1b2441318SGreg Kroah-Hartman/* SPDX-License-Identifier: GPL-2.0 */ 27d7d1bf1SArnaldo Carvalho de Melo/* Copyright 2002 Andi Kleen, SuSE Labs */ 37d7d1bf1SArnaldo Carvalho de Melo 4*efe80f9cSArnaldo Carvalho de Melo#include <linux/export.h> 57d7d1bf1SArnaldo Carvalho de Melo#include <linux/linkage.h> 67d7d1bf1SArnaldo Carvalho de Melo#include <asm/cpufeatures.h> 7fb24e308SArnaldo Carvalho de Melo#include <asm/alternative.h> 87d7d1bf1SArnaldo Carvalho de Melo 931d2e6b5SArnaldo Carvalho de Melo.section .noinstr.text, "ax" 1031d2e6b5SArnaldo Carvalho de Melo 117d7d1bf1SArnaldo Carvalho de Melo/* 127d7d1bf1SArnaldo Carvalho de Melo * ISO C memset - set a memory block to a byte value. This function uses fast 137d7d1bf1SArnaldo Carvalho de Melo * string to get better performance than the original function. The code is 147d7d1bf1SArnaldo Carvalho de Melo * simpler and shorter than the original function as well. 157d7d1bf1SArnaldo Carvalho de Melo * 167d7d1bf1SArnaldo Carvalho de Melo * rdi destination 177d7d1bf1SArnaldo Carvalho de Melo * rsi value (char) 187d7d1bf1SArnaldo Carvalho de Melo * rdx count (bytes) 197d7d1bf1SArnaldo Carvalho de Melo * 207d7d1bf1SArnaldo Carvalho de Melo * rax original destination 217f02ce62SArnaldo Carvalho de Melo * 227f02ce62SArnaldo Carvalho de Melo * The FSRS alternative should be done inline (avoiding the call and 237f02ce62SArnaldo Carvalho de Melo * the disgusting return handling), but that would require some help 247f02ce62SArnaldo Carvalho de Melo * from the compiler for better calling conventions. 257f02ce62SArnaldo Carvalho de Melo * 267f02ce62SArnaldo Carvalho de Melo * The 'rep stosb' itself is small enough to replace the call, but all 277f02ce62SArnaldo Carvalho de Melo * the register moves blow up the code. And two of them are "needed" 287f02ce62SArnaldo Carvalho de Melo * only for the return value that is the same as the source input, 297f02ce62SArnaldo Carvalho de Melo * which the compiler could/should do much better anyway. 307d7d1bf1SArnaldo Carvalho de Melo */ 31bd5c6b81SArnaldo Carvalho de MeloSYM_FUNC_START(__memset) 327f02ce62SArnaldo Carvalho de Melo ALTERNATIVE "jmp memset_orig", "", X86_FEATURE_FSRS 337d7d1bf1SArnaldo Carvalho de Melo 347d7d1bf1SArnaldo Carvalho de Melo movq %rdi,%r9 357f02ce62SArnaldo Carvalho de Melo movb %sil,%al 367d7d1bf1SArnaldo Carvalho de Melo movq %rdx,%rcx 377d7d1bf1SArnaldo Carvalho de Melo rep stosb 387d7d1bf1SArnaldo Carvalho de Melo movq %r9,%rax 3935cb8c71SArnaldo Carvalho de Melo RET 40bd5c6b81SArnaldo Carvalho de MeloSYM_FUNC_END(__memset) 41db1a8b97SArnaldo Carvalho de MeloEXPORT_SYMBOL(__memset) 427d7d1bf1SArnaldo Carvalho de Melo 43*efe80f9cSArnaldo Carvalho de MeloSYM_FUNC_ALIAS_MEMFUNC(memset, __memset) 447be2e319SMark RutlandEXPORT_SYMBOL(memset) 457be2e319SMark Rutland 46db1a8b97SArnaldo Carvalho de MeloSYM_FUNC_START_LOCAL(memset_orig) 477d7d1bf1SArnaldo Carvalho de Melo movq %rdi,%r10 487d7d1bf1SArnaldo Carvalho de Melo 497d7d1bf1SArnaldo Carvalho de Melo /* expand byte value */ 507d7d1bf1SArnaldo Carvalho de Melo movzbl %sil,%ecx 517d7d1bf1SArnaldo Carvalho de Melo movabs $0x0101010101010101,%rax 527d7d1bf1SArnaldo Carvalho de Melo imulq %rcx,%rax 537d7d1bf1SArnaldo Carvalho de Melo 547d7d1bf1SArnaldo Carvalho de Melo /* align dst */ 557d7d1bf1SArnaldo Carvalho de Melo movl %edi,%r9d 567d7d1bf1SArnaldo Carvalho de Melo andl $7,%r9d 577d7d1bf1SArnaldo Carvalho de Melo jnz .Lbad_alignment 587d7d1bf1SArnaldo Carvalho de Melo.Lafter_bad_alignment: 597d7d1bf1SArnaldo Carvalho de Melo 607d7d1bf1SArnaldo Carvalho de Melo movq %rdx,%rcx 617d7d1bf1SArnaldo Carvalho de Melo shrq $6,%rcx 627d7d1bf1SArnaldo Carvalho de Melo jz .Lhandle_tail 637d7d1bf1SArnaldo Carvalho de Melo 647d7d1bf1SArnaldo Carvalho de Melo .p2align 4 657d7d1bf1SArnaldo Carvalho de Melo.Lloop_64: 667d7d1bf1SArnaldo Carvalho de Melo decq %rcx 677d7d1bf1SArnaldo Carvalho de Melo movq %rax,(%rdi) 687d7d1bf1SArnaldo Carvalho de Melo movq %rax,8(%rdi) 697d7d1bf1SArnaldo Carvalho de Melo movq %rax,16(%rdi) 707d7d1bf1SArnaldo Carvalho de Melo movq %rax,24(%rdi) 717d7d1bf1SArnaldo Carvalho de Melo movq %rax,32(%rdi) 727d7d1bf1SArnaldo Carvalho de Melo movq %rax,40(%rdi) 737d7d1bf1SArnaldo Carvalho de Melo movq %rax,48(%rdi) 747d7d1bf1SArnaldo Carvalho de Melo movq %rax,56(%rdi) 757d7d1bf1SArnaldo Carvalho de Melo leaq 64(%rdi),%rdi 767d7d1bf1SArnaldo Carvalho de Melo jnz .Lloop_64 777d7d1bf1SArnaldo Carvalho de Melo 787d7d1bf1SArnaldo Carvalho de Melo /* Handle tail in loops. The loops should be faster than hard 797d7d1bf1SArnaldo Carvalho de Melo to predict jump tables. */ 807d7d1bf1SArnaldo Carvalho de Melo .p2align 4 817d7d1bf1SArnaldo Carvalho de Melo.Lhandle_tail: 827d7d1bf1SArnaldo Carvalho de Melo movl %edx,%ecx 837d7d1bf1SArnaldo Carvalho de Melo andl $63&(~7),%ecx 847d7d1bf1SArnaldo Carvalho de Melo jz .Lhandle_7 857d7d1bf1SArnaldo Carvalho de Melo shrl $3,%ecx 867d7d1bf1SArnaldo Carvalho de Melo .p2align 4 877d7d1bf1SArnaldo Carvalho de Melo.Lloop_8: 887d7d1bf1SArnaldo Carvalho de Melo decl %ecx 897d7d1bf1SArnaldo Carvalho de Melo movq %rax,(%rdi) 907d7d1bf1SArnaldo Carvalho de Melo leaq 8(%rdi),%rdi 917d7d1bf1SArnaldo Carvalho de Melo jnz .Lloop_8 927d7d1bf1SArnaldo Carvalho de Melo 937d7d1bf1SArnaldo Carvalho de Melo.Lhandle_7: 947d7d1bf1SArnaldo Carvalho de Melo andl $7,%edx 957d7d1bf1SArnaldo Carvalho de Melo jz .Lende 967d7d1bf1SArnaldo Carvalho de Melo .p2align 4 977d7d1bf1SArnaldo Carvalho de Melo.Lloop_1: 987d7d1bf1SArnaldo Carvalho de Melo decl %edx 997d7d1bf1SArnaldo Carvalho de Melo movb %al,(%rdi) 1007d7d1bf1SArnaldo Carvalho de Melo leaq 1(%rdi),%rdi 1017d7d1bf1SArnaldo Carvalho de Melo jnz .Lloop_1 1027d7d1bf1SArnaldo Carvalho de Melo 1037d7d1bf1SArnaldo Carvalho de Melo.Lende: 1047d7d1bf1SArnaldo Carvalho de Melo movq %r10,%rax 10535cb8c71SArnaldo Carvalho de Melo RET 1067d7d1bf1SArnaldo Carvalho de Melo 1077d7d1bf1SArnaldo Carvalho de Melo.Lbad_alignment: 1087d7d1bf1SArnaldo Carvalho de Melo cmpq $7,%rdx 1097d7d1bf1SArnaldo Carvalho de Melo jbe .Lhandle_7 1107d7d1bf1SArnaldo Carvalho de Melo movq %rax,(%rdi) /* unaligned store */ 1117d7d1bf1SArnaldo Carvalho de Melo movq $8,%r8 1127d7d1bf1SArnaldo Carvalho de Melo subq %r9,%r8 1137d7d1bf1SArnaldo Carvalho de Melo addq %r8,%rdi 1147d7d1bf1SArnaldo Carvalho de Melo subq %r8,%rdx 1157d7d1bf1SArnaldo Carvalho de Melo jmp .Lafter_bad_alignment 1167d7d1bf1SArnaldo Carvalho de Melo.Lfinal: 117bd5c6b81SArnaldo Carvalho de MeloSYM_FUNC_END(memset_orig) 118