1b2441318SGreg Kroah-Hartman/* SPDX-License-Identifier: GPL-2.0 */ 27d7d1bf1SArnaldo Carvalho de Melo/* Copyright 2002 Andi Kleen, SuSE Labs */ 37d7d1bf1SArnaldo Carvalho de Melo 47d7d1bf1SArnaldo Carvalho de Melo#include <linux/linkage.h> 57d7d1bf1SArnaldo Carvalho de Melo#include <asm/cpufeatures.h> 6fb24e308SArnaldo Carvalho de Melo#include <asm/alternative.h> 7db1a8b97SArnaldo Carvalho de Melo#include <asm/export.h> 87d7d1bf1SArnaldo Carvalho de Melo 9*31d2e6b5SArnaldo Carvalho de Melo.section .noinstr.text, "ax" 10*31d2e6b5SArnaldo Carvalho de Melo 117d7d1bf1SArnaldo Carvalho de Melo/* 127d7d1bf1SArnaldo Carvalho de Melo * ISO C memset - set a memory block to a byte value. This function uses fast 137d7d1bf1SArnaldo Carvalho de Melo * string to get better performance than the original function. The code is 147d7d1bf1SArnaldo Carvalho de Melo * simpler and shorter than the original function as well. 157d7d1bf1SArnaldo Carvalho de Melo * 167d7d1bf1SArnaldo Carvalho de Melo * rdi destination 177d7d1bf1SArnaldo Carvalho de Melo * rsi value (char) 187d7d1bf1SArnaldo Carvalho de Melo * rdx count (bytes) 197d7d1bf1SArnaldo Carvalho de Melo * 207d7d1bf1SArnaldo Carvalho de Melo * rax original destination 217d7d1bf1SArnaldo Carvalho de Melo */ 22bd5c6b81SArnaldo Carvalho de MeloSYM_FUNC_START(__memset) 237d7d1bf1SArnaldo Carvalho de Melo /* 247d7d1bf1SArnaldo Carvalho de Melo * Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended 257d7d1bf1SArnaldo Carvalho de Melo * to use it when possible. If not available, use fast string instructions. 267d7d1bf1SArnaldo Carvalho de Melo * 277d7d1bf1SArnaldo Carvalho de Melo * Otherwise, use original memset function. 287d7d1bf1SArnaldo Carvalho de Melo */ 297d7d1bf1SArnaldo Carvalho de Melo ALTERNATIVE_2 "jmp memset_orig", "", X86_FEATURE_REP_GOOD, \ 307d7d1bf1SArnaldo Carvalho de Melo "jmp memset_erms", X86_FEATURE_ERMS 317d7d1bf1SArnaldo Carvalho de Melo 327d7d1bf1SArnaldo Carvalho de Melo movq %rdi,%r9 337d7d1bf1SArnaldo Carvalho de Melo movq %rdx,%rcx 347d7d1bf1SArnaldo Carvalho de Melo andl $7,%edx 357d7d1bf1SArnaldo Carvalho de Melo shrq $3,%rcx 367d7d1bf1SArnaldo Carvalho de Melo /* expand byte value */ 377d7d1bf1SArnaldo Carvalho de Melo movzbl %sil,%esi 387d7d1bf1SArnaldo Carvalho de Melo movabs $0x0101010101010101,%rax 397d7d1bf1SArnaldo Carvalho de Melo imulq %rsi,%rax 407d7d1bf1SArnaldo Carvalho de Melo rep stosq 417d7d1bf1SArnaldo Carvalho de Melo movl %edx,%ecx 427d7d1bf1SArnaldo Carvalho de Melo rep stosb 437d7d1bf1SArnaldo Carvalho de Melo movq %r9,%rax 4435cb8c71SArnaldo Carvalho de Melo RET 45bd5c6b81SArnaldo Carvalho de MeloSYM_FUNC_END(__memset) 46db1a8b97SArnaldo Carvalho de MeloEXPORT_SYMBOL(__memset) 477d7d1bf1SArnaldo Carvalho de Melo 48*31d2e6b5SArnaldo Carvalho de MeloSYM_FUNC_ALIAS(memset, __memset) 497be2e319SMark RutlandEXPORT_SYMBOL(memset) 507be2e319SMark Rutland 517d7d1bf1SArnaldo Carvalho de Melo/* 527d7d1bf1SArnaldo Carvalho de Melo * ISO C memset - set a memory block to a byte value. This function uses 537d7d1bf1SArnaldo Carvalho de Melo * enhanced rep stosb to override the fast string function. 547d7d1bf1SArnaldo Carvalho de Melo * The code is simpler and shorter than the fast string function as well. 557d7d1bf1SArnaldo Carvalho de Melo * 567d7d1bf1SArnaldo Carvalho de Melo * rdi destination 577d7d1bf1SArnaldo Carvalho de Melo * rsi value (char) 587d7d1bf1SArnaldo Carvalho de Melo * rdx count (bytes) 597d7d1bf1SArnaldo Carvalho de Melo * 607d7d1bf1SArnaldo Carvalho de Melo * rax original destination 617d7d1bf1SArnaldo Carvalho de Melo */ 62db1a8b97SArnaldo Carvalho de MeloSYM_FUNC_START_LOCAL(memset_erms) 637d7d1bf1SArnaldo Carvalho de Melo movq %rdi,%r9 647d7d1bf1SArnaldo Carvalho de Melo movb %sil,%al 657d7d1bf1SArnaldo Carvalho de Melo movq %rdx,%rcx 667d7d1bf1SArnaldo Carvalho de Melo rep stosb 677d7d1bf1SArnaldo Carvalho de Melo movq %r9,%rax 6835cb8c71SArnaldo Carvalho de Melo RET 69bd5c6b81SArnaldo Carvalho de MeloSYM_FUNC_END(memset_erms) 707d7d1bf1SArnaldo Carvalho de Melo 71db1a8b97SArnaldo Carvalho de MeloSYM_FUNC_START_LOCAL(memset_orig) 727d7d1bf1SArnaldo Carvalho de Melo movq %rdi,%r10 737d7d1bf1SArnaldo Carvalho de Melo 747d7d1bf1SArnaldo Carvalho de Melo /* expand byte value */ 757d7d1bf1SArnaldo Carvalho de Melo movzbl %sil,%ecx 767d7d1bf1SArnaldo Carvalho de Melo movabs $0x0101010101010101,%rax 777d7d1bf1SArnaldo Carvalho de Melo imulq %rcx,%rax 787d7d1bf1SArnaldo Carvalho de Melo 797d7d1bf1SArnaldo Carvalho de Melo /* align dst */ 807d7d1bf1SArnaldo Carvalho de Melo movl %edi,%r9d 817d7d1bf1SArnaldo Carvalho de Melo andl $7,%r9d 827d7d1bf1SArnaldo Carvalho de Melo jnz .Lbad_alignment 837d7d1bf1SArnaldo Carvalho de Melo.Lafter_bad_alignment: 847d7d1bf1SArnaldo Carvalho de Melo 857d7d1bf1SArnaldo Carvalho de Melo movq %rdx,%rcx 867d7d1bf1SArnaldo Carvalho de Melo shrq $6,%rcx 877d7d1bf1SArnaldo Carvalho de Melo jz .Lhandle_tail 887d7d1bf1SArnaldo Carvalho de Melo 897d7d1bf1SArnaldo Carvalho de Melo .p2align 4 907d7d1bf1SArnaldo Carvalho de Melo.Lloop_64: 917d7d1bf1SArnaldo Carvalho de Melo decq %rcx 927d7d1bf1SArnaldo Carvalho de Melo movq %rax,(%rdi) 937d7d1bf1SArnaldo Carvalho de Melo movq %rax,8(%rdi) 947d7d1bf1SArnaldo Carvalho de Melo movq %rax,16(%rdi) 957d7d1bf1SArnaldo Carvalho de Melo movq %rax,24(%rdi) 967d7d1bf1SArnaldo Carvalho de Melo movq %rax,32(%rdi) 977d7d1bf1SArnaldo Carvalho de Melo movq %rax,40(%rdi) 987d7d1bf1SArnaldo Carvalho de Melo movq %rax,48(%rdi) 997d7d1bf1SArnaldo Carvalho de Melo movq %rax,56(%rdi) 1007d7d1bf1SArnaldo Carvalho de Melo leaq 64(%rdi),%rdi 1017d7d1bf1SArnaldo Carvalho de Melo jnz .Lloop_64 1027d7d1bf1SArnaldo Carvalho de Melo 1037d7d1bf1SArnaldo Carvalho de Melo /* Handle tail in loops. The loops should be faster than hard 1047d7d1bf1SArnaldo Carvalho de Melo to predict jump tables. */ 1057d7d1bf1SArnaldo Carvalho de Melo .p2align 4 1067d7d1bf1SArnaldo Carvalho de Melo.Lhandle_tail: 1077d7d1bf1SArnaldo Carvalho de Melo movl %edx,%ecx 1087d7d1bf1SArnaldo Carvalho de Melo andl $63&(~7),%ecx 1097d7d1bf1SArnaldo Carvalho de Melo jz .Lhandle_7 1107d7d1bf1SArnaldo Carvalho de Melo shrl $3,%ecx 1117d7d1bf1SArnaldo Carvalho de Melo .p2align 4 1127d7d1bf1SArnaldo Carvalho de Melo.Lloop_8: 1137d7d1bf1SArnaldo Carvalho de Melo decl %ecx 1147d7d1bf1SArnaldo Carvalho de Melo movq %rax,(%rdi) 1157d7d1bf1SArnaldo Carvalho de Melo leaq 8(%rdi),%rdi 1167d7d1bf1SArnaldo Carvalho de Melo jnz .Lloop_8 1177d7d1bf1SArnaldo Carvalho de Melo 1187d7d1bf1SArnaldo Carvalho de Melo.Lhandle_7: 1197d7d1bf1SArnaldo Carvalho de Melo andl $7,%edx 1207d7d1bf1SArnaldo Carvalho de Melo jz .Lende 1217d7d1bf1SArnaldo Carvalho de Melo .p2align 4 1227d7d1bf1SArnaldo Carvalho de Melo.Lloop_1: 1237d7d1bf1SArnaldo Carvalho de Melo decl %edx 1247d7d1bf1SArnaldo Carvalho de Melo movb %al,(%rdi) 1257d7d1bf1SArnaldo Carvalho de Melo leaq 1(%rdi),%rdi 1267d7d1bf1SArnaldo Carvalho de Melo jnz .Lloop_1 1277d7d1bf1SArnaldo Carvalho de Melo 1287d7d1bf1SArnaldo Carvalho de Melo.Lende: 1297d7d1bf1SArnaldo Carvalho de Melo movq %r10,%rax 13035cb8c71SArnaldo Carvalho de Melo RET 1317d7d1bf1SArnaldo Carvalho de Melo 1327d7d1bf1SArnaldo Carvalho de Melo.Lbad_alignment: 1337d7d1bf1SArnaldo Carvalho de Melo cmpq $7,%rdx 1347d7d1bf1SArnaldo Carvalho de Melo jbe .Lhandle_7 1357d7d1bf1SArnaldo Carvalho de Melo movq %rax,(%rdi) /* unaligned store */ 1367d7d1bf1SArnaldo Carvalho de Melo movq $8,%r8 1377d7d1bf1SArnaldo Carvalho de Melo subq %r9,%r8 1387d7d1bf1SArnaldo Carvalho de Melo addq %r8,%rdi 1397d7d1bf1SArnaldo Carvalho de Melo subq %r8,%rdx 1407d7d1bf1SArnaldo Carvalho de Melo jmp .Lafter_bad_alignment 1417d7d1bf1SArnaldo Carvalho de Melo.Lfinal: 142bd5c6b81SArnaldo Carvalho de MeloSYM_FUNC_END(memset_orig) 143