1b2441318SGreg Kroah-Hartman/* SPDX-License-Identifier: GPL-2.0 */ 27d7d1bf1SArnaldo Carvalho de Melo/* Copyright 2002 Andi Kleen, SuSE Labs */ 37d7d1bf1SArnaldo Carvalho de Melo 47d7d1bf1SArnaldo Carvalho de Melo#include <linux/linkage.h> 57d7d1bf1SArnaldo Carvalho de Melo#include <asm/cpufeatures.h> 67d7d1bf1SArnaldo Carvalho de Melo#include <asm/alternative-asm.h> 7*db1a8b97SArnaldo Carvalho de Melo#include <asm/export.h> 87d7d1bf1SArnaldo Carvalho de Melo 97d7d1bf1SArnaldo Carvalho de Melo/* 107d7d1bf1SArnaldo Carvalho de Melo * ISO C memset - set a memory block to a byte value. This function uses fast 117d7d1bf1SArnaldo Carvalho de Melo * string to get better performance than the original function. The code is 127d7d1bf1SArnaldo Carvalho de Melo * simpler and shorter than the original function as well. 137d7d1bf1SArnaldo Carvalho de Melo * 147d7d1bf1SArnaldo Carvalho de Melo * rdi destination 157d7d1bf1SArnaldo Carvalho de Melo * rsi value (char) 167d7d1bf1SArnaldo Carvalho de Melo * rdx count (bytes) 177d7d1bf1SArnaldo Carvalho de Melo * 187d7d1bf1SArnaldo Carvalho de Melo * rax original destination 197d7d1bf1SArnaldo Carvalho de Melo */ 20*db1a8b97SArnaldo Carvalho de MeloSYM_FUNC_START_WEAK(memset) 21bd5c6b81SArnaldo Carvalho de MeloSYM_FUNC_START(__memset) 227d7d1bf1SArnaldo Carvalho de Melo /* 237d7d1bf1SArnaldo Carvalho de Melo * Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended 247d7d1bf1SArnaldo Carvalho de Melo * to use it when possible. If not available, use fast string instructions. 257d7d1bf1SArnaldo Carvalho de Melo * 267d7d1bf1SArnaldo Carvalho de Melo * Otherwise, use original memset function. 277d7d1bf1SArnaldo Carvalho de Melo */ 287d7d1bf1SArnaldo Carvalho de Melo ALTERNATIVE_2 "jmp memset_orig", "", X86_FEATURE_REP_GOOD, \ 297d7d1bf1SArnaldo Carvalho de Melo "jmp memset_erms", X86_FEATURE_ERMS 307d7d1bf1SArnaldo Carvalho de Melo 317d7d1bf1SArnaldo Carvalho de Melo movq %rdi,%r9 327d7d1bf1SArnaldo Carvalho de Melo movq %rdx,%rcx 337d7d1bf1SArnaldo Carvalho de Melo andl $7,%edx 347d7d1bf1SArnaldo Carvalho de Melo shrq $3,%rcx 357d7d1bf1SArnaldo Carvalho de Melo /* expand byte value */ 367d7d1bf1SArnaldo Carvalho de Melo movzbl %sil,%esi 377d7d1bf1SArnaldo Carvalho de Melo movabs $0x0101010101010101,%rax 387d7d1bf1SArnaldo Carvalho de Melo imulq %rsi,%rax 397d7d1bf1SArnaldo Carvalho de Melo rep stosq 407d7d1bf1SArnaldo Carvalho de Melo movl %edx,%ecx 417d7d1bf1SArnaldo Carvalho de Melo rep stosb 427d7d1bf1SArnaldo Carvalho de Melo movq %r9,%rax 437d7d1bf1SArnaldo Carvalho de Melo ret 44bd5c6b81SArnaldo Carvalho de MeloSYM_FUNC_END(__memset) 45bd5c6b81SArnaldo Carvalho de MeloSYM_FUNC_END_ALIAS(memset) 46*db1a8b97SArnaldo Carvalho de MeloEXPORT_SYMBOL(memset) 47*db1a8b97SArnaldo Carvalho de MeloEXPORT_SYMBOL(__memset) 487d7d1bf1SArnaldo Carvalho de Melo 497d7d1bf1SArnaldo Carvalho de Melo/* 507d7d1bf1SArnaldo Carvalho de Melo * ISO C memset - set a memory block to a byte value. This function uses 517d7d1bf1SArnaldo Carvalho de Melo * enhanced rep stosb to override the fast string function. 527d7d1bf1SArnaldo Carvalho de Melo * The code is simpler and shorter than the fast string function as well. 537d7d1bf1SArnaldo Carvalho de Melo * 547d7d1bf1SArnaldo Carvalho de Melo * rdi destination 557d7d1bf1SArnaldo Carvalho de Melo * rsi value (char) 567d7d1bf1SArnaldo Carvalho de Melo * rdx count (bytes) 577d7d1bf1SArnaldo Carvalho de Melo * 587d7d1bf1SArnaldo Carvalho de Melo * rax original destination 597d7d1bf1SArnaldo Carvalho de Melo */ 60*db1a8b97SArnaldo Carvalho de MeloSYM_FUNC_START_LOCAL(memset_erms) 617d7d1bf1SArnaldo Carvalho de Melo movq %rdi,%r9 627d7d1bf1SArnaldo Carvalho de Melo movb %sil,%al 637d7d1bf1SArnaldo Carvalho de Melo movq %rdx,%rcx 647d7d1bf1SArnaldo Carvalho de Melo rep stosb 657d7d1bf1SArnaldo Carvalho de Melo movq %r9,%rax 667d7d1bf1SArnaldo Carvalho de Melo ret 67bd5c6b81SArnaldo Carvalho de MeloSYM_FUNC_END(memset_erms) 687d7d1bf1SArnaldo Carvalho de Melo 69*db1a8b97SArnaldo Carvalho de MeloSYM_FUNC_START_LOCAL(memset_orig) 707d7d1bf1SArnaldo Carvalho de Melo movq %rdi,%r10 717d7d1bf1SArnaldo Carvalho de Melo 727d7d1bf1SArnaldo Carvalho de Melo /* expand byte value */ 737d7d1bf1SArnaldo Carvalho de Melo movzbl %sil,%ecx 747d7d1bf1SArnaldo Carvalho de Melo movabs $0x0101010101010101,%rax 757d7d1bf1SArnaldo Carvalho de Melo imulq %rcx,%rax 767d7d1bf1SArnaldo Carvalho de Melo 777d7d1bf1SArnaldo Carvalho de Melo /* align dst */ 787d7d1bf1SArnaldo Carvalho de Melo movl %edi,%r9d 797d7d1bf1SArnaldo Carvalho de Melo andl $7,%r9d 807d7d1bf1SArnaldo Carvalho de Melo jnz .Lbad_alignment 817d7d1bf1SArnaldo Carvalho de Melo.Lafter_bad_alignment: 827d7d1bf1SArnaldo Carvalho de Melo 837d7d1bf1SArnaldo Carvalho de Melo movq %rdx,%rcx 847d7d1bf1SArnaldo Carvalho de Melo shrq $6,%rcx 857d7d1bf1SArnaldo Carvalho de Melo jz .Lhandle_tail 867d7d1bf1SArnaldo Carvalho de Melo 877d7d1bf1SArnaldo Carvalho de Melo .p2align 4 887d7d1bf1SArnaldo Carvalho de Melo.Lloop_64: 897d7d1bf1SArnaldo Carvalho de Melo decq %rcx 907d7d1bf1SArnaldo Carvalho de Melo movq %rax,(%rdi) 917d7d1bf1SArnaldo Carvalho de Melo movq %rax,8(%rdi) 927d7d1bf1SArnaldo Carvalho de Melo movq %rax,16(%rdi) 937d7d1bf1SArnaldo Carvalho de Melo movq %rax,24(%rdi) 947d7d1bf1SArnaldo Carvalho de Melo movq %rax,32(%rdi) 957d7d1bf1SArnaldo Carvalho de Melo movq %rax,40(%rdi) 967d7d1bf1SArnaldo Carvalho de Melo movq %rax,48(%rdi) 977d7d1bf1SArnaldo Carvalho de Melo movq %rax,56(%rdi) 987d7d1bf1SArnaldo Carvalho de Melo leaq 64(%rdi),%rdi 997d7d1bf1SArnaldo Carvalho de Melo jnz .Lloop_64 1007d7d1bf1SArnaldo Carvalho de Melo 1017d7d1bf1SArnaldo Carvalho de Melo /* Handle tail in loops. The loops should be faster than hard 1027d7d1bf1SArnaldo Carvalho de Melo to predict jump tables. */ 1037d7d1bf1SArnaldo Carvalho de Melo .p2align 4 1047d7d1bf1SArnaldo Carvalho de Melo.Lhandle_tail: 1057d7d1bf1SArnaldo Carvalho de Melo movl %edx,%ecx 1067d7d1bf1SArnaldo Carvalho de Melo andl $63&(~7),%ecx 1077d7d1bf1SArnaldo Carvalho de Melo jz .Lhandle_7 1087d7d1bf1SArnaldo Carvalho de Melo shrl $3,%ecx 1097d7d1bf1SArnaldo Carvalho de Melo .p2align 4 1107d7d1bf1SArnaldo Carvalho de Melo.Lloop_8: 1117d7d1bf1SArnaldo Carvalho de Melo decl %ecx 1127d7d1bf1SArnaldo Carvalho de Melo movq %rax,(%rdi) 1137d7d1bf1SArnaldo Carvalho de Melo leaq 8(%rdi),%rdi 1147d7d1bf1SArnaldo Carvalho de Melo jnz .Lloop_8 1157d7d1bf1SArnaldo Carvalho de Melo 1167d7d1bf1SArnaldo Carvalho de Melo.Lhandle_7: 1177d7d1bf1SArnaldo Carvalho de Melo andl $7,%edx 1187d7d1bf1SArnaldo Carvalho de Melo jz .Lende 1197d7d1bf1SArnaldo Carvalho de Melo .p2align 4 1207d7d1bf1SArnaldo Carvalho de Melo.Lloop_1: 1217d7d1bf1SArnaldo Carvalho de Melo decl %edx 1227d7d1bf1SArnaldo Carvalho de Melo movb %al,(%rdi) 1237d7d1bf1SArnaldo Carvalho de Melo leaq 1(%rdi),%rdi 1247d7d1bf1SArnaldo Carvalho de Melo jnz .Lloop_1 1257d7d1bf1SArnaldo Carvalho de Melo 1267d7d1bf1SArnaldo Carvalho de Melo.Lende: 1277d7d1bf1SArnaldo Carvalho de Melo movq %r10,%rax 1287d7d1bf1SArnaldo Carvalho de Melo ret 1297d7d1bf1SArnaldo Carvalho de Melo 1307d7d1bf1SArnaldo Carvalho de Melo.Lbad_alignment: 1317d7d1bf1SArnaldo Carvalho de Melo cmpq $7,%rdx 1327d7d1bf1SArnaldo Carvalho de Melo jbe .Lhandle_7 1337d7d1bf1SArnaldo Carvalho de Melo movq %rax,(%rdi) /* unaligned store */ 1347d7d1bf1SArnaldo Carvalho de Melo movq $8,%r8 1357d7d1bf1SArnaldo Carvalho de Melo subq %r9,%r8 1367d7d1bf1SArnaldo Carvalho de Melo addq %r8,%rdi 1377d7d1bf1SArnaldo Carvalho de Melo subq %r8,%rdx 1387d7d1bf1SArnaldo Carvalho de Melo jmp .Lafter_bad_alignment 1397d7d1bf1SArnaldo Carvalho de Melo.Lfinal: 140bd5c6b81SArnaldo Carvalho de MeloSYM_FUNC_END(memset_orig) 141