1*b2441318SGreg Kroah-Hartman/* SPDX-License-Identifier: GPL-2.0 */ 27d7d1bf1SArnaldo Carvalho de Melo/* Copyright 2002 Andi Kleen, SuSE Labs */ 37d7d1bf1SArnaldo Carvalho de Melo 47d7d1bf1SArnaldo Carvalho de Melo#include <linux/linkage.h> 57d7d1bf1SArnaldo Carvalho de Melo#include <asm/cpufeatures.h> 67d7d1bf1SArnaldo Carvalho de Melo#include <asm/alternative-asm.h> 77d7d1bf1SArnaldo Carvalho de Melo 87d7d1bf1SArnaldo Carvalho de Melo.weak memset 97d7d1bf1SArnaldo Carvalho de Melo 107d7d1bf1SArnaldo Carvalho de Melo/* 117d7d1bf1SArnaldo Carvalho de Melo * ISO C memset - set a memory block to a byte value. This function uses fast 127d7d1bf1SArnaldo Carvalho de Melo * string to get better performance than the original function. The code is 137d7d1bf1SArnaldo Carvalho de Melo * simpler and shorter than the original function as well. 147d7d1bf1SArnaldo Carvalho de Melo * 157d7d1bf1SArnaldo Carvalho de Melo * rdi destination 167d7d1bf1SArnaldo Carvalho de Melo * rsi value (char) 177d7d1bf1SArnaldo Carvalho de Melo * rdx count (bytes) 187d7d1bf1SArnaldo Carvalho de Melo * 197d7d1bf1SArnaldo Carvalho de Melo * rax original destination 207d7d1bf1SArnaldo Carvalho de Melo */ 217d7d1bf1SArnaldo Carvalho de MeloENTRY(memset) 227d7d1bf1SArnaldo Carvalho de MeloENTRY(__memset) 237d7d1bf1SArnaldo Carvalho de Melo /* 247d7d1bf1SArnaldo Carvalho de Melo * Some CPUs support enhanced REP MOVSB/STOSB feature. It is recommended 257d7d1bf1SArnaldo Carvalho de Melo * to use it when possible. If not available, use fast string instructions. 267d7d1bf1SArnaldo Carvalho de Melo * 277d7d1bf1SArnaldo Carvalho de Melo * Otherwise, use original memset function. 287d7d1bf1SArnaldo Carvalho de Melo */ 297d7d1bf1SArnaldo Carvalho de Melo ALTERNATIVE_2 "jmp memset_orig", "", X86_FEATURE_REP_GOOD, \ 307d7d1bf1SArnaldo Carvalho de Melo "jmp memset_erms", X86_FEATURE_ERMS 317d7d1bf1SArnaldo Carvalho de Melo 327d7d1bf1SArnaldo Carvalho de Melo movq %rdi,%r9 337d7d1bf1SArnaldo Carvalho de Melo movq %rdx,%rcx 347d7d1bf1SArnaldo Carvalho de Melo andl $7,%edx 357d7d1bf1SArnaldo Carvalho de Melo shrq $3,%rcx 367d7d1bf1SArnaldo Carvalho de Melo /* expand byte value */ 377d7d1bf1SArnaldo Carvalho de Melo movzbl %sil,%esi 387d7d1bf1SArnaldo Carvalho de Melo movabs $0x0101010101010101,%rax 397d7d1bf1SArnaldo Carvalho de Melo imulq %rsi,%rax 407d7d1bf1SArnaldo Carvalho de Melo rep stosq 417d7d1bf1SArnaldo Carvalho de Melo movl %edx,%ecx 427d7d1bf1SArnaldo Carvalho de Melo rep stosb 437d7d1bf1SArnaldo Carvalho de Melo movq %r9,%rax 447d7d1bf1SArnaldo Carvalho de Melo ret 457d7d1bf1SArnaldo Carvalho de MeloENDPROC(memset) 467d7d1bf1SArnaldo Carvalho de MeloENDPROC(__memset) 477d7d1bf1SArnaldo Carvalho de Melo 487d7d1bf1SArnaldo Carvalho de Melo/* 497d7d1bf1SArnaldo Carvalho de Melo * ISO C memset - set a memory block to a byte value. This function uses 507d7d1bf1SArnaldo Carvalho de Melo * enhanced rep stosb to override the fast string function. 517d7d1bf1SArnaldo Carvalho de Melo * The code is simpler and shorter than the fast string function as well. 527d7d1bf1SArnaldo Carvalho de Melo * 537d7d1bf1SArnaldo Carvalho de Melo * rdi destination 547d7d1bf1SArnaldo Carvalho de Melo * rsi value (char) 557d7d1bf1SArnaldo Carvalho de Melo * rdx count (bytes) 567d7d1bf1SArnaldo Carvalho de Melo * 577d7d1bf1SArnaldo Carvalho de Melo * rax original destination 587d7d1bf1SArnaldo Carvalho de Melo */ 597d7d1bf1SArnaldo Carvalho de MeloENTRY(memset_erms) 607d7d1bf1SArnaldo Carvalho de Melo movq %rdi,%r9 617d7d1bf1SArnaldo Carvalho de Melo movb %sil,%al 627d7d1bf1SArnaldo Carvalho de Melo movq %rdx,%rcx 637d7d1bf1SArnaldo Carvalho de Melo rep stosb 647d7d1bf1SArnaldo Carvalho de Melo movq %r9,%rax 657d7d1bf1SArnaldo Carvalho de Melo ret 667d7d1bf1SArnaldo Carvalho de MeloENDPROC(memset_erms) 677d7d1bf1SArnaldo Carvalho de Melo 687d7d1bf1SArnaldo Carvalho de MeloENTRY(memset_orig) 697d7d1bf1SArnaldo Carvalho de Melo movq %rdi,%r10 707d7d1bf1SArnaldo Carvalho de Melo 717d7d1bf1SArnaldo Carvalho de Melo /* expand byte value */ 727d7d1bf1SArnaldo Carvalho de Melo movzbl %sil,%ecx 737d7d1bf1SArnaldo Carvalho de Melo movabs $0x0101010101010101,%rax 747d7d1bf1SArnaldo Carvalho de Melo imulq %rcx,%rax 757d7d1bf1SArnaldo Carvalho de Melo 767d7d1bf1SArnaldo Carvalho de Melo /* align dst */ 777d7d1bf1SArnaldo Carvalho de Melo movl %edi,%r9d 787d7d1bf1SArnaldo Carvalho de Melo andl $7,%r9d 797d7d1bf1SArnaldo Carvalho de Melo jnz .Lbad_alignment 807d7d1bf1SArnaldo Carvalho de Melo.Lafter_bad_alignment: 817d7d1bf1SArnaldo Carvalho de Melo 827d7d1bf1SArnaldo Carvalho de Melo movq %rdx,%rcx 837d7d1bf1SArnaldo Carvalho de Melo shrq $6,%rcx 847d7d1bf1SArnaldo Carvalho de Melo jz .Lhandle_tail 857d7d1bf1SArnaldo Carvalho de Melo 867d7d1bf1SArnaldo Carvalho de Melo .p2align 4 877d7d1bf1SArnaldo Carvalho de Melo.Lloop_64: 887d7d1bf1SArnaldo Carvalho de Melo decq %rcx 897d7d1bf1SArnaldo Carvalho de Melo movq %rax,(%rdi) 907d7d1bf1SArnaldo Carvalho de Melo movq %rax,8(%rdi) 917d7d1bf1SArnaldo Carvalho de Melo movq %rax,16(%rdi) 927d7d1bf1SArnaldo Carvalho de Melo movq %rax,24(%rdi) 937d7d1bf1SArnaldo Carvalho de Melo movq %rax,32(%rdi) 947d7d1bf1SArnaldo Carvalho de Melo movq %rax,40(%rdi) 957d7d1bf1SArnaldo Carvalho de Melo movq %rax,48(%rdi) 967d7d1bf1SArnaldo Carvalho de Melo movq %rax,56(%rdi) 977d7d1bf1SArnaldo Carvalho de Melo leaq 64(%rdi),%rdi 987d7d1bf1SArnaldo Carvalho de Melo jnz .Lloop_64 997d7d1bf1SArnaldo Carvalho de Melo 1007d7d1bf1SArnaldo Carvalho de Melo /* Handle tail in loops. The loops should be faster than hard 1017d7d1bf1SArnaldo Carvalho de Melo to predict jump tables. */ 1027d7d1bf1SArnaldo Carvalho de Melo .p2align 4 1037d7d1bf1SArnaldo Carvalho de Melo.Lhandle_tail: 1047d7d1bf1SArnaldo Carvalho de Melo movl %edx,%ecx 1057d7d1bf1SArnaldo Carvalho de Melo andl $63&(~7),%ecx 1067d7d1bf1SArnaldo Carvalho de Melo jz .Lhandle_7 1077d7d1bf1SArnaldo Carvalho de Melo shrl $3,%ecx 1087d7d1bf1SArnaldo Carvalho de Melo .p2align 4 1097d7d1bf1SArnaldo Carvalho de Melo.Lloop_8: 1107d7d1bf1SArnaldo Carvalho de Melo decl %ecx 1117d7d1bf1SArnaldo Carvalho de Melo movq %rax,(%rdi) 1127d7d1bf1SArnaldo Carvalho de Melo leaq 8(%rdi),%rdi 1137d7d1bf1SArnaldo Carvalho de Melo jnz .Lloop_8 1147d7d1bf1SArnaldo Carvalho de Melo 1157d7d1bf1SArnaldo Carvalho de Melo.Lhandle_7: 1167d7d1bf1SArnaldo Carvalho de Melo andl $7,%edx 1177d7d1bf1SArnaldo Carvalho de Melo jz .Lende 1187d7d1bf1SArnaldo Carvalho de Melo .p2align 4 1197d7d1bf1SArnaldo Carvalho de Melo.Lloop_1: 1207d7d1bf1SArnaldo Carvalho de Melo decl %edx 1217d7d1bf1SArnaldo Carvalho de Melo movb %al,(%rdi) 1227d7d1bf1SArnaldo Carvalho de Melo leaq 1(%rdi),%rdi 1237d7d1bf1SArnaldo Carvalho de Melo jnz .Lloop_1 1247d7d1bf1SArnaldo Carvalho de Melo 1257d7d1bf1SArnaldo Carvalho de Melo.Lende: 1267d7d1bf1SArnaldo Carvalho de Melo movq %r10,%rax 1277d7d1bf1SArnaldo Carvalho de Melo ret 1287d7d1bf1SArnaldo Carvalho de Melo 1297d7d1bf1SArnaldo Carvalho de Melo.Lbad_alignment: 1307d7d1bf1SArnaldo Carvalho de Melo cmpq $7,%rdx 1317d7d1bf1SArnaldo Carvalho de Melo jbe .Lhandle_7 1327d7d1bf1SArnaldo Carvalho de Melo movq %rax,(%rdi) /* unaligned store */ 1337d7d1bf1SArnaldo Carvalho de Melo movq $8,%r8 1347d7d1bf1SArnaldo Carvalho de Melo subq %r9,%r8 1357d7d1bf1SArnaldo Carvalho de Melo addq %r8,%rdi 1367d7d1bf1SArnaldo Carvalho de Melo subq %r8,%rdx 1377d7d1bf1SArnaldo Carvalho de Melo jmp .Lafter_bad_alignment 1387d7d1bf1SArnaldo Carvalho de Melo.Lfinal: 1397d7d1bf1SArnaldo Carvalho de MeloENDPROC(memset_orig) 140