1/* Copyright 2002 Andi Kleen, SuSE Labs */ 2 3#include <linux/linkage.h> 4#include <asm/dwarf2.h> 5 6/* 7 * ISO C memset - set a memory block to a byte value. 8 * 9 * rdi destination 10 * rsi value (char) 11 * rdx count (bytes) 12 * 13 * rax original destination 14 */ 15 .section .altinstr_replacement, "ax", @progbits 16.Lmemset_c: 17 movq %rdi,%r9 18 movl %edx,%r8d 19 andl $7,%r8d 20 movl %edx,%ecx 21 shrl $3,%ecx 22 /* expand byte value */ 23 movzbl %sil,%esi 24 movabs $0x0101010101010101,%rax 25 mulq %rsi /* with rax, clobbers rdx */ 26 rep stosq 27 movl %r8d,%ecx 28 rep stosb 29 movq %r9,%rax 30 ret 31.Lmemset_e: 32 .previous 33 34ENTRY(memset) 35ENTRY(__memset) 36 CFI_STARTPROC 37 movq %rdi,%r10 38 movq %rdx,%r11 39 40 /* expand byte value */ 41 movzbl %sil,%ecx 42 movabs $0x0101010101010101,%rax 43 mul %rcx /* with rax, clobbers rdx */ 44 45 /* align dst */ 46 movl %edi,%r9d 47 andl $7,%r9d 48 jnz .Lbad_alignment 49 CFI_REMEMBER_STATE 50.Lafter_bad_alignment: 51 52 movl %r11d,%ecx 53 shrl $6,%ecx 54 jz .Lhandle_tail 55 56 .p2align 4 57.Lloop_64: 58 decl %ecx 59 movq %rax,(%rdi) 60 movq %rax,8(%rdi) 61 movq %rax,16(%rdi) 62 movq %rax,24(%rdi) 63 movq %rax,32(%rdi) 64 movq %rax,40(%rdi) 65 movq %rax,48(%rdi) 66 movq %rax,56(%rdi) 67 leaq 64(%rdi),%rdi 68 jnz .Lloop_64 69 70 /* Handle tail in loops. The loops should be faster than hard 71 to predict jump tables. */ 72 .p2align 4 73.Lhandle_tail: 74 movl %r11d,%ecx 75 andl $63&(~7),%ecx 76 jz .Lhandle_7 77 shrl $3,%ecx 78 .p2align 4 79.Lloop_8: 80 decl %ecx 81 movq %rax,(%rdi) 82 leaq 8(%rdi),%rdi 83 jnz .Lloop_8 84 85.Lhandle_7: 86 movl %r11d,%ecx 87 andl $7,%ecx 88 jz .Lende 89 .p2align 4 90.Lloop_1: 91 decl %ecx 92 movb %al,(%rdi) 93 leaq 1(%rdi),%rdi 94 jnz .Lloop_1 95 96.Lende: 97 movq %r10,%rax 98 ret 99 100 CFI_RESTORE_STATE 101.Lbad_alignment: 102 cmpq $7,%r11 103 jbe .Lhandle_7 104 movq %rax,(%rdi) /* unaligned store */ 105 movq $8,%r8 106 subq %r9,%r8 107 addq %r8,%rdi 108 subq %r8,%r11 109 jmp .Lafter_bad_alignment 110.Lfinal: 111 CFI_ENDPROC 112ENDPROC(memset) 113ENDPROC(__memset) 114 115 /* Some CPUs run faster using the string instructions. 116 It is also a lot simpler. Use this when possible */ 117 118#include <asm/cpufeature.h> 119 120 .section .altinstructions,"a" 121 .align 8 122 .quad memset 123 .quad .Lmemset_c 124 .word X86_FEATURE_REP_GOOD 125 .byte .Lfinal - memset 126 .byte .Lmemset_e - .Lmemset_c 127 .previous 128