1/* 2Copyright (c) 2014, Intel Corporation 3All rights reserved. 4 5Redistribution and use in source and binary forms, with or without 6modification, are permitted provided that the following conditions are met: 7 8 * Redistributions of source code must retain the above copyright notice, 9 * this list of conditions and the following disclaimer. 10 11 * Redistributions in binary form must reproduce the above copyright notice, 12 * this list of conditions and the following disclaimer in the documentation 13 * and/or other materials provided with the distribution. 14 15 * Neither the name of Intel Corporation nor the names of its contributors 16 * may be used to endorse or promote products derived from this software 17 * without specific prior written permission. 18 19THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" AND 20ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE IMPLIED 21WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE ARE 22DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE LIABLE FOR 23ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES 24(INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; 25LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON 26ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 27(INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE OF THIS 28SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 29*/ 30 31#include <private/bionic_asm.h> 32 33#include "cache.h" 34 35#ifndef L 36# define L(label) .L##label 37#endif 38 39#ifndef ALIGN 40# define ALIGN(n) .p2align n 41#endif 42 43 44ENTRY(__memset_chk_generic) 45 # %rdi = dst, %rsi = byte, %rdx = n, %rcx = dst_len 46 cmp %rcx, %rdx 47 ja __memset_chk_fail 48 // Fall through to memset... 49END(__memset_chk_generic) 50 51 52 .section .text.sse2,"ax",@progbits 53ENTRY(memset_generic) 54 movq %rdi, %rax 55 and $0xff, %rsi 56 mov $0x0101010101010101, %rcx 57 imul %rsi, %rcx 58 cmpq $16, %rdx 59 jae L(16bytesormore) 60 testb $8, %dl 61 jnz L(8_15bytes) 62 testb $4, %dl 63 jnz L(4_7bytes) 64 testb $2, %dl 65 jnz L(2_3bytes) 66 testb $1, %dl 67 jz L(return) 68 movb %cl, (%rdi) 69L(return): 70 ret 71 72L(8_15bytes): 73 movq %rcx, (%rdi) 74 movq %rcx, -8(%rdi, %rdx) 75 ret 76 77L(4_7bytes): 78 movl %ecx, (%rdi) 79 movl %ecx, -4(%rdi, %rdx) 80 ret 81 82L(2_3bytes): 83 movw %cx, (%rdi) 84 movw %cx, -2(%rdi, %rdx) 85 ret 86 87 ALIGN (4) 88L(16bytesormore): 89 movd %rcx, %xmm0 90 pshufd $0, %xmm0, %xmm0 91 movdqu %xmm0, (%rdi) 92 movdqu %xmm0, -16(%rdi, %rdx) 93 cmpq $32, %rdx 94 jbe L(32bytesless) 95 movdqu %xmm0, 16(%rdi) 96 movdqu %xmm0, -32(%rdi, %rdx) 97 cmpq $64, %rdx 98 jbe L(64bytesless) 99 movdqu %xmm0, 32(%rdi) 100 movdqu %xmm0, 48(%rdi) 101 movdqu %xmm0, -64(%rdi, %rdx) 102 movdqu %xmm0, -48(%rdi, %rdx) 103 cmpq $128, %rdx 104 ja L(128bytesmore) 105L(32bytesless): 106L(64bytesless): 107 ret 108 109 ALIGN (4) 110L(128bytesmore): 111 leaq 64(%rdi), %rcx 112 andq $-64, %rcx 113 movq %rdx, %r8 114 addq %rdi, %rdx 115 andq $-64, %rdx 116 cmpq %rcx, %rdx 117 je L(return) 118 119#ifdef SHARED_CACHE_SIZE 120 cmp $SHARED_CACHE_SIZE, %r8 121#else 122 cmp __x86_64_shared_cache_size(%rip), %r8 123#endif 124 ja L(128bytesmore_nt) 125 126 ALIGN (4) 127L(128bytesmore_normal): 128 movdqa %xmm0, (%rcx) 129 movaps %xmm0, 0x10(%rcx) 130 movaps %xmm0, 0x20(%rcx) 131 movaps %xmm0, 0x30(%rcx) 132 addq $64, %rcx 133 cmpq %rcx, %rdx 134 jne L(128bytesmore_normal) 135 ret 136 137 ALIGN (4) 138L(128bytesmore_nt): 139 movntdq %xmm0, (%rcx) 140 movntdq %xmm0, 0x10(%rcx) 141 movntdq %xmm0, 0x20(%rcx) 142 movntdq %xmm0, 0x30(%rcx) 143 leaq 64(%rcx), %rcx 144 cmpq %rcx, %rdx 145 jne L(128bytesmore_nt) 146 sfence 147 ret 148 149END(memset_generic) 150