17c478bd9Sstevel@tonic-gate/* 2d0b3732eSbholler * CDDL HEADER START 3d0b3732eSbholler * 4d0b3732eSbholler * The contents of this file are subject to the terms of the 5d0b3732eSbholler * Common Development and Distribution License (the "License"). 6d0b3732eSbholler * You may not use this file except in compliance with the License. 7d0b3732eSbholler * 8d0b3732eSbholler * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9d0b3732eSbholler * or http://www.opensolaris.org/os/licensing. 10d0b3732eSbholler * See the License for the specific language governing permissions 11d0b3732eSbholler * and limitations under the License. 12d0b3732eSbholler * 13d0b3732eSbholler * When distributing Covered Code, include this CDDL HEADER in each 14d0b3732eSbholler * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15d0b3732eSbholler * If applicable, add the following below this CDDL HEADER, with the 16d0b3732eSbholler * fields enclosed by brackets "[]" replaced with your own identifying 17d0b3732eSbholler * information: Portions Copyright [yyyy] [name of copyright owner] 18d0b3732eSbholler * 19d0b3732eSbholler * CDDL HEADER END 207c478bd9Sstevel@tonic-gate */ 217c478bd9Sstevel@tonic-gate 227c478bd9Sstevel@tonic-gate/* 23*fad5204eSbostrovs * Copyright 2009 Sun Microsystems, Inc. All rights reserved. 247257d1b4Sraf * Use is subject to license terms. 257257d1b4Sraf */ 267257d1b4Sraf 277257d1b4Sraf/* 28d0b3732eSbholler * Copyright (c) 2008, Intel Corporation 297c478bd9Sstevel@tonic-gate * All rights reserved. 307c478bd9Sstevel@tonic-gate */ 317257d1b4Sraf 32*fad5204eSbostrovs/* 33*fad5204eSbostrovs * Portions Copyright 2009 Advanced Micro Devices, Inc. 34*fad5204eSbostrovs */ 35*fad5204eSbostrovs 369a70fc3bSMark J. Nelson .file "memset.s" 377c478bd9Sstevel@tonic-gate 387c478bd9Sstevel@tonic-gate#include <sys/asm_linkage.h> 397c478bd9Sstevel@tonic-gate 407c478bd9Sstevel@tonic-gate ANSI_PRAGMA_WEAK(memset,function) 417c478bd9Sstevel@tonic-gate 427c478bd9Sstevel@tonic-gate#include "cache.h" 43d0b3732eSbholler#include "proc64_id.h" 447c478bd9Sstevel@tonic-gate 45d0b3732eSbholler#define L(s) .memset/**/s 467c478bd9Sstevel@tonic-gate 47d0b3732eSbholler/* 48d0b3732eSbholler * memset algorithm overview: 49d0b3732eSbholler * 50d0b3732eSbholler * Thresholds used below were determined experimentally. 51d0b3732eSbholler * 52d0b3732eSbholler * Pseudo code: 53d0b3732eSbholler * 54*fad5204eSbostrovs * NOTE: On AMD NO_SSE is always set. Performance on Opteron did not improve 55*fad5204eSbostrovs * using 16-byte stores. Setting NO_SSE on AMD should be re-evaluated on 56*fad5204eSbostrovs * future AMD processors. 57*fad5204eSbostrovs * 58*fad5204eSbostrovs * 59d0b3732eSbholler * If (size <= 144 bytes) { 60d0b3732eSbholler * do unrolled code (primarily 8-byte stores) regardless of alignment. 61d0b3732eSbholler * } else { 62d0b3732eSbholler * Align destination to 16-byte boundary 63d0b3732eSbholler * 64d0b3732eSbholler * if (NO_SSE) { 65d0b3732eSbholler * If (size > largest level cache) { 66d0b3732eSbholler * Use 8-byte non-temporal stores (64-bytes/loop) 67d0b3732eSbholler * } else { 68d0b3732eSbholler * if (size >= 2K) { 69d0b3732eSbholler * Use rep sstoq 70d0b3732eSbholler * } else { 71d0b3732eSbholler * Use 8-byte stores (128 bytes per loop) 72d0b3732eSbholler * } 73d0b3732eSbholler * } 74d0b3732eSbholler * 75d0b3732eSbholler * } else { **USE SSE** 76d0b3732eSbholler * If (size <= 192 bytes) { 77d0b3732eSbholler * do unrolled code using primarily 16-byte stores (SSE2) 78d0b3732eSbholler * } else { 79d0b3732eSbholler * If (size > largest level cache) { 80d0b3732eSbholler * Use 16-byte non-temporal stores (128-bytes/loop) 81d0b3732eSbholler * } else { 82d0b3732eSbholler * Use 16-byte stores (128 bytes per loop) 83d0b3732eSbholler * } 84d0b3732eSbholler * } 85d0b3732eSbholler * } 86d0b3732eSbholler * 87d0b3732eSbholler * Finish any remaining bytes via unrolled code above. 88d0b3732eSbholler * } 89d0b3732eSbholler */ 907c478bd9Sstevel@tonic-gate 91d0b3732eSbholler ENTRY(memset) # (void *, const void*, size_t) 92d0b3732eSbholler cmp $0x1,%rdx 93d0b3732eSbholler mov %rdi,%rax # memset returns the dest address 94d0b3732eSbholler jne L(ck2) 957c478bd9Sstevel@tonic-gate mov %sil,(%rdi) 96d0b3732eSbholler ret 97d0b3732eSbhollerL(ck2): 98d0b3732eSbholler mov $0x0101010101010101,%r9 99d0b3732eSbholler mov %rdx,%r8 100d0b3732eSbholler movzbq %sil,%rdx 101d0b3732eSbholler imul %r9,%rdx # clone value 8 times 1027c478bd9Sstevel@tonic-gate 103d0b3732eSbholler cmp $0x90,%r8 # 144 104d0b3732eSbholler jge L(ck_align) 1057c478bd9Sstevel@tonic-gate 106d0b3732eSbholler lea L(setPxQx)(%rip),%r11 107d0b3732eSbholler add %r8,%rdi 1087c478bd9Sstevel@tonic-gate 109d0b3732eSbholler movslq (%r11,%r8,4),%rcx 110d0b3732eSbholler lea (%rcx,%r11,1),%r11 111d0b3732eSbholler jmpq *%r11 1127c478bd9Sstevel@tonic-gate 113d0b3732eSbholler .balign 16 114d0b3732eSbhollerL(setPxQx): .int L(P0Q0)-L(setPxQx) 115d0b3732eSbholler .int L(P1Q0)-L(setPxQx) 116d0b3732eSbholler .int L(P2Q0)-L(setPxQx) 117d0b3732eSbholler .int L(P3Q0)-L(setPxQx) 118d0b3732eSbholler .int L(P4Q0)-L(setPxQx) 119d0b3732eSbholler .int L(P5Q0)-L(setPxQx) 120d0b3732eSbholler .int L(P6Q0)-L(setPxQx) 121d0b3732eSbholler .int L(P7Q0)-L(setPxQx) 1227c478bd9Sstevel@tonic-gate 123d0b3732eSbholler .int L(P0Q1)-L(setPxQx) 124d0b3732eSbholler .int L(P1Q1)-L(setPxQx) 125d0b3732eSbholler .int L(P2Q1)-L(setPxQx) 126d0b3732eSbholler .int L(P3Q1)-L(setPxQx) 127d0b3732eSbholler .int L(P4Q1)-L(setPxQx) 128d0b3732eSbholler .int L(P5Q1)-L(setPxQx) 129d0b3732eSbholler .int L(P6Q1)-L(setPxQx) 130d0b3732eSbholler .int L(P7Q1)-L(setPxQx) 1317c478bd9Sstevel@tonic-gate 132d0b3732eSbholler .int L(P0Q2)-L(setPxQx) 133d0b3732eSbholler .int L(P1Q2)-L(setPxQx) 134d0b3732eSbholler .int L(P2Q2)-L(setPxQx) 135d0b3732eSbholler .int L(P3Q2)-L(setPxQx) 136d0b3732eSbholler .int L(P4Q2)-L(setPxQx) 137d0b3732eSbholler .int L(P5Q2)-L(setPxQx) 138d0b3732eSbholler .int L(P6Q2)-L(setPxQx) 139d0b3732eSbholler .int L(P7Q2)-L(setPxQx) 1407c478bd9Sstevel@tonic-gate 141d0b3732eSbholler .int L(P0Q3)-L(setPxQx) 142d0b3732eSbholler .int L(P1Q3)-L(setPxQx) 143d0b3732eSbholler .int L(P2Q3)-L(setPxQx) 144d0b3732eSbholler .int L(P3Q3)-L(setPxQx) 145d0b3732eSbholler .int L(P4Q3)-L(setPxQx) 146d0b3732eSbholler .int L(P5Q3)-L(setPxQx) 147d0b3732eSbholler .int L(P6Q3)-L(setPxQx) 148d0b3732eSbholler .int L(P7Q3)-L(setPxQx) 1497c478bd9Sstevel@tonic-gate 150d0b3732eSbholler .int L(P0Q4)-L(setPxQx) 151d0b3732eSbholler .int L(P1Q4)-L(setPxQx) 152d0b3732eSbholler .int L(P2Q4)-L(setPxQx) 153d0b3732eSbholler .int L(P3Q4)-L(setPxQx) 154d0b3732eSbholler .int L(P4Q4)-L(setPxQx) 155d0b3732eSbholler .int L(P5Q4)-L(setPxQx) 156d0b3732eSbholler .int L(P6Q4)-L(setPxQx) 157d0b3732eSbholler .int L(P7Q4)-L(setPxQx) 1587c478bd9Sstevel@tonic-gate 159d0b3732eSbholler .int L(P0Q5)-L(setPxQx) 160d0b3732eSbholler .int L(P1Q5)-L(setPxQx) 161d0b3732eSbholler .int L(P2Q5)-L(setPxQx) 162d0b3732eSbholler .int L(P3Q5)-L(setPxQx) 163d0b3732eSbholler .int L(P4Q5)-L(setPxQx) 164d0b3732eSbholler .int L(P5Q5)-L(setPxQx) 165d0b3732eSbholler .int L(P6Q5)-L(setPxQx) 166d0b3732eSbholler .int L(P7Q5)-L(setPxQx) 1677c478bd9Sstevel@tonic-gate 168d0b3732eSbholler .int L(P0Q6)-L(setPxQx) 169d0b3732eSbholler .int L(P1Q6)-L(setPxQx) 170d0b3732eSbholler .int L(P2Q6)-L(setPxQx) 171d0b3732eSbholler .int L(P3Q6)-L(setPxQx) 172d0b3732eSbholler .int L(P4Q6)-L(setPxQx) 173d0b3732eSbholler .int L(P5Q6)-L(setPxQx) 174d0b3732eSbholler .int L(P6Q6)-L(setPxQx) 175d0b3732eSbholler .int L(P7Q6)-L(setPxQx) 1767c478bd9Sstevel@tonic-gate 177d0b3732eSbholler .int L(P0Q7)-L(setPxQx) 178d0b3732eSbholler .int L(P1Q7)-L(setPxQx) 179d0b3732eSbholler .int L(P2Q7)-L(setPxQx) 180d0b3732eSbholler .int L(P3Q7)-L(setPxQx) 181d0b3732eSbholler .int L(P4Q7)-L(setPxQx) 182d0b3732eSbholler .int L(P5Q7)-L(setPxQx) 183d0b3732eSbholler .int L(P6Q7)-L(setPxQx) 184d0b3732eSbholler .int L(P7Q7)-L(setPxQx) 1857c478bd9Sstevel@tonic-gate 186d0b3732eSbholler .int L(P0Q8)-L(setPxQx) 187d0b3732eSbholler .int L(P1Q8)-L(setPxQx) 188d0b3732eSbholler .int L(P2Q8)-L(setPxQx) 189d0b3732eSbholler .int L(P3Q8)-L(setPxQx) 190d0b3732eSbholler .int L(P4Q8)-L(setPxQx) 191d0b3732eSbholler .int L(P5Q8)-L(setPxQx) 192d0b3732eSbholler .int L(P6Q8)-L(setPxQx) 193d0b3732eSbholler .int L(P7Q8)-L(setPxQx) 1947c478bd9Sstevel@tonic-gate 195d0b3732eSbholler .int L(P0Q9)-L(setPxQx) 196d0b3732eSbholler .int L(P1Q9)-L(setPxQx) 197d0b3732eSbholler .int L(P2Q9)-L(setPxQx) 198d0b3732eSbholler .int L(P3Q9)-L(setPxQx) 199d0b3732eSbholler .int L(P4Q9)-L(setPxQx) 200d0b3732eSbholler .int L(P5Q9)-L(setPxQx) 201d0b3732eSbholler .int L(P6Q9)-L(setPxQx) 202d0b3732eSbholler .int L(P7Q9)-L(setPxQx) 203d0b3732eSbholler 204d0b3732eSbholler .int L(P0QA)-L(setPxQx) 205d0b3732eSbholler .int L(P1QA)-L(setPxQx) 206d0b3732eSbholler .int L(P2QA)-L(setPxQx) 207d0b3732eSbholler .int L(P3QA)-L(setPxQx) 208d0b3732eSbholler .int L(P4QA)-L(setPxQx) 209d0b3732eSbholler .int L(P5QA)-L(setPxQx) 210d0b3732eSbholler .int L(P6QA)-L(setPxQx) 211d0b3732eSbholler .int L(P7QA)-L(setPxQx) 212d0b3732eSbholler 213d0b3732eSbholler .int L(P0QB)-L(setPxQx) 214d0b3732eSbholler .int L(P1QB)-L(setPxQx) 215d0b3732eSbholler .int L(P2QB)-L(setPxQx) 216d0b3732eSbholler .int L(P3QB)-L(setPxQx) 217d0b3732eSbholler .int L(P4QB)-L(setPxQx) 218d0b3732eSbholler .int L(P5QB)-L(setPxQx) 219d0b3732eSbholler .int L(P6QB)-L(setPxQx) 220d0b3732eSbholler .int L(P7QB)-L(setPxQx) 221d0b3732eSbholler 222d0b3732eSbholler .int L(P0QC)-L(setPxQx) 223d0b3732eSbholler .int L(P1QC)-L(setPxQx) 224d0b3732eSbholler .int L(P2QC)-L(setPxQx) 225d0b3732eSbholler .int L(P3QC)-L(setPxQx) 226d0b3732eSbholler .int L(P4QC)-L(setPxQx) 227d0b3732eSbholler .int L(P5QC)-L(setPxQx) 228d0b3732eSbholler .int L(P6QC)-L(setPxQx) 229d0b3732eSbholler .int L(P7QC)-L(setPxQx) 230d0b3732eSbholler 231d0b3732eSbholler .int L(P0QD)-L(setPxQx) 232d0b3732eSbholler .int L(P1QD)-L(setPxQx) 233d0b3732eSbholler .int L(P2QD)-L(setPxQx) 234d0b3732eSbholler .int L(P3QD)-L(setPxQx) 235d0b3732eSbholler .int L(P4QD)-L(setPxQx) 236d0b3732eSbholler .int L(P5QD)-L(setPxQx) 237d0b3732eSbholler .int L(P6QD)-L(setPxQx) 238d0b3732eSbholler .int L(P7QD)-L(setPxQx) 239d0b3732eSbholler 240d0b3732eSbholler .int L(P0QE)-L(setPxQx) # 112 241d0b3732eSbholler .int L(P1QE)-L(setPxQx) 242d0b3732eSbholler .int L(P2QE)-L(setPxQx) 243d0b3732eSbholler .int L(P3QE)-L(setPxQx) 244d0b3732eSbholler .int L(P4QE)-L(setPxQx) 245d0b3732eSbholler .int L(P5QE)-L(setPxQx) 246d0b3732eSbholler .int L(P6QE)-L(setPxQx) 247d0b3732eSbholler .int L(P7QE)-L(setPxQx) 248d0b3732eSbholler 249d0b3732eSbholler .int L(P0QF)-L(setPxQx) #120 250d0b3732eSbholler .int L(P1QF)-L(setPxQx) 251d0b3732eSbholler .int L(P2QF)-L(setPxQx) 252d0b3732eSbholler .int L(P3QF)-L(setPxQx) 253d0b3732eSbholler .int L(P4QF)-L(setPxQx) 254d0b3732eSbholler .int L(P5QF)-L(setPxQx) 255d0b3732eSbholler .int L(P6QF)-L(setPxQx) 256d0b3732eSbholler .int L(P7QF)-L(setPxQx) 257d0b3732eSbholler 258d0b3732eSbholler .int L(P0QG)-L(setPxQx) #128 259d0b3732eSbholler .int L(P1QG)-L(setPxQx) 260d0b3732eSbholler .int L(P2QG)-L(setPxQx) 261d0b3732eSbholler .int L(P3QG)-L(setPxQx) 262d0b3732eSbholler .int L(P4QG)-L(setPxQx) 263d0b3732eSbholler .int L(P5QG)-L(setPxQx) 264d0b3732eSbholler .int L(P6QG)-L(setPxQx) 265d0b3732eSbholler .int L(P7QG)-L(setPxQx) 266d0b3732eSbholler 267d0b3732eSbholler .int L(P0QH)-L(setPxQx) #136 268d0b3732eSbholler .int L(P1QH)-L(setPxQx) 269d0b3732eSbholler .int L(P2QH)-L(setPxQx) 270d0b3732eSbholler .int L(P3QH)-L(setPxQx) 271d0b3732eSbholler .int L(P4QH)-L(setPxQx) 272d0b3732eSbholler .int L(P5QH)-L(setPxQx) 273d0b3732eSbholler .int L(P6QH)-L(setPxQx) 274d0b3732eSbholler .int L(P7QH)-L(setPxQx) #143 275d0b3732eSbholler 276d0b3732eSbholler .balign 16 277d0b3732eSbhollerL(P1QH): mov %rdx,-0x89(%rdi) 278d0b3732eSbhollerL(P1QG): mov %rdx,-0x81(%rdi) 279d0b3732eSbholler .balign 16 280d0b3732eSbhollerL(P1QF): mov %rdx,-0x79(%rdi) 281d0b3732eSbhollerL(P1QE): mov %rdx,-0x71(%rdi) 282d0b3732eSbhollerL(P1QD): mov %rdx,-0x69(%rdi) 283d0b3732eSbhollerL(P1QC): mov %rdx,-0x61(%rdi) 284d0b3732eSbhollerL(P1QB): mov %rdx,-0x59(%rdi) 285d0b3732eSbhollerL(P1QA): mov %rdx,-0x51(%rdi) 286d0b3732eSbhollerL(P1Q9): mov %rdx,-0x49(%rdi) 287d0b3732eSbhollerL(P1Q8): mov %rdx,-0x41(%rdi) 288d0b3732eSbhollerL(P1Q7): mov %rdx,-0x39(%rdi) 289d0b3732eSbhollerL(P1Q6): mov %rdx,-0x31(%rdi) 290d0b3732eSbhollerL(P1Q5): mov %rdx,-0x29(%rdi) 291d0b3732eSbhollerL(P1Q4): mov %rdx,-0x21(%rdi) 292d0b3732eSbhollerL(P1Q3): mov %rdx,-0x19(%rdi) 293d0b3732eSbhollerL(P1Q2): mov %rdx,-0x11(%rdi) 294d0b3732eSbhollerL(P1Q1): mov %rdx,-0x9(%rdi) 295d0b3732eSbhollerL(P1Q0): mov %dl,-0x1(%rdi) 2967c478bd9Sstevel@tonic-gate ret 2977c478bd9Sstevel@tonic-gate 298d0b3732eSbholler .balign 16 299d0b3732eSbhollerL(P0QH): mov %rdx,-0x88(%rdi) 300d0b3732eSbholler .balign 16 301d0b3732eSbhollerL(P0QG): mov %rdx,-0x80(%rdi) 302d0b3732eSbhollerL(P0QF): mov %rdx,-0x78(%rdi) 303d0b3732eSbhollerL(P0QE): mov %rdx,-0x70(%rdi) 304d0b3732eSbhollerL(P0QD): mov %rdx,-0x68(%rdi) 305d0b3732eSbhollerL(P0QC): mov %rdx,-0x60(%rdi) 306d0b3732eSbhollerL(P0QB): mov %rdx,-0x58(%rdi) 307d0b3732eSbhollerL(P0QA): mov %rdx,-0x50(%rdi) 308d0b3732eSbhollerL(P0Q9): mov %rdx,-0x48(%rdi) 309d0b3732eSbhollerL(P0Q8): mov %rdx,-0x40(%rdi) 310d0b3732eSbhollerL(P0Q7): mov %rdx,-0x38(%rdi) 311d0b3732eSbhollerL(P0Q6): mov %rdx,-0x30(%rdi) 312d0b3732eSbhollerL(P0Q5): mov %rdx,-0x28(%rdi) 313d0b3732eSbhollerL(P0Q4): mov %rdx,-0x20(%rdi) 314d0b3732eSbhollerL(P0Q3): mov %rdx,-0x18(%rdi) 315d0b3732eSbhollerL(P0Q2): mov %rdx,-0x10(%rdi) 316d0b3732eSbhollerL(P0Q1): mov %rdx,-0x8(%rdi) 317d0b3732eSbhollerL(P0Q0): ret 3187c478bd9Sstevel@tonic-gate 319d0b3732eSbholler .balign 16 320d0b3732eSbhollerL(P2QH): mov %rdx,-0x8a(%rdi) 321d0b3732eSbhollerL(P2QG): mov %rdx,-0x82(%rdi) 322d0b3732eSbholler .balign 16 323d0b3732eSbhollerL(P2QF): mov %rdx,-0x7a(%rdi) 324d0b3732eSbhollerL(P2QE): mov %rdx,-0x72(%rdi) 325d0b3732eSbhollerL(P2QD): mov %rdx,-0x6a(%rdi) 326d0b3732eSbhollerL(P2QC): mov %rdx,-0x62(%rdi) 327d0b3732eSbhollerL(P2QB): mov %rdx,-0x5a(%rdi) 328d0b3732eSbhollerL(P2QA): mov %rdx,-0x52(%rdi) 329d0b3732eSbhollerL(P2Q9): mov %rdx,-0x4a(%rdi) 330d0b3732eSbhollerL(P2Q8): mov %rdx,-0x42(%rdi) 331d0b3732eSbhollerL(P2Q7): mov %rdx,-0x3a(%rdi) 332d0b3732eSbhollerL(P2Q6): mov %rdx,-0x32(%rdi) 333d0b3732eSbhollerL(P2Q5): mov %rdx,-0x2a(%rdi) 334d0b3732eSbhollerL(P2Q4): mov %rdx,-0x22(%rdi) 335d0b3732eSbhollerL(P2Q3): mov %rdx,-0x1a(%rdi) 336d0b3732eSbhollerL(P2Q2): mov %rdx,-0x12(%rdi) 337d0b3732eSbhollerL(P2Q1): mov %rdx,-0xa(%rdi) 338d0b3732eSbhollerL(P2Q0): mov %dx,-0x2(%rdi) 3397c478bd9Sstevel@tonic-gate ret 3407c478bd9Sstevel@tonic-gate 341d0b3732eSbholler .balign 16 342d0b3732eSbhollerL(P3QH): mov %rdx,-0x8b(%rdi) 343d0b3732eSbhollerL(P3QG): mov %rdx,-0x83(%rdi) 344d0b3732eSbholler .balign 16 345d0b3732eSbhollerL(P3QF): mov %rdx,-0x7b(%rdi) 346d0b3732eSbhollerL(P3QE): mov %rdx,-0x73(%rdi) 347d0b3732eSbhollerL(P3QD): mov %rdx,-0x6b(%rdi) 348d0b3732eSbhollerL(P3QC): mov %rdx,-0x63(%rdi) 349d0b3732eSbhollerL(P3QB): mov %rdx,-0x5b(%rdi) 350d0b3732eSbhollerL(P3QA): mov %rdx,-0x53(%rdi) 351d0b3732eSbhollerL(P3Q9): mov %rdx,-0x4b(%rdi) 352d0b3732eSbhollerL(P3Q8): mov %rdx,-0x43(%rdi) 353d0b3732eSbhollerL(P3Q7): mov %rdx,-0x3b(%rdi) 354d0b3732eSbhollerL(P3Q6): mov %rdx,-0x33(%rdi) 355d0b3732eSbhollerL(P3Q5): mov %rdx,-0x2b(%rdi) 356d0b3732eSbhollerL(P3Q4): mov %rdx,-0x23(%rdi) 357d0b3732eSbhollerL(P3Q3): mov %rdx,-0x1b(%rdi) 358d0b3732eSbhollerL(P3Q2): mov %rdx,-0x13(%rdi) 359d0b3732eSbhollerL(P3Q1): mov %rdx,-0xb(%rdi) 360d0b3732eSbhollerL(P3Q0): mov %dx,-0x3(%rdi) 361d0b3732eSbholler mov %dl,-0x1(%rdi) 3627c478bd9Sstevel@tonic-gate ret 3637c478bd9Sstevel@tonic-gate 364d0b3732eSbholler .balign 16 365d0b3732eSbhollerL(P4QH): mov %rdx,-0x8c(%rdi) 366d0b3732eSbhollerL(P4QG): mov %rdx,-0x84(%rdi) 367d0b3732eSbholler .balign 16 368d0b3732eSbhollerL(P4QF): mov %rdx,-0x7c(%rdi) 369d0b3732eSbhollerL(P4QE): mov %rdx,-0x74(%rdi) 370d0b3732eSbhollerL(P4QD): mov %rdx,-0x6c(%rdi) 371d0b3732eSbhollerL(P4QC): mov %rdx,-0x64(%rdi) 372d0b3732eSbhollerL(P4QB): mov %rdx,-0x5c(%rdi) 373d0b3732eSbhollerL(P4QA): mov %rdx,-0x54(%rdi) 374d0b3732eSbhollerL(P4Q9): mov %rdx,-0x4c(%rdi) 375d0b3732eSbhollerL(P4Q8): mov %rdx,-0x44(%rdi) 376d0b3732eSbhollerL(P4Q7): mov %rdx,-0x3c(%rdi) 377d0b3732eSbhollerL(P4Q6): mov %rdx,-0x34(%rdi) 378d0b3732eSbhollerL(P4Q5): mov %rdx,-0x2c(%rdi) 379d0b3732eSbhollerL(P4Q4): mov %rdx,-0x24(%rdi) 380d0b3732eSbhollerL(P4Q3): mov %rdx,-0x1c(%rdi) 381d0b3732eSbhollerL(P4Q2): mov %rdx,-0x14(%rdi) 382d0b3732eSbhollerL(P4Q1): mov %rdx,-0xc(%rdi) 383d0b3732eSbhollerL(P4Q0): mov %edx,-0x4(%rdi) 3847c478bd9Sstevel@tonic-gate ret 3857c478bd9Sstevel@tonic-gate 386d0b3732eSbholler .balign 16 387d0b3732eSbhollerL(P5QH): mov %rdx,-0x8d(%rdi) 388d0b3732eSbhollerL(P5QG): mov %rdx,-0x85(%rdi) 389d0b3732eSbholler .balign 16 390d0b3732eSbhollerL(P5QF): mov %rdx,-0x7d(%rdi) 391d0b3732eSbhollerL(P5QE): mov %rdx,-0x75(%rdi) 392d0b3732eSbhollerL(P5QD): mov %rdx,-0x6d(%rdi) 393d0b3732eSbhollerL(P5QC): mov %rdx,-0x65(%rdi) 394d0b3732eSbhollerL(P5QB): mov %rdx,-0x5d(%rdi) 395d0b3732eSbhollerL(P5QA): mov %rdx,-0x55(%rdi) 396d0b3732eSbhollerL(P5Q9): mov %rdx,-0x4d(%rdi) 397d0b3732eSbhollerL(P5Q8): mov %rdx,-0x45(%rdi) 398d0b3732eSbhollerL(P5Q7): mov %rdx,-0x3d(%rdi) 399d0b3732eSbhollerL(P5Q6): mov %rdx,-0x35(%rdi) 400d0b3732eSbhollerL(P5Q5): mov %rdx,-0x2d(%rdi) 401d0b3732eSbhollerL(P5Q4): mov %rdx,-0x25(%rdi) 402d0b3732eSbhollerL(P5Q3): mov %rdx,-0x1d(%rdi) 403d0b3732eSbhollerL(P5Q2): mov %rdx,-0x15(%rdi) 404d0b3732eSbhollerL(P5Q1): mov %rdx,-0xd(%rdi) 405d0b3732eSbhollerL(P5Q0): mov %edx,-0x5(%rdi) 406d0b3732eSbholler mov %dl,-0x1(%rdi) 4077c478bd9Sstevel@tonic-gate ret 4087c478bd9Sstevel@tonic-gate 409d0b3732eSbholler .balign 16 410d0b3732eSbhollerL(P6QH): mov %rdx,-0x8e(%rdi) 411d0b3732eSbhollerL(P6QG): mov %rdx,-0x86(%rdi) 412d0b3732eSbholler .balign 16 413d0b3732eSbhollerL(P6QF): mov %rdx,-0x7e(%rdi) 414d0b3732eSbhollerL(P6QE): mov %rdx,-0x76(%rdi) 415d0b3732eSbhollerL(P6QD): mov %rdx,-0x6e(%rdi) 416d0b3732eSbhollerL(P6QC): mov %rdx,-0x66(%rdi) 417d0b3732eSbhollerL(P6QB): mov %rdx,-0x5e(%rdi) 418d0b3732eSbhollerL(P6QA): mov %rdx,-0x56(%rdi) 419d0b3732eSbhollerL(P6Q9): mov %rdx,-0x4e(%rdi) 420d0b3732eSbhollerL(P6Q8): mov %rdx,-0x46(%rdi) 421d0b3732eSbhollerL(P6Q7): mov %rdx,-0x3e(%rdi) 422d0b3732eSbhollerL(P6Q6): mov %rdx,-0x36(%rdi) 423d0b3732eSbhollerL(P6Q5): mov %rdx,-0x2e(%rdi) 424d0b3732eSbhollerL(P6Q4): mov %rdx,-0x26(%rdi) 425d0b3732eSbhollerL(P6Q3): mov %rdx,-0x1e(%rdi) 426d0b3732eSbhollerL(P6Q2): mov %rdx,-0x16(%rdi) 427d0b3732eSbhollerL(P6Q1): mov %rdx,-0xe(%rdi) 428d0b3732eSbhollerL(P6Q0): mov %edx,-0x6(%rdi) 429d0b3732eSbholler mov %dx,-0x2(%rdi) 430d0b3732eSbholler ret 431d0b3732eSbholler 432d0b3732eSbholler .balign 16 433d0b3732eSbhollerL(P7QH): mov %rdx,-0x8f(%rdi) 434d0b3732eSbhollerL(P7QG): mov %rdx,-0x87(%rdi) 435d0b3732eSbholler .balign 16 436d0b3732eSbhollerL(P7QF): mov %rdx,-0x7f(%rdi) 437d0b3732eSbhollerL(P7QE): mov %rdx,-0x77(%rdi) 438d0b3732eSbhollerL(P7QD): mov %rdx,-0x6f(%rdi) 439d0b3732eSbhollerL(P7QC): mov %rdx,-0x67(%rdi) 440d0b3732eSbhollerL(P7QB): mov %rdx,-0x5f(%rdi) 441d0b3732eSbhollerL(P7QA): mov %rdx,-0x57(%rdi) 442d0b3732eSbhollerL(P7Q9): mov %rdx,-0x4f(%rdi) 443d0b3732eSbhollerL(P7Q8): mov %rdx,-0x47(%rdi) 444d0b3732eSbhollerL(P7Q7): mov %rdx,-0x3f(%rdi) 445d0b3732eSbhollerL(P7Q6): mov %rdx,-0x37(%rdi) 446d0b3732eSbhollerL(P7Q5): mov %rdx,-0x2f(%rdi) 447d0b3732eSbhollerL(P7Q4): mov %rdx,-0x27(%rdi) 448d0b3732eSbhollerL(P7Q3): mov %rdx,-0x1f(%rdi) 449d0b3732eSbhollerL(P7Q2): mov %rdx,-0x17(%rdi) 450d0b3732eSbhollerL(P7Q1): mov %rdx,-0xf(%rdi) 451d0b3732eSbhollerL(P7Q0): mov %edx,-0x7(%rdi) 452d0b3732eSbholler mov %dx,-0x3(%rdi) 453d0b3732eSbholler mov %dl,-0x1(%rdi) 454d0b3732eSbholler ret 455d0b3732eSbholler 456d0b3732eSbholler .balign 16 457d0b3732eSbhollerL(ck_align): 458d0b3732eSbholler /* 459d0b3732eSbholler * Align to 16 byte boundary first 460d0b3732eSbholler */ 461d0b3732eSbholler lea L(AliPxQx)(%rip),%r11 462d0b3732eSbholler mov $0x10,%r10 463d0b3732eSbholler mov %rdi,%r9 464d0b3732eSbholler and $0xf,%r9 465d0b3732eSbholler sub %r9,%r10 466d0b3732eSbholler and $0xf,%r10 467d0b3732eSbholler add %r10,%rdi 468d0b3732eSbholler sub %r10,%r8 469d0b3732eSbholler 470d0b3732eSbholler movslq (%r11,%r10,4),%rcx 471d0b3732eSbholler lea (%rcx,%r11,1),%r11 472d0b3732eSbholler jmpq *%r11 # align dest to 16-byte boundary 473d0b3732eSbholler 474d0b3732eSbholler .balign 16 475d0b3732eSbhollerL(AliPxQx): .int L(aligned_now)-L(AliPxQx) 476d0b3732eSbholler .int L(A1Q0)-L(AliPxQx) 477d0b3732eSbholler .int L(A2Q0)-L(AliPxQx) 478d0b3732eSbholler .int L(A3Q0)-L(AliPxQx) 479d0b3732eSbholler .int L(A4Q0)-L(AliPxQx) 480d0b3732eSbholler .int L(A5Q0)-L(AliPxQx) 481d0b3732eSbholler .int L(A6Q0)-L(AliPxQx) 482d0b3732eSbholler .int L(A7Q0)-L(AliPxQx) 483d0b3732eSbholler 484d0b3732eSbholler .int L(A0Q1)-L(AliPxQx) 485d0b3732eSbholler .int L(A1Q1)-L(AliPxQx) 486d0b3732eSbholler .int L(A2Q1)-L(AliPxQx) 487d0b3732eSbholler .int L(A3Q1)-L(AliPxQx) 488d0b3732eSbholler .int L(A4Q1)-L(AliPxQx) 489d0b3732eSbholler .int L(A5Q1)-L(AliPxQx) 490d0b3732eSbholler .int L(A6Q1)-L(AliPxQx) 491d0b3732eSbholler .int L(A7Q1)-L(AliPxQx) 492d0b3732eSbholler 493d0b3732eSbholler .balign 16 494d0b3732eSbhollerL(A5Q1): mov %dl,-0xd(%rdi) 495d0b3732eSbhollerL(A4Q1): mov %edx,-0xc(%rdi) 496d0b3732eSbhollerL(A0Q1): mov %rdx,-0x8(%rdi) 497d0b3732eSbholler jmp L(aligned_now) 498d0b3732eSbholler 499d0b3732eSbholler .balign 16 500d0b3732eSbhollerL(A1Q1): mov %dl,-0x9(%rdi) 501d0b3732eSbholler mov %rdx,-0x8(%rdi) 502d0b3732eSbholler jmp L(aligned_now) 503d0b3732eSbholler 504d0b3732eSbholler .balign 16 505d0b3732eSbhollerL(A1Q0): mov %dl,-0x1(%rdi) 506d0b3732eSbholler jmp L(aligned_now) 507d0b3732eSbholler 508d0b3732eSbholler .balign 16 509d0b3732eSbhollerL(A3Q1): mov %dl,-0xb(%rdi) 510d0b3732eSbhollerL(A2Q1): mov %dx,-0xa(%rdi) 511d0b3732eSbholler mov %rdx,-0x8(%rdi) 512d0b3732eSbholler jmp L(aligned_now) 513d0b3732eSbholler 514d0b3732eSbholler .balign 16 515d0b3732eSbhollerL(A3Q0): mov %dl,-0x3(%rdi) 516d0b3732eSbhollerL(A2Q0): mov %dx,-0x2(%rdi) 517d0b3732eSbholler jmp L(aligned_now) 518d0b3732eSbholler 519d0b3732eSbholler .balign 16 520d0b3732eSbhollerL(A5Q0): mov %dl,-0x5(%rdi) 521d0b3732eSbhollerL(A4Q0): mov %edx,-0x4(%rdi) 522d0b3732eSbholler jmp L(aligned_now) 523d0b3732eSbholler 524d0b3732eSbholler .balign 16 525d0b3732eSbhollerL(A7Q1): mov %dl,-0xf(%rdi) 526d0b3732eSbhollerL(A6Q1): mov %dx,-0xe(%rdi) 527d0b3732eSbholler mov %edx,-0xc(%rdi) 528d0b3732eSbholler mov %rdx,-0x8(%rdi) 529d0b3732eSbholler jmp L(aligned_now) 530d0b3732eSbholler 531d0b3732eSbholler .balign 16 532d0b3732eSbhollerL(A7Q0): mov %dl,-0x7(%rdi) 533d0b3732eSbhollerL(A6Q0): mov %dx,-0x6(%rdi) 534d0b3732eSbholler mov %edx,-0x4(%rdi) 535d0b3732eSbholler #jmp L(aligned_now) # Fall thru... 536d0b3732eSbholler 537d0b3732eSbholler .balign 16 538d0b3732eSbhollerL(aligned_now): 539d0b3732eSbholler /* 540d0b3732eSbholler * Check memops method 541d0b3732eSbholler */ 542d0b3732eSbholler cmpl $NO_SSE,.memops_method(%rip) 543d0b3732eSbholler je L(Loop8byte_pre) 544d0b3732eSbholler 545d0b3732eSbholler /* 546d0b3732eSbholler * Use SSE2 instructions 547d0b3732eSbholler */ 548d0b3732eSbholler movd %rdx,%xmm0 549d0b3732eSbholler lea L(SSExDx)(%rip),%r9 # after dest alignment 550d0b3732eSbholler punpcklqdq %xmm0,%xmm0 # fill RegXMM0 with the pattern 551d0b3732eSbholler cmp $0xc0,%r8 # 192 552d0b3732eSbholler jge L(byte32sse2_pre) 553d0b3732eSbholler 554d0b3732eSbholler add %r8,%rdi 555d0b3732eSbholler 556d0b3732eSbholler movslq (%r9,%r8,4),%rcx 557d0b3732eSbholler lea (%rcx,%r9,1),%r9 558d0b3732eSbholler jmpq *%r9 559d0b3732eSbholler 560d0b3732eSbholler .balign 16 561d0b3732eSbhollerL(SSE0QB): movdqa %xmm0,-0xb0(%rdi) 562d0b3732eSbhollerL(SSE0QA): movdqa %xmm0,-0xa0(%rdi) 563d0b3732eSbhollerL(SSE0Q9): movdqa %xmm0,-0x90(%rdi) 564d0b3732eSbhollerL(SSE0Q8): movdqa %xmm0,-0x80(%rdi) 565d0b3732eSbhollerL(SSE0Q7): movdqa %xmm0,-0x70(%rdi) 566d0b3732eSbhollerL(SSE0Q6): movdqa %xmm0,-0x60(%rdi) 567d0b3732eSbhollerL(SSE0Q5): movdqa %xmm0,-0x50(%rdi) 568d0b3732eSbhollerL(SSE0Q4): movdqa %xmm0,-0x40(%rdi) 569d0b3732eSbhollerL(SSE0Q3): movdqa %xmm0,-0x30(%rdi) 570d0b3732eSbhollerL(SSE0Q2): movdqa %xmm0,-0x20(%rdi) 571d0b3732eSbhollerL(SSE0Q1): movdqa %xmm0,-0x10(%rdi) 572d0b3732eSbhollerL(SSE0Q0): ret 573d0b3732eSbholler 574d0b3732eSbholler .balign 16 575d0b3732eSbhollerL(SSE1QB): movdqa %xmm0,-0xb1(%rdi) 576d0b3732eSbhollerL(SSE1QA): movdqa %xmm0,-0xa1(%rdi) 577d0b3732eSbhollerL(SSE1Q9): movdqa %xmm0,-0x91(%rdi) 578d0b3732eSbhollerL(SSE1Q8): movdqa %xmm0,-0x81(%rdi) 579d0b3732eSbhollerL(SSE1Q7): movdqa %xmm0,-0x71(%rdi) 580d0b3732eSbhollerL(SSE1Q6): movdqa %xmm0,-0x61(%rdi) 581d0b3732eSbhollerL(SSE1Q5): movdqa %xmm0,-0x51(%rdi) 582d0b3732eSbhollerL(SSE1Q4): movdqa %xmm0,-0x41(%rdi) 583d0b3732eSbhollerL(SSE1Q3): movdqa %xmm0,-0x31(%rdi) 584d0b3732eSbhollerL(SSE1Q2): movdqa %xmm0,-0x21(%rdi) 585d0b3732eSbhollerL(SSE1Q1): movdqa %xmm0,-0x11(%rdi) 586d0b3732eSbhollerL(SSE1Q0): mov %dl,-0x1(%rdi) 587d0b3732eSbholler ret 588d0b3732eSbholler 589d0b3732eSbholler .balign 16 590d0b3732eSbhollerL(SSE2QB): movdqa %xmm0,-0xb2(%rdi) 591d0b3732eSbhollerL(SSE2QA): movdqa %xmm0,-0xa2(%rdi) 592d0b3732eSbhollerL(SSE2Q9): movdqa %xmm0,-0x92(%rdi) 593d0b3732eSbhollerL(SSE2Q8): movdqa %xmm0,-0x82(%rdi) 594d0b3732eSbhollerL(SSE2Q7): movdqa %xmm0,-0x72(%rdi) 595d0b3732eSbhollerL(SSE2Q6): movdqa %xmm0,-0x62(%rdi) 596d0b3732eSbhollerL(SSE2Q5): movdqa %xmm0,-0x52(%rdi) 597d0b3732eSbhollerL(SSE2Q4): movdqa %xmm0,-0x42(%rdi) 598d0b3732eSbhollerL(SSE2Q3): movdqa %xmm0,-0x32(%rdi) 599d0b3732eSbhollerL(SSE2Q2): movdqa %xmm0,-0x22(%rdi) 600d0b3732eSbhollerL(SSE2Q1): movdqa %xmm0,-0x12(%rdi) 601d0b3732eSbhollerL(SSE2Q0): mov %dx,-0x2(%rdi) 602d0b3732eSbholler ret 603d0b3732eSbholler 604d0b3732eSbholler .balign 16 605d0b3732eSbhollerL(SSE3QB): movdqa %xmm0,-0xb3(%rdi) 606d0b3732eSbhollerL(SSE3QA): movdqa %xmm0,-0xa3(%rdi) 607d0b3732eSbhollerL(SSE3Q9): movdqa %xmm0,-0x93(%rdi) 608d0b3732eSbhollerL(SSE3Q8): movdqa %xmm0,-0x83(%rdi) 609d0b3732eSbhollerL(SSE3Q7): movdqa %xmm0,-0x73(%rdi) 610d0b3732eSbhollerL(SSE3Q6): movdqa %xmm0,-0x63(%rdi) 611d0b3732eSbhollerL(SSE3Q5): movdqa %xmm0,-0x53(%rdi) 612d0b3732eSbhollerL(SSE3Q4): movdqa %xmm0,-0x43(%rdi) 613d0b3732eSbhollerL(SSE3Q3): movdqa %xmm0,-0x33(%rdi) 614d0b3732eSbhollerL(SSE3Q2): movdqa %xmm0,-0x23(%rdi) 615d0b3732eSbhollerL(SSE3Q1): movdqa %xmm0,-0x13(%rdi) 616d0b3732eSbhollerL(SSE3Q0): mov %dx,-0x3(%rdi) 617d0b3732eSbholler mov %dl,-0x1(%rdi) 618d0b3732eSbholler ret 619d0b3732eSbholler 620d0b3732eSbholler .balign 16 621d0b3732eSbhollerL(SSE4QB): movdqa %xmm0,-0xb4(%rdi) 622d0b3732eSbhollerL(SSE4QA): movdqa %xmm0,-0xa4(%rdi) 623d0b3732eSbhollerL(SSE4Q9): movdqa %xmm0,-0x94(%rdi) 624d0b3732eSbhollerL(SSE4Q8): movdqa %xmm0,-0x84(%rdi) 625d0b3732eSbhollerL(SSE4Q7): movdqa %xmm0,-0x74(%rdi) 626d0b3732eSbhollerL(SSE4Q6): movdqa %xmm0,-0x64(%rdi) 627d0b3732eSbhollerL(SSE4Q5): movdqa %xmm0,-0x54(%rdi) 628d0b3732eSbhollerL(SSE4Q4): movdqa %xmm0,-0x44(%rdi) 629d0b3732eSbhollerL(SSE4Q3): movdqa %xmm0,-0x34(%rdi) 630d0b3732eSbhollerL(SSE4Q2): movdqa %xmm0,-0x24(%rdi) 631d0b3732eSbhollerL(SSE4Q1): movdqa %xmm0,-0x14(%rdi) 632d0b3732eSbhollerL(SSE4Q0): mov %edx,-0x4(%rdi) 633d0b3732eSbholler ret 634d0b3732eSbholler 635d0b3732eSbholler .balign 16 636d0b3732eSbhollerL(SSE5QB): movdqa %xmm0,-0xb5(%rdi) 637d0b3732eSbhollerL(SSE5QA): movdqa %xmm0,-0xa5(%rdi) 638d0b3732eSbhollerL(SSE5Q9): movdqa %xmm0,-0x95(%rdi) 639d0b3732eSbhollerL(SSE5Q8): movdqa %xmm0,-0x85(%rdi) 640d0b3732eSbhollerL(SSE5Q7): movdqa %xmm0,-0x75(%rdi) 641d0b3732eSbhollerL(SSE5Q6): movdqa %xmm0,-0x65(%rdi) 642d0b3732eSbhollerL(SSE5Q5): movdqa %xmm0,-0x55(%rdi) 643d0b3732eSbhollerL(SSE5Q4): movdqa %xmm0,-0x45(%rdi) 644d0b3732eSbhollerL(SSE5Q3): movdqa %xmm0,-0x35(%rdi) 645d0b3732eSbhollerL(SSE5Q2): movdqa %xmm0,-0x25(%rdi) 646d0b3732eSbhollerL(SSE5Q1): movdqa %xmm0,-0x15(%rdi) 647d0b3732eSbhollerL(SSE5Q0): mov %edx,-0x5(%rdi) 648d0b3732eSbholler mov %dl,-0x1(%rdi) 649d0b3732eSbholler ret 650d0b3732eSbholler 651d0b3732eSbholler .balign 16 652d0b3732eSbhollerL(SSE6QB): movdqa %xmm0,-0xb6(%rdi) 653d0b3732eSbhollerL(SSE6QA): movdqa %xmm0,-0xa6(%rdi) 654d0b3732eSbhollerL(SSE6Q9): movdqa %xmm0,-0x96(%rdi) 655d0b3732eSbhollerL(SSE6Q8): movdqa %xmm0,-0x86(%rdi) 656d0b3732eSbhollerL(SSE6Q7): movdqa %xmm0,-0x76(%rdi) 657d0b3732eSbhollerL(SSE6Q6): movdqa %xmm0,-0x66(%rdi) 658d0b3732eSbhollerL(SSE6Q5): movdqa %xmm0,-0x56(%rdi) 659d0b3732eSbhollerL(SSE6Q4): movdqa %xmm0,-0x46(%rdi) 660d0b3732eSbhollerL(SSE6Q3): movdqa %xmm0,-0x36(%rdi) 661d0b3732eSbhollerL(SSE6Q2): movdqa %xmm0,-0x26(%rdi) 662d0b3732eSbhollerL(SSE6Q1): movdqa %xmm0,-0x16(%rdi) 663d0b3732eSbhollerL(SSE6Q0): mov %edx,-0x6(%rdi) 664d0b3732eSbholler mov %dx,-0x2(%rdi) 665d0b3732eSbholler ret 666d0b3732eSbholler 667d0b3732eSbholler .balign 16 668d0b3732eSbhollerL(SSE7QB): movdqa %xmm0,-0xb7(%rdi) 669d0b3732eSbhollerL(SSE7QA): movdqa %xmm0,-0xa7(%rdi) 670d0b3732eSbhollerL(SSE7Q9): movdqa %xmm0,-0x97(%rdi) 671d0b3732eSbhollerL(SSE7Q8): movdqa %xmm0,-0x87(%rdi) 672d0b3732eSbhollerL(SSE7Q7): movdqa %xmm0,-0x77(%rdi) 673d0b3732eSbhollerL(SSE7Q6): movdqa %xmm0,-0x67(%rdi) 674d0b3732eSbhollerL(SSE7Q5): movdqa %xmm0,-0x57(%rdi) 675d0b3732eSbhollerL(SSE7Q4): movdqa %xmm0,-0x47(%rdi) 676d0b3732eSbhollerL(SSE7Q3): movdqa %xmm0,-0x37(%rdi) 677d0b3732eSbhollerL(SSE7Q2): movdqa %xmm0,-0x27(%rdi) 678d0b3732eSbhollerL(SSE7Q1): movdqa %xmm0,-0x17(%rdi) 679d0b3732eSbhollerL(SSE7Q0): mov %edx,-0x7(%rdi) 680d0b3732eSbholler mov %dx,-0x3(%rdi) 681d0b3732eSbholler mov %dl,-0x1(%rdi) 682d0b3732eSbholler ret 683d0b3732eSbholler 684d0b3732eSbholler .balign 16 685d0b3732eSbhollerL(SSE8QB): movdqa %xmm0,-0xb8(%rdi) 686d0b3732eSbhollerL(SSE8QA): movdqa %xmm0,-0xa8(%rdi) 687d0b3732eSbhollerL(SSE8Q9): movdqa %xmm0,-0x98(%rdi) 688d0b3732eSbhollerL(SSE8Q8): movdqa %xmm0,-0x88(%rdi) 689d0b3732eSbhollerL(SSE8Q7): movdqa %xmm0,-0x78(%rdi) 690d0b3732eSbhollerL(SSE8Q6): movdqa %xmm0,-0x68(%rdi) 691d0b3732eSbhollerL(SSE8Q5): movdqa %xmm0,-0x58(%rdi) 692d0b3732eSbhollerL(SSE8Q4): movdqa %xmm0,-0x48(%rdi) 693d0b3732eSbhollerL(SSE8Q3): movdqa %xmm0,-0x38(%rdi) 694d0b3732eSbhollerL(SSE8Q2): movdqa %xmm0,-0x28(%rdi) 695d0b3732eSbhollerL(SSE8Q1): movdqa %xmm0,-0x18(%rdi) 696d0b3732eSbhollerL(SSE8Q0): mov %rdx,-0x8(%rdi) 697d0b3732eSbholler ret 698d0b3732eSbholler 699d0b3732eSbholler .balign 16 700d0b3732eSbhollerL(SSE9QB): movdqa %xmm0,-0xb9(%rdi) 701d0b3732eSbhollerL(SSE9QA): movdqa %xmm0,-0xa9(%rdi) 702d0b3732eSbhollerL(SSE9Q9): movdqa %xmm0,-0x99(%rdi) 703d0b3732eSbhollerL(SSE9Q8): movdqa %xmm0,-0x89(%rdi) 704d0b3732eSbhollerL(SSE9Q7): movdqa %xmm0,-0x79(%rdi) 705d0b3732eSbhollerL(SSE9Q6): movdqa %xmm0,-0x69(%rdi) 706d0b3732eSbhollerL(SSE9Q5): movdqa %xmm0,-0x59(%rdi) 707d0b3732eSbhollerL(SSE9Q4): movdqa %xmm0,-0x49(%rdi) 708d0b3732eSbhollerL(SSE9Q3): movdqa %xmm0,-0x39(%rdi) 709d0b3732eSbhollerL(SSE9Q2): movdqa %xmm0,-0x29(%rdi) 710d0b3732eSbhollerL(SSE9Q1): movdqa %xmm0,-0x19(%rdi) 711d0b3732eSbhollerL(SSE9Q0): mov %rdx,-0x9(%rdi) 712d0b3732eSbholler mov %dl,-0x1(%rdi) 713d0b3732eSbholler ret 714d0b3732eSbholler 715d0b3732eSbholler .balign 16 716d0b3732eSbhollerL(SSE10QB): movdqa %xmm0,-0xba(%rdi) 717d0b3732eSbhollerL(SSE10QA): movdqa %xmm0,-0xaa(%rdi) 718d0b3732eSbhollerL(SSE10Q9): movdqa %xmm0,-0x9a(%rdi) 719d0b3732eSbhollerL(SSE10Q8): movdqa %xmm0,-0x8a(%rdi) 720d0b3732eSbhollerL(SSE10Q7): movdqa %xmm0,-0x7a(%rdi) 721d0b3732eSbhollerL(SSE10Q6): movdqa %xmm0,-0x6a(%rdi) 722d0b3732eSbhollerL(SSE10Q5): movdqa %xmm0,-0x5a(%rdi) 723d0b3732eSbhollerL(SSE10Q4): movdqa %xmm0,-0x4a(%rdi) 724d0b3732eSbhollerL(SSE10Q3): movdqa %xmm0,-0x3a(%rdi) 725d0b3732eSbhollerL(SSE10Q2): movdqa %xmm0,-0x2a(%rdi) 726d0b3732eSbhollerL(SSE10Q1): movdqa %xmm0,-0x1a(%rdi) 727d0b3732eSbhollerL(SSE10Q0): mov %rdx,-0xa(%rdi) 728d0b3732eSbholler mov %dx,-0x2(%rdi) 729d0b3732eSbholler ret 730d0b3732eSbholler 731d0b3732eSbholler .balign 16 732d0b3732eSbhollerL(SSE11QB): movdqa %xmm0,-0xbb(%rdi) 733d0b3732eSbhollerL(SSE11QA): movdqa %xmm0,-0xab(%rdi) 734d0b3732eSbhollerL(SSE11Q9): movdqa %xmm0,-0x9b(%rdi) 735d0b3732eSbhollerL(SSE11Q8): movdqa %xmm0,-0x8b(%rdi) 736d0b3732eSbhollerL(SSE11Q7): movdqa %xmm0,-0x7b(%rdi) 737d0b3732eSbhollerL(SSE11Q6): movdqa %xmm0,-0x6b(%rdi) 738d0b3732eSbhollerL(SSE11Q5): movdqa %xmm0,-0x5b(%rdi) 739d0b3732eSbhollerL(SSE11Q4): movdqa %xmm0,-0x4b(%rdi) 740d0b3732eSbhollerL(SSE11Q3): movdqa %xmm0,-0x3b(%rdi) 741d0b3732eSbhollerL(SSE11Q2): movdqa %xmm0,-0x2b(%rdi) 742d0b3732eSbhollerL(SSE11Q1): movdqa %xmm0,-0x1b(%rdi) 743d0b3732eSbhollerL(SSE11Q0): mov %rdx,-0xb(%rdi) 744d0b3732eSbholler mov %dx,-0x3(%rdi) 745d0b3732eSbholler mov %dl,-0x1(%rdi) 746d0b3732eSbholler ret 747d0b3732eSbholler 748d0b3732eSbholler .balign 16 749d0b3732eSbhollerL(SSE12QB): movdqa %xmm0,-0xbc(%rdi) 750d0b3732eSbhollerL(SSE12QA): movdqa %xmm0,-0xac(%rdi) 751d0b3732eSbhollerL(SSE12Q9): movdqa %xmm0,-0x9c(%rdi) 752d0b3732eSbhollerL(SSE12Q8): movdqa %xmm0,-0x8c(%rdi) 753d0b3732eSbhollerL(SSE12Q7): movdqa %xmm0,-0x7c(%rdi) 754d0b3732eSbhollerL(SSE12Q6): movdqa %xmm0,-0x6c(%rdi) 755d0b3732eSbhollerL(SSE12Q5): movdqa %xmm0,-0x5c(%rdi) 756d0b3732eSbhollerL(SSE12Q4): movdqa %xmm0,-0x4c(%rdi) 757d0b3732eSbhollerL(SSE12Q3): movdqa %xmm0,-0x3c(%rdi) 758d0b3732eSbhollerL(SSE12Q2): movdqa %xmm0,-0x2c(%rdi) 759d0b3732eSbhollerL(SSE12Q1): movdqa %xmm0,-0x1c(%rdi) 760d0b3732eSbhollerL(SSE12Q0): mov %rdx,-0xc(%rdi) 761d0b3732eSbholler mov %edx,-0x4(%rdi) 762d0b3732eSbholler ret 763d0b3732eSbholler 764d0b3732eSbholler .balign 16 765d0b3732eSbhollerL(SSE13QB): movdqa %xmm0,-0xbd(%rdi) 766d0b3732eSbhollerL(SSE13QA): movdqa %xmm0,-0xad(%rdi) 767d0b3732eSbhollerL(SSE13Q9): movdqa %xmm0,-0x9d(%rdi) 768d0b3732eSbhollerL(SSE13Q8): movdqa %xmm0,-0x8d(%rdi) 769d0b3732eSbhollerL(SSE13Q7): movdqa %xmm0,-0x7d(%rdi) 770d0b3732eSbhollerL(SSE13Q6): movdqa %xmm0,-0x6d(%rdi) 771d0b3732eSbhollerL(SSE13Q5): movdqa %xmm0,-0x5d(%rdi) 772d0b3732eSbhollerL(SSE13Q4): movdqa %xmm0,-0x4d(%rdi) 773d0b3732eSbhollerL(SSE13Q3): movdqa %xmm0,-0x3d(%rdi) 774d0b3732eSbhollerL(SSE13Q2): movdqa %xmm0,-0x2d(%rdi) 775d0b3732eSbhollerL(SSE13Q1): movdqa %xmm0,-0x1d(%rdi) 776d0b3732eSbhollerL(SSE13Q0): mov %rdx,-0xd(%rdi) 777d0b3732eSbholler mov %edx,-0x5(%rdi) 778d0b3732eSbholler mov %dl,-0x1(%rdi) 779d0b3732eSbholler ret 780d0b3732eSbholler 781d0b3732eSbholler .balign 16 782d0b3732eSbhollerL(SSE14QB): movdqa %xmm0,-0xbe(%rdi) 783d0b3732eSbhollerL(SSE14QA): movdqa %xmm0,-0xae(%rdi) 784d0b3732eSbhollerL(SSE14Q9): movdqa %xmm0,-0x9e(%rdi) 785d0b3732eSbhollerL(SSE14Q8): movdqa %xmm0,-0x8e(%rdi) 786d0b3732eSbhollerL(SSE14Q7): movdqa %xmm0,-0x7e(%rdi) 787d0b3732eSbhollerL(SSE14Q6): movdqa %xmm0,-0x6e(%rdi) 788d0b3732eSbhollerL(SSE14Q5): movdqa %xmm0,-0x5e(%rdi) 789d0b3732eSbhollerL(SSE14Q4): movdqa %xmm0,-0x4e(%rdi) 790d0b3732eSbhollerL(SSE14Q3): movdqa %xmm0,-0x3e(%rdi) 791d0b3732eSbhollerL(SSE14Q2): movdqa %xmm0,-0x2e(%rdi) 792d0b3732eSbhollerL(SSE14Q1): movdqa %xmm0,-0x1e(%rdi) 793d0b3732eSbhollerL(SSE14Q0): mov %rdx,-0xe(%rdi) 794d0b3732eSbholler mov %edx,-0x6(%rdi) 795d0b3732eSbholler mov %dx,-0x2(%rdi) 796d0b3732eSbholler ret 797d0b3732eSbholler 798d0b3732eSbholler .balign 16 799d0b3732eSbhollerL(SSE15QB): movdqa %xmm0,-0xbf(%rdi) 800d0b3732eSbhollerL(SSE15QA): movdqa %xmm0,-0xaf(%rdi) 801d0b3732eSbhollerL(SSE15Q9): movdqa %xmm0,-0x9f(%rdi) 802d0b3732eSbhollerL(SSE15Q8): movdqa %xmm0,-0x8f(%rdi) 803d0b3732eSbhollerL(SSE15Q7): movdqa %xmm0,-0x7f(%rdi) 804d0b3732eSbhollerL(SSE15Q6): movdqa %xmm0,-0x6f(%rdi) 805d0b3732eSbhollerL(SSE15Q5): movdqa %xmm0,-0x5f(%rdi) 806d0b3732eSbhollerL(SSE15Q4): movdqa %xmm0,-0x4f(%rdi) 807d0b3732eSbhollerL(SSE15Q3): movdqa %xmm0,-0x3f(%rdi) 808d0b3732eSbhollerL(SSE15Q2): movdqa %xmm0,-0x2f(%rdi) 809d0b3732eSbhollerL(SSE15Q1): movdqa %xmm0,-0x1f(%rdi) 810d0b3732eSbhollerL(SSE15Q0): mov %rdx,-0xf(%rdi) 811d0b3732eSbholler mov %edx,-0x7(%rdi) 812d0b3732eSbholler mov %dx,-0x3(%rdi) 813d0b3732eSbholler mov %dl,-0x1(%rdi) 814d0b3732eSbholler ret 815d0b3732eSbholler 816d0b3732eSbholler .balign 16 817d0b3732eSbhollerL(byte32sse2_pre): 818d0b3732eSbholler mov .largest_level_cache_size(%rip),%r9d 819d0b3732eSbholler cmp %r9,%r8 820d0b3732eSbholler jg L(sse2_nt_move) 821d0b3732eSbholler #jmp L(byte32sse2) # Fall thru... 822d0b3732eSbholler 823d0b3732eSbholler .balign 16 824d0b3732eSbhollerL(byte32sse2): 825d0b3732eSbholler lea -0x80(%r8),%r8 # 128 826d0b3732eSbholler cmp $0x80,%r8 827d0b3732eSbholler movdqa %xmm0,(%rdi) 828d0b3732eSbholler movdqa %xmm0,0x10(%rdi) 829d0b3732eSbholler movdqa %xmm0,0x20(%rdi) 830d0b3732eSbholler movdqa %xmm0,0x30(%rdi) 831d0b3732eSbholler movdqa %xmm0,0x40(%rdi) 832d0b3732eSbholler movdqa %xmm0,0x50(%rdi) 833d0b3732eSbholler movdqa %xmm0,0x60(%rdi) 834d0b3732eSbholler movdqa %xmm0,0x70(%rdi) 835d0b3732eSbholler 836d0b3732eSbholler lea 0x80(%rdi),%rdi 837d0b3732eSbholler jge L(byte32sse2) 838d0b3732eSbholler 839d0b3732eSbholler lea L(SSExDx)(%rip),%r11 840d0b3732eSbholler add %r8,%rdi 841d0b3732eSbholler movslq (%r11,%r8,4),%rcx 842d0b3732eSbholler lea (%rcx,%r11,1),%r11 843d0b3732eSbholler jmpq *%r11 844d0b3732eSbholler 845d0b3732eSbholler .balign 16 846d0b3732eSbhollerL(sse2_nt_move): 847d0b3732eSbholler sub $0x80,%r8 # 128 848d0b3732eSbholler movntdq %xmm0,(%rdi) 849d0b3732eSbholler movntdq %xmm0,0x10(%rdi) 850d0b3732eSbholler movntdq %xmm0,0x20(%rdi) 851d0b3732eSbholler movntdq %xmm0,0x30(%rdi) 852d0b3732eSbholler movntdq %xmm0,0x40(%rdi) 853d0b3732eSbholler movntdq %xmm0,0x50(%rdi) 854d0b3732eSbholler movntdq %xmm0,0x60(%rdi) 855d0b3732eSbholler movntdq %xmm0,0x70(%rdi) 856d0b3732eSbholler add $0x80,%rdi 857d0b3732eSbholler cmp $0x80,%r8 858d0b3732eSbholler jge L(sse2_nt_move) 859d0b3732eSbholler 860d0b3732eSbholler sfence 861d0b3732eSbholler lea L(SSExDx)(%rip),%r11 862d0b3732eSbholler add %r8,%rdi 863d0b3732eSbholler movslq (%r11,%r8,4),%rcx 864d0b3732eSbholler lea (%rcx,%r11,1),%r11 865d0b3732eSbholler jmpq *%r11 866d0b3732eSbholler 867d0b3732eSbholler /* 868d0b3732eSbholler * Don't use SSE 869d0b3732eSbholler */ 870d0b3732eSbholler .balign 16 871d0b3732eSbhollerL(Loop8byte_pre): 872d0b3732eSbholler mov .largest_level_cache_size(%rip),%r9d 873d0b3732eSbholler cmp %r9,%r8 874d0b3732eSbholler jg L(Loop8byte_nt_move) 875d0b3732eSbholler cmp $0x800,%r8 # Use rep sstoq 876d0b3732eSbholler jge L(use_rep) 877d0b3732eSbholler 878d0b3732eSbholler .balign 16 879d0b3732eSbhollerL(Loop8byte): 880d0b3732eSbholler lea -0x80(%r8),%r8 # 128 881d0b3732eSbholler mov %rdx,(%rdi) 882d0b3732eSbholler mov %rdx,0x8(%rdi) 883d0b3732eSbholler mov %rdx,0x10(%rdi) 884d0b3732eSbholler mov %rdx,0x18(%rdi) 885d0b3732eSbholler mov %rdx,0x20(%rdi) 886d0b3732eSbholler mov %rdx,0x28(%rdi) 887d0b3732eSbholler mov %rdx,0x30(%rdi) 888d0b3732eSbholler mov %rdx,0x38(%rdi) 889d0b3732eSbholler cmp $0x80,%r8 890d0b3732eSbholler mov %rdx,0x40(%rdi) 891d0b3732eSbholler mov %rdx,0x48(%rdi) 892d0b3732eSbholler mov %rdx,0x50(%rdi) 893d0b3732eSbholler mov %rdx,0x58(%rdi) 894d0b3732eSbholler mov %rdx,0x60(%rdi) 895d0b3732eSbholler mov %rdx,0x68(%rdi) 896d0b3732eSbholler mov %rdx,0x70(%rdi) 897d0b3732eSbholler mov %rdx,0x78(%rdi) 898d0b3732eSbholler lea 0x80(%rdi),%rdi 899d0b3732eSbholler jge L(Loop8byte) 900d0b3732eSbholler 901d0b3732eSbholler1: 902d0b3732eSbholler lea L(setPxQx)(%rip),%r11 903d0b3732eSbholler lea (%rdi,%r8,1),%rdi 904d0b3732eSbholler 905d0b3732eSbholler movslq (%r11,%r8,4),%rcx 906d0b3732eSbholler lea (%rcx,%r11,1),%r11 907d0b3732eSbholler jmpq *%r11 908d0b3732eSbholler 909d0b3732eSbholler /* 910d0b3732eSbholler * Use rep sstoq for sizes > 2K 911d0b3732eSbholler */ 912d0b3732eSbholler .balign 16 913d0b3732eSbhollerL(use_rep): 914d0b3732eSbholler movq %r8,%rcx # get size in bytes 915d0b3732eSbholler xchg %rax,%rdx 916d0b3732eSbholler shrq $3,%rcx 917d0b3732eSbholler rep 918d0b3732eSbholler sstoq 919d0b3732eSbholler xchg %rax,%rdx 920d0b3732eSbholler andq $7,%r8 # remaining bytes 921d0b3732eSbholler jnz 1b 922d0b3732eSbholler ret 923d0b3732eSbholler 924d0b3732eSbholler .balign 16 925d0b3732eSbhollerL(Loop8byte_nt_move): 926*fad5204eSbostrovs lea -0x80(%r8),%r8 # 128 927d0b3732eSbholler movnti %rdx,(%rdi) 928d0b3732eSbholler movnti %rdx,0x8(%rdi) 929d0b3732eSbholler movnti %rdx,0x10(%rdi) 930d0b3732eSbholler movnti %rdx,0x18(%rdi) 931d0b3732eSbholler movnti %rdx,0x20(%rdi) 932d0b3732eSbholler movnti %rdx,0x28(%rdi) 933d0b3732eSbholler movnti %rdx,0x30(%rdi) 934d0b3732eSbholler movnti %rdx,0x38(%rdi) 935*fad5204eSbostrovs cmp $0x80,%r8 936*fad5204eSbostrovs movnti %rdx,0x40(%rdi) 937*fad5204eSbostrovs movnti %rdx,0x48(%rdi) 938*fad5204eSbostrovs movnti %rdx,0x50(%rdi) 939*fad5204eSbostrovs movnti %rdx,0x58(%rdi) 940*fad5204eSbostrovs movnti %rdx,0x60(%rdi) 941*fad5204eSbostrovs movnti %rdx,0x68(%rdi) 942*fad5204eSbostrovs movnti %rdx,0x70(%rdi) 943*fad5204eSbostrovs movnti %rdx,0x78(%rdi) 944*fad5204eSbostrovs lea 0x80(%rdi),%rdi 945d0b3732eSbholler jge L(Loop8byte_nt_move) 946d0b3732eSbholler 947d0b3732eSbholler sfence 948d0b3732eSbholler lea L(setPxQx)(%rip),%r11 949d0b3732eSbholler lea (%rdi,%r8,1),%rdi 950d0b3732eSbholler 951d0b3732eSbholler movslq (%r11,%r8,4),%rcx 952d0b3732eSbholler lea (%rcx,%r11,1),%r11 953d0b3732eSbholler jmpq *%r11 954d0b3732eSbholler 955d0b3732eSbholler .balign 16 956d0b3732eSbhollerL(SSExDx): .int L(SSE0Q0) -L(SSExDx) 957d0b3732eSbholler .int L(SSE1Q0) -L(SSExDx) 958d0b3732eSbholler .int L(SSE2Q0) -L(SSExDx) 959d0b3732eSbholler .int L(SSE3Q0) -L(SSExDx) 960d0b3732eSbholler .int L(SSE4Q0) -L(SSExDx) 961d0b3732eSbholler .int L(SSE5Q0) -L(SSExDx) 962d0b3732eSbholler .int L(SSE6Q0) -L(SSExDx) 963d0b3732eSbholler .int L(SSE7Q0) -L(SSExDx) 964d0b3732eSbholler 965d0b3732eSbholler .int L(SSE8Q0) -L(SSExDx) 966d0b3732eSbholler .int L(SSE9Q0) -L(SSExDx) 967d0b3732eSbholler .int L(SSE10Q0)-L(SSExDx) 968d0b3732eSbholler .int L(SSE11Q0)-L(SSExDx) 969d0b3732eSbholler .int L(SSE12Q0)-L(SSExDx) 970d0b3732eSbholler .int L(SSE13Q0)-L(SSExDx) 971d0b3732eSbholler .int L(SSE14Q0)-L(SSExDx) 972d0b3732eSbholler .int L(SSE15Q0)-L(SSExDx) 973d0b3732eSbholler 974d0b3732eSbholler .int L(SSE0Q1) -L(SSExDx) 975d0b3732eSbholler .int L(SSE1Q1) -L(SSExDx) 976d0b3732eSbholler .int L(SSE2Q1) -L(SSExDx) 977d0b3732eSbholler .int L(SSE3Q1) -L(SSExDx) 978d0b3732eSbholler .int L(SSE4Q1) -L(SSExDx) 979d0b3732eSbholler .int L(SSE5Q1) -L(SSExDx) 980d0b3732eSbholler .int L(SSE6Q1) -L(SSExDx) 981d0b3732eSbholler .int L(SSE7Q1) -L(SSExDx) 982d0b3732eSbholler 983d0b3732eSbholler .int L(SSE8Q1) -L(SSExDx) 984d0b3732eSbholler .int L(SSE9Q1) -L(SSExDx) 985d0b3732eSbholler .int L(SSE10Q1)-L(SSExDx) 986d0b3732eSbholler .int L(SSE11Q1)-L(SSExDx) 987d0b3732eSbholler .int L(SSE12Q1)-L(SSExDx) 988d0b3732eSbholler .int L(SSE13Q1)-L(SSExDx) 989d0b3732eSbholler .int L(SSE14Q1)-L(SSExDx) 990d0b3732eSbholler .int L(SSE15Q1)-L(SSExDx) 991d0b3732eSbholler 992d0b3732eSbholler .int L(SSE0Q2) -L(SSExDx) 993d0b3732eSbholler .int L(SSE1Q2) -L(SSExDx) 994d0b3732eSbholler .int L(SSE2Q2) -L(SSExDx) 995d0b3732eSbholler .int L(SSE3Q2) -L(SSExDx) 996d0b3732eSbholler .int L(SSE4Q2) -L(SSExDx) 997d0b3732eSbholler .int L(SSE5Q2) -L(SSExDx) 998d0b3732eSbholler .int L(SSE6Q2) -L(SSExDx) 999d0b3732eSbholler .int L(SSE7Q2) -L(SSExDx) 1000d0b3732eSbholler 1001d0b3732eSbholler .int L(SSE8Q2) -L(SSExDx) 1002d0b3732eSbholler .int L(SSE9Q2) -L(SSExDx) 1003d0b3732eSbholler .int L(SSE10Q2)-L(SSExDx) 1004d0b3732eSbholler .int L(SSE11Q2)-L(SSExDx) 1005d0b3732eSbholler .int L(SSE12Q2)-L(SSExDx) 1006d0b3732eSbholler .int L(SSE13Q2)-L(SSExDx) 1007d0b3732eSbholler .int L(SSE14Q2)-L(SSExDx) 1008d0b3732eSbholler .int L(SSE15Q2)-L(SSExDx) 1009d0b3732eSbholler 1010d0b3732eSbholler .int L(SSE0Q3) -L(SSExDx) 1011d0b3732eSbholler .int L(SSE1Q3) -L(SSExDx) 1012d0b3732eSbholler .int L(SSE2Q3) -L(SSExDx) 1013d0b3732eSbholler .int L(SSE3Q3) -L(SSExDx) 1014d0b3732eSbholler .int L(SSE4Q3) -L(SSExDx) 1015d0b3732eSbholler .int L(SSE5Q3) -L(SSExDx) 1016d0b3732eSbholler .int L(SSE6Q3) -L(SSExDx) 1017d0b3732eSbholler .int L(SSE7Q3) -L(SSExDx) 1018d0b3732eSbholler 1019d0b3732eSbholler .int L(SSE8Q3) -L(SSExDx) 1020d0b3732eSbholler .int L(SSE9Q3) -L(SSExDx) 1021d0b3732eSbholler .int L(SSE10Q3)-L(SSExDx) 1022d0b3732eSbholler .int L(SSE11Q3)-L(SSExDx) 1023d0b3732eSbholler .int L(SSE12Q3)-L(SSExDx) 1024d0b3732eSbholler .int L(SSE13Q3)-L(SSExDx) 1025d0b3732eSbholler .int L(SSE14Q3)-L(SSExDx) 1026d0b3732eSbholler .int L(SSE15Q3)-L(SSExDx) 1027d0b3732eSbholler 1028d0b3732eSbholler .int L(SSE0Q4) -L(SSExDx) 1029d0b3732eSbholler .int L(SSE1Q4) -L(SSExDx) 1030d0b3732eSbholler .int L(SSE2Q4) -L(SSExDx) 1031d0b3732eSbholler .int L(SSE3Q4) -L(SSExDx) 1032d0b3732eSbholler .int L(SSE4Q4) -L(SSExDx) 1033d0b3732eSbholler .int L(SSE5Q4) -L(SSExDx) 1034d0b3732eSbholler .int L(SSE6Q4) -L(SSExDx) 1035d0b3732eSbholler .int L(SSE7Q4) -L(SSExDx) 1036d0b3732eSbholler 1037d0b3732eSbholler .int L(SSE8Q4) -L(SSExDx) 1038d0b3732eSbholler .int L(SSE9Q4) -L(SSExDx) 1039d0b3732eSbholler .int L(SSE10Q4)-L(SSExDx) 1040d0b3732eSbholler .int L(SSE11Q4)-L(SSExDx) 1041d0b3732eSbholler .int L(SSE12Q4)-L(SSExDx) 1042d0b3732eSbholler .int L(SSE13Q4)-L(SSExDx) 1043d0b3732eSbholler .int L(SSE14Q4)-L(SSExDx) 1044d0b3732eSbholler .int L(SSE15Q4)-L(SSExDx) 1045d0b3732eSbholler 1046d0b3732eSbholler .int L(SSE0Q5) -L(SSExDx) 1047d0b3732eSbholler .int L(SSE1Q5) -L(SSExDx) 1048d0b3732eSbholler .int L(SSE2Q5) -L(SSExDx) 1049d0b3732eSbholler .int L(SSE3Q5) -L(SSExDx) 1050d0b3732eSbholler .int L(SSE4Q5) -L(SSExDx) 1051d0b3732eSbholler .int L(SSE5Q5) -L(SSExDx) 1052d0b3732eSbholler .int L(SSE6Q5) -L(SSExDx) 1053d0b3732eSbholler .int L(SSE7Q5) -L(SSExDx) 1054d0b3732eSbholler 1055d0b3732eSbholler .int L(SSE8Q5) -L(SSExDx) 1056d0b3732eSbholler .int L(SSE9Q5) -L(SSExDx) 1057d0b3732eSbholler .int L(SSE10Q5)-L(SSExDx) 1058d0b3732eSbholler .int L(SSE11Q5)-L(SSExDx) 1059d0b3732eSbholler .int L(SSE12Q5)-L(SSExDx) 1060d0b3732eSbholler .int L(SSE13Q5)-L(SSExDx) 1061d0b3732eSbholler .int L(SSE14Q5)-L(SSExDx) 1062d0b3732eSbholler .int L(SSE15Q5)-L(SSExDx) 1063d0b3732eSbholler 1064d0b3732eSbholler .int L(SSE0Q6) -L(SSExDx) 1065d0b3732eSbholler .int L(SSE1Q6) -L(SSExDx) 1066d0b3732eSbholler .int L(SSE2Q6) -L(SSExDx) 1067d0b3732eSbholler .int L(SSE3Q6) -L(SSExDx) 1068d0b3732eSbholler .int L(SSE4Q6) -L(SSExDx) 1069d0b3732eSbholler .int L(SSE5Q6) -L(SSExDx) 1070d0b3732eSbholler .int L(SSE6Q6) -L(SSExDx) 1071d0b3732eSbholler .int L(SSE7Q6) -L(SSExDx) 1072d0b3732eSbholler 1073d0b3732eSbholler .int L(SSE8Q6) -L(SSExDx) 1074d0b3732eSbholler .int L(SSE9Q6) -L(SSExDx) 1075d0b3732eSbholler .int L(SSE10Q6)-L(SSExDx) 1076d0b3732eSbholler .int L(SSE11Q6)-L(SSExDx) 1077d0b3732eSbholler .int L(SSE12Q6)-L(SSExDx) 1078d0b3732eSbholler .int L(SSE13Q6)-L(SSExDx) 1079d0b3732eSbholler .int L(SSE14Q6)-L(SSExDx) 1080d0b3732eSbholler .int L(SSE15Q6)-L(SSExDx) 1081d0b3732eSbholler 1082d0b3732eSbholler .int L(SSE0Q7) -L(SSExDx) 1083d0b3732eSbholler .int L(SSE1Q7) -L(SSExDx) 1084d0b3732eSbholler .int L(SSE2Q7) -L(SSExDx) 1085d0b3732eSbholler .int L(SSE3Q7) -L(SSExDx) 1086d0b3732eSbholler .int L(SSE4Q7) -L(SSExDx) 1087d0b3732eSbholler .int L(SSE5Q7) -L(SSExDx) 1088d0b3732eSbholler .int L(SSE6Q7) -L(SSExDx) 1089d0b3732eSbholler .int L(SSE7Q7) -L(SSExDx) 1090d0b3732eSbholler 1091d0b3732eSbholler .int L(SSE8Q7) -L(SSExDx) 1092d0b3732eSbholler .int L(SSE9Q7) -L(SSExDx) 1093d0b3732eSbholler .int L(SSE10Q7)-L(SSExDx) 1094d0b3732eSbholler .int L(SSE11Q7)-L(SSExDx) 1095d0b3732eSbholler .int L(SSE12Q7)-L(SSExDx) 1096d0b3732eSbholler .int L(SSE13Q7)-L(SSExDx) 1097d0b3732eSbholler .int L(SSE14Q7)-L(SSExDx) 1098d0b3732eSbholler .int L(SSE15Q7)-L(SSExDx) 1099d0b3732eSbholler 1100d0b3732eSbholler .int L(SSE0Q8) -L(SSExDx) 1101d0b3732eSbholler .int L(SSE1Q8) -L(SSExDx) 1102d0b3732eSbholler .int L(SSE2Q8) -L(SSExDx) 1103d0b3732eSbholler .int L(SSE3Q8) -L(SSExDx) 1104d0b3732eSbholler .int L(SSE4Q8) -L(SSExDx) 1105d0b3732eSbholler .int L(SSE5Q8) -L(SSExDx) 1106d0b3732eSbholler .int L(SSE6Q8) -L(SSExDx) 1107d0b3732eSbholler .int L(SSE7Q8) -L(SSExDx) 1108d0b3732eSbholler 1109d0b3732eSbholler .int L(SSE8Q8) -L(SSExDx) 1110d0b3732eSbholler .int L(SSE9Q8) -L(SSExDx) 1111d0b3732eSbholler .int L(SSE10Q8)-L(SSExDx) 1112d0b3732eSbholler .int L(SSE11Q8)-L(SSExDx) 1113d0b3732eSbholler .int L(SSE12Q8)-L(SSExDx) 1114d0b3732eSbholler .int L(SSE13Q8)-L(SSExDx) 1115d0b3732eSbholler .int L(SSE14Q8)-L(SSExDx) 1116d0b3732eSbholler .int L(SSE15Q8)-L(SSExDx) 1117d0b3732eSbholler 1118d0b3732eSbholler .int L(SSE0Q9) -L(SSExDx) 1119d0b3732eSbholler .int L(SSE1Q9) -L(SSExDx) 1120d0b3732eSbholler .int L(SSE2Q9) -L(SSExDx) 1121d0b3732eSbholler .int L(SSE3Q9) -L(SSExDx) 1122d0b3732eSbholler .int L(SSE4Q9) -L(SSExDx) 1123d0b3732eSbholler .int L(SSE5Q9) -L(SSExDx) 1124d0b3732eSbholler .int L(SSE6Q9) -L(SSExDx) 1125d0b3732eSbholler .int L(SSE7Q9) -L(SSExDx) 1126d0b3732eSbholler 1127d0b3732eSbholler .int L(SSE8Q9) -L(SSExDx) 1128d0b3732eSbholler .int L(SSE9Q9) -L(SSExDx) 1129d0b3732eSbholler .int L(SSE10Q9)-L(SSExDx) 1130d0b3732eSbholler .int L(SSE11Q9)-L(SSExDx) 1131d0b3732eSbholler .int L(SSE12Q9)-L(SSExDx) 1132d0b3732eSbholler .int L(SSE13Q9)-L(SSExDx) 1133d0b3732eSbholler .int L(SSE14Q9)-L(SSExDx) 1134d0b3732eSbholler .int L(SSE15Q9)-L(SSExDx) 1135d0b3732eSbholler 1136d0b3732eSbholler .int L(SSE0QA) -L(SSExDx) 1137d0b3732eSbholler .int L(SSE1QA) -L(SSExDx) 1138d0b3732eSbholler .int L(SSE2QA) -L(SSExDx) 1139d0b3732eSbholler .int L(SSE3QA) -L(SSExDx) 1140d0b3732eSbholler .int L(SSE4QA) -L(SSExDx) 1141d0b3732eSbholler .int L(SSE5QA) -L(SSExDx) 1142d0b3732eSbholler .int L(SSE6QA) -L(SSExDx) 1143d0b3732eSbholler .int L(SSE7QA) -L(SSExDx) 1144d0b3732eSbholler 1145d0b3732eSbholler .int L(SSE8QA) -L(SSExDx) 1146d0b3732eSbholler .int L(SSE9QA) -L(SSExDx) 1147d0b3732eSbholler .int L(SSE10QA)-L(SSExDx) 1148d0b3732eSbholler .int L(SSE11QA)-L(SSExDx) 1149d0b3732eSbholler .int L(SSE12QA)-L(SSExDx) 1150d0b3732eSbholler .int L(SSE13QA)-L(SSExDx) 1151d0b3732eSbholler .int L(SSE14QA)-L(SSExDx) 1152d0b3732eSbholler .int L(SSE15QA)-L(SSExDx) 1153d0b3732eSbholler 1154d0b3732eSbholler .int L(SSE0QB) -L(SSExDx) 1155d0b3732eSbholler .int L(SSE1QB) -L(SSExDx) 1156d0b3732eSbholler .int L(SSE2QB) -L(SSExDx) 1157d0b3732eSbholler .int L(SSE3QB) -L(SSExDx) 1158d0b3732eSbholler .int L(SSE4QB) -L(SSExDx) 1159d0b3732eSbholler .int L(SSE5QB) -L(SSExDx) 1160d0b3732eSbholler .int L(SSE6QB) -L(SSExDx) 1161d0b3732eSbholler .int L(SSE7QB) -L(SSExDx) 1162d0b3732eSbholler 1163d0b3732eSbholler .int L(SSE8QB) -L(SSExDx) 1164d0b3732eSbholler .int L(SSE9QB) -L(SSExDx) 1165d0b3732eSbholler .int L(SSE10QB)-L(SSExDx) 1166d0b3732eSbholler .int L(SSE11QB)-L(SSExDx) 1167d0b3732eSbholler .int L(SSE12QB)-L(SSExDx) 1168d0b3732eSbholler .int L(SSE13QB)-L(SSExDx) 1169d0b3732eSbholler .int L(SSE14QB)-L(SSExDx) 1170d0b3732eSbholler .int L(SSE15QB)-L(SSExDx) 1171d0b3732eSbholler 11727c478bd9Sstevel@tonic-gate SET_SIZE(memset) 1173