17c478bd9Sstevel@tonic-gate/* 27c478bd9Sstevel@tonic-gate * CDDL HEADER START 37c478bd9Sstevel@tonic-gate * 47c478bd9Sstevel@tonic-gate * The contents of this file are subject to the terms of the 5*ae115bc7Smrj * Common Development and Distribution License (the "License"). 6*ae115bc7Smrj * You may not use this file except in compliance with the License. 77c478bd9Sstevel@tonic-gate * 87c478bd9Sstevel@tonic-gate * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 97c478bd9Sstevel@tonic-gate * or http://www.opensolaris.org/os/licensing. 107c478bd9Sstevel@tonic-gate * See the License for the specific language governing permissions 117c478bd9Sstevel@tonic-gate * and limitations under the License. 127c478bd9Sstevel@tonic-gate * 137c478bd9Sstevel@tonic-gate * When distributing Covered Code, include this CDDL HEADER in each 147c478bd9Sstevel@tonic-gate * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 157c478bd9Sstevel@tonic-gate * If applicable, add the following below this CDDL HEADER, with the 167c478bd9Sstevel@tonic-gate * fields enclosed by brackets "[]" replaced with your own identifying 177c478bd9Sstevel@tonic-gate * information: Portions Copyright [yyyy] [name of copyright owner] 187c478bd9Sstevel@tonic-gate * 197c478bd9Sstevel@tonic-gate * CDDL HEADER END 207c478bd9Sstevel@tonic-gate */ 217c478bd9Sstevel@tonic-gate/* 22*ae115bc7Smrj * Copyright 2007 Sun Microsystems, Inc. All rights reserved. 237c478bd9Sstevel@tonic-gate * Use is subject to license terms. 247c478bd9Sstevel@tonic-gate */ 257c478bd9Sstevel@tonic-gate 267c478bd9Sstevel@tonic-gate#pragma ident "%Z%%M% %I% %E% SMI" 277c478bd9Sstevel@tonic-gate 287c478bd9Sstevel@tonic-gate#include <sys/asm_linkage.h> 297c478bd9Sstevel@tonic-gate#include <sys/regset.h> 307c478bd9Sstevel@tonic-gate#include <sys/privregs.h> 317c478bd9Sstevel@tonic-gate 327c478bd9Sstevel@tonic-gate#if defined(__lint) 337c478bd9Sstevel@tonic-gate#include <sys/types.h> 347c478bd9Sstevel@tonic-gate#include <sys/archsystm.h> 357c478bd9Sstevel@tonic-gate#else 367c478bd9Sstevel@tonic-gate#include "assym.h" 377c478bd9Sstevel@tonic-gate#endif 387c478bd9Sstevel@tonic-gate 397c478bd9Sstevel@tonic-gate/* 407c478bd9Sstevel@tonic-gate * Do block operations using Streaming SIMD extensions 417c478bd9Sstevel@tonic-gate */ 427c478bd9Sstevel@tonic-gate 437c478bd9Sstevel@tonic-gate#if defined(DEBUG) 447c478bd9Sstevel@tonic-gate#if defined(__amd64) 457c478bd9Sstevel@tonic-gate#define ASSERT_KPREEMPT_DISABLED(t, r32, msg) \ 467c478bd9Sstevel@tonic-gate movq %gs:CPU_THREAD, t; \ 477c478bd9Sstevel@tonic-gate movsbl T_PREEMPT(t), r32; \ 487c478bd9Sstevel@tonic-gate testl r32, r32; \ 497c478bd9Sstevel@tonic-gate jne 5f; \ 507c478bd9Sstevel@tonic-gate pushq %rbp; \ 517c478bd9Sstevel@tonic-gate movq %rsp, %rbp; \ 527c478bd9Sstevel@tonic-gate leaq msg(%rip), %rdi; \ 537c478bd9Sstevel@tonic-gate xorl %eax, %eax; \ 547c478bd9Sstevel@tonic-gate call panic; \ 557c478bd9Sstevel@tonic-gate5: 567c478bd9Sstevel@tonic-gate#elif defined(__i386) 577c478bd9Sstevel@tonic-gate#define ASSERT_KPREEMPT_DISABLED(t, r32, msg) \ 587c478bd9Sstevel@tonic-gate movl %gs:CPU_THREAD, t; \ 597c478bd9Sstevel@tonic-gate movsbl T_PREEMPT(t), r32; \ 607c478bd9Sstevel@tonic-gate testl r32, r32; \ 617c478bd9Sstevel@tonic-gate jne 5f; \ 627c478bd9Sstevel@tonic-gate pushl %ebp; \ 637c478bd9Sstevel@tonic-gate movl %esp, %ebp; \ 647c478bd9Sstevel@tonic-gate pushl $msg; \ 657c478bd9Sstevel@tonic-gate call panic; \ 667c478bd9Sstevel@tonic-gate5: 677c478bd9Sstevel@tonic-gate#endif /* __i386 */ 687c478bd9Sstevel@tonic-gate#else /* DEBUG */ 697c478bd9Sstevel@tonic-gate#define ASSERT_KPREEMPT_DISABLED(t, r32, msg) 707c478bd9Sstevel@tonic-gate#endif /* DEBUG */ 717c478bd9Sstevel@tonic-gate 727c478bd9Sstevel@tonic-gate#define BLOCKSHIFT 6 737c478bd9Sstevel@tonic-gate#define BLOCKSIZE 64 /* (1 << BLOCKSHIFT) */ 747c478bd9Sstevel@tonic-gate#define BLOCKMASK 63 /* (BLOCKSIZE - 1) */ 757c478bd9Sstevel@tonic-gate 767c478bd9Sstevel@tonic-gate#if (1 << BLOCKSHIFT) != BLOCKSIZE || BLOCKMASK != (BLOCKSIZE - 1) 777c478bd9Sstevel@tonic-gate#error "mucked up constants" 787c478bd9Sstevel@tonic-gate#endif 797c478bd9Sstevel@tonic-gate 807c478bd9Sstevel@tonic-gate#if defined(__lint) 817c478bd9Sstevel@tonic-gate 827c478bd9Sstevel@tonic-gate/*ARGSUSED*/ 837c478bd9Sstevel@tonic-gatevoid 847c478bd9Sstevel@tonic-gatehwblkclr(void *addr, size_t size) 857c478bd9Sstevel@tonic-gate{} 867c478bd9Sstevel@tonic-gate 877c478bd9Sstevel@tonic-gate#else /* __lint */ 887c478bd9Sstevel@tonic-gate 897c478bd9Sstevel@tonic-gate#if defined(__amd64) 907c478bd9Sstevel@tonic-gate#define ADD addq 917c478bd9Sstevel@tonic-gate#define SUB subq 927c478bd9Sstevel@tonic-gate#else 937c478bd9Sstevel@tonic-gate#define ADD addl 947c478bd9Sstevel@tonic-gate#define SUB subl 957c478bd9Sstevel@tonic-gate#endif 967c478bd9Sstevel@tonic-gate 977c478bd9Sstevel@tonic-gate#define SAVE_XMM0(r) \ 987c478bd9Sstevel@tonic-gate SAVE_XMM_PROLOG(r, 1); \ 997c478bd9Sstevel@tonic-gate movdqa %xmm0, (r) 1007c478bd9Sstevel@tonic-gate 1017c478bd9Sstevel@tonic-gate#define ZERO_LOOP_INIT_XMM(dst) \ 1027c478bd9Sstevel@tonic-gate pxor %xmm0, %xmm0 1037c478bd9Sstevel@tonic-gate 1047c478bd9Sstevel@tonic-gate#define ZERO_LOOP_BODY_XMM(dst, cnt) \ 1057c478bd9Sstevel@tonic-gate movntdq %xmm0, (dst); \ 1067c478bd9Sstevel@tonic-gate movntdq %xmm0, 0x10(dst); \ 1077c478bd9Sstevel@tonic-gate movntdq %xmm0, 0x20(dst); \ 1087c478bd9Sstevel@tonic-gate movntdq %xmm0, 0x30(dst); \ 1097c478bd9Sstevel@tonic-gate ADD $BLOCKSIZE, dst; \ 1107c478bd9Sstevel@tonic-gate SUB $1, cnt 1117c478bd9Sstevel@tonic-gate 1127c478bd9Sstevel@tonic-gate#define ZERO_LOOP_FINI_XMM(dst) \ 1137c478bd9Sstevel@tonic-gate mfence 1147c478bd9Sstevel@tonic-gate 1157c478bd9Sstevel@tonic-gate#define RSTOR_XMM0(r) \ 1167c478bd9Sstevel@tonic-gate movdqa 0x0(r), %xmm0; \ 1177c478bd9Sstevel@tonic-gate RSTOR_XMM_EPILOG(r, 1) 1187c478bd9Sstevel@tonic-gate 1197c478bd9Sstevel@tonic-gate#if defined(__amd64) 1207c478bd9Sstevel@tonic-gate 1217c478bd9Sstevel@tonic-gate /* 1227c478bd9Sstevel@tonic-gate * %rdi dst 1237c478bd9Sstevel@tonic-gate * %rsi size 1247c478bd9Sstevel@tonic-gate * %rax saved %cr0 (#if DEBUG then %eax is t->t_preempt) 1257c478bd9Sstevel@tonic-gate * %r8 pointer to %xmm register save area 1267c478bd9Sstevel@tonic-gate */ 1277c478bd9Sstevel@tonic-gate ENTRY(hwblkclr) 1287c478bd9Sstevel@tonic-gate pushq %rbp 1297c478bd9Sstevel@tonic-gate movq %rsp, %rbp 1307c478bd9Sstevel@tonic-gate testl $BLOCKMASK, %edi /* address must be BLOCKSIZE aligned */ 1317c478bd9Sstevel@tonic-gate jne .dobzero 1327c478bd9Sstevel@tonic-gate cmpq $BLOCKSIZE, %rsi /* size must be at least BLOCKSIZE */ 1337c478bd9Sstevel@tonic-gate jl .dobzero 1347c478bd9Sstevel@tonic-gate testq $BLOCKMASK, %rsi /* .. and be a multiple of BLOCKSIZE */ 1357c478bd9Sstevel@tonic-gate jne .dobzero 1367c478bd9Sstevel@tonic-gate shrq $BLOCKSHIFT, %rsi 1377c478bd9Sstevel@tonic-gate 1387c478bd9Sstevel@tonic-gate ASSERT_KPREEMPT_DISABLED(%r11, %eax, .not_disabled) 1397c478bd9Sstevel@tonic-gate movq %cr0, %rax 1407c478bd9Sstevel@tonic-gate clts 1417c478bd9Sstevel@tonic-gate testl $CR0_TS, %eax 1427c478bd9Sstevel@tonic-gate jnz 1f 1437c478bd9Sstevel@tonic-gate 1447c478bd9Sstevel@tonic-gate SAVE_XMM0(%r8) 1457c478bd9Sstevel@tonic-gate1: ZERO_LOOP_INIT_XMM(%rdi) 1467c478bd9Sstevel@tonic-gate9: ZERO_LOOP_BODY_XMM(%rdi, %rsi) 1477c478bd9Sstevel@tonic-gate jnz 9b 1487c478bd9Sstevel@tonic-gate ZERO_LOOP_FINI_XMM(%rdi) 1497c478bd9Sstevel@tonic-gate 1507c478bd9Sstevel@tonic-gate testl $CR0_TS, %eax 1517c478bd9Sstevel@tonic-gate jnz 2f 1527c478bd9Sstevel@tonic-gate RSTOR_XMM0(%r8) 1537c478bd9Sstevel@tonic-gate2: movq %rax, %cr0 1547c478bd9Sstevel@tonic-gate leave 1557c478bd9Sstevel@tonic-gate ret 1567c478bd9Sstevel@tonic-gate.dobzero: 1577c478bd9Sstevel@tonic-gate leave 1587c478bd9Sstevel@tonic-gate jmp bzero 1597c478bd9Sstevel@tonic-gate SET_SIZE(hwblkclr) 1607c478bd9Sstevel@tonic-gate 1617c478bd9Sstevel@tonic-gate#elif defined(__i386) 1627c478bd9Sstevel@tonic-gate 1637c478bd9Sstevel@tonic-gate /* 1647c478bd9Sstevel@tonic-gate * %eax dst 1657c478bd9Sstevel@tonic-gate * %ecx size in bytes, loop count 1667c478bd9Sstevel@tonic-gate * %ebx saved %cr0 (#if DEBUG then t->t_preempt) 1677c478bd9Sstevel@tonic-gate * %edi pointer to %xmm register save area 1687c478bd9Sstevel@tonic-gate */ 1697c478bd9Sstevel@tonic-gate ENTRY(hwblkclr) 1707c478bd9Sstevel@tonic-gate movl 4(%esp), %eax 1717c478bd9Sstevel@tonic-gate movl 8(%esp), %ecx 1727c478bd9Sstevel@tonic-gate testl $BLOCKMASK, %eax /* address must be BLOCKSIZE aligned */ 1737c478bd9Sstevel@tonic-gate jne .dobzero 1747c478bd9Sstevel@tonic-gate cmpl $BLOCKSIZE, %ecx /* size must be at least BLOCKSIZE */ 1757c478bd9Sstevel@tonic-gate jl .dobzero 1767c478bd9Sstevel@tonic-gate testl $BLOCKMASK, %ecx /* .. and be a multiple of BLOCKSIZE */ 1777c478bd9Sstevel@tonic-gate jne .dobzero 1787c478bd9Sstevel@tonic-gate shrl $BLOCKSHIFT, %ecx 1797c478bd9Sstevel@tonic-gate movl 0xc(%esp), %edx 1807c478bd9Sstevel@tonic-gate pushl %ebx 1817c478bd9Sstevel@tonic-gate 1827c478bd9Sstevel@tonic-gate pushl %esi 1837c478bd9Sstevel@tonic-gate ASSERT_KPREEMPT_DISABLED(%esi, %ebx, .not_disabled) 1847c478bd9Sstevel@tonic-gate popl %esi 1857c478bd9Sstevel@tonic-gate movl %cr0, %ebx 1867c478bd9Sstevel@tonic-gate clts 1877c478bd9Sstevel@tonic-gate testl $CR0_TS, %ebx 1887c478bd9Sstevel@tonic-gate jnz 1f 1897c478bd9Sstevel@tonic-gate 1907c478bd9Sstevel@tonic-gate pushl %edi 1917c478bd9Sstevel@tonic-gate SAVE_XMM0(%edi) 1927c478bd9Sstevel@tonic-gate1: ZERO_LOOP_INIT_XMM(%eax) 1937c478bd9Sstevel@tonic-gate9: ZERO_LOOP_BODY_XMM(%eax, %ecx) 1947c478bd9Sstevel@tonic-gate jnz 9b 1957c478bd9Sstevel@tonic-gate ZERO_LOOP_FINI_XMM(%eax) 1967c478bd9Sstevel@tonic-gate 1977c478bd9Sstevel@tonic-gate testl $CR0_TS, %ebx 1987c478bd9Sstevel@tonic-gate jnz 2f 1997c478bd9Sstevel@tonic-gate RSTOR_XMM0(%edi) 2007c478bd9Sstevel@tonic-gate popl %edi 2017c478bd9Sstevel@tonic-gate2: movl %ebx, %cr0 2027c478bd9Sstevel@tonic-gate popl %ebx 2037c478bd9Sstevel@tonic-gate ret 2047c478bd9Sstevel@tonic-gate.dobzero: 2057c478bd9Sstevel@tonic-gate jmp bzero 2067c478bd9Sstevel@tonic-gate SET_SIZE(hwblkclr) 2077c478bd9Sstevel@tonic-gate 2087c478bd9Sstevel@tonic-gate#endif /* __i386 */ 2097c478bd9Sstevel@tonic-gate#endif /* __lint */ 2107c478bd9Sstevel@tonic-gate 2117c478bd9Sstevel@tonic-gate 2127c478bd9Sstevel@tonic-gate#if defined(__lint) 2137c478bd9Sstevel@tonic-gate 2147c478bd9Sstevel@tonic-gate/*ARGSUSED*/ 2157c478bd9Sstevel@tonic-gatevoid 2167c478bd9Sstevel@tonic-gatehwblkpagecopy(const void *src, void *dst) 2177c478bd9Sstevel@tonic-gate{} 2187c478bd9Sstevel@tonic-gate 2197c478bd9Sstevel@tonic-gate#else /* __lint */ 2207c478bd9Sstevel@tonic-gate 2217c478bd9Sstevel@tonic-gate#define PREFETCH_START(src) \ 2227c478bd9Sstevel@tonic-gate prefetchnta 0x0(src); \ 2237c478bd9Sstevel@tonic-gate prefetchnta 0x40(src) 2247c478bd9Sstevel@tonic-gate 2257c478bd9Sstevel@tonic-gate#define SAVE_XMMS(r) \ 2267c478bd9Sstevel@tonic-gate SAVE_XMM_PROLOG(r, 8); \ 2277c478bd9Sstevel@tonic-gate movdqa %xmm0, (r); \ 2287c478bd9Sstevel@tonic-gate movdqa %xmm1, 0x10(r); \ 2297c478bd9Sstevel@tonic-gate movdqa %xmm2, 0x20(r); \ 2307c478bd9Sstevel@tonic-gate movdqa %xmm3, 0x30(r); \ 2317c478bd9Sstevel@tonic-gate movdqa %xmm4, 0x40(r); \ 2327c478bd9Sstevel@tonic-gate movdqa %xmm5, 0x50(r); \ 2337c478bd9Sstevel@tonic-gate movdqa %xmm6, 0x60(r); \ 2347c478bd9Sstevel@tonic-gate movdqa %xmm7, 0x70(r) 2357c478bd9Sstevel@tonic-gate 2367c478bd9Sstevel@tonic-gate#define COPY_LOOP_INIT_XMM(src) \ 2377c478bd9Sstevel@tonic-gate prefetchnta 0x80(src); \ 2387c478bd9Sstevel@tonic-gate prefetchnta 0xc0(src); \ 2397c478bd9Sstevel@tonic-gate movdqa 0x0(src), %xmm0; \ 2407c478bd9Sstevel@tonic-gate movdqa 0x10(src), %xmm1; \ 2417c478bd9Sstevel@tonic-gate movdqa 0x20(src), %xmm2; \ 2427c478bd9Sstevel@tonic-gate movdqa 0x30(src), %xmm3; \ 2437c478bd9Sstevel@tonic-gate movdqa 0x40(src), %xmm4; \ 2447c478bd9Sstevel@tonic-gate movdqa 0x50(src), %xmm5; \ 2457c478bd9Sstevel@tonic-gate movdqa 0x60(src), %xmm6; \ 2467c478bd9Sstevel@tonic-gate movdqa 0x70(src), %xmm7; \ 2477c478bd9Sstevel@tonic-gate ADD $0x80, src 2487c478bd9Sstevel@tonic-gate 2497c478bd9Sstevel@tonic-gate#define COPY_LOOP_BODY_XMM(src, dst, cnt) \ 2507c478bd9Sstevel@tonic-gate prefetchnta 0x80(src); \ 2517c478bd9Sstevel@tonic-gate prefetchnta 0xc0(src); \ 2527c478bd9Sstevel@tonic-gate prefetchnta 0x100(src); \ 2537c478bd9Sstevel@tonic-gate prefetchnta 0x140(src); \ 2547c478bd9Sstevel@tonic-gate movntdq %xmm0, (dst); \ 2557c478bd9Sstevel@tonic-gate movntdq %xmm1, 0x10(dst); \ 2567c478bd9Sstevel@tonic-gate movntdq %xmm2, 0x20(dst); \ 2577c478bd9Sstevel@tonic-gate movntdq %xmm3, 0x30(dst); \ 2587c478bd9Sstevel@tonic-gate movdqa 0x0(src), %xmm0; \ 2597c478bd9Sstevel@tonic-gate movdqa 0x10(src), %xmm1; \ 2607c478bd9Sstevel@tonic-gate movntdq %xmm4, 0x40(dst); \ 2617c478bd9Sstevel@tonic-gate movntdq %xmm5, 0x50(dst); \ 2627c478bd9Sstevel@tonic-gate movdqa 0x20(src), %xmm2; \ 2637c478bd9Sstevel@tonic-gate movdqa 0x30(src), %xmm3; \ 2647c478bd9Sstevel@tonic-gate movntdq %xmm6, 0x60(dst); \ 2657c478bd9Sstevel@tonic-gate movntdq %xmm7, 0x70(dst); \ 2667c478bd9Sstevel@tonic-gate movdqa 0x40(src), %xmm4; \ 2677c478bd9Sstevel@tonic-gate movdqa 0x50(src), %xmm5; \ 2687c478bd9Sstevel@tonic-gate ADD $0x80, dst; \ 2697c478bd9Sstevel@tonic-gate movdqa 0x60(src), %xmm6; \ 2707c478bd9Sstevel@tonic-gate movdqa 0x70(src), %xmm7; \ 2717c478bd9Sstevel@tonic-gate ADD $0x80, src; \ 2727c478bd9Sstevel@tonic-gate subl $1, cnt 2737c478bd9Sstevel@tonic-gate 2747c478bd9Sstevel@tonic-gate#define COPY_LOOP_FINI_XMM(dst) \ 2757c478bd9Sstevel@tonic-gate movntdq %xmm0, 0x0(dst); \ 2767c478bd9Sstevel@tonic-gate movntdq %xmm1, 0x10(dst); \ 2777c478bd9Sstevel@tonic-gate movntdq %xmm2, 0x20(dst); \ 2787c478bd9Sstevel@tonic-gate movntdq %xmm3, 0x30(dst); \ 2797c478bd9Sstevel@tonic-gate movntdq %xmm4, 0x40(dst); \ 2807c478bd9Sstevel@tonic-gate movntdq %xmm5, 0x50(dst); \ 2817c478bd9Sstevel@tonic-gate movntdq %xmm6, 0x60(dst); \ 2827c478bd9Sstevel@tonic-gate movntdq %xmm7, 0x70(dst) 2837c478bd9Sstevel@tonic-gate 2847c478bd9Sstevel@tonic-gate#define RSTOR_XMMS(r) \ 2857c478bd9Sstevel@tonic-gate movdqa 0x0(r), %xmm0; \ 2867c478bd9Sstevel@tonic-gate movdqa 0x10(r), %xmm1; \ 2877c478bd9Sstevel@tonic-gate movdqa 0x20(r), %xmm2; \ 2887c478bd9Sstevel@tonic-gate movdqa 0x30(r), %xmm3; \ 2897c478bd9Sstevel@tonic-gate movdqa 0x40(r), %xmm4; \ 2907c478bd9Sstevel@tonic-gate movdqa 0x50(r), %xmm5; \ 2917c478bd9Sstevel@tonic-gate movdqa 0x60(r), %xmm6; \ 2927c478bd9Sstevel@tonic-gate movdqa 0x70(r), %xmm7; \ 2937c478bd9Sstevel@tonic-gate RSTOR_XMM_EPILOG(r, 8) 2947c478bd9Sstevel@tonic-gate 2957c478bd9Sstevel@tonic-gate#if defined(__amd64) 2967c478bd9Sstevel@tonic-gate 2977c478bd9Sstevel@tonic-gate /* 2987c478bd9Sstevel@tonic-gate * %rdi src 2997c478bd9Sstevel@tonic-gate * %rsi dst 3007c478bd9Sstevel@tonic-gate * %rdx #if DEBUG then curthread 3017c478bd9Sstevel@tonic-gate * %ecx loop count 3027c478bd9Sstevel@tonic-gate * %rax saved %cr0 (#if DEBUG then %eax is t->t_prempt) 3037c478bd9Sstevel@tonic-gate * %r8 pointer to %xmm register save area 3047c478bd9Sstevel@tonic-gate */ 3057c478bd9Sstevel@tonic-gate ENTRY(hwblkpagecopy) 3067c478bd9Sstevel@tonic-gate pushq %rbp 3077c478bd9Sstevel@tonic-gate movq %rsp, %rbp 3087c478bd9Sstevel@tonic-gate PREFETCH_START(%rdi) 3097c478bd9Sstevel@tonic-gate /* 3107c478bd9Sstevel@tonic-gate * PAGESIZE is 4096, each loop moves 128 bytes, but the initial 3117c478bd9Sstevel@tonic-gate * load and final store save us on loop count 3127c478bd9Sstevel@tonic-gate */ 3137c478bd9Sstevel@tonic-gate movl $_CONST(32 - 1), %ecx 3147c478bd9Sstevel@tonic-gate ASSERT_KPREEMPT_DISABLED(%rdx, %eax, .not_disabled) 3157c478bd9Sstevel@tonic-gate movq %cr0, %rax 3167c478bd9Sstevel@tonic-gate clts 3177c478bd9Sstevel@tonic-gate testl $CR0_TS, %eax 3187c478bd9Sstevel@tonic-gate jnz 3f 3197c478bd9Sstevel@tonic-gate SAVE_XMMS(%r8) 3207c478bd9Sstevel@tonic-gate3: COPY_LOOP_INIT_XMM(%rdi) 3217c478bd9Sstevel@tonic-gate4: COPY_LOOP_BODY_XMM(%rdi, %rsi, %ecx) 3227c478bd9Sstevel@tonic-gate jnz 4b 3237c478bd9Sstevel@tonic-gate COPY_LOOP_FINI_XMM(%rsi) 3247c478bd9Sstevel@tonic-gate testl $CR0_TS, %eax 3257c478bd9Sstevel@tonic-gate jnz 5f 3267c478bd9Sstevel@tonic-gate RSTOR_XMMS(%r8) 3277c478bd9Sstevel@tonic-gate5: movq %rax, %cr0 3287c478bd9Sstevel@tonic-gate mfence 3297c478bd9Sstevel@tonic-gate leave 3307c478bd9Sstevel@tonic-gate ret 3317c478bd9Sstevel@tonic-gate SET_SIZE(hwblkpagecopy) 3327c478bd9Sstevel@tonic-gate 3337c478bd9Sstevel@tonic-gate#elif defined(__i386) 3347c478bd9Sstevel@tonic-gate 3357c478bd9Sstevel@tonic-gate /* 3367c478bd9Sstevel@tonic-gate * %eax src 3377c478bd9Sstevel@tonic-gate * %edx dst 3387c478bd9Sstevel@tonic-gate * %ecx loop count 3397c478bd9Sstevel@tonic-gate * %ebx saved %cr0 (#if DEBUG then t->t_prempt) 3407c478bd9Sstevel@tonic-gate * %edi pointer to %xmm register save area 3417c478bd9Sstevel@tonic-gate * %esi #if DEBUG temporary thread pointer 3427c478bd9Sstevel@tonic-gate */ 3437c478bd9Sstevel@tonic-gate ENTRY(hwblkpagecopy) 3447c478bd9Sstevel@tonic-gate movl 4(%esp), %eax 3457c478bd9Sstevel@tonic-gate movl 8(%esp), %edx 3467c478bd9Sstevel@tonic-gate PREFETCH_START(%eax) 3477c478bd9Sstevel@tonic-gate pushl %ebx 3487c478bd9Sstevel@tonic-gate /* 3497c478bd9Sstevel@tonic-gate * PAGESIZE is 4096, each loop moves 128 bytes, but the initial 3507c478bd9Sstevel@tonic-gate * load and final store save us one loop count 3517c478bd9Sstevel@tonic-gate */ 3527c478bd9Sstevel@tonic-gate movl $_CONST(32 - 1), %ecx 3537c478bd9Sstevel@tonic-gate pushl %esi 3547c478bd9Sstevel@tonic-gate ASSERT_KPREEMPT_DISABLED(%esi, %ebx, .not_disabled) 3557c478bd9Sstevel@tonic-gate popl %esi 3567c478bd9Sstevel@tonic-gate movl %cr0, %ebx 3577c478bd9Sstevel@tonic-gate clts 3587c478bd9Sstevel@tonic-gate testl $CR0_TS, %ebx 3597c478bd9Sstevel@tonic-gate jnz 3f 3607c478bd9Sstevel@tonic-gate pushl %edi 3617c478bd9Sstevel@tonic-gate SAVE_XMMS(%edi) 3627c478bd9Sstevel@tonic-gate3: COPY_LOOP_INIT_XMM(%eax) 3637c478bd9Sstevel@tonic-gate4: COPY_LOOP_BODY_XMM(%eax, %edx, %ecx) 3647c478bd9Sstevel@tonic-gate jnz 4b 3657c478bd9Sstevel@tonic-gate COPY_LOOP_FINI_XMM(%edx) 3667c478bd9Sstevel@tonic-gate testl $CR0_TS, %ebx 3677c478bd9Sstevel@tonic-gate jnz 5f 3687c478bd9Sstevel@tonic-gate RSTOR_XMMS(%edi) 3697c478bd9Sstevel@tonic-gate popl %edi 3707c478bd9Sstevel@tonic-gate5: movl %ebx, %cr0 3717c478bd9Sstevel@tonic-gate popl %ebx 3727c478bd9Sstevel@tonic-gate mfence 3737c478bd9Sstevel@tonic-gate ret 3747c478bd9Sstevel@tonic-gate SET_SIZE(hwblkpagecopy) 3757c478bd9Sstevel@tonic-gate 3767c478bd9Sstevel@tonic-gate#endif /* __i386 */ 3777c478bd9Sstevel@tonic-gate#endif /* __lint */ 3787c478bd9Sstevel@tonic-gate 3797c478bd9Sstevel@tonic-gate#if defined(__lint) 3807c478bd9Sstevel@tonic-gate 381*ae115bc7Smrj/* 382*ae115bc7Smrj * Version of hwblkclr which doesn't use XMM registers. 383*ae115bc7Smrj * Note that it requires aligned dst and len. 384*ae115bc7Smrj * 385*ae115bc7Smrj * XXPV This needs to be performance tuned at some point. 386*ae115bc7Smrj * Is 4 the best number of iterations to unroll? 387*ae115bc7Smrj */ 3887c478bd9Sstevel@tonic-gate/*ARGSUSED*/ 3897c478bd9Sstevel@tonic-gatevoid 390*ae115bc7Smrjblock_zero_no_xmm(void *dst, int len) 3917c478bd9Sstevel@tonic-gate{} 3927c478bd9Sstevel@tonic-gate 393*ae115bc7Smrj#else /* __lint */ 3947c478bd9Sstevel@tonic-gate 3957c478bd9Sstevel@tonic-gate#if defined(__amd64) 3967c478bd9Sstevel@tonic-gate 397*ae115bc7Smrj ENTRY(block_zero_no_xmm) 398*ae115bc7Smrj pushq %rbp 399*ae115bc7Smrj movq %rsp, %rbp 4007c478bd9Sstevel@tonic-gate xorl %eax, %eax 401*ae115bc7Smrj addq %rsi, %rdi 402*ae115bc7Smrj negq %rsi 4037c478bd9Sstevel@tonic-gate1: 404*ae115bc7Smrj movnti %rax, (%rdi, %rsi) 405*ae115bc7Smrj movnti %rax, 8(%rdi, %rsi) 406*ae115bc7Smrj movnti %rax, 16(%rdi, %rsi) 407*ae115bc7Smrj movnti %rax, 24(%rdi, %rsi) 408*ae115bc7Smrj addq $32, %rsi 4097c478bd9Sstevel@tonic-gate jnz 1b 4107c478bd9Sstevel@tonic-gate mfence 411*ae115bc7Smrj leave 4127c478bd9Sstevel@tonic-gate ret 413*ae115bc7Smrj SET_SIZE(block_zero_no_xmm) 4147c478bd9Sstevel@tonic-gate 4157c478bd9Sstevel@tonic-gate#elif defined(__i386) 4167c478bd9Sstevel@tonic-gate 417*ae115bc7Smrj ENTRY(block_zero_no_xmm) 418*ae115bc7Smrj pushl %ebp 419*ae115bc7Smrj movl %esp, %ebp 4207c478bd9Sstevel@tonic-gate xorl %eax, %eax 421*ae115bc7Smrj movl 8(%ebp), %edx 422*ae115bc7Smrj movl 12(%ebp), %ecx 423*ae115bc7Smrj addl %ecx, %edx 424*ae115bc7Smrj negl %ecx 4257c478bd9Sstevel@tonic-gate1: 426*ae115bc7Smrj movnti %eax, (%edx, %ecx) 427*ae115bc7Smrj movnti %eax, 4(%edx, %ecx) 428*ae115bc7Smrj movnti %eax, 8(%edx, %ecx) 429*ae115bc7Smrj movnti %eax, 12(%edx, %ecx) 430*ae115bc7Smrj addl $16, %ecx 431*ae115bc7Smrj jnz 1b 432*ae115bc7Smrj mfence 433*ae115bc7Smrj leave 434*ae115bc7Smrj ret 435*ae115bc7Smrj SET_SIZE(block_zero_no_xmm) 436*ae115bc7Smrj 437*ae115bc7Smrj#endif /* __i386 */ 438*ae115bc7Smrj#endif /* __lint */ 439*ae115bc7Smrj 440*ae115bc7Smrj 441*ae115bc7Smrj#if defined(__lint) 442*ae115bc7Smrj 443*ae115bc7Smrj/* 444*ae115bc7Smrj * Version of page copy which doesn't use XMM registers. 445*ae115bc7Smrj * 446*ae115bc7Smrj * XXPV This needs to be performance tuned at some point. 447*ae115bc7Smrj * Is 4 the right number of iterations to unroll? 448*ae115bc7Smrj * Is the load/store order optimal? Should it use prefetch? 449*ae115bc7Smrj */ 450*ae115bc7Smrj/*ARGSUSED*/ 451*ae115bc7Smrjvoid 452*ae115bc7Smrjpage_copy_no_xmm(void *dst, void *src) 453*ae115bc7Smrj{} 454*ae115bc7Smrj 455*ae115bc7Smrj#else /* __lint */ 456*ae115bc7Smrj 457*ae115bc7Smrj#if defined(__amd64) 458*ae115bc7Smrj 459*ae115bc7Smrj ENTRY(page_copy_no_xmm) 460*ae115bc7Smrj movq $MMU_STD_PAGESIZE, %rcx 461*ae115bc7Smrj addq %rcx, %rdi 462*ae115bc7Smrj addq %rcx, %rsi 463*ae115bc7Smrj negq %rcx 464*ae115bc7Smrj1: 465*ae115bc7Smrj movq (%rsi, %rcx), %rax 466*ae115bc7Smrj movnti %rax, (%rdi, %rcx) 467*ae115bc7Smrj movq 8(%rsi, %rcx), %rax 468*ae115bc7Smrj movnti %rax, 8(%rdi, %rcx) 469*ae115bc7Smrj movq 16(%rsi, %rcx), %rax 470*ae115bc7Smrj movnti %rax, 16(%rdi, %rcx) 471*ae115bc7Smrj movq 24(%rsi, %rcx), %rax 472*ae115bc7Smrj movnti %rax, 24(%rdi, %rcx) 473*ae115bc7Smrj addq $32, %rcx 4747c478bd9Sstevel@tonic-gate jnz 1b 4757c478bd9Sstevel@tonic-gate mfence 4767c478bd9Sstevel@tonic-gate ret 477*ae115bc7Smrj SET_SIZE(page_copy_no_xmm) 478*ae115bc7Smrj 479*ae115bc7Smrj#elif defined(__i386) 480*ae115bc7Smrj 481*ae115bc7Smrj ENTRY(page_copy_no_xmm) 482*ae115bc7Smrj pushl %esi 483*ae115bc7Smrj movl $MMU_STD_PAGESIZE, %ecx 484*ae115bc7Smrj movl 8(%esp), %edx 485*ae115bc7Smrj movl 12(%esp), %esi 486*ae115bc7Smrj addl %ecx, %edx 487*ae115bc7Smrj addl %ecx, %esi 488*ae115bc7Smrj negl %ecx 489*ae115bc7Smrj1: 490*ae115bc7Smrj movl (%esi, %ecx), %eax 491*ae115bc7Smrj movnti %eax, (%edx, %ecx) 492*ae115bc7Smrj movl 4(%esi, %ecx), %eax 493*ae115bc7Smrj movnti %eax, 4(%edx, %ecx) 494*ae115bc7Smrj movl 8(%esi, %ecx), %eax 495*ae115bc7Smrj movnti %eax, 8(%edx, %ecx) 496*ae115bc7Smrj movl 12(%esi, %ecx), %eax 497*ae115bc7Smrj movnti %eax, 12(%edx, %ecx) 498*ae115bc7Smrj addl $16, %ecx 499*ae115bc7Smrj jnz 1b 500*ae115bc7Smrj mfence 501*ae115bc7Smrj popl %esi 502*ae115bc7Smrj ret 503*ae115bc7Smrj SET_SIZE(page_copy_no_xmm) 5047c478bd9Sstevel@tonic-gate 5057c478bd9Sstevel@tonic-gate#endif /* __i386 */ 5067c478bd9Sstevel@tonic-gate#endif /* __lint */ 5077c478bd9Sstevel@tonic-gate 5087c478bd9Sstevel@tonic-gate#if defined(DEBUG) && !defined(__lint) 5097c478bd9Sstevel@tonic-gate .text 5107c478bd9Sstevel@tonic-gate.not_disabled: 5117c478bd9Sstevel@tonic-gate .string "sseblk: preemption not disabled!" 5127c478bd9Sstevel@tonic-gate#endif 513