1*1e49577aSRod Evans/* 2*1e49577aSRod Evans * CDDL HEADER START 3*1e49577aSRod Evans * 4*1e49577aSRod Evans * The contents of this file are subject to the terms of the 5*1e49577aSRod Evans * Common Development and Distribution License (the "License"). 6*1e49577aSRod Evans * You may not use this file except in compliance with the License. 7*1e49577aSRod Evans * 8*1e49577aSRod Evans * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9*1e49577aSRod Evans * or http://www.opensolaris.org/os/licensing. 10*1e49577aSRod Evans * See the License for the specific language governing permissions 11*1e49577aSRod Evans * and limitations under the License. 12*1e49577aSRod Evans * 13*1e49577aSRod Evans * When distributing Covered Code, include this CDDL HEADER in each 14*1e49577aSRod Evans * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15*1e49577aSRod Evans * If applicable, add the following below this CDDL HEADER, with the 16*1e49577aSRod Evans * fields enclosed by brackets "[]" replaced with your own identifying 17*1e49577aSRod Evans * information: Portions Copyright [yyyy] [name of copyright owner] 18*1e49577aSRod Evans * 19*1e49577aSRod Evans * CDDL HEADER END 20*1e49577aSRod Evans */ 21*1e49577aSRod Evans 22*1e49577aSRod Evans/* 23*1e49577aSRod Evans * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. 24*1e49577aSRod Evans */ 25*1e49577aSRod Evans 26*1e49577aSRod Evans .file "memset.s" 27*1e49577aSRod Evans 28*1e49577aSRod Evans/* 29*1e49577aSRod Evans * char *memset(sp, c, n) 30*1e49577aSRod Evans * 31*1e49577aSRod Evans * Set an array of n chars starting at sp to the character c. 32*1e49577aSRod Evans * Return sp. 33*1e49577aSRod Evans * 34*1e49577aSRod Evans * Fast assembler language version of the following C-program for memset 35*1e49577aSRod Evans * which represents the `standard' for the C-library. 36*1e49577aSRod Evans * 37*1e49577aSRod Evans * void * 38*1e49577aSRod Evans * memset(void *sp1, int c, size_t n) 39*1e49577aSRod Evans * { 40*1e49577aSRod Evans * if (n != 0) { 41*1e49577aSRod Evans * char *sp = sp1; 42*1e49577aSRod Evans * do { 43*1e49577aSRod Evans * *sp++ = (char)c; 44*1e49577aSRod Evans * } while (--n != 0); 45*1e49577aSRod Evans * } 46*1e49577aSRod Evans * return (sp1); 47*1e49577aSRod Evans * } 48*1e49577aSRod Evans */ 49*1e49577aSRod Evans 50*1e49577aSRod Evans#include <sys/asm_linkage.h> 51*1e49577aSRod Evans#include <sys/sun4asi.h> 52*1e49577aSRod Evans 53*1e49577aSRod Evans ANSI_PRAGMA_WEAK(memset,function) 54*1e49577aSRod Evans 55*1e49577aSRod Evans#define ALIGN8(X) (((X) + 7) & ~7) 56*1e49577aSRod Evans#define BLOCK_SIZE 64 57*1e49577aSRod Evans 58*1e49577aSRod Evans .section ".text" 59*1e49577aSRod Evans .align 32 60*1e49577aSRod Evans 61*1e49577aSRod Evans ENTRY(memset) 62*1e49577aSRod Evans cmp %o2, 12 ! if small counts, just write bytes 63*1e49577aSRod Evans bgeu,pn %ncc, .wrbig 64*1e49577aSRod Evans mov %o0, %o5 ! copy sp1 before using it 65*1e49577aSRod Evans 66*1e49577aSRod Evans.wrchar: 67*1e49577aSRod Evans deccc %o2 ! byte clearing loop 68*1e49577aSRod Evans inc %o5 69*1e49577aSRod Evans bgeu,a,pt %ncc, .wrchar 70*1e49577aSRod Evans stb %o1, [%o5 + -1] ! we've already incremented the address 71*1e49577aSRod Evans 72*1e49577aSRod Evans retl 73*1e49577aSRod Evans .empty ! next instruction is safe, %o0 still good 74*1e49577aSRod Evans 75*1e49577aSRod Evans.wrbig: 76*1e49577aSRod Evans andcc %o5, 7, %o3 ! is sp1 aligned on a 8 byte bound 77*1e49577aSRod Evans bz,pt %ncc, .blkchk ! already double aligned 78*1e49577aSRod Evans and %o1, 0xff, %o1 ! o1 is (char)c 79*1e49577aSRod Evans sub %o3, 8, %o3 ! -(bytes till double aligned) 80*1e49577aSRod Evans add %o2, %o3, %o2 ! update o2 with new count 81*1e49577aSRod Evans 82*1e49577aSRod Evans ! Set -(%o3) bytes till sp1 double aligned 83*1e49577aSRod Evans1: stb %o1, [%o5] ! there is at least 1 byte to set 84*1e49577aSRod Evans inccc %o3 ! byte clearing loop 85*1e49577aSRod Evans bl,pt %ncc, 1b 86*1e49577aSRod Evans inc %o5 87*1e49577aSRod Evans 88*1e49577aSRod Evans 89*1e49577aSRod Evans ! Now sp1 is double aligned (sp1 is found in %o5) 90*1e49577aSRod Evans.blkchk: 91*1e49577aSRod Evans sll %o1, 8, %o3 92*1e49577aSRod Evans or %o1, %o3, %o1 ! now o1 has 2 bytes of c 93*1e49577aSRod Evans 94*1e49577aSRod Evans sll %o1, 16, %o3 95*1e49577aSRod Evans or %o1, %o3, %o1 ! now o1 has 4 bytes of c 96*1e49577aSRod Evans 97*1e49577aSRod Evans cmp %o2, 4095 ! if large count use Block ld/st 98*1e49577aSRod Evans 99*1e49577aSRod Evans sllx %o1, 32, %o3 100*1e49577aSRod Evans or %o1, %o3, %o1 ! now o1 has 8 bytes of c 101*1e49577aSRod Evans 102*1e49577aSRod Evans bgu,a,pn %ncc, .blkwr ! Do block write for large count 103*1e49577aSRod Evans andcc %o5, 63, %o3 ! is sp1 block aligned? 104*1e49577aSRod Evans 105*1e49577aSRod Evans and %o2, 24, %o3 ! o3 is {0, 8, 16, 24} 106*1e49577aSRod Evans 107*1e49577aSRod Evans1: subcc %o3, 8, %o3 ! double-word loop 108*1e49577aSRod Evans add %o5, 8, %o5 109*1e49577aSRod Evans bgeu,a,pt %ncc, 1b 110*1e49577aSRod Evans stx %o1, [%o5 - 8] ! already incremented the address 111*1e49577aSRod Evans 112*1e49577aSRod Evans andncc %o2, 31, %o4 ! o4 has 32 byte aligned count 113*1e49577aSRod Evans bz,pn %ncc, 3f ! First instruction of icache line 114*1e49577aSRod Evans2: 115*1e49577aSRod Evans subcc %o4, 32, %o4 ! main loop, 32 bytes per iteration 116*1e49577aSRod Evans stx %o1, [%o5 - 8] 117*1e49577aSRod Evans stx %o1, [%o5] 118*1e49577aSRod Evans stx %o1, [%o5 + 8] 119*1e49577aSRod Evans stx %o1, [%o5 + 16] 120*1e49577aSRod Evans bnz,pt %ncc, 2b 121*1e49577aSRod Evans add %o5, 32, %o5 122*1e49577aSRod Evans 123*1e49577aSRod Evans3: 124*1e49577aSRod Evans and %o2, 7, %o2 ! o2 has the remaining bytes (<8) 125*1e49577aSRod Evans 126*1e49577aSRod Evans4: 127*1e49577aSRod Evans deccc %o2 ! byte clearing loop 128*1e49577aSRod Evans inc %o5 129*1e49577aSRod Evans bgeu,a,pt %ncc, 4b 130*1e49577aSRod Evans stb %o1, [%o5 - 9] ! already incremented the address 131*1e49577aSRod Evans 132*1e49577aSRod Evans retl 133*1e49577aSRod Evans nop ! %o0 still preserved 134*1e49577aSRod Evans 135*1e49577aSRod Evans.blkwr: 136*1e49577aSRod Evans bz,pn %ncc, .blalign ! now block aligned 137*1e49577aSRod Evans sub %o3, 64, %o3 ! o3 is -(bytes till block aligned) 138*1e49577aSRod Evans add %o2, %o3, %o2 ! o2 is the remainder 139*1e49577aSRod Evans 140*1e49577aSRod Evans ! Store -(%o3) bytes till dst is block (64 byte) aligned. 141*1e49577aSRod Evans ! Use double word stores. 142*1e49577aSRod Evans ! Recall that dst is already double word aligned 143*1e49577aSRod Evans1: 144*1e49577aSRod Evans stx %o1, [%o5] 145*1e49577aSRod Evans addcc %o3, 8, %o3 146*1e49577aSRod Evans bl,pt %ncc, 1b 147*1e49577aSRod Evans add %o5, 8, %o5 148*1e49577aSRod Evans 149*1e49577aSRod Evans ! sp1 is block aligned 150*1e49577aSRod Evans.blalign: 151*1e49577aSRod Evans rd %fprs, %g1 ! g1 = fprs 152*1e49577aSRod Evans 153*1e49577aSRod Evans and %o2, 63, %o3 ! calc bytes left after blk store. 154*1e49577aSRod Evans 155*1e49577aSRod Evans andcc %g1, 0x4, %g1 ! fprs.du = fprs.dl = 0 156*1e49577aSRod Evans bz,a %ncc, 2f ! Is fprs.fef == 0 157*1e49577aSRod Evans wr %g0, 0x4, %fprs ! fprs.fef = 1 158*1e49577aSRod Evans2: 159*1e49577aSRod Evans brnz,pn %o1, 3f ! %o1 is safe to check all 64-bits 160*1e49577aSRod Evans andn %o2, 63, %o4 ! calc size of blocks in bytes 161*1e49577aSRod Evans fzero %d0 162*1e49577aSRod Evans fzero %d2 163*1e49577aSRod Evans fzero %d4 164*1e49577aSRod Evans fzero %d6 165*1e49577aSRod Evans fmuld %d0, %d0, %d8 166*1e49577aSRod Evans fzero %d10 167*1e49577aSRod Evans ba 4f 168*1e49577aSRod Evans fmuld %d0, %d0, %d12 169*1e49577aSRod Evans 170*1e49577aSRod Evans3: 171*1e49577aSRod Evans ! allocate 8 bytes of scratch space on the stack 172*1e49577aSRod Evans add %sp, -SA(16), %sp 173*1e49577aSRod Evans stx %o1, [%sp + STACK_BIAS + ALIGN8(MINFRAME)] ! move %o1 to %d0 174*1e49577aSRod Evans ldd [%sp + STACK_BIAS + ALIGN8(MINFRAME)], %d0 175*1e49577aSRod Evans 176*1e49577aSRod Evans fmovd %d0, %d2 177*1e49577aSRod Evans add %sp, SA(16), %sp ! deallocate the scratch space 178*1e49577aSRod Evans fmovd %d0, %d4 179*1e49577aSRod Evans fmovd %d0, %d6 180*1e49577aSRod Evans fmovd %d0, %d8 181*1e49577aSRod Evans fmovd %d0, %d10 182*1e49577aSRod Evans fmovd %d0, %d12 183*1e49577aSRod Evans4: 184*1e49577aSRod Evans fmovd %d0, %d14 185*1e49577aSRod Evans 186*1e49577aSRod Evans ! 1st quadrant has 64 bytes of c 187*1e49577aSRod Evans ! instructions 32-byte aligned here 188*1e49577aSRod Evans#ifdef PANTHER_ONLY 189*1e49577aSRod Evans ! Panther only code 190*1e49577aSRod Evans prefetch [%o5 + (3 * BLOCK_SIZE)], 22 191*1e49577aSRod Evans prefetch [%o5 + (6 * BLOCK_SIZE)], 22 192*1e49577aSRod Evans std %d0, [%o5] 193*1e49577aSRod Evans std %d0, [%o5 + 8] 194*1e49577aSRod Evans std %d0, [%o5 + 16] 195*1e49577aSRod Evans std %d0, [%o5 + 24] 196*1e49577aSRod Evans std %d0, [%o5 + 32] 197*1e49577aSRod Evans std %d0, [%o5 + 40] 198*1e49577aSRod Evans std %d0, [%o5 + 48] 199*1e49577aSRod Evans std %d0, [%o5 + 56] 200*1e49577aSRod Evans#else 201*1e49577aSRod Evans ! Cheetah/Jaguar code 202*1e49577aSRod Evans stda %d0, [%o5]ASI_BLK_P 203*1e49577aSRod Evans#endif 204*1e49577aSRod Evans subcc %o4, 64, %o4 205*1e49577aSRod Evans bgu,pt %ncc, 4b 206*1e49577aSRod Evans add %o5, 64, %o5 207*1e49577aSRod Evans 208*1e49577aSRod Evans ! Set the remaining doubles 209*1e49577aSRod Evans subcc %o3, 8, %o3 ! Can we store any doubles? 210*1e49577aSRod Evans blu,pn %ncc, 6f 211*1e49577aSRod Evans and %o2, 7, %o2 ! calc bytes left after doubles 212*1e49577aSRod Evans 213*1e49577aSRod Evans5: 214*1e49577aSRod Evans std %d0, [%o5] ! store the doubles 215*1e49577aSRod Evans subcc %o3, 8, %o3 216*1e49577aSRod Evans bgeu,pt %ncc, 5b 217*1e49577aSRod Evans add %o5, 8, %o5 218*1e49577aSRod Evans6: 219*1e49577aSRod Evans ! Set the remaining bytes 220*1e49577aSRod Evans brz %o2, .exit ! safe to check all 64-bits 221*1e49577aSRod Evans 222*1e49577aSRod Evans#if 0 223*1e49577aSRod Evans ! Terminate the copy with a partial store. (bug 1200071 does not apply) 224*1e49577aSRod Evans ! The data should be at d0 225*1e49577aSRod Evans dec %o2 ! needed to get the mask right 226*1e49577aSRod Evans edge8n %g0, %o2, %o4 227*1e49577aSRod Evans stda %d0, [%o5]%o4, ASI_PST8_P 228*1e49577aSRod Evans#else 229*1e49577aSRod Evans7: 230*1e49577aSRod Evans deccc %o2 231*1e49577aSRod Evans stb %o1, [%o5] 232*1e49577aSRod Evans bgu,pt %ncc, 7b 233*1e49577aSRod Evans inc %o5 234*1e49577aSRod Evans#endif 235*1e49577aSRod Evans 236*1e49577aSRod Evans.exit: 237*1e49577aSRod Evans membar #StoreLoad|#StoreStore 238*1e49577aSRod Evans retl ! %o0 was preserved 239*1e49577aSRod Evans wr %g1, %g0, %fprs ! fprs = g1 restore fprs 240*1e49577aSRod Evans 241*1e49577aSRod Evans SET_SIZE(memset) 242