1*5d9d9091SRichard Lowe/* 2*5d9d9091SRichard Lowe * CDDL HEADER START 3*5d9d9091SRichard Lowe * 4*5d9d9091SRichard Lowe * The contents of this file are subject to the terms of the 5*5d9d9091SRichard Lowe * Common Development and Distribution License (the "License"). 6*5d9d9091SRichard Lowe * You may not use this file except in compliance with the License. 7*5d9d9091SRichard Lowe * 8*5d9d9091SRichard Lowe * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9*5d9d9091SRichard Lowe * or http://www.opensolaris.org/os/licensing. 10*5d9d9091SRichard Lowe * See the License for the specific language governing permissions 11*5d9d9091SRichard Lowe * and limitations under the License. 12*5d9d9091SRichard Lowe * 13*5d9d9091SRichard Lowe * When distributing Covered Code, include this CDDL HEADER in each 14*5d9d9091SRichard Lowe * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15*5d9d9091SRichard Lowe * If applicable, add the following below this CDDL HEADER, with the 16*5d9d9091SRichard Lowe * fields enclosed by brackets "[]" replaced with your own identifying 17*5d9d9091SRichard Lowe * information: Portions Copyright [yyyy] [name of copyright owner] 18*5d9d9091SRichard Lowe * 19*5d9d9091SRichard Lowe * CDDL HEADER END 20*5d9d9091SRichard Lowe */ 21*5d9d9091SRichard Lowe 22*5d9d9091SRichard Lowe/* 23*5d9d9091SRichard Lowe * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved. 24*5d9d9091SRichard Lowe */ 25*5d9d9091SRichard Lowe 26*5d9d9091SRichard Lowe .file "memset.s" 27*5d9d9091SRichard Lowe 28*5d9d9091SRichard Lowe/* 29*5d9d9091SRichard Lowe * char *memset(sp, c, n) 30*5d9d9091SRichard Lowe * 31*5d9d9091SRichard Lowe * Set an array of n chars starting at sp to the character c. 32*5d9d9091SRichard Lowe * Return sp. 33*5d9d9091SRichard Lowe * 34*5d9d9091SRichard Lowe * Fast assembler language version of the following C-program for memset 35*5d9d9091SRichard Lowe * which represents the `standard' for the C-library. 36*5d9d9091SRichard Lowe * 37*5d9d9091SRichard Lowe * void * 38*5d9d9091SRichard Lowe * memset(void *sp1, int c, size_t n) 39*5d9d9091SRichard Lowe * { 40*5d9d9091SRichard Lowe * if (n != 0) { 41*5d9d9091SRichard Lowe * char *sp = sp1; 42*5d9d9091SRichard Lowe * do { 43*5d9d9091SRichard Lowe * *sp++ = (char)c; 44*5d9d9091SRichard Lowe * } while (--n != 0); 45*5d9d9091SRichard Lowe * } 46*5d9d9091SRichard Lowe * return (sp1); 47*5d9d9091SRichard Lowe * } 48*5d9d9091SRichard Lowe * 49*5d9d9091SRichard Lowe * Flow : 50*5d9d9091SRichard Lowe * 51*5d9d9091SRichard Lowe * For small 6 or fewer bytes stores, bytes will be stored. 52*5d9d9091SRichard Lowe * 53*5d9d9091SRichard Lowe * For less than 32 bytes stores, align the address on 4 byte boundary. 54*5d9d9091SRichard Lowe * Then store as many 4-byte chunks, followed by trailing bytes. 55*5d9d9091SRichard Lowe * 56*5d9d9091SRichard Lowe * For sizes greater than 32 bytes, align the address on 8 byte boundary. 57*5d9d9091SRichard Lowe * if (count > 64) { 58*5d9d9091SRichard Lowe * store as many 8-bytes chunks to block align the address 59*5d9d9091SRichard Lowe * store using ASI_BLK_INIT_ST_QUAD_LDD_P 60*5d9d9091SRichard Lowe * } 61*5d9d9091SRichard Lowe * Store as many 8-byte chunks, followed by trialing bytes. 62*5d9d9091SRichard Lowe * 63*5d9d9091SRichard Lowe */ 64*5d9d9091SRichard Lowe 65*5d9d9091SRichard Lowe#include <sys/asm_linkage.h> 66*5d9d9091SRichard Lowe#include <sys/niagaraasi.h> 67*5d9d9091SRichard Lowe#include <sys/asi.h> 68*5d9d9091SRichard Lowe 69*5d9d9091SRichard Lowe ANSI_PRAGMA_WEAK(memset,function) 70*5d9d9091SRichard Lowe 71*5d9d9091SRichard Lowe .section ".text" 72*5d9d9091SRichard Lowe .align 32 73*5d9d9091SRichard Lowe 74*5d9d9091SRichard Lowe ENTRY(memset) 75*5d9d9091SRichard Lowe 76*5d9d9091SRichard Lowe mov %o0, %o5 ! copy sp1 before using it 77*5d9d9091SRichard Lowe cmp %o2, 7 ! if small counts, just write bytes 78*5d9d9091SRichard Lowe blu,pn %ncc, .wrchar 79*5d9d9091SRichard Lowe and %o1, 0xff, %o1 ! o1 is (char)c 80*5d9d9091SRichard Lowe 81*5d9d9091SRichard Lowe sll %o1, 8, %o3 82*5d9d9091SRichard Lowe or %o1, %o3, %o1 ! now o1 has 2 bytes of c 83*5d9d9091SRichard Lowe sll %o1, 16, %o3 84*5d9d9091SRichard Lowe 85*5d9d9091SRichard Lowe cmp %o2, 0x20 86*5d9d9091SRichard Lowe blu,pn %ncc, .wdalign 87*5d9d9091SRichard Lowe or %o1, %o3, %o1 ! now o1 has 4 bytes of c 88*5d9d9091SRichard Lowe 89*5d9d9091SRichard Lowe sllx %o1, 32, %o3 90*5d9d9091SRichard Lowe or %o1, %o3, %o1 ! now o1 has 8 bytes of c 91*5d9d9091SRichard Lowe 92*5d9d9091SRichard Lowe.dbalign: 93*5d9d9091SRichard Lowe andcc %o5, 7, %o3 ! is sp1 aligned on a 8 byte bound 94*5d9d9091SRichard Lowe bz,pt %ncc, .blkalign ! already double aligned 95*5d9d9091SRichard Lowe sub %o3, 8, %o3 ! -(bytes till double aligned) 96*5d9d9091SRichard Lowe add %o2, %o3, %o2 ! update o2 with new count 97*5d9d9091SRichard Lowe 98*5d9d9091SRichard Lowe ! Set -(%o3) bytes till sp1 double aligned 99*5d9d9091SRichard Lowe1: stb %o1, [%o5] ! there is at least 1 byte to set 100*5d9d9091SRichard Lowe inccc %o3 ! byte clearing loop 101*5d9d9091SRichard Lowe bl,pt %ncc, 1b 102*5d9d9091SRichard Lowe inc %o5 103*5d9d9091SRichard Lowe 104*5d9d9091SRichard Lowe ! Now sp1 is double aligned (sp1 is found in %o5) 105*5d9d9091SRichard Lowe.blkalign: 106*5d9d9091SRichard Lowe mov ASI_BLK_INIT_ST_QUAD_LDD_P, %asi 107*5d9d9091SRichard Lowe 108*5d9d9091SRichard Lowe cmp %o2, 0x40 ! check if there are 64 bytes to set 109*5d9d9091SRichard Lowe blu,pn %ncc, 5f 110*5d9d9091SRichard Lowe mov %o2, %o3 111*5d9d9091SRichard Lowe 112*5d9d9091SRichard Lowe andcc %o5, 63, %o3 ! is sp1 block aligned? 113*5d9d9091SRichard Lowe bz,pt %ncc, .blkwr ! now block aligned 114*5d9d9091SRichard Lowe sub %o3, 64, %o3 ! o3 is -(bytes till block aligned) 115*5d9d9091SRichard Lowe add %o2, %o3, %o2 ! o2 is the remainder 116*5d9d9091SRichard Lowe 117*5d9d9091SRichard Lowe ! Store -(%o3) bytes till dst is block (64 byte) aligned. 118*5d9d9091SRichard Lowe ! Use double word stores. 119*5d9d9091SRichard Lowe ! Recall that dst is already double word aligned 120*5d9d9091SRichard Lowe1: 121*5d9d9091SRichard Lowe stx %o1, [%o5] 122*5d9d9091SRichard Lowe addcc %o3, 8, %o3 123*5d9d9091SRichard Lowe bl,pt %ncc, 1b 124*5d9d9091SRichard Lowe add %o5, 8, %o5 125*5d9d9091SRichard Lowe 126*5d9d9091SRichard Lowe ! Now sp1 is block aligned 127*5d9d9091SRichard Lowe.blkwr: 128*5d9d9091SRichard Lowe and %o2, 63, %o3 ! calc bytes left after blk store. 129*5d9d9091SRichard Lowe andn %o2, 63, %o4 ! calc size of blocks in bytes 130*5d9d9091SRichard Lowe 131*5d9d9091SRichard Lowe cmp %o4, 0x100 ! check if there are 256 bytes to set 132*5d9d9091SRichard Lowe blu,pn %ncc, 3f 133*5d9d9091SRichard Lowe nop 134*5d9d9091SRichard Lowe2: 135*5d9d9091SRichard Lowe stxa %o1, [%o5+0x0]%asi 136*5d9d9091SRichard Lowe stxa %o1, [%o5+0x40]%asi 137*5d9d9091SRichard Lowe stxa %o1, [%o5+0x80]%asi 138*5d9d9091SRichard Lowe stxa %o1, [%o5+0xc0]%asi 139*5d9d9091SRichard Lowe 140*5d9d9091SRichard Lowe stxa %o1, [%o5+0x8]%asi 141*5d9d9091SRichard Lowe stxa %o1, [%o5+0x10]%asi 142*5d9d9091SRichard Lowe stxa %o1, [%o5+0x18]%asi 143*5d9d9091SRichard Lowe stxa %o1, [%o5+0x20]%asi 144*5d9d9091SRichard Lowe stxa %o1, [%o5+0x28]%asi 145*5d9d9091SRichard Lowe stxa %o1, [%o5+0x30]%asi 146*5d9d9091SRichard Lowe stxa %o1, [%o5+0x38]%asi 147*5d9d9091SRichard Lowe 148*5d9d9091SRichard Lowe stxa %o1, [%o5+0x48]%asi 149*5d9d9091SRichard Lowe stxa %o1, [%o5+0x50]%asi 150*5d9d9091SRichard Lowe stxa %o1, [%o5+0x58]%asi 151*5d9d9091SRichard Lowe stxa %o1, [%o5+0x60]%asi 152*5d9d9091SRichard Lowe stxa %o1, [%o5+0x68]%asi 153*5d9d9091SRichard Lowe stxa %o1, [%o5+0x70]%asi 154*5d9d9091SRichard Lowe stxa %o1, [%o5+0x78]%asi 155*5d9d9091SRichard Lowe 156*5d9d9091SRichard Lowe stxa %o1, [%o5+0x88]%asi 157*5d9d9091SRichard Lowe stxa %o1, [%o5+0x90]%asi 158*5d9d9091SRichard Lowe stxa %o1, [%o5+0x98]%asi 159*5d9d9091SRichard Lowe stxa %o1, [%o5+0xa0]%asi 160*5d9d9091SRichard Lowe stxa %o1, [%o5+0xa8]%asi 161*5d9d9091SRichard Lowe stxa %o1, [%o5+0xb0]%asi 162*5d9d9091SRichard Lowe stxa %o1, [%o5+0xb8]%asi 163*5d9d9091SRichard Lowe 164*5d9d9091SRichard Lowe stxa %o1, [%o5+0xc8]%asi 165*5d9d9091SRichard Lowe stxa %o1, [%o5+0xd0]%asi 166*5d9d9091SRichard Lowe stxa %o1, [%o5+0xd8]%asi 167*5d9d9091SRichard Lowe stxa %o1, [%o5+0xe0]%asi 168*5d9d9091SRichard Lowe stxa %o1, [%o5+0xe8]%asi 169*5d9d9091SRichard Lowe stxa %o1, [%o5+0xf0]%asi 170*5d9d9091SRichard Lowe stxa %o1, [%o5+0xf8]%asi 171*5d9d9091SRichard Lowe 172*5d9d9091SRichard Lowe sub %o4, 0x100, %o4 173*5d9d9091SRichard Lowe cmp %o4, 0x100 174*5d9d9091SRichard Lowe bgu,pt %ncc, 2b 175*5d9d9091SRichard Lowe add %o5, 0x100, %o5 176*5d9d9091SRichard Lowe 177*5d9d9091SRichard Lowe3: 178*5d9d9091SRichard Lowe cmp %o4, 0x40 ! check if 64 bytes to set 179*5d9d9091SRichard Lowe blu %ncc, 5f 180*5d9d9091SRichard Lowe nop 181*5d9d9091SRichard Lowe4: 182*5d9d9091SRichard Lowe stxa %o1, [%o5+0x0]%asi 183*5d9d9091SRichard Lowe stxa %o1, [%o5+0x8]%asi 184*5d9d9091SRichard Lowe stxa %o1, [%o5+0x10]%asi 185*5d9d9091SRichard Lowe stxa %o1, [%o5+0x18]%asi 186*5d9d9091SRichard Lowe stxa %o1, [%o5+0x20]%asi 187*5d9d9091SRichard Lowe stxa %o1, [%o5+0x28]%asi 188*5d9d9091SRichard Lowe stxa %o1, [%o5+0x30]%asi 189*5d9d9091SRichard Lowe stxa %o1, [%o5+0x38]%asi 190*5d9d9091SRichard Lowe 191*5d9d9091SRichard Lowe subcc %o4, 0x40, %o4 192*5d9d9091SRichard Lowe bgu,pt %ncc, 4b 193*5d9d9091SRichard Lowe add %o5, 0x40, %o5 194*5d9d9091SRichard Lowe 195*5d9d9091SRichard Lowe5: 196*5d9d9091SRichard Lowe ! Set the remaining doubles 197*5d9d9091SRichard Lowe membar #Sync 198*5d9d9091SRichard Lowe mov ASI_PNF, %asi ! restore %asi to default 199*5d9d9091SRichard Lowe ! ASI_PRIMARY_NOFAULT value 200*5d9d9091SRichard Lowe subcc %o3, 8, %o3 ! Can we store any doubles? 201*5d9d9091SRichard Lowe blu,pn %ncc, .wrchar 202*5d9d9091SRichard Lowe and %o2, 7, %o2 ! calc bytes left after doubles 203*5d9d9091SRichard Lowe 204*5d9d9091SRichard Lowe6: 205*5d9d9091SRichard Lowe stx %o1, [%o5] ! store the doubles 206*5d9d9091SRichard Lowe subcc %o3, 8, %o3 207*5d9d9091SRichard Lowe bgeu,pt %ncc, 6b 208*5d9d9091SRichard Lowe add %o5, 8, %o5 209*5d9d9091SRichard Lowe 210*5d9d9091SRichard Lowe ba .wrchar 211*5d9d9091SRichard Lowe nop 212*5d9d9091SRichard Lowe 213*5d9d9091SRichard Lowe.wdalign: 214*5d9d9091SRichard Lowe andcc %o5, 3, %o3 ! is sp1 aligned on a word boundary 215*5d9d9091SRichard Lowe bz,pn %ncc, .wrword 216*5d9d9091SRichard Lowe andn %o2, 3, %o3 ! create word sized count in %o3 217*5d9d9091SRichard Lowe 218*5d9d9091SRichard Lowe dec %o2 ! decrement count 219*5d9d9091SRichard Lowe stb %o1, [%o5] ! clear a byte 220*5d9d9091SRichard Lowe b .wdalign 221*5d9d9091SRichard Lowe inc %o5 ! next byte 222*5d9d9091SRichard Lowe 223*5d9d9091SRichard Lowe.wrword: 224*5d9d9091SRichard Lowe st %o1, [%o5] ! 4-byte writing loop 225*5d9d9091SRichard Lowe subcc %o3, 4, %o3 226*5d9d9091SRichard Lowe bnz,pt %ncc, .wrword 227*5d9d9091SRichard Lowe inc 4, %o5 228*5d9d9091SRichard Lowe 229*5d9d9091SRichard Lowe and %o2, 3, %o2 ! leftover count, if any 230*5d9d9091SRichard Lowe 231*5d9d9091SRichard Lowe.wrchar: 232*5d9d9091SRichard Lowe ! Set the remaining bytes, if any 233*5d9d9091SRichard Lowe cmp %o2, 0 234*5d9d9091SRichard Lowe be %ncc, .exit 235*5d9d9091SRichard Lowe nop 236*5d9d9091SRichard Lowe 237*5d9d9091SRichard Lowe7: 238*5d9d9091SRichard Lowe deccc %o2 239*5d9d9091SRichard Lowe stb %o1, [%o5] 240*5d9d9091SRichard Lowe bgu,pt %ncc, 7b 241*5d9d9091SRichard Lowe inc %o5 242*5d9d9091SRichard Lowe 243*5d9d9091SRichard Lowe.exit: 244*5d9d9091SRichard Lowe retl ! %o0 was preserved 245*5d9d9091SRichard Lowe nop 246*5d9d9091SRichard Lowe 247*5d9d9091SRichard Lowe SET_SIZE(memset) 248