1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22/* 23 * Copyright (c) 2006, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 .file "memset.s" 27 28/* 29 * char *memset(sp, c, n) 30 * 31 * Set an array of n chars starting at sp to the character c. 32 * Return sp. 33 * 34 * Fast assembler language version of the following C-program for memset 35 * which represents the `standard' for the C-library. 36 * 37 * void * 38 * memset(void *sp1, int c, size_t n) 39 * { 40 * if (n != 0) { 41 * char *sp = sp1; 42 * do { 43 * *sp++ = (char)c; 44 * } while (--n != 0); 45 * } 46 * return (sp1); 47 * } 48 */ 49 50#include <sys/asm_linkage.h> 51#include <sys/sun4asi.h> 52 53 ANSI_PRAGMA_WEAK(memset,function) 54 55#define ALIGN8(X) (((X) + 7) & ~7) 56#define BLOCK_SIZE 64 57 58 .section ".text" 59 .align 32 60 61 ENTRY(memset) 62 cmp %o2, 12 ! if small counts, just write bytes 63 bgeu,pn %ncc, .wrbig 64 mov %o0, %o5 ! copy sp1 before using it 65 66.wrchar: 67 deccc %o2 ! byte clearing loop 68 inc %o5 69 bgeu,a,pt %ncc, .wrchar 70 stb %o1, [%o5 + -1] ! we've already incremented the address 71 72 retl 73 .empty ! next instruction is safe, %o0 still good 74 75.wrbig: 76 andcc %o5, 7, %o3 ! is sp1 aligned on a 8 byte bound 77 bz,pt %ncc, .blkchk ! already double aligned 78 and %o1, 0xff, %o1 ! o1 is (char)c 79 sub %o3, 8, %o3 ! -(bytes till double aligned) 80 add %o2, %o3, %o2 ! update o2 with new count 81 82 ! Set -(%o3) bytes till sp1 double aligned 831: stb %o1, [%o5] ! there is at least 1 byte to set 84 inccc %o3 ! byte clearing loop 85 bl,pt %ncc, 1b 86 inc %o5 87 88 89 ! Now sp1 is double aligned (sp1 is found in %o5) 90.blkchk: 91 sll %o1, 8, %o3 92 or %o1, %o3, %o1 ! now o1 has 2 bytes of c 93 94 sll %o1, 16, %o3 95 or %o1, %o3, %o1 ! now o1 has 4 bytes of c 96 97 cmp %o2, 4095 ! if large count use Block ld/st 98 99 sllx %o1, 32, %o3 100 or %o1, %o3, %o1 ! now o1 has 8 bytes of c 101 102 bgu,a,pn %ncc, .blkwr ! Do block write for large count 103 andcc %o5, 63, %o3 ! is sp1 block aligned? 104 105 and %o2, 24, %o3 ! o3 is {0, 8, 16, 24} 106 1071: subcc %o3, 8, %o3 ! double-word loop 108 add %o5, 8, %o5 109 bgeu,a,pt %ncc, 1b 110 stx %o1, [%o5 - 8] ! already incremented the address 111 112 andncc %o2, 31, %o4 ! o4 has 32 byte aligned count 113 bz,pn %ncc, 3f ! First instruction of icache line 1142: 115 subcc %o4, 32, %o4 ! main loop, 32 bytes per iteration 116 stx %o1, [%o5 - 8] 117 stx %o1, [%o5] 118 stx %o1, [%o5 + 8] 119 stx %o1, [%o5 + 16] 120 bnz,pt %ncc, 2b 121 add %o5, 32, %o5 122 1233: 124 and %o2, 7, %o2 ! o2 has the remaining bytes (<8) 125 1264: 127 deccc %o2 ! byte clearing loop 128 inc %o5 129 bgeu,a,pt %ncc, 4b 130 stb %o1, [%o5 - 9] ! already incremented the address 131 132 retl 133 nop ! %o0 still preserved 134 135.blkwr: 136 bz,pn %ncc, .blalign ! now block aligned 137 sub %o3, 64, %o3 ! o3 is -(bytes till block aligned) 138 add %o2, %o3, %o2 ! o2 is the remainder 139 140 ! Store -(%o3) bytes till dst is block (64 byte) aligned. 141 ! Use double word stores. 142 ! Recall that dst is already double word aligned 1431: 144 stx %o1, [%o5] 145 addcc %o3, 8, %o3 146 bl,pt %ncc, 1b 147 add %o5, 8, %o5 148 149 ! sp1 is block aligned 150.blalign: 151 rd %fprs, %g1 ! g1 = fprs 152 153 and %o2, 63, %o3 ! calc bytes left after blk store. 154 155 andcc %g1, 0x4, %g1 ! fprs.du = fprs.dl = 0 156 bz,a %ncc, 2f ! Is fprs.fef == 0 157 wr %g0, 0x4, %fprs ! fprs.fef = 1 1582: 159 brnz,pn %o1, 3f ! %o1 is safe to check all 64-bits 160 andn %o2, 63, %o4 ! calc size of blocks in bytes 161 fzero %d0 162 fzero %d2 163 fzero %d4 164 fzero %d6 165 fmuld %d0, %d0, %d8 166 fzero %d10 167 ba 4f 168 fmuld %d0, %d0, %d12 169 1703: 171 ! allocate 8 bytes of scratch space on the stack 172 add %sp, -SA(16), %sp 173 stx %o1, [%sp + STACK_BIAS + ALIGN8(MINFRAME)] ! move %o1 to %d0 174 ldd [%sp + STACK_BIAS + ALIGN8(MINFRAME)], %d0 175 176 fmovd %d0, %d2 177 add %sp, SA(16), %sp ! deallocate the scratch space 178 fmovd %d0, %d4 179 fmovd %d0, %d6 180 fmovd %d0, %d8 181 fmovd %d0, %d10 182 fmovd %d0, %d12 1834: 184 fmovd %d0, %d14 185 186 ! 1st quadrant has 64 bytes of c 187 ! instructions 32-byte aligned here 188#ifdef PANTHER_ONLY 189 ! Panther only code 190 prefetch [%o5 + (3 * BLOCK_SIZE)], 22 191 prefetch [%o5 + (6 * BLOCK_SIZE)], 22 192 std %d0, [%o5] 193 std %d0, [%o5 + 8] 194 std %d0, [%o5 + 16] 195 std %d0, [%o5 + 24] 196 std %d0, [%o5 + 32] 197 std %d0, [%o5 + 40] 198 std %d0, [%o5 + 48] 199 std %d0, [%o5 + 56] 200#else 201 ! Cheetah/Jaguar code 202 stda %d0, [%o5]ASI_BLK_P 203#endif 204 subcc %o4, 64, %o4 205 bgu,pt %ncc, 4b 206 add %o5, 64, %o5 207 208 ! Set the remaining doubles 209 subcc %o3, 8, %o3 ! Can we store any doubles? 210 blu,pn %ncc, 6f 211 and %o2, 7, %o2 ! calc bytes left after doubles 212 2135: 214 std %d0, [%o5] ! store the doubles 215 subcc %o3, 8, %o3 216 bgeu,pt %ncc, 5b 217 add %o5, 8, %o5 2186: 219 ! Set the remaining bytes 220 brz %o2, .exit ! safe to check all 64-bits 221 222#if 0 223 ! Terminate the copy with a partial store. (bug 1200071 does not apply) 224 ! The data should be at d0 225 dec %o2 ! needed to get the mask right 226 edge8n %g0, %o2, %o4 227 stda %d0, [%o5]%o4, ASI_PST8_P 228#else 2297: 230 deccc %o2 231 stb %o1, [%o5] 232 bgu,pt %ncc, 7b 233 inc %o5 234#endif 235 236.exit: 237 membar #StoreLoad|#StoreStore 238 retl ! %o0 was preserved 239 wr %g1, %g0, %fprs ! fprs = g1 restore fprs 240 241 SET_SIZE(memset) 242