1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22/* 23 * Copyright (c) 1995, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 .file "memset.s" 27 28/* 29 * char *memset(sp, c, n) 30 * 31 * Set an array of n chars starting at sp to the character c. 32 * Return sp. 33 * 34 * Fast assembler language version of the following C-program for memset 35 * which represents the `standard' for the C-library. 36 * 37 * void * 38 * memset(void *sp1, int c, size_t n) 39 * { 40 * if (n != 0) { 41 * char *sp = sp1; 42 * do { 43 * *sp++ = (char)c; 44 * } while (--n != 0); 45 * } 46 * return (sp1); 47 * } 48 */ 49 50#include <sys/asm_linkage.h> 51#include <sys/sun4asi.h> 52 53 ANSI_PRAGMA_WEAK(memset,function) 54 55#define ALIGN8(X) (((X) + 7) & ~7) 56 57 .section ".text" 58 .align 32 59 60 ENTRY(memset) 61 cmp %o2, 12 ! if small counts, just write bytes 62 bgeu,pn %ncc, .wrbig 63 mov %o0, %o5 ! copy sp1 before using it 64 65.wrchar: 66 deccc %o2 ! byte clearing loop 67 inc %o5 68 bgeu,a,pt %ncc, .wrchar 69 stb %o1, [%o5 + -1] ! we've already incremented the address 70 71 retl 72 .empty ! next instruction is safe, %o0 still good 73 74.wrbig: 75 andcc %o5, 7, %o3 ! is sp1 aligned on a 8 byte bound 76 bz,pt %ncc, .blkchk ! already double aligned 77 and %o1, 0xff, %o1 ! o1 is (char)c 78 sub %o3, 8, %o3 ! -(bytes till double aligned) 79 add %o2, %o3, %o2 ! update o2 with new count 80 81 ! Set -(%o3) bytes till sp1 double aligned 821: stb %o1, [%o5] ! there is at least 1 byte to set 83 inccc %o3 ! byte clearing loop 84 bl,pt %ncc, 1b 85 inc %o5 86 87 88 ! Now sp1 is double aligned (sp1 is found in %o5) 89.blkchk: 90 sll %o1, 8, %o3 91 or %o1, %o3, %o1 ! now o1 has 2 bytes of c 92 93 sll %o1, 16, %o3 94 or %o1, %o3, %o1 ! now o1 has 4 bytes of c 95 96 cmp %o2, 4095 ! if large count use Block ld/st 97 98 sllx %o1, 32, %o3 99 or %o1, %o3, %o1 ! now o1 has 8 bytes of c 100 101 bgu,a,pn %ncc, .blkwr ! Do block write for large count 102 andcc %o5, 63, %o3 ! is sp1 block aligned? 103 104 and %o2, 24, %o3 ! o3 is {0, 8, 16, 24} 105 1061: subcc %o3, 8, %o3 ! double-word loop 107 add %o5, 8, %o5 108 bgeu,a,pt %ncc, 1b 109 stx %o1, [%o5 - 8] ! already incremented the address 110 111 andncc %o2, 31, %o4 ! o4 has 32 byte aligned count 112 bz,pn %ncc, 3f ! First instruction of icache line 1132: 114 subcc %o4, 32, %o4 ! main loop, 32 bytes per iteration 115 stx %o1, [%o5 - 8] 116 stx %o1, [%o5] 117 stx %o1, [%o5 + 8] 118 stx %o1, [%o5 + 16] 119 bnz,pt %ncc, 2b 120 add %o5, 32, %o5 121 1223: 123 and %o2, 7, %o2 ! o2 has the remaining bytes (<8) 124 1254: 126 deccc %o2 ! byte clearing loop 127 inc %o5 128 bgeu,a,pt %ncc, 4b 129 stb %o1, [%o5 - 9] ! already incremented the address 130 131 retl 132 nop ! %o0 still preserved 133 134.blkwr: 135 bz,pn %ncc, .blalign ! now block aligned 136 sub %o3, 64, %o3 ! o3 is -(bytes till block aligned) 137 add %o2, %o3, %o2 ! o2 is the remainder 138 139 ! Store -(%o3) bytes till dst is block (64 byte) aligned. 140 ! Use double word stores. 141 ! Recall that dst is already double word aligned 1421: 143 stx %o1, [%o5] 144 addcc %o3, 8, %o3 145 bl,pt %ncc, 1b 146 add %o5, 8, %o5 147 148 ! sp1 is block aligned 149.blalign: 150 rd %fprs, %g1 ! g1 = fprs 151 152 and %o2, 63, %o3 ! calc bytes left after blk store. 153 154 andcc %g1, 0x4, %g1 ! fprs.du = fprs.dl = 0 155 bz,a %ncc, 2f ! Is fprs.fef == 0 156 wr %g0, 0x4, %fprs ! fprs.fef = 1 1572: 158 brnz,pn %o1, 3f ! %o1 is safe to check all 64-bits 159 andn %o2, 63, %o4 ! calc size of blocks in bytes 160 fzero %d0 161 fzero %d2 162 fzero %d4 163 fzero %d6 164 fmuld %d0, %d0, %d8 165 fzero %d10 166 ba 4f 167 fmuld %d0, %d0, %d12 168 1693: 170 ! allocate 8 bytes of scratch space on the stack 171 add %sp, -SA(16), %sp 172 stx %o1, [%sp + STACK_BIAS + ALIGN8(MINFRAME)] ! move %o1 to %d0 173 ldd [%sp + STACK_BIAS + ALIGN8(MINFRAME)], %d0 174 175 fmovd %d0, %d2 176 add %sp, SA(16), %sp ! deallocate the scratch space 177 fmovd %d0, %d4 178 fmovd %d0, %d6 179 fmovd %d0, %d8 180 fmovd %d0, %d10 181 fmovd %d0, %d12 1824: 183 fmovd %d0, %d14 184 185 ! 1st quadrant has 64 bytes of c 186 ! instructions 32-byte aligned here 187 188 stda %d0, [%o5]ASI_BLK_P 189 subcc %o4, 64, %o4 190 bgu,pt %ncc, 4b 191 add %o5, 64, %o5 192 193 ! Set the remaining doubles 194 subcc %o3, 8, %o3 ! Can we store any doubles? 195 blu,pn %ncc, 6f 196 and %o2, 7, %o2 ! calc bytes left after doubles 197 1985: 199 std %d0, [%o5] ! store the doubles 200 subcc %o3, 8, %o3 201 bgeu,pt %ncc, 5b 202 add %o5, 8, %o5 2036: 204 ! Set the remaining bytes 205 brz %o2, .exit ! safe to check all 64-bits 206 207#if 0 208 ! Terminate the copy with a partial store. (bug 1200071 does not apply) 209 ! The data should be at d0 210 dec %o2 ! needed to get the mask right 211 edge8n %g0, %o2, %o4 212 stda %d0, [%o5]%o4, ASI_PST8_P 213#else 2147: 215 deccc %o2 216 stb %o1, [%o5] 217 bgu,pt %ncc, 7b 218 inc %o5 219#endif 220 221.exit: 222 membar #StoreLoad|#StoreStore 223 retl ! %o0 was preserved 224 wr %g1, %g0, %fprs ! fprs = g1 restore fprs 225 226 SET_SIZE(memset) 227