1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22/* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27#pragma ident "%Z%%M% %I% %E% SMI" 28 29 .file "%M%" 30 31/* 32 * strcpy(s1, s2) 33 * 34 * Copy string s2 to s1. s1 must be large enough. Return s1. 35 * 36 * Fast assembler language version of the following C-program strcpy 37 * which represents the `standard' for the C-library. 38 * 39 * char * 40 * strcpy(s1, s2) 41 * register char *s1; 42 * register const char *s2; 43 * { 44 * char *os1 = s1; 45 * 46 * while(*s1++ = *s2++) 47 * ; 48 * return(os1); 49 * } 50 * 51 */ 52 53#include <sys/asm_linkage.h> 54 55 ! This implementation of strcpy works by first checking the 56 ! source alignment and copying byte, half byte, or word 57 ! quantities until the source ptr is aligned at an extended 58 ! word boundary. Once this has occurred, the string is copied, 59 ! checking for zero bytes, depending upon its dst ptr alignment. 60 ! (methods for xword, word, half-word, and byte copies are present) 61 62 ENTRY(strcpy) 63 64 .align 32 65 66 sub %o1, %o0, %o3 ! src - dst 67 andcc %o1, 7, %o4 ! dword aligned ? 68 bz,pn %ncc, .srcaligned ! yup 69 mov %o0, %o2 ! save dst 70 71.chkbyte: 72 andcc %o1, 1, %g0 ! need to copy byte ? 73 bz,pn %ncc, .chkhalfword ! nope, maybe halfword 74 sub %g0, %o1, %g1 ! %g1<2:0> = # of unaligned bytes 75 ldub [%o2 + %o3], %o5 ! src[0] 76 tst %o5 ! src[0] == 0 ? 77 stb %o5, [%o2] ! dst[0] = src[0] 78 bz,pn %ncc, .done ! yup, done 79 inc %o2 ! src++, dst++ 80 81.chkhalfword: 82 andcc %g1, 2, %g0 ! need to copy half-word ? 83 bz,pn %ncc, .chkword ! nope, maybe word 84 nop ! 85 lduh [%o2 + %o3], %o5 ! load src halfword 86 srl %o5, 8, %o4 ! extract first byte 87 tst %o4 ! first byte == 0 ? 88 bz,pn %ncc, .done ! yup, done 89 stb %o4, [%o2] ! store first byte 90 andcc %o5, 0xff, %g0 ! extract second byte 91 stb %o5, [%o2 + 1] ! store second byte 92 bz,pn %ncc, .done ! yup, 2nd byte zero, done 93 add %o2, 2, %o2 ! src += 2 94 95.chkword: 96 andcc %g1, 4, %g0 ! need to copy word ? 97 bz,pn %ncc, .srcaligned ! nope 98 nop ! 99 lduw [%o2 + %o3], %o5 ! load src word 100 srl %o5, 24, %o4 ! extract first byte 101 tst %o4 ! is first byte zero ? 102 bz,pn %ncc, .done ! yup, done 103 stb %o4, [%o2] ! store first byte 104 srl %o5, 16, %o4 ! extract second byte 105 andcc %o4, 0xff, %g0 ! is second byte zero ? 106 bz,pn %ncc, .done ! yup, done 107 stb %o4, [%o2 + 1] ! store second byte 108 srl %o5, 8, %o4 ! extract third byte 109 andcc %o4, 0xff, %g0 ! third byte zero ? 110 bz,pn %ncc, .done ! yup, done 111 stb %o4, [%o2 + 2] ! store third byte 112 andcc %o5, 0xff, %g0 ! fourth byte zero ? 113 stb %o5, [%o2 + 3] ! store fourth byte 114 bz,pn %ncc, .done ! yup, fourth byte zero, done 115 add %o2, 4, %o2 ! src += 2 116 117.srcaligned: 118 sethi %hi(0x01010101), %o4 ! Alan Mycroft's magic1 119 or %o4, %lo(0x01010101),%o4! finish loading magic1 120 sllx %o4, 32, %o1 ! spread magic1 121 and %o2, 3, %g4 ! dst<1:0> to examine offset 122 or %o4, %o1, %o4 ! to all 64 bits 123 cmp %g4, 1 ! dst offset of 1 or 5 124 sllx %o4, 7, %o5 ! Alan Mycroft's magic2 125 be,pn %ncc, .storebyte1241 ! store 1, 2, 4, 1 bytes 126 cmp %g4, 3 ! dst offset of 3 or 7 127 be,pn %ncc, .storebyte1421 ! store 1, 4, 2, 1 bytes 128 cmp %g4, 2 ! dst halfword aligned ? 129 be,pn %ncc, .storehalfword ! yup, store half-word wise 130 andcc %o2, 7, %g0 ! dst word aligned ? 131 bnz,pn %ncc, .storeword2 ! yup, store word wise 132 .empty 133 134.storedword: 135 ldx [%o2 + %o3], %o1 ! src dword 136 add %o2, 8, %o2 ! src += 8, dst += 8 137 andn %o5, %o1, %g1 ! ~dword & 0x8080808080808080 138 sub %o1, %o4, %g4 ! dword - 0x0101010101010101 139 andcc %g4, %g1, %g0 ! ((dword - 0x0101010101010101) & ~dword & 0x8080808080808080) 140 bz,a,pt %ncc, .storedword ! no zero byte if magic expression == 0 141 stx %o1, [%o2 - 8] ! store word to dst (address pre-incremented) 142 143.zerobyte: 144 orn %o4, %g0, %o4 ! 0xffffffffffffffff 145 sllx %o4, 56, %o4 ! 0xff00000000000000 146 srlx %o1, 56, %o3 ! %o3<7:0> = first byte 147 andcc %o1, %o4, %g0 ! first byte zero? 148 bz,pn %ncc, .done ! yup, done 149 stb %o3, [%o2 - 8] ! store first byte 150 srlx %o4, 8, %o4 ! 0x00ff000000000000 151 srlx %o1, 48, %o3 ! %o3<7:0> = second byte 152 andcc %o1, %o4, %g0 ! second byte zero? 153 bz,pn %ncc, .done ! yup, done 154 stb %o3, [%o2 - 7] ! store second byte 155 srlx %o4, 8, %o4 ! 0x0000ff0000000000 156 srlx %o1, 40, %o3 ! %o3<7:0> = third byte 157 andcc %o1, %o4, %g0 ! third byte zero? 158 bz,pn %ncc, .done ! yup, done 159 stb %o3, [%o2 - 6] ! store third byte 160 srlx %o4, 8, %o4 ! 0x000000ff00000000 161 srlx %o1, 32, %o3 ! %o3<7:0> = fourth byte 162 andcc %o1, %o4, %g0 ! fourth byte zero? 163 bz,pn %ncc, .done ! yup, done 164 stb %o3, [%o2 - 5] ! store fourth byte 165 srlx %o4, 8, %o4 ! 0x00000000ff000000 166 srlx %o1, 24, %o3 ! %o3<7:0> = fifth byte 167 andcc %o1, %o4, %g0 ! fifth byte zero? 168 bz,pn %ncc, .done ! yup, done 169 stb %o3, [%o2 - 4] ! store fifth byte 170 srlx %o4, 8, %o4 ! 0x0000000000ff0000 171 srlx %o1, 16, %o3 ! %o3<7:0> = sixth byte 172 andcc %o1, %o4, %g0 ! sixth byte zero? 173 bz,pn %ncc, .done ! yup, done 174 stb %o3, [%o2 - 3] ! store sixth byte 175 srlx %o4, 8, %o4 ! 0x000000000000ff00 176 andcc %o1, %o4, %g0 ! seventh byte zero? 177 srlx %o1, 8, %o3 ! %o3<7:0> = seventh byte 178 bz,pn %ncc, .done ! yup, done 179 stb %o3, [%o2 - 2] ! store seventh byte 180 stb %o1, [%o2 - 1] ! store eigth byte 181.done: 182 retl ! done with leaf function 183 184 nop ! ensure following loop 16-byte aligned 185 186.storebyte1421: 187 ldx [%o2 + %o3], %o1 ! x = src[] 188 add %o2, 8, %o2 ! src += 8, dst += 8 189 andn %o5, %o1, %g1 ! ~x & 0x8080808080808080 190 sub %o1, %o4, %g4 ! x - 0x0101010101010101 191 andcc %g4, %g1, %g0 ! ((x - 0x0101010101010101) & ~x & 0x8080808080808080) 192 bnz,pn %ncc, .zerobyte ! x has zero byte, handle end cases 193 srlx %o1, 56, %g1 ! %g1<7:0> = first byte; word aligned now 194 stb %g1, [%o2 - 8] ! store first byte 195 srlx %o1, 24, %g1 ! %g1<31:0> = bytes 2, 3, 4, 5 196 stw %g1, [%o2 - 7] ! store bytes 2, 3, 4, 5 197 srlx %o1, 8, %g1 ! %g1<15:0> = bytes 6, 7 198 sth %g1, [%o2 - 3] ! store bytes 6, 7 199 ba .storebyte1421 ! next dword 200 stb %o1, [%o2 - 1] ! store eigth byte 201 202 nop ! ensure following loop 16-byte aligned 203 nop ! ensure following loop 16-byte aligned 204 205.storebyte1241: 206 ldx [%o2 + %o3], %o1 ! x = src[] 207 add %o2, 8, %o2 ! src += 8, dst += 8 208 andn %o5, %o1, %g1 ! ~x & 0x8080808080808080 209 sub %o1, %o4, %g4 ! x - 0x0101010101010101 210 andcc %g4, %g1, %g0 ! ((x - 0x0101010101010101) & ~x & 0x8080808080808080) 211 bnz,pn %ncc, .zerobyte ! x has zero byte, handle end cases 212 srlx %o1, 56, %g1 ! %g1<7:0> = first byte; word aligned now 213 stb %g1, [%o2 - 8] ! store first byte 214 srlx %o1, 40, %g1 ! %g1<15:0> = bytes 2, 3 215 sth %g1, [%o2 - 7] ! store bytes 2, 3 216 srlx %o1, 8, %g1 ! %g1<31:0> = bytes 4, 5, 6, 7 217 stw %g1, [%o2 - 5] ! store bytes 4, 5, 6, 7 218 ba .storebyte1241 ! next dword 219 stb %o1, [%o2 - 1] ! store eigth byte 220 221 nop ! ensure following loop 16-byte aligned 222 nop ! ensure following loop 16-byte aligned 223 224.storehalfword: 225 ldx [%o2 + %o3], %o1 ! x = src[] 226 add %o2, 8, %o2 ! src += 8, dst += 8 227 andn %o5, %o1, %g1 ! ~x & 0x8080808080808080 228 sub %o1, %o4, %g4 ! x - 0x0101010101010101 229 andcc %g4, %g1, %g0 ! ((x - 0x0101010101010101) & ~x & 0x8080808080808080) 230 bnz,pn %ncc, .zerobyte ! x has zero byte, handle end cases 231 srlx %o1, 48, %g1 ! get first and second byte 232 sth %g1, [%o2 - 8] ! store first and second byte; word aligned now 233 srlx %o1, 16, %g1 ! %g1<31:0> = bytes 3, 4, 5, 6 234 stw %g1, [%o2 - 6] ! store bytes 3, 4, 5, 6 235 ba .storehalfword ! next word 236 sth %o1, [%o2 - 2] ! store seventh and eigth byte 237 238.storeword: 239 ldx [%o2 + %o3], %o1 ! x = src[] 240.storeword2: 241 add %o2, 8, %o2 ! src += 8, dst += 8 242 andn %o5, %o1, %g1 ! ~x & 0x0x8080808080808080 243 sub %o1, %o4, %g4 ! x - 0x0101010101010101 244 andcc %g4, %g1, %g0 ! ((x - 0x0101010101010101) & ~x & 0x8080808080808080) 245 bnz,pn %ncc, .zerobyte ! x has zero byte, handle end cases 246 srlx %o1, 32, %g1 ! get bytes 1,2,3,4 247 stw %g1, [%o2 - 8] ! store bytes 1,2,3,4 (address is pre-incremented) 248 ba .storeword ! no zero byte if magic expression == 0 249 stw %o1, [%o2 - 4] ! store bytes 5,6,7,8 250 251 nop ! padding, do not remove!!! 252 nop ! padding, do not remove!!! 253 SET_SIZE(strcpy) 254 255