1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22/* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 .file "strcpy.s" 28 29/* 30 * strcpy(s1, s2) 31 * 32 * Copy string s2 to s1. s1 must be large enough. Return s1. 33 * 34 * Fast assembler language version of the following C-program strcpy 35 * which represents the `standard' for the C-library. 36 * 37 * char * 38 * strcpy(s1, s2) 39 * register char *s1; 40 * register const char *s2; 41 * { 42 * char *os1 = s1; 43 * 44 * while(*s1++ = *s2++) 45 * ; 46 * return(os1); 47 * } 48 * 49 */ 50 51#include <sys/asm_linkage.h> 52 53 ! This is a 32-bit implementation of strcpy. It works by 54 ! first checking the alignment of its source pointer. And, 55 ! if it is not aligned, attempts to copy bytes until it is. 56 ! once this has occurred, the copy takes place, while checking 57 ! for zero bytes, based upon destination alignment. 58 ! Methods exist to handle per-byte, half-word, and word sized 59 ! copies. 60 61 ENTRY(strcpy) 62 63 .align 32 64 65 sub %o1, %o0, %o3 ! src - dst 66 andcc %o1, 3, %o4 ! src word aligned ? 67 bz .srcaligned ! yup 68 mov %o0, %o2 ! save dst 69 70 cmp %o4, 2 ! src halfword aligned 71 be .s2aligned ! yup 72 ldub [%o2 + %o3], %o1 ! src[0] 73 tst %o1 ! byte zero? 74 stb %o1, [%o2] ! store first byte 75 bz .done ! yup, done 76 cmp %o4, 3 ! only one byte needed to align? 77 bz .srcaligned ! yup 78 inc %o2 ! src++, dst++ 79 80.s2aligned: 81 lduh [%o2 + %o3], %o1 ! src[] 82 srl %o1, 8, %o4 ! %o4<7:0> = first byte 83 tst %o4 ! first byte zero ? 84 bz .done ! yup, done 85 stb %o4, [%o2] ! store first byte 86 andcc %o1, 0xff, %g0 ! second byte zero ? 87 bz .done ! yup, done 88 stb %o1, [%o2 + 1] ! store second byte 89 add %o2, 2, %o2 ! src += 2, dst += 2 90 91.srcaligned: 92 sethi %hi(0x01010101), %o4 ! Alan Mycroft's magic1 93 sethi %hi(0x80808080), %o5 ! Alan Mycroft's magic2 94 or %o4, %lo(0x01010101), %o4 95 andcc %o2, 3, %o1 ! destination word aligned? 96 bnz .dstnotaligned ! nope 97 or %o5, %lo(0x80808080), %o5 98 99.copyword: 100 lduw [%o2 + %o3], %o1 ! src word 101 add %o2, 4, %o2 ! src += 4, dst += 4 102 andn %o5, %o1, %g1 ! ~word & 0x80808080 103 sub %o1, %o4, %o1 ! word - 0x01010101 104 andcc %o1, %g1, %g0 ! ((word - 0x01010101) & ~word & 0x80808080) 105 add %o1, %o4, %o1 ! restore word 106 bz,a .copyword ! no zero byte if magic expression == 0 107 st %o1, [%o2 - 4] ! store word to dst (address pre-incremented) 108 109.zerobyte: 110 set 0xff000000, %o4 ! mask for 1st byte 111 srl %o1, 24, %o3 ! %o3<7:0> = first byte 112 andcc %o1, %o4, %g0 ! first byte zero? 113 bz .done ! yup, done 114 stb %o3, [%o2 - 4] ! store first byte 115 set 0x00ff0000, %o5 ! mask for 2nd byte 116 srl %o1, 16, %o3 ! %o3<7:0> = second byte 117 andcc %o1, %o5, %g0 ! second byte zero? 118 bz .done ! yup, done 119 stb %o3, [%o2 - 3] ! store second byte 120 srl %o4, 16, %o4 ! 0x0000ff00 = mask for 3rd byte 121 andcc %o1, %o4, %g0 ! third byte zero? 122 srl %o1, 8, %o3 ! %o3<7:0> = third byte 123 bz .done ! yup, done 124 stb %o3, [%o2 - 2] ! store third byte 125 stb %o1, [%o2 - 1] ! store fourth byte 126 127.done: 128 retl ! done with leaf function 129 .empty 130 131.dstnotaligned: 132 cmp %o1, 2 ! dst half word aligned? 133 be,a .storehalfword2 ! yup, store half word at a time 134 lduw [%o2 + %o3], %o1 ! src word 135 136.storebyte: 137 lduw [%o2 + %o3], %o1 ! src word 138 add %o2, 4, %o2 ! src += 4, dst += 4 139 sub %o1, %o4, %g1 ! x - 0x01010101 140 andn %g1, %o1, %g1 ! (x - 0x01010101) & ~x 141 andcc %g1, %o5, %g0 ! ((x - 0x01010101) & ~x & 0x80808080) 142 bnz .zerobyte ! word has zero byte, handle end cases 143 srl %o1, 24, %g1 ! %g1<7:0> = first byte 144 stb %g1, [%o2 - 4] ! store first byte; half-word aligned now 145 srl %o1, 8, %g1 ! %g1<15:0> = byte 2, 3 146 sth %g1, [%o2 - 3] ! store bytes 2, 3 147 ba .storebyte ! next word 148 stb %o1, [%o2 - 1] ! store fourth byte 149 150.storehalfword: 151 lduw [%o2 + %o3], %o1 ! src word 152.storehalfword2: 153 add %o2, 4, %o2 ! src += 4, dst += 4 154 sub %o1, %o4, %g1 ! x - 0x01010101 155 andn %g1, %o1, %g1 ! (x - 0x01010101) & ~x 156 andcc %g1, %o5, %g0 ! ((x - 0x01010101) & ~x & 0x80808080) 157 bnz .zerobyte ! word has zero byte, handle end cases 158 srl %o1, 16, %g1 ! get first and second byte 159 sth %g1, [%o2 - 4] ! store first and second byte 160 ba .storehalfword ! next word 161 sth %o1, [%o2 - 2] ! store third and fourth byte 162 163 ! DO NOT remove these NOPs. It will slow down the halfword loop by 15% 164 165 nop ! padding 166 nop ! padding 167 168 SET_SIZE(strcpy) 169 170