1*5d9d9091SRichard Lowe/* 2*5d9d9091SRichard Lowe * CDDL HEADER START 3*5d9d9091SRichard Lowe * 4*5d9d9091SRichard Lowe * The contents of this file are subject to the terms of the 5*5d9d9091SRichard Lowe * Common Development and Distribution License (the "License"). 6*5d9d9091SRichard Lowe * You may not use this file except in compliance with the License. 7*5d9d9091SRichard Lowe * 8*5d9d9091SRichard Lowe * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9*5d9d9091SRichard Lowe * or http://www.opensolaris.org/os/licensing. 10*5d9d9091SRichard Lowe * See the License for the specific language governing permissions 11*5d9d9091SRichard Lowe * and limitations under the License. 12*5d9d9091SRichard Lowe * 13*5d9d9091SRichard Lowe * When distributing Covered Code, include this CDDL HEADER in each 14*5d9d9091SRichard Lowe * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15*5d9d9091SRichard Lowe * If applicable, add the following below this CDDL HEADER, with the 16*5d9d9091SRichard Lowe * fields enclosed by brackets "[]" replaced with your own identifying 17*5d9d9091SRichard Lowe * information: Portions Copyright [yyyy] [name of copyright owner] 18*5d9d9091SRichard Lowe * 19*5d9d9091SRichard Lowe * CDDL HEADER END 20*5d9d9091SRichard Lowe */ 21*5d9d9091SRichard Lowe 22*5d9d9091SRichard Lowe/* 23*5d9d9091SRichard Lowe * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24*5d9d9091SRichard Lowe * Use is subject to license terms. 25*5d9d9091SRichard Lowe */ 26*5d9d9091SRichard Lowe 27*5d9d9091SRichard Lowe .file "strncpy.s" 28*5d9d9091SRichard Lowe 29*5d9d9091SRichard Lowe/* 30*5d9d9091SRichard Lowe * strncpy(s1, s2) 31*5d9d9091SRichard Lowe * 32*5d9d9091SRichard Lowe * Copy string s2 to s1, truncating or null-padding to always copy n bytes 33*5d9d9091SRichard Lowe * return s1. 34*5d9d9091SRichard Lowe * 35*5d9d9091SRichard Lowe * Fast assembler language version of the following C-program for strncpy 36*5d9d9091SRichard Lowe * which represents the `standard' for the C-library. 37*5d9d9091SRichard Lowe * 38*5d9d9091SRichard Lowe * char * 39*5d9d9091SRichard Lowe * strncpy(char *s1, const char *s2, size_t n) 40*5d9d9091SRichard Lowe * { 41*5d9d9091SRichard Lowe * char *os1 = s1; 42*5d9d9091SRichard Lowe * 43*5d9d9091SRichard Lowe * n++; 44*5d9d9091SRichard Lowe * while ((--n != 0) && ((*s1++ = *s2++) != '\0')) 45*5d9d9091SRichard Lowe * ; 46*5d9d9091SRichard Lowe * if (n != 0) 47*5d9d9091SRichard Lowe * while (--n != 0) 48*5d9d9091SRichard Lowe * *s1++ = '\0'; 49*5d9d9091SRichard Lowe * return (os1); 50*5d9d9091SRichard Lowe * } 51*5d9d9091SRichard Lowe */ 52*5d9d9091SRichard Lowe 53*5d9d9091SRichard Lowe#include <sys/asm_linkage.h> 54*5d9d9091SRichard Lowe 55*5d9d9091SRichard Lowe ! strncpy works similarly to strcpy, except that n bytes of s2 56*5d9d9091SRichard Lowe ! are copied to s1. If a null character is reached in s2 yet more 57*5d9d9091SRichard Lowe ! bytes remain to be copied, strncpy will copy null bytes into 58*5d9d9091SRichard Lowe ! the destination string. 59*5d9d9091SRichard Lowe ! 60*5d9d9091SRichard Lowe ! This implementation works by first aligning the src ptr and 61*5d9d9091SRichard Lowe ! performing small copies until it is aligned. Then, the string 62*5d9d9091SRichard Lowe ! is copied based upon destination alignment. (byte, half-word, 63*5d9d9091SRichard Lowe ! word, etc.) 64*5d9d9091SRichard Lowe 65*5d9d9091SRichard Lowe ENTRY(strncpy) 66*5d9d9091SRichard Lowe 67*5d9d9091SRichard Lowe .align 32 68*5d9d9091SRichard Lowe nop ! pad to align loop on 16-byte boundary 69*5d9d9091SRichard Lowe subcc %g0, %o2, %g4 ! n = -n, n == 0 ? 70*5d9d9091SRichard Lowe bz,pn %ncc, .done ! n == 0, done 71*5d9d9091SRichard Lowe add %o1, %o2, %o3 ! src = src + n 72*5d9d9091SRichard Lowe andcc %o1, 7, %o4 ! dword aligned ? 73*5d9d9091SRichard Lowe bz,pn %ncc, .dwordaligned ! yup 74*5d9d9091SRichard Lowe add %o0, %o2, %o2 ! dst = dst + n 75*5d9d9091SRichard Lowe sub %o4, 8, %o4 ! bytes until src aligned 76*5d9d9091SRichard Lowe 77*5d9d9091SRichard Lowe.alignsrc: 78*5d9d9091SRichard Lowe ldub [%o3 + %g4], %o1 ! src[] 79*5d9d9091SRichard Lowe stb %o1, [%o2 + %g4] ! dst[] = src[] 80*5d9d9091SRichard Lowe addcc %g4, 1, %g4 ! src++, dst++, n-- 81*5d9d9091SRichard Lowe bz,pn %ncc, .done ! n == 0, done 82*5d9d9091SRichard Lowe tst %o1 ! end of src reached (null byte) ? 83*5d9d9091SRichard Lowe bz,a %ncc, .bytepad ! yes, at least one byte to pad here 84*5d9d9091SRichard Lowe add %o2, %g4, %o3 ! need single dest pointer for fill 85*5d9d9091SRichard Lowe addcc %o4, 1, %o4 ! src aligned now? 86*5d9d9091SRichard Lowe bnz,a %ncc, .alignsrc ! no, copy another byte 87*5d9d9091SRichard Lowe nop ! pad 88*5d9d9091SRichard Lowe nop ! pad 89*5d9d9091SRichard Lowe 90*5d9d9091SRichard Lowe.dwordaligned: 91*5d9d9091SRichard Lowe sethi %hi(0x01010101), %o4 ! Alan Mycroft's magic1 92*5d9d9091SRichard Lowe add %o2, %g4, %g5 ! dst 93*5d9d9091SRichard Lowe or %o4, %lo(0x01010101),%o4! finish loading magic1 94*5d9d9091SRichard Lowe and %g5, 3, %g1 ! dst<1:0> to examine offset 95*5d9d9091SRichard Lowe sllx %o4, 32, %o1 ! spread magic1 96*5d9d9091SRichard Lowe cmp %g1, 1 ! dst offset of 1 or 5 97*5d9d9091SRichard Lowe or %o4, %o1, %o4 ! to all 64 bits 98*5d9d9091SRichard Lowe sub %o2, 8, %o2 ! adjust for dest pre-incr in cpy loops 99*5d9d9091SRichard Lowe be,pn %ncc, .storebyte1241 ! store 1, 2, 4, 1 bytes 100*5d9d9091SRichard Lowe sllx %o4, 7, %o5 ! Alan Mycroft's magic2 101*5d9d9091SRichard Lowe cmp %g1, 3 ! dst offset of 3 or 7 102*5d9d9091SRichard Lowe be,pn %ncc, .storebyte1421 ! store 1, 4, 2, 1 bytes 103*5d9d9091SRichard Lowe cmp %g1, 2 ! dst halfword aligned ? 104*5d9d9091SRichard Lowe be,pn %ncc, .storehalfword ! yup, store half-word wise 105*5d9d9091SRichard Lowe andcc %g5, 7, %g0 ! dst word aligned ? 106*5d9d9091SRichard Lowe bnz,pn %ncc, .storeword2 ! yup, store word wise 107*5d9d9091SRichard Lowe nop ! ensure loop is 16-byte aligned 108*5d9d9091SRichard Lowe 109*5d9d9091SRichard Lowe.storedword: 110*5d9d9091SRichard Lowe ldx [%o3 + %g4], %o1 ! src dword 111*5d9d9091SRichard Lowe addcc %g4, 8, %g4 ! n += 8, src += 8, dst += 8 112*5d9d9091SRichard Lowe bcs,pn %ncc,.lastword ! if counter wraps, last word 113*5d9d9091SRichard Lowe andn %o5, %o1, %g1 ! ~dword & 0x8080808080808080 114*5d9d9091SRichard Lowe sub %o1, %o4, %g5 ! dword - 0x0101010101010101 115*5d9d9091SRichard Lowe andcc %g5, %g1, %g0 ! ((dword - 0x0101010101010101) & ~dword & 0x8080808080808080) 116*5d9d9091SRichard Lowe bz,a,pt %ncc, .storedword ! no zero byte if magic expression == 0 117*5d9d9091SRichard Lowe stx %o1, [%o2 + %g4] ! store word to dst (address pre-incremented) 118*5d9d9091SRichard Lowe 119*5d9d9091SRichard Lowe ! n has not expired, but src is at the end. we need to push out the 120*5d9d9091SRichard Lowe ! remaining src bytes and then start padding with null bytes 121*5d9d9091SRichard Lowe 122*5d9d9091SRichard Lowe.zerobyte: 123*5d9d9091SRichard Lowe add %o2, %g4, %o3 ! pointer to dest string 124*5d9d9091SRichard Lowe srlx %o1, 56, %g1 ! first byte 125*5d9d9091SRichard Lowe stb %g1, [%o3] ! store it 126*5d9d9091SRichard Lowe andcc %g1, 0xff, %g0 ! end of string ? 127*5d9d9091SRichard Lowe movz %ncc, %g0, %o1 ! if so, start padding with null bytes 128*5d9d9091SRichard Lowe srlx %o1, 48, %g1 ! second byte 129*5d9d9091SRichard Lowe stb %g1, [%o3 + 1] ! store it 130*5d9d9091SRichard Lowe andcc %g1, 0xff, %g0 ! end of string ? 131*5d9d9091SRichard Lowe movz %ncc, %g0, %o1 ! if so, start padding with null bytes 132*5d9d9091SRichard Lowe srlx %o1, 40, %g1 ! third byte 133*5d9d9091SRichard Lowe stb %g1, [%o3 + 2] ! store it 134*5d9d9091SRichard Lowe andcc %g1, 0xff, %g0 ! end of string ? 135*5d9d9091SRichard Lowe movz %ncc, %g0, %o1 ! if so, start padding with null bytes 136*5d9d9091SRichard Lowe srlx %o1, 32, %g1 ! fourth byte 137*5d9d9091SRichard Lowe stb %g1, [%o3 + 3] ! store it 138*5d9d9091SRichard Lowe andcc %g1, 0xff, %g0 ! end of string ? 139*5d9d9091SRichard Lowe movz %ncc, %g0, %o1 ! if so, start padding with null bytes 140*5d9d9091SRichard Lowe srlx %o1, 24, %g1 ! fifth byte 141*5d9d9091SRichard Lowe stb %g1, [%o3 + 4] ! store it 142*5d9d9091SRichard Lowe andcc %g1, 0xff, %g0 ! end of string ? 143*5d9d9091SRichard Lowe movz %ncc, %g0, %o1 ! if so, start padding with null bytes 144*5d9d9091SRichard Lowe srlx %o1, 16, %g1 ! sixth byte 145*5d9d9091SRichard Lowe stb %g1, [%o3 + 5] ! store it 146*5d9d9091SRichard Lowe andcc %g1, 0xff, %g0 ! end of string ? 147*5d9d9091SRichard Lowe movz %ncc, %g0, %o1 ! if so, start padding with null bytes 148*5d9d9091SRichard Lowe srlx %o1, 8, %g1 ! seventh byte 149*5d9d9091SRichard Lowe stb %g1, [%o3 + 6] ! store it 150*5d9d9091SRichard Lowe andcc %g1, 0xff, %g0 ! end of string ? 151*5d9d9091SRichard Lowe movz %ncc, %g0, %o1 ! if so, start padding with null bytes 152*5d9d9091SRichard Lowe stb %o1, [%o3 + 7] ! store eighth byte 153*5d9d9091SRichard Lowe addcc %g4, 16, %g0 ! number of pad bytes < 16 ? 154*5d9d9091SRichard Lowe bcs,pn %ncc, .bytepad ! yes, do simple byte wise fill 155*5d9d9091SRichard Lowe add %o3, 8, %o3 ! dst += 8 156*5d9d9091SRichard Lowe andcc %o3, 7, %o4 ! dst offset relative to dword boundary 157*5d9d9091SRichard Lowe bz,pn %ncc, .fillaligned ! dst already dword aligned 158*5d9d9091SRichard Lowe 159*5d9d9091SRichard Lowe ! here there is a least one more byte to zero out: otherwise we would 160*5d9d9091SRichard Lowe ! have exited through label .lastword 161*5d9d9091SRichard Lowe 162*5d9d9091SRichard Lowe sub %o4, 8, %o4 ! bytes to align dst to dword boundary 163*5d9d9091SRichard Lowe.makealigned: 164*5d9d9091SRichard Lowe stb %g0, [%o3] ! dst[] = 0 165*5d9d9091SRichard Lowe addcc %g4, 1, %g4 ! n-- 166*5d9d9091SRichard Lowe bz,pt %ncc, .done ! n == 0, we are done 167*5d9d9091SRichard Lowe addcc %o4, 1, %o4 ! any more byte needed to align 168*5d9d9091SRichard Lowe bnz,pt %ncc, .makealigned ! yup, pad another byte 169*5d9d9091SRichard Lowe add %o3, 1, %o3 ! dst++ 170*5d9d9091SRichard Lowe nop ! pad to align copy loop below 171*5d9d9091SRichard Lowe nop ! pad to align copy loop below 172*5d9d9091SRichard Lowe 173*5d9d9091SRichard Lowe ! here we know that there at least another 8 bytes to pad, since 174*5d9d9091SRichard Lowe ! we don't get here unless there were >= 16 bytes to pad to begin 175*5d9d9091SRichard Lowe ! with, and we have padded at most 7 bytes suring dst aligning 176*5d9d9091SRichard Lowe 177*5d9d9091SRichard Lowe.fillaligned: 178*5d9d9091SRichard Lowe add %g4, 7, %o2 ! round up to next dword boundary 179*5d9d9091SRichard Lowe and %o2, -8, %o4 ! pointer to next dword boundary 180*5d9d9091SRichard Lowe and %o2, 8, %o2 ! dword count odd ? 8 : 0 181*5d9d9091SRichard Lowe stx %g0, [%o3] ! store first dword 182*5d9d9091SRichard Lowe addcc %o4, %o2, %o4 ! dword count == 1 ? 183*5d9d9091SRichard Lowe add %g4, %o2, %g4 ! if dword count odd, n -= 8 184*5d9d9091SRichard Lowe bz,pt %ncc, .bytepad ! if dword count == 1, pad leftover bytes 185*5d9d9091SRichard Lowe add %o3, %o2, %o3 ! bump dst if dword count odd 186*5d9d9091SRichard Lowe 187*5d9d9091SRichard Lowe.filldword: 188*5d9d9091SRichard Lowe addcc %o4, 16, %o4 ! count -= 16 189*5d9d9091SRichard Lowe stx %g0, [%o3] ! dst[n] = 0 190*5d9d9091SRichard Lowe stx %g0, [%o3 + 8] ! dst[n+8] = 0 191*5d9d9091SRichard Lowe add %o3, 16, %o3 ! dst += 16 192*5d9d9091SRichard Lowe bcc,pt %ncc, .filldword ! fill dwords until count == 0 193*5d9d9091SRichard Lowe addcc %g4, 16, %g4 ! n -= 16 194*5d9d9091SRichard Lowe bz,pn %ncc, .done ! if n == 0, we are done 195*5d9d9091SRichard Lowe 196*5d9d9091SRichard Lowe.bytepad: 197*5d9d9091SRichard Lowe and %g4, 1, %o2 ! byte count odd ? 1 : 0 198*5d9d9091SRichard Lowe stb %g0, [%o3] ! store first byte 199*5d9d9091SRichard Lowe addcc %g4, %o2, %g4 ! byte count == 1 ? 200*5d9d9091SRichard Lowe bz,pt %ncc, .done ! yup, we are done 201*5d9d9091SRichard Lowe add %o3, %o2, %o3 ! bump pointer if odd 202*5d9d9091SRichard Lowe 203*5d9d9091SRichard Lowe.fillbyte: 204*5d9d9091SRichard Lowe addcc %g4, 2, %g4 ! n -= 2 205*5d9d9091SRichard Lowe stb %g0, [%o3] ! dst[n] = 0 206*5d9d9091SRichard Lowe stb %g0, [%o3 + 1] ! dst[n+1] = 0 207*5d9d9091SRichard Lowe bnz,pt %ncc, .fillbyte ! fill until n == 0 208*5d9d9091SRichard Lowe add %o3, 2, %o3 ! dst += 2 209*5d9d9091SRichard Lowe 210*5d9d9091SRichard Lowe.done: 211*5d9d9091SRichard Lowe retl ! done 212*5d9d9091SRichard Lowe nop ! pad to align loops below 213*5d9d9091SRichard Lowe nop ! pad to align loops below 214*5d9d9091SRichard Lowe 215*5d9d9091SRichard Lowe ! this is the last word. It may contain null bytes. store bytes 216*5d9d9091SRichard Lowe ! until n == 0. if null byte encountered, continue 217*5d9d9091SRichard Lowe 218*5d9d9091SRichard Lowe.lastword: 219*5d9d9091SRichard Lowe sub %g4, 8, %g4 ! undo counter pre-increment 220*5d9d9091SRichard Lowe add %o2, 8, %o2 ! adjust dst for counter un-bumping 221*5d9d9091SRichard Lowe 222*5d9d9091SRichard Lowe srlx %o1, 56, %g1 ! first byte 223*5d9d9091SRichard Lowe stb %g1, [%o2 + %g4] ! store it 224*5d9d9091SRichard Lowe inccc %g4 ! n-- 225*5d9d9091SRichard Lowe bz .done ! if n == 0, we're done 226*5d9d9091SRichard Lowe andcc %g1, 0xff, %g0 ! end of src reached ? 227*5d9d9091SRichard Lowe movz %ncc, %g0, %o1 ! if so, start padding with null bytes 228*5d9d9091SRichard Lowe srlx %o1, 48, %g1 ! second byte 229*5d9d9091SRichard Lowe stb %g1, [%o2 + %g4] ! store it 230*5d9d9091SRichard Lowe inccc %g4 ! n-- 231*5d9d9091SRichard Lowe bz .done ! if n == 0, we're done 232*5d9d9091SRichard Lowe andcc %g1, 0xff, %g0 ! end of src reached ? 233*5d9d9091SRichard Lowe movz %ncc, %g0, %o1 ! if so, start padding with null bytes 234*5d9d9091SRichard Lowe srlx %o1, 40, %g1 ! third byte 235*5d9d9091SRichard Lowe stb %g1, [%o2 + %g4] ! store it 236*5d9d9091SRichard Lowe inccc %g4 ! n-- 237*5d9d9091SRichard Lowe bz .done ! if n == 0, we're done 238*5d9d9091SRichard Lowe andcc %g1, 0xff, %g0 ! end of src reached ? 239*5d9d9091SRichard Lowe movz %ncc, %g0, %o1 ! if so, start padding with null bytes 240*5d9d9091SRichard Lowe srlx %o1, 32, %g1 ! fourth byte 241*5d9d9091SRichard Lowe stb %g1, [%o2 + %g4] ! store it 242*5d9d9091SRichard Lowe inccc %g4 ! n-- 243*5d9d9091SRichard Lowe bz .done ! if n == 0, we're done 244*5d9d9091SRichard Lowe andcc %g1, 0xff, %g0 ! end of src reached ? 245*5d9d9091SRichard Lowe movz %ncc, %g0, %o1 ! if so, start padding with null bytes 246*5d9d9091SRichard Lowe srlx %o1, 24, %g1 ! fifth byte 247*5d9d9091SRichard Lowe stb %g1, [%o2 + %g4] ! store it 248*5d9d9091SRichard Lowe inccc %g4 ! n-- 249*5d9d9091SRichard Lowe bz .done ! if n == 0, we're done 250*5d9d9091SRichard Lowe andcc %g1, 0xff, %g0 ! end of src reached ? 251*5d9d9091SRichard Lowe movz %ncc, %g0, %o1 ! if so, start padding with null bytes 252*5d9d9091SRichard Lowe srlx %o1, 16, %g1 ! sixth byte 253*5d9d9091SRichard Lowe stb %g1, [%o2 + %g4] ! store it 254*5d9d9091SRichard Lowe inccc %g4 ! n-- 255*5d9d9091SRichard Lowe bz .done ! if n == 0, we're done 256*5d9d9091SRichard Lowe andcc %g1, 0xff, %g0 ! end of src reached ? 257*5d9d9091SRichard Lowe movz %ncc, %g0, %o1 ! if so, start padding with null bytes 258*5d9d9091SRichard Lowe srlx %o1, 8, %g1 ! seventh byte 259*5d9d9091SRichard Lowe stb %g1, [%o2 + %g4] ! store it 260*5d9d9091SRichard Lowe inccc %g4 ! n-- 261*5d9d9091SRichard Lowe bz .done ! if n == 0, we're done 262*5d9d9091SRichard Lowe andcc %g1, 0xff, %g0 ! end of src reached ? 263*5d9d9091SRichard Lowe movz %ncc, %g0, %o1 ! if so, start padding with null bytes 264*5d9d9091SRichard Lowe ba .done ! here n must be zero, we are done 265*5d9d9091SRichard Lowe stb %o1, [%o2 + %g4] ! store eigth byte 266*5d9d9091SRichard Lowe nop ! pad to align loops below 267*5d9d9091SRichard Lowe nop ! pad to align loops below 268*5d9d9091SRichard Lowe 269*5d9d9091SRichard Lowe.storebyte1421: 270*5d9d9091SRichard Lowe ldx [%o3 + %g4], %o1 ! x = src[] 271*5d9d9091SRichard Lowe addcc %g4, 8, %g4 ! src += 8, dst += 8 272*5d9d9091SRichard Lowe bcs,pn %ncc,.lastword ! if counter wraps, last word 273*5d9d9091SRichard Lowe andn %o5, %o1, %g1 ! ~x & 0x8080808080808080 274*5d9d9091SRichard Lowe sub %o1, %o4, %g5 ! x - 0x0101010101010101 275*5d9d9091SRichard Lowe andcc %g5, %g1, %g0 ! ((x - 0x0101010101010101) & ~x & 0x8080808080808080) 276*5d9d9091SRichard Lowe bnz,pn %ncc, .zerobyte ! end of src found, may need to pad 277*5d9d9091SRichard Lowe add %o2, %g4, %g5 ! dst (in pointer form) 278*5d9d9091SRichard Lowe srlx %o1, 56, %g1 ! %g1<7:0> = first byte; word aligned now 279*5d9d9091SRichard Lowe stb %g1, [%g5] ! store first byte 280*5d9d9091SRichard Lowe srlx %o1, 24, %g1 ! %g1<31:0> = bytes 2, 3, 4, 5 281*5d9d9091SRichard Lowe stw %g1, [%g5 + 1] ! store bytes 2, 3, 4, 5 282*5d9d9091SRichard Lowe srlx %o1, 8, %g1 ! %g1<15:0> = bytes 6, 7 283*5d9d9091SRichard Lowe sth %g1, [%g5 + 5] ! store bytes 6, 7 284*5d9d9091SRichard Lowe ba .storebyte1421 ! next dword 285*5d9d9091SRichard Lowe stb %o1, [%g5 + 7] ! store eigth byte 286*5d9d9091SRichard Lowe 287*5d9d9091SRichard Lowe.storebyte1241: 288*5d9d9091SRichard Lowe ldx [%o3 + %g4], %o1 ! x = src[] 289*5d9d9091SRichard Lowe addcc %g4, 8, %g4 ! src += 8, dst += 8 290*5d9d9091SRichard Lowe bcs,pn %ncc,.lastword ! if counter wraps, last word 291*5d9d9091SRichard Lowe andn %o5, %o1, %g1 ! ~x & 0x8080808080808080 292*5d9d9091SRichard Lowe sub %o1, %o4, %g5 ! x - 0x0101010101010101 293*5d9d9091SRichard Lowe andcc %g5, %g1, %g0 ! ((x - 0x0101010101010101) & ~x & 0x8080808080808080) 294*5d9d9091SRichard Lowe bnz,pn %ncc, .zerobyte ! x has zero byte, handle end cases 295*5d9d9091SRichard Lowe add %o2, %g4, %g5 ! dst (in pointer form) 296*5d9d9091SRichard Lowe srlx %o1, 56, %g1 ! %g1<7:0> = first byte; half-word aligned now 297*5d9d9091SRichard Lowe stb %g1, [%g5] ! store first byte 298*5d9d9091SRichard Lowe srlx %o1, 40, %g1 ! %g1<15:0> = bytes 2, 3 299*5d9d9091SRichard Lowe sth %g1, [%g5 + 1] ! store bytes 2, 3 300*5d9d9091SRichard Lowe srlx %o1, 8, %g1 ! %g1<31:0> = bytes 4, 5, 6, 7 301*5d9d9091SRichard Lowe stw %g1, [%g5 + 3] ! store bytes 4, 5, 6, 7 302*5d9d9091SRichard Lowe ba .storebyte1241 ! next dword 303*5d9d9091SRichard Lowe stb %o1, [%g5 + 7] ! store eigth byte 304*5d9d9091SRichard Lowe 305*5d9d9091SRichard Lowe.storehalfword: 306*5d9d9091SRichard Lowe ldx [%o3 + %g4], %o1 ! x = src[] 307*5d9d9091SRichard Lowe addcc %g4, 8, %g4 ! src += 8, dst += 8 308*5d9d9091SRichard Lowe bcs,pn %ncc,.lastword ! if counter wraps, last word 309*5d9d9091SRichard Lowe andn %o5, %o1, %g1 ! ~x & 0x8080808080808080 310*5d9d9091SRichard Lowe sub %o1, %o4, %g5 ! x - 0x0101010101010101 311*5d9d9091SRichard Lowe andcc %g5, %g1, %g0 ! ((x - 0x0101010101010101) & ~x & 0x8080808080808080) 312*5d9d9091SRichard Lowe bnz,pn %ncc, .zerobyte ! x has zero byte, handle end cases 313*5d9d9091SRichard Lowe add %o2, %g4, %g5 ! dst (in pointer form) 314*5d9d9091SRichard Lowe srlx %o1, 48, %g1 ! %g1<15:0> = bytes 1, 2; word aligned now 315*5d9d9091SRichard Lowe sth %g1, [%g5] ! store bytes 1, 2 316*5d9d9091SRichard Lowe srlx %o1, 16, %g1 ! %g1<31:0> = bytes 3, 4, 5, 6 317*5d9d9091SRichard Lowe stw %g1, [%g5 + 2] ! store bytes 3, 4, 5, 6 318*5d9d9091SRichard Lowe ba .storehalfword ! next dword 319*5d9d9091SRichard Lowe sth %o1, [%g5 + 6] ! store bytes 7, 8 320*5d9d9091SRichard Lowe nop ! align next loop to 16-byte boundary 321*5d9d9091SRichard Lowe nop ! align next loop to 16-byte boundary 322*5d9d9091SRichard Lowe 323*5d9d9091SRichard Lowe.storeword2: 324*5d9d9091SRichard Lowe ldx [%o3 + %g4], %o1 ! x = src[] 325*5d9d9091SRichard Lowe addcc %g4, 8, %g4 ! src += 8, dst += 8 326*5d9d9091SRichard Lowe bcs,pn %ncc,.lastword ! if counter wraps, last word 327*5d9d9091SRichard Lowe andn %o5, %o1, %g1 ! ~x & 0x8080808080808080 328*5d9d9091SRichard Lowe sub %o1, %o4, %g5 ! x - 0x0101010101010101 329*5d9d9091SRichard Lowe andcc %g5, %g1, %g0 ! ((x - 0x0101010101010101) & ~x & 0x8080808080808080) 330*5d9d9091SRichard Lowe bnz,pn %ncc, .zerobyte ! x has zero byte, handle end cases 331*5d9d9091SRichard Lowe add %o2, %g4, %g5 ! dst (in pointer form) 332*5d9d9091SRichard Lowe srlx %o1, 32, %g1 ! %g1<31:0> = bytes 1, 2, 3, 4 333*5d9d9091SRichard Lowe stw %g1, [%g5] ! store bytes 1, 2, 3, 4 334*5d9d9091SRichard Lowe ba .storeword2 ! next dword 335*5d9d9091SRichard Lowe stw %o1, [%g5 + 4] ! store bytes 5, 6, 7, 8 336*5d9d9091SRichard Lowe 337*5d9d9091SRichard Lowe ! do not remove these pads, loop above may slow down otherwise 338*5d9d9091SRichard Lowe 339*5d9d9091SRichard Lowe nop ! pad 340*5d9d9091SRichard Lowe nop ! pad 341*5d9d9091SRichard Lowe 342*5d9d9091SRichard Lowe SET_SIZE(strncpy) 343