1*5d9d9091SRichard Lowe/* 2*5d9d9091SRichard Lowe * CDDL HEADER START 3*5d9d9091SRichard Lowe * 4*5d9d9091SRichard Lowe * The contents of this file are subject to the terms of the 5*5d9d9091SRichard Lowe * Common Development and Distribution License (the "License"). 6*5d9d9091SRichard Lowe * You may not use this file except in compliance with the License. 7*5d9d9091SRichard Lowe * 8*5d9d9091SRichard Lowe * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9*5d9d9091SRichard Lowe * or http://www.opensolaris.org/os/licensing. 10*5d9d9091SRichard Lowe * See the License for the specific language governing permissions 11*5d9d9091SRichard Lowe * and limitations under the License. 12*5d9d9091SRichard Lowe * 13*5d9d9091SRichard Lowe * When distributing Covered Code, include this CDDL HEADER in each 14*5d9d9091SRichard Lowe * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15*5d9d9091SRichard Lowe * If applicable, add the following below this CDDL HEADER, with the 16*5d9d9091SRichard Lowe * fields enclosed by brackets "[]" replaced with your own identifying 17*5d9d9091SRichard Lowe * information: Portions Copyright [yyyy] [name of copyright owner] 18*5d9d9091SRichard Lowe * 19*5d9d9091SRichard Lowe * CDDL HEADER END 20*5d9d9091SRichard Lowe */ 21*5d9d9091SRichard Lowe 22*5d9d9091SRichard Lowe/* 23*5d9d9091SRichard Lowe * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24*5d9d9091SRichard Lowe * Use is subject to license terms. 25*5d9d9091SRichard Lowe */ 26*5d9d9091SRichard Lowe 27*5d9d9091SRichard Lowe .file "memcpy.s" 28*5d9d9091SRichard Lowe 29*5d9d9091SRichard Lowe/* 30*5d9d9091SRichard Lowe * memcpy(s1, s2, len) 31*5d9d9091SRichard Lowe * 32*5d9d9091SRichard Lowe * Copy s2 to s1, always copy n bytes. 33*5d9d9091SRichard Lowe * Note: this does not work for overlapped copies, bcopy() does 34*5d9d9091SRichard Lowe * 35*5d9d9091SRichard Lowe * Added entry __align_cpy_1 is generally for use of the compilers. 36*5d9d9091SRichard Lowe * 37*5d9d9091SRichard Lowe * 38*5d9d9091SRichard Lowe * Fast assembler language version of the following C-program for memcpy 39*5d9d9091SRichard Lowe * which represents the `standard' for the C-library. 40*5d9d9091SRichard Lowe * 41*5d9d9091SRichard Lowe * void * 42*5d9d9091SRichard Lowe * memcpy(void *s, const void *s0, size_t n) 43*5d9d9091SRichard Lowe * { 44*5d9d9091SRichard Lowe * if (n != 0) { 45*5d9d9091SRichard Lowe * char *s1 = s; 46*5d9d9091SRichard Lowe * const char *s2 = s0; 47*5d9d9091SRichard Lowe * do { 48*5d9d9091SRichard Lowe * *s1++ = *s2++; 49*5d9d9091SRichard Lowe * } while (--n != 0); 50*5d9d9091SRichard Lowe * } 51*5d9d9091SRichard Lowe * return (s); 52*5d9d9091SRichard Lowe * } 53*5d9d9091SRichard Lowe */ 54*5d9d9091SRichard Lowe 55*5d9d9091SRichard Lowe#include <sys/asm_linkage.h> 56*5d9d9091SRichard Lowe 57*5d9d9091SRichard Lowe ANSI_PRAGMA_WEAK(memcpy,function) 58*5d9d9091SRichard Lowe 59*5d9d9091SRichard Lowe ENTRY(memcpy) 60*5d9d9091SRichard Lowe ENTRY(__align_cpy_1) 61*5d9d9091SRichard Lowe mov %o0, %g5 ! save des address for return val 62*5d9d9091SRichard Lowe cmp %o2, 17 ! for small counts copy bytes 63*5d9d9091SRichard Lowe bleu,pn %xcc, .dbytecp 64*5d9d9091SRichard Lowe andcc %o1, 3, %o5 ! is src word aligned 65*5d9d9091SRichard Lowe bz,pn %icc, .aldst 66*5d9d9091SRichard Lowe cmp %o5, 2 ! is src half-word aligned 67*5d9d9091SRichard Lowe be,pt %xcc, .s2algn 68*5d9d9091SRichard Lowe cmp %o5, 3 ! src is byte aligned 69*5d9d9091SRichard Lowe.s1algn:ldub [%o1], %o3 ! move 1 or 3 bytes to align it 70*5d9d9091SRichard Lowe inc 1, %o1 71*5d9d9091SRichard Lowe stb %o3, [%g5] ! move a byte to align src 72*5d9d9091SRichard Lowe inc 1, %g5 73*5d9d9091SRichard Lowe bne,pt %icc, .s2algn 74*5d9d9091SRichard Lowe dec %o2 75*5d9d9091SRichard Lowe b .ald ! now go align dest 76*5d9d9091SRichard Lowe andcc %g5, 3, %o5 77*5d9d9091SRichard Lowe 78*5d9d9091SRichard Lowe.s2algn:lduh [%o1], %o3 ! know src is 2 byte alinged 79*5d9d9091SRichard Lowe inc 2, %o1 80*5d9d9091SRichard Lowe srl %o3, 8, %o4 81*5d9d9091SRichard Lowe stb %o4, [%g5] ! have to do bytes, 82*5d9d9091SRichard Lowe stb %o3, [%g5 + 1] ! don't know dst alingment 83*5d9d9091SRichard Lowe inc 2, %g5 84*5d9d9091SRichard Lowe dec 2, %o2 85*5d9d9091SRichard Lowe 86*5d9d9091SRichard Lowe.aldst: andcc %g5, 3, %o5 ! align the destination address 87*5d9d9091SRichard Lowe.ald: bz,pn %icc, .w4cp 88*5d9d9091SRichard Lowe cmp %o5, 2 89*5d9d9091SRichard Lowe bz,pn %icc, .w2cp 90*5d9d9091SRichard Lowe cmp %o5, 3 91*5d9d9091SRichard Lowe.w3cp: lduw [%o1], %o4 92*5d9d9091SRichard Lowe inc 4, %o1 93*5d9d9091SRichard Lowe srl %o4, 24, %o5 94*5d9d9091SRichard Lowe stb %o5, [%g5] 95*5d9d9091SRichard Lowe bne,pt %icc, .w1cp 96*5d9d9091SRichard Lowe inc %g5 97*5d9d9091SRichard Lowe dec 1, %o2 98*5d9d9091SRichard Lowe andn %o2, 3, %o3 ! o3 is aligned word count 99*5d9d9091SRichard Lowe dec 4, %o3 ! avoid reading beyond tail of src 100*5d9d9091SRichard Lowe sub %o1, %g5, %o1 ! o1 gets the difference 101*5d9d9091SRichard Lowe 102*5d9d9091SRichard Lowe1: sll %o4, 8, %g1 ! save residual bytes 103*5d9d9091SRichard Lowe lduw [%o1+%g5], %o4 104*5d9d9091SRichard Lowe deccc 4, %o3 105*5d9d9091SRichard Lowe srl %o4, 24, %o5 ! merge with residual 106*5d9d9091SRichard Lowe or %o5, %g1, %g1 107*5d9d9091SRichard Lowe st %g1, [%g5] 108*5d9d9091SRichard Lowe bnz,pt %xcc, 1b 109*5d9d9091SRichard Lowe inc 4, %g5 110*5d9d9091SRichard Lowe sub %o1, 3, %o1 ! used one byte of last word read 111*5d9d9091SRichard Lowe and %o2, 3, %o2 112*5d9d9091SRichard Lowe b 7f 113*5d9d9091SRichard Lowe inc 4, %o2 114*5d9d9091SRichard Lowe 115*5d9d9091SRichard Lowe.w1cp: srl %o4, 8, %o5 116*5d9d9091SRichard Lowe sth %o5, [%g5] 117*5d9d9091SRichard Lowe inc 2, %g5 118*5d9d9091SRichard Lowe dec 3, %o2 119*5d9d9091SRichard Lowe andn %o2, 3, %o3 ! o3 is aligned word count 120*5d9d9091SRichard Lowe dec 4, %o3 ! avoid reading beyond tail of src 121*5d9d9091SRichard Lowe sub %o1, %g5, %o1 ! o1 gets the difference 122*5d9d9091SRichard Lowe 123*5d9d9091SRichard Lowe2: sll %o4, 24, %g1 ! save residual bytes 124*5d9d9091SRichard Lowe lduw [%o1+%g5], %o4 125*5d9d9091SRichard Lowe deccc 4, %o3 126*5d9d9091SRichard Lowe srl %o4, 8, %o5 ! merge with residual 127*5d9d9091SRichard Lowe or %o5, %g1, %g1 128*5d9d9091SRichard Lowe st %g1, [%g5] 129*5d9d9091SRichard Lowe bnz,pt %xcc, 2b 130*5d9d9091SRichard Lowe inc 4, %g5 131*5d9d9091SRichard Lowe sub %o1, 1, %o1 ! used three bytes of last word read 132*5d9d9091SRichard Lowe and %o2, 3, %o2 133*5d9d9091SRichard Lowe b 7f 134*5d9d9091SRichard Lowe inc 4, %o2 135*5d9d9091SRichard Lowe 136*5d9d9091SRichard Lowe.w2cp: lduw [%o1], %o4 137*5d9d9091SRichard Lowe inc 4, %o1 138*5d9d9091SRichard Lowe srl %o4, 16, %o5 139*5d9d9091SRichard Lowe sth %o5, [%g5] 140*5d9d9091SRichard Lowe inc 2, %g5 141*5d9d9091SRichard Lowe dec 2, %o2 142*5d9d9091SRichard Lowe andn %o2, 3, %o3 ! o3 is aligned word count 143*5d9d9091SRichard Lowe dec 4, %o3 ! avoid reading beyond tail of src 144*5d9d9091SRichard Lowe sub %o1, %g5, %o1 ! o1 gets the difference 145*5d9d9091SRichard Lowe 146*5d9d9091SRichard Lowe3: sll %o4, 16, %g1 ! save residual bytes 147*5d9d9091SRichard Lowe lduw [%o1+%g5], %o4 148*5d9d9091SRichard Lowe deccc 4, %o3 149*5d9d9091SRichard Lowe srl %o4, 16, %o5 ! merge with residual 150*5d9d9091SRichard Lowe or %o5, %g1, %g1 151*5d9d9091SRichard Lowe st %g1, [%g5] 152*5d9d9091SRichard Lowe bnz,pt %xcc, 3b 153*5d9d9091SRichard Lowe inc 4, %g5 154*5d9d9091SRichard Lowe sub %o1, 2, %o1 ! used two bytes of last word read 155*5d9d9091SRichard Lowe and %o2, 3, %o2 156*5d9d9091SRichard Lowe b 7f 157*5d9d9091SRichard Lowe inc 4, %o2 158*5d9d9091SRichard Lowe 159*5d9d9091SRichard Lowe.w4cp: andn %o2, 3, %o3 ! o3 is aligned word count 160*5d9d9091SRichard Lowe sub %o1, %g5, %o1 ! o1 gets the difference 161*5d9d9091SRichard Lowe 162*5d9d9091SRichard Lowe1: lduw [%o1+%g5], %o4 ! read from address 163*5d9d9091SRichard Lowe deccc 4, %o3 ! decrement count 164*5d9d9091SRichard Lowe st %o4, [%g5] ! write at destination address 165*5d9d9091SRichard Lowe bgu,pt %xcc, 1b 166*5d9d9091SRichard Lowe inc 4, %g5 ! increment to address 167*5d9d9091SRichard Lowe b 7f 168*5d9d9091SRichard Lowe and %o2, 3, %o2 ! number of leftover bytes, if any 169*5d9d9091SRichard Lowe 170*5d9d9091SRichard Lowe ! 171*5d9d9091SRichard Lowe ! differenced byte copy, works with any alignment 172*5d9d9091SRichard Lowe ! 173*5d9d9091SRichard Lowe.dbytecp: 174*5d9d9091SRichard Lowe b 7f 175*5d9d9091SRichard Lowe sub %o1, %g5, %o1 ! o1 gets the difference 176*5d9d9091SRichard Lowe 177*5d9d9091SRichard Lowe4: stb %o4, [%g5] ! write to address 178*5d9d9091SRichard Lowe inc %g5 ! inc to address 179*5d9d9091SRichard Lowe7: deccc %o2 ! decrement count 180*5d9d9091SRichard Lowe bgeu,a,pt %xcc,4b ! loop till done 181*5d9d9091SRichard Lowe ldub [%o1+%g5], %o4 ! read from address 182*5d9d9091SRichard Lowe retl 183*5d9d9091SRichard Lowe nop 184*5d9d9091SRichard Lowe 185*5d9d9091SRichard Lowe SET_SIZE(memcpy) 186*5d9d9091SRichard Lowe SET_SIZE(__align_cpy_1) 187