1*1e49577aSRod Evans/* 2*1e49577aSRod Evans * CDDL HEADER START 3*1e49577aSRod Evans * 4*1e49577aSRod Evans * The contents of this file are subject to the terms of the 5*1e49577aSRod Evans * Common Development and Distribution License (the "License"). 6*1e49577aSRod Evans * You may not use this file except in compliance with the License. 7*1e49577aSRod Evans * 8*1e49577aSRod Evans * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9*1e49577aSRod Evans * or http://www.opensolaris.org/os/licensing. 10*1e49577aSRod Evans * See the License for the specific language governing permissions 11*1e49577aSRod Evans * and limitations under the License. 12*1e49577aSRod Evans * 13*1e49577aSRod Evans * When distributing Covered Code, include this CDDL HEADER in each 14*1e49577aSRod Evans * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15*1e49577aSRod Evans * If applicable, add the following below this CDDL HEADER, with the 16*1e49577aSRod Evans * fields enclosed by brackets "[]" replaced with your own identifying 17*1e49577aSRod Evans * information: Portions Copyright [yyyy] [name of copyright owner] 18*1e49577aSRod Evans * 19*1e49577aSRod Evans * CDDL HEADER END 20*1e49577aSRod Evans */ 21*1e49577aSRod Evans 22*1e49577aSRod Evans/* 23*1e49577aSRod Evans * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. 24*1e49577aSRod Evans */ 25*1e49577aSRod Evans 26*1e49577aSRod Evans .file "memcmp.s" 27*1e49577aSRod Evans 28*1e49577aSRod Evans/* 29*1e49577aSRod Evans * memcmp(s1, s2, len) 30*1e49577aSRod Evans * 31*1e49577aSRod Evans * Compare n bytes: s1>s2: >0 s1==s2: 0 s1<s2: <0 32*1e49577aSRod Evans * 33*1e49577aSRod Evans * Fast assembler language version of the following C-program for memcmp 34*1e49577aSRod Evans * which represents the `standard' for the C-library. 35*1e49577aSRod Evans * 36*1e49577aSRod Evans * int 37*1e49577aSRod Evans * memcmp(const void *s1, const void *s2, size_t n) 38*1e49577aSRod Evans * { 39*1e49577aSRod Evans * if (s1 != s2 && n != 0) { 40*1e49577aSRod Evans * const char *ps1 = s1; 41*1e49577aSRod Evans * const char *ps2 = s2; 42*1e49577aSRod Evans * do { 43*1e49577aSRod Evans * if (*ps1++ != *ps2++) 44*1e49577aSRod Evans * return(ps1[-1] - ps2[-1]); 45*1e49577aSRod Evans * } while (--n != 0); 46*1e49577aSRod Evans * } 47*1e49577aSRod Evans * return (0); 48*1e49577aSRod Evans * } 49*1e49577aSRod Evans */ 50*1e49577aSRod Evans 51*1e49577aSRod Evans#include <sys/asm_linkage.h> 52*1e49577aSRod Evans#include <sys/machasi.h> 53*1e49577aSRod Evans 54*1e49577aSRod Evans#define BLOCK_SIZE 64 55*1e49577aSRod Evans 56*1e49577aSRod Evans ANSI_PRAGMA_WEAK(memcmp,function) 57*1e49577aSRod Evans 58*1e49577aSRod Evans ENTRY(memcmp) 59*1e49577aSRod Evans cmp %o0, %o1 ! s1 == s2? 60*1e49577aSRod Evans be %ncc, .cmpeq 61*1e49577aSRod Evans prefetch [%o0], #one_read 62*1e49577aSRod Evans prefetch [%o1], #one_read 63*1e49577aSRod Evans 64*1e49577aSRod Evans ! for small counts byte compare immediately 65*1e49577aSRod Evans cmp %o2, 48 66*1e49577aSRod Evans bleu,a %ncc, .bytcmp 67*1e49577aSRod Evans mov %o2, %o3 ! o3 <= 48 68*1e49577aSRod Evans 69*1e49577aSRod Evans ! Count > 48. We will byte compare (8 + num of bytes to dbl align) 70*1e49577aSRod Evans ! bytes. We assume that most miscompares will occur in the 1st 8 bytes 71*1e49577aSRod Evans 72*1e49577aSRod Evans prefetch [%o0 + (1 * BLOCK_SIZE)], #one_read 73*1e49577aSRod Evans prefetch [%o1 + (1 * BLOCK_SIZE)], #one_read 74*1e49577aSRod Evans 75*1e49577aSRod Evans.chkdbl: 76*1e49577aSRod Evans and %o0, 7, %o4 ! is s1 aligned on a 8 byte bound 77*1e49577aSRod Evans mov 8, %o3 ! o2 > 48; o3 = 8 78*1e49577aSRod Evans sub %o4, 8, %o4 ! o4 = -(num of bytes to dbl align) 79*1e49577aSRod Evans ba %ncc, .bytcmp 80*1e49577aSRod Evans sub %o3, %o4, %o3 ! o3 = 8 + (num of bytes to dbl align) 81*1e49577aSRod Evans 82*1e49577aSRod Evans1: ldub [%o1], %o5 ! byte compare loop 83*1e49577aSRod Evans inc %o1 84*1e49577aSRod Evans inc %o0 85*1e49577aSRod Evans dec %o2 86*1e49577aSRod Evans cmp %o4, %o5 87*1e49577aSRod Evans bne %ncc, .noteq 88*1e49577aSRod Evans.bytcmp: 89*1e49577aSRod Evans deccc %o3 90*1e49577aSRod Evans bgeu,a %ncc, 1b 91*1e49577aSRod Evans ldub [%o0], %o4 92*1e49577aSRod Evans 93*1e49577aSRod Evans ! Check to see if there are more bytes to compare 94*1e49577aSRod Evans cmp %o2, 0 ! is o2 > 0 95*1e49577aSRod Evans bgu %ncc, .dwcmp ! we should already be dbl aligned 96*1e49577aSRod Evans nop 97*1e49577aSRod Evans.cmpeq: 98*1e49577aSRod Evans retl ! strings compare equal 99*1e49577aSRod Evans sub %g0, %g0, %o0 100*1e49577aSRod Evans 101*1e49577aSRod Evans.noteq: 102*1e49577aSRod Evans retl ! strings aren't equal 103*1e49577aSRod Evans sub %o4, %o5, %o0 ! return(*s1 - *s2) 104*1e49577aSRod Evans 105*1e49577aSRod Evans 106*1e49577aSRod Evans ! double word compare - using ldd and faligndata. Compares upto 107*1e49577aSRod Evans ! 8 byte multiple count and does byte compare for the residual. 108*1e49577aSRod Evans 109*1e49577aSRod Evans.dwcmp: 110*1e49577aSRod Evans prefetch [%o0 + (2 * BLOCK_SIZE)], #one_read 111*1e49577aSRod Evans prefetch [%o1 + (2 * BLOCK_SIZE)], #one_read 112*1e49577aSRod Evans 113*1e49577aSRod Evans ! if fprs.fef == 0, set it. Checking it, reqires 2 instructions. 114*1e49577aSRod Evans ! So set it anyway, without checking. 115*1e49577aSRod Evans rd %fprs, %o3 ! o3 = fprs 116*1e49577aSRod Evans wr %g0, 0x4, %fprs ! fprs.fef = 1 117*1e49577aSRod Evans 118*1e49577aSRod Evans andn %o2, 7, %o4 ! o4 has 8 byte aligned cnt 119*1e49577aSRod Evans sub %o4, 8, %o4 120*1e49577aSRod Evans alignaddr %o1, %g0, %g1 121*1e49577aSRod Evans ldd [%g1], %d0 122*1e49577aSRod Evans4: 123*1e49577aSRod Evans add %g1, 8, %g1 124*1e49577aSRod Evans ldd [%g1], %d2 125*1e49577aSRod Evans ldd [%o0], %d6 126*1e49577aSRod Evans prefetch [%g1 + (3 * BLOCK_SIZE)], #one_read 127*1e49577aSRod Evans prefetch [%o0 + (3 * BLOCK_SIZE)], #one_read 128*1e49577aSRod Evans faligndata %d0, %d2, %d8 129*1e49577aSRod Evans fcmpne32 %d6, %d8, %o5 130*1e49577aSRod Evans fsrc1 %d6, %d6 ! 2 fsrc1's added since o5 cannot 131*1e49577aSRod Evans fsrc1 %d8, %d8 ! be used for 3 cycles else we 132*1e49577aSRod Evans fmovd %d2, %d0 ! create 9 bubbles in the pipeline 133*1e49577aSRod Evans brnz,a,pn %o5, 6f 134*1e49577aSRod Evans sub %o1, %o0, %o1 ! o1 gets the difference 135*1e49577aSRod Evans subcc %o4, 8, %o4 136*1e49577aSRod Evans add %o0, 8, %o0 137*1e49577aSRod Evans add %o1, 8, %o1 138*1e49577aSRod Evans bgu,pt %ncc, 4b 139*1e49577aSRod Evans sub %o2, 8, %o2 140*1e49577aSRod Evans 141*1e49577aSRod Evans.residcmp: 142*1e49577aSRod Evans ba 6f 143*1e49577aSRod Evans sub %o1, %o0, %o1 ! o1 gets the difference 144*1e49577aSRod Evans 145*1e49577aSRod Evans5: ldub [%o0 + %o1], %o5 ! byte compare loop 146*1e49577aSRod Evans inc %o0 147*1e49577aSRod Evans cmp %o4, %o5 148*1e49577aSRod Evans bne %ncc, .dnoteq 149*1e49577aSRod Evans6: 150*1e49577aSRod Evans deccc %o2 151*1e49577aSRod Evans bgeu,a %ncc, 5b 152*1e49577aSRod Evans ldub [%o0], %o4 153*1e49577aSRod Evans 154*1e49577aSRod Evans and %o3, 0x4, %o3 ! fprs.du = fprs.dl = 0 155*1e49577aSRod Evans wr %o3, %g0, %fprs ! fprs = o3 - restore fprs 156*1e49577aSRod Evans retl 157*1e49577aSRod Evans sub %g0, %g0, %o0 ! strings compare equal 158*1e49577aSRod Evans 159*1e49577aSRod Evans.dnoteq: 160*1e49577aSRod Evans and %o3, 0x4, %o3 ! fprs.du = fprs.dl = 0 161*1e49577aSRod Evans wr %o3, %g0, %fprs ! fprs = o3 - restore fprs 162*1e49577aSRod Evans retl 163*1e49577aSRod Evans sub %o4, %o5, %o0 ! return(*s1 - *s2) 164*1e49577aSRod Evans 165*1e49577aSRod Evans SET_SIZE(memcmp) 166