1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22/* 23 * Copyright (c) 2000, 2010, Oracle and/or its affiliates. All rights reserved. 24 */ 25 26 .file "memcmp.s" 27 28/* 29 * memcmp(s1, s2, len) 30 * 31 * Compare n bytes: s1>s2: >0 s1==s2: 0 s1<s2: <0 32 * 33 * Fast assembler language version of the following C-program for memcmp 34 * which represents the `standard' for the C-library. 35 * 36 * int 37 * memcmp(const void *s1, const void *s2, size_t n) 38 * { 39 * if (s1 != s2 && n != 0) { 40 * const char *ps1 = s1; 41 * const char *ps2 = s2; 42 * do { 43 * if (*ps1++ != *ps2++) 44 * return(ps1[-1] - ps2[-1]); 45 * } while (--n != 0); 46 * } 47 * return (0); 48 * } 49 */ 50 51#include <sys/asm_linkage.h> 52#include <sys/machasi.h> 53 54#define BLOCK_SIZE 64 55 56 ANSI_PRAGMA_WEAK(memcmp,function) 57 58 ENTRY(memcmp) 59 cmp %o0, %o1 ! s1 == s2? 60 be %ncc, .cmpeq 61 prefetch [%o0], #one_read 62 prefetch [%o1], #one_read 63 64 ! for small counts byte compare immediately 65 cmp %o2, 48 66 bleu,a %ncc, .bytcmp 67 mov %o2, %o3 ! o3 <= 48 68 69 ! Count > 48. We will byte compare (8 + num of bytes to dbl align) 70 ! bytes. We assume that most miscompares will occur in the 1st 8 bytes 71 72 prefetch [%o0 + (1 * BLOCK_SIZE)], #one_read 73 prefetch [%o1 + (1 * BLOCK_SIZE)], #one_read 74 75.chkdbl: 76 and %o0, 7, %o4 ! is s1 aligned on a 8 byte bound 77 mov 8, %o3 ! o2 > 48; o3 = 8 78 sub %o4, 8, %o4 ! o4 = -(num of bytes to dbl align) 79 ba %ncc, .bytcmp 80 sub %o3, %o4, %o3 ! o3 = 8 + (num of bytes to dbl align) 81 821: ldub [%o1], %o5 ! byte compare loop 83 inc %o1 84 inc %o0 85 dec %o2 86 cmp %o4, %o5 87 bne %ncc, .noteq 88.bytcmp: 89 deccc %o3 90 bgeu,a %ncc, 1b 91 ldub [%o0], %o4 92 93 ! Check to see if there are more bytes to compare 94 cmp %o2, 0 ! is o2 > 0 95 bgu %ncc, .dwcmp ! we should already be dbl aligned 96 nop 97.cmpeq: 98 retl ! strings compare equal 99 sub %g0, %g0, %o0 100 101.noteq: 102 retl ! strings aren't equal 103 sub %o4, %o5, %o0 ! return(*s1 - *s2) 104 105 106 ! double word compare - using ldd and faligndata. Compares upto 107 ! 8 byte multiple count and does byte compare for the residual. 108 109.dwcmp: 110 prefetch [%o0 + (2 * BLOCK_SIZE)], #one_read 111 prefetch [%o1 + (2 * BLOCK_SIZE)], #one_read 112 113 ! if fprs.fef == 0, set it. Checking it, reqires 2 instructions. 114 ! So set it anyway, without checking. 115 rd %fprs, %o3 ! o3 = fprs 116 wr %g0, 0x4, %fprs ! fprs.fef = 1 117 118 andn %o2, 7, %o4 ! o4 has 8 byte aligned cnt 119 sub %o4, 8, %o4 120 alignaddr %o1, %g0, %g1 121 ldd [%g1], %d0 1224: 123 add %g1, 8, %g1 124 ldd [%g1], %d2 125 ldd [%o0], %d6 126 prefetch [%g1 + (3 * BLOCK_SIZE)], #one_read 127 prefetch [%o0 + (3 * BLOCK_SIZE)], #one_read 128 faligndata %d0, %d2, %d8 129 fcmpne32 %d6, %d8, %o5 130 fsrc1 %d6, %d6 ! 2 fsrc1's added since o5 cannot 131 fsrc1 %d8, %d8 ! be used for 3 cycles else we 132 fmovd %d2, %d0 ! create 9 bubbles in the pipeline 133 brnz,a,pn %o5, 6f 134 sub %o1, %o0, %o1 ! o1 gets the difference 135 subcc %o4, 8, %o4 136 add %o0, 8, %o0 137 add %o1, 8, %o1 138 bgu,pt %ncc, 4b 139 sub %o2, 8, %o2 140 141.residcmp: 142 ba 6f 143 sub %o1, %o0, %o1 ! o1 gets the difference 144 1455: ldub [%o0 + %o1], %o5 ! byte compare loop 146 inc %o0 147 cmp %o4, %o5 148 bne %ncc, .dnoteq 1496: 150 deccc %o2 151 bgeu,a %ncc, 5b 152 ldub [%o0], %o4 153 154 and %o3, 0x4, %o3 ! fprs.du = fprs.dl = 0 155 wr %o3, %g0, %fprs ! fprs = o3 - restore fprs 156 retl 157 sub %g0, %g0, %o0 ! strings compare equal 158 159.dnoteq: 160 and %o3, 0x4, %o3 ! fprs.du = fprs.dl = 0 161 wr %o3, %g0, %fprs ! fprs = o3 - restore fprs 162 retl 163 sub %o4, %o5, %o0 ! return(*s1 - *s2) 164 165 SET_SIZE(memcmp) 166