1*5d9d9091SRichard Lowe/* 2*5d9d9091SRichard Lowe * CDDL HEADER START 3*5d9d9091SRichard Lowe * 4*5d9d9091SRichard Lowe * The contents of this file are subject to the terms of the 5*5d9d9091SRichard Lowe * Common Development and Distribution License (the "License"). 6*5d9d9091SRichard Lowe * You may not use this file except in compliance with the License. 7*5d9d9091SRichard Lowe * 8*5d9d9091SRichard Lowe * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9*5d9d9091SRichard Lowe * or http://www.opensolaris.org/os/licensing. 10*5d9d9091SRichard Lowe * See the License for the specific language governing permissions 11*5d9d9091SRichard Lowe * and limitations under the License. 12*5d9d9091SRichard Lowe * 13*5d9d9091SRichard Lowe * When distributing Covered Code, include this CDDL HEADER in each 14*5d9d9091SRichard Lowe * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15*5d9d9091SRichard Lowe * If applicable, add the following below this CDDL HEADER, with the 16*5d9d9091SRichard Lowe * fields enclosed by brackets "[]" replaced with your own identifying 17*5d9d9091SRichard Lowe * information: Portions Copyright [yyyy] [name of copyright owner] 18*5d9d9091SRichard Lowe * 19*5d9d9091SRichard Lowe * CDDL HEADER END 20*5d9d9091SRichard Lowe */ 21*5d9d9091SRichard Lowe 22*5d9d9091SRichard Lowe/* 23*5d9d9091SRichard Lowe * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24*5d9d9091SRichard Lowe * Use is subject to license terms. 25*5d9d9091SRichard Lowe */ 26*5d9d9091SRichard Lowe 27*5d9d9091SRichard Lowe .file "memchr.s" 28*5d9d9091SRichard Lowe 29*5d9d9091SRichard Lowe/* 30*5d9d9091SRichard Lowe * Return the ptr in sptr at which the character c1 appears; 31*5d9d9091SRichard Lowe * or NULL if not found in n chars; don't stop at \0. 32*5d9d9091SRichard Lowe * void * 33*5d9d9091SRichard Lowe * memchr(const void *sptr, int c1, size_t n) 34*5d9d9091SRichard Lowe * { 35*5d9d9091SRichard Lowe * if (n != 0) { 36*5d9d9091SRichard Lowe * unsigned char c = (unsigned char)c1; 37*5d9d9091SRichard Lowe * const unsigned char *sp = sptr; 38*5d9d9091SRichard Lowe * 39*5d9d9091SRichard Lowe * do { 40*5d9d9091SRichard Lowe * if (*sp++ == c) 41*5d9d9091SRichard Lowe * return ((void *)--sp); 42*5d9d9091SRichard Lowe * } while (--n != 0); 43*5d9d9091SRichard Lowe * } 44*5d9d9091SRichard Lowe * return (NULL); 45*5d9d9091SRichard Lowe * } 46*5d9d9091SRichard Lowe */ 47*5d9d9091SRichard Lowe 48*5d9d9091SRichard Lowe#include <sys/asm_linkage.h> 49*5d9d9091SRichard Lowe 50*5d9d9091SRichard Lowe ! The first part of this algorithm focuses on determining 51*5d9d9091SRichard Lowe ! whether or not the desired character is in the first few bytes 52*5d9d9091SRichard Lowe ! of memory, aligning the memory for word-wise copies, and 53*5d9d9091SRichard Lowe ! initializing registers to detect zero bytes 54*5d9d9091SRichard Lowe 55*5d9d9091SRichard Lowe ENTRY(memchr) 56*5d9d9091SRichard Lowe 57*5d9d9091SRichard Lowe .align 32 58*5d9d9091SRichard Lowe 59*5d9d9091SRichard Lowe tst %o2 ! n == 0 ? 60*5d9d9091SRichard Lowe bz .notfound ! yup, c not found, return null ptr 61*5d9d9091SRichard Lowe andcc %o0, 3, %o4 ! s word aligned ? 62*5d9d9091SRichard Lowe add %o0, %o2, %o0 ! s + n 63*5d9d9091SRichard Lowe sub %g0, %o2, %o2 ! n = -n 64*5d9d9091SRichard Lowe bz .prepword ! yup, prepare for word-wise search 65*5d9d9091SRichard Lowe and %o1, 0xff, %o1 ! search only for this one byte 66*5d9d9091SRichard Lowe 67*5d9d9091SRichard Lowe ldub [%o0 + %o2], %o3 ! s[0] 68*5d9d9091SRichard Lowe cmp %o3, %o1 ! s[0] == c ? 69*5d9d9091SRichard Lowe be .done ! yup, done 70*5d9d9091SRichard Lowe nop ! 71*5d9d9091SRichard Lowe addcc %o2, 1, %o2 ! n++, s++ 72*5d9d9091SRichard Lowe bz .notfound ! c not found in first n bytes 73*5d9d9091SRichard Lowe cmp %o4, 3 ! only one byte needed to align? 74*5d9d9091SRichard Lowe bz .prepword2 ! yup, prepare for word-wise search 75*5d9d9091SRichard Lowe sll %o1, 8, %g1 ! start spreading c across word 76*5d9d9091SRichard Lowe ldub [%o0 + %o2], %o3 ! s[1] 77*5d9d9091SRichard Lowe cmp %o3, %o1 ! s[1] == c ? 78*5d9d9091SRichard Lowe be .done ! yup, done 79*5d9d9091SRichard Lowe nop ! 80*5d9d9091SRichard Lowe addcc %o2, 1, %o2 ! n++, s++ 81*5d9d9091SRichard Lowe bz .notfound ! c not found in first n bytes 82*5d9d9091SRichard Lowe cmp %o4, 2 ! only two bytes needed to align? 83*5d9d9091SRichard Lowe bz .prepword3 ! yup, prepare for word-wise search 84*5d9d9091SRichard Lowe sethi %hi(0x01010101), %o4 ! start loading Alan Mycroft's magic1 85*5d9d9091SRichard Lowe ldub [%o0 + %o2], %o3 ! s[1] 86*5d9d9091SRichard Lowe cmp %o3, %o1 ! s[1] == c ? 87*5d9d9091SRichard Lowe be .done ! yup, done 88*5d9d9091SRichard Lowe nop 89*5d9d9091SRichard Lowe addcc %o2, 1, %o2 ! n++, s++ 90*5d9d9091SRichard Lowe bz .notfound ! c not found in first n bytes 91*5d9d9091SRichard Lowe nop 92*5d9d9091SRichard Lowe 93*5d9d9091SRichard Lowe.prepword: 94*5d9d9091SRichard Lowe sll %o1, 8, %g1 ! spread c -------------+ 95*5d9d9091SRichard Lowe.prepword2: ! ! 96*5d9d9091SRichard Lowe sethi %hi(0x01010101), %o4 ! Alan Mycroft's magic1 ! 97*5d9d9091SRichard Lowe.prepword3: ! ! 98*5d9d9091SRichard Lowe or %o1, %g1, %o1 ! across all <---------+ 99*5d9d9091SRichard Lowe or %o4, %lo(0x01010101),%o4! finish loading magic1 ! 100*5d9d9091SRichard Lowe sll %o1, 16, %g1 ! four bytes <--------+ 101*5d9d9091SRichard Lowe sll %o4, 7, %o5 ! Alan Mycroft's magic2 ! 102*5d9d9091SRichard Lowe or %o1, %g1, %o1 ! of a word <--------+ 103*5d9d9091SRichard Lowe 104*5d9d9091SRichard Lowe.searchchar: 105*5d9d9091SRichard Lowe lduw [%o0 + %o2], %o3 ! src word 106*5d9d9091SRichard Lowe.searchchar2: 107*5d9d9091SRichard Lowe addcc %o2, 4, %o2 ! s+=4, n+=4 108*5d9d9091SRichard Lowe bcs .lastword ! if counter wraps, last word 109*5d9d9091SRichard Lowe xor %o3, %o1, %g1 ! tword = word ^ c 110*5d9d9091SRichard Lowe andn %o5, %g1, %o3 ! ~tword & 0x80808080 111*5d9d9091SRichard Lowe sub %g1, %o4, %g1 ! (tword - 0x01010101) 112*5d9d9091SRichard Lowe andcc %o3, %g1, %g0 ! ((tword - 0x01010101) & ~tword & 0x80808080) 113*5d9d9091SRichard Lowe bz,a .searchchar2 ! c not found if magic expression == 0 114*5d9d9091SRichard Lowe lduw [%o0 + %o2], %o3 ! src word 115*5d9d9091SRichard Lowe 116*5d9d9091SRichard Lowe ! here we know "word" contains the searched character, and no byte in 117*5d9d9091SRichard Lowe ! "word" exceeds n. If we had exceeded n, we would have gone to label 118*5d9d9091SRichard Lowe ! .lastword. "tword" has null bytes where "word" had c. After 119*5d9d9091SRichard Lowe ! restoring "tword" from "(tword - 0x01010101)" in %g1, examine "tword" 120*5d9d9091SRichard Lowe 121*5d9d9091SRichard Lowe.foundchar: 122*5d9d9091SRichard Lowe add %g1, %o4, %g1 ! restore tword 123*5d9d9091SRichard Lowe set 0xff000000, %o4 ! mask for 1st byte 124*5d9d9091SRichard Lowe andcc %g1, %o4, %g0 ! first byte zero (= found c) ? 125*5d9d9091SRichard Lowe bz,a .done ! yup, done 126*5d9d9091SRichard Lowe sub %o2, 4, %o2 ! n -= 4 (undo counter bumping) 127*5d9d9091SRichard Lowe set 0x00ff0000, %o5 ! mask for 2nd byte 128*5d9d9091SRichard Lowe andcc %g1, %o5, %g0 ! second byte zero (= found c) ? 129*5d9d9091SRichard Lowe bz,a .done ! yup, done 130*5d9d9091SRichard Lowe sub %o2, 3, %o2 ! n -= 3 (undo counter bumping) 131*5d9d9091SRichard Lowe srl %o4, 16, %o4 ! 0x0000ff00 = mask for 3rd byte 132*5d9d9091SRichard Lowe andcc %g1, %o4, %g0 ! third byte zero (= found c) ? 133*5d9d9091SRichard Lowe bz,a .done ! nope, must be fourth byte 134*5d9d9091SRichard Lowe sub %o2, 2, %o2 ! n -= 2 (undo counter bumping) 135*5d9d9091SRichard Lowe sub %o2, 1, %o2 ! n -= 1, if fourth byte 136*5d9d9091SRichard Lowe retl ! done with leaf function 137*5d9d9091SRichard Lowe add %o0, %o2, %o0 ! return pointer to c in s 138*5d9d9091SRichard Lowe.done: 139*5d9d9091SRichard Lowe retl ! done with leaf function 140*5d9d9091SRichard Lowe add %o0, %o2, %o0 ! return pointer to c in s 141*5d9d9091SRichard Lowe nop 142*5d9d9091SRichard Lowe nop 143*5d9d9091SRichard Lowe 144*5d9d9091SRichard Lowe ! Here we know that "word" is the last word in the search, and that 145*5d9d9091SRichard Lowe ! some bytes possibly exceed n. However, "word" might also contain c. 146*5d9d9091SRichard Lowe ! "tword" (in %g1) has null bytes where "word" had c. Examine "tword" 147*5d9d9091SRichard Lowe ! while keeping track of number of remaining bytes 148*5d9d9091SRichard Lowe 149*5d9d9091SRichard Lowe.lastword: 150*5d9d9091SRichard Lowe set 0xff000000, %o4 ! mask for 1st byte 151*5d9d9091SRichard Lowe sub %o2, 4, %o2 ! n -= 4 (undo counter bumping) 152*5d9d9091SRichard Lowe andcc %g1, %o4, %g0 ! first byte zero (= found c) ? 153*5d9d9091SRichard Lowe bz .done ! yup, done 154*5d9d9091SRichard Lowe set 0x00ff0000, %o5 ! mask for 2nd byte 155*5d9d9091SRichard Lowe addcc %o2, 1, %o2 ! n += 1 156*5d9d9091SRichard Lowe bz .notfound ! c not found in first n bytes 157*5d9d9091SRichard Lowe andcc %g1, %o5, %g0 ! second byte zero (= found c) ? 158*5d9d9091SRichard Lowe bz .done ! yup, done 159*5d9d9091SRichard Lowe srl %o4, 16, %o4 ! 0x0000ff00 = mask for 3rd byte 160*5d9d9091SRichard Lowe addcc %o2, 1, %o2 ! n += 1 161*5d9d9091SRichard Lowe bz .notfound ! c not found in first n bytes 162*5d9d9091SRichard Lowe andcc %g1, %o4, %g0 ! third byte zero (= found c) ? 163*5d9d9091SRichard Lowe bz .done ! yup, done 164*5d9d9091SRichard Lowe nop ! 165*5d9d9091SRichard Lowe addcc %o2, 1, %o2 ! n += 1 166*5d9d9091SRichard Lowe bz .notfound ! c not found in first n bytes 167*5d9d9091SRichard Lowe andcc %g1, 0xff, %g0 ! fourth byte zero (= found c) ? 168*5d9d9091SRichard Lowe bz .done ! yup, done 169*5d9d9091SRichard Lowe nop 170*5d9d9091SRichard Lowe 171*5d9d9091SRichard Lowe.notfound: 172*5d9d9091SRichard Lowe retl ! done with leaf function 173*5d9d9091SRichard Lowe mov %g0, %o0 ! return null pointer 174*5d9d9091SRichard Lowe 175*5d9d9091SRichard Lowe SET_SIZE(memchr) 176