1/* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22/* 23 * Copyright 2008 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 .file "memchr.s" 28 29/* 30 * Return the ptr in sptr at which the character c1 appears; 31 * or NULL if not found in n chars; don't stop at \0. 32 * void * 33 * memchr(const void *sptr, int c1, size_t n) 34 * { 35 * if (n != 0) { 36 * unsigned char c = (unsigned char)c1; 37 * const unsigned char *sp = sptr; 38 * 39 * do { 40 * if (*sp++ == c) 41 * return ((void *)--sp); 42 * } while (--n != 0); 43 * } 44 * return (NULL); 45 * } 46 */ 47 48#include <sys/asm_linkage.h> 49 50 ! The first part of this algorithm focuses on determining 51 ! whether or not the desired character is in the first few bytes 52 ! of memory, aligning the memory for word-wise copies, and 53 ! initializing registers to detect zero bytes 54 55 ENTRY(memchr) 56 57 .align 32 58 59 tst %o2 ! n == 0 ? 60 bz %ncc, .notfound ! yup, c not found, return null ptr 61 andcc %o0, 3, %o4 ! s word aligned ? 62 add %o0, %o2, %o0 ! s + n 63 sub %g0, %o2, %o2 ! n = -n 64 bz %ncc, .prepword ! yup, prepare for word-wise search 65 and %o1, 0xff, %o1 ! search only for this one byte 66 67 ldub [%o0 + %o2], %o3 ! s[0] 68 cmp %o3, %o1 ! s[0] == c ? 69 be %ncc, .done ! yup, done 70 nop ! 71 addcc %o2, 1, %o2 ! n++, s++ 72 bz %ncc, .notfound ! c not found in first n bytes 73 cmp %o4, 3 ! only one byte needed to align? 74 bz %ncc, .prepword2 ! yup, prepare for word-wise search 75 sllx %o1, 8, %g1 ! start spreading c across word 76 ldub [%o0 + %o2], %o3 ! s[1] 77 cmp %o3, %o1 ! s[1] == c ? 78 be %ncc, .done ! yup, done 79 nop ! 80 addcc %o2, 1, %o2 ! n++, s++ 81 bz %ncc, .notfound ! c not found in first n bytes 82 cmp %o4, 2 ! only two bytes needed to align? 83 bz %ncc, .prepword3 ! yup, prepare for word-wise search 84 sethi %hi(0x80808080), %o5 ! start loading Alan Mycroft's magic1 85 ldub [%o0 + %o2], %o3 ! s[1] 86 cmp %o3, %o1 ! s[1] == c ? 87 be %ncc, .done ! yup, done 88 nop ! 89 addcc %o2, 1, %o2 ! n++, s++ 90 bz %ncc, .notfound ! c not found in first n bytes 91 nop ! 92 93.prepword: 94 sllx %o1, 8, %g1 ! spread c -------------+ 95.prepword2: ! ! 96 sethi %hi(0x80808080), %o5 ! Alan Mycroft's magic2 ! 97.prepword3: ! ! 98 or %o1, %g1, %o1 ! across all <---------+ 99 or %o5, %lo(0x80808080),%o5! finish loading magic2 ! 100 sllx %o1, 16, %g1 ! four bytes <--------+ 101 srlx %o5, 7, %o4 ! Alan Mycroft's magic1 ! 102 or %o1, %g1, %o1 ! of a word <--------+ 103 104.searchchar: 105 lduw [%o0 + %o2], %o3 ! src word 106.searchchar2: 107 addcc %o2, 4, %o2 ! s+=4, n+=4 108 bcs %ncc, .lastword ! if counter wraps, last word 109 xor %o3, %o1, %g1 ! tword = word ^ c 110 andn %o5, %g1, %o3 ! ~tword & 0x80808080 111 sub %g1, %o4, %g4 ! (tword - 0x01010101) 112 andcc %o3, %g4, %g0 ! ((tword - 0x01010101) & ~tword & 0x80808080) 113 bz,a %ncc, .searchchar2 ! c not found if magic expression == 0 114 lduw [%o0 + %o2], %o3 ! src word 115 116 ! here we know "word" contains the searched character, and no byte in 117 ! "word" exceeds n. If we had exceeded n, we would have gone to label 118 ! .lastword. "tword" has null bytes where "word" had c. After 119 ! restoring "tword" from "(tword - 0x01010101)" in %g1, examine "tword" 120 121.foundchar: 122 set 0xff000000, %o4 ! mask for 1st byte 123 andcc %g1, %o4, %g0 ! first byte zero (= found c) ? 124 bz,a %ncc, .done ! yup, done 125 sub %o2, 4, %o2 ! n -= 4 (undo counter bumping) 126 nop 127 set 0x00ff0000, %o5 ! mask for 2nd byte 128 andcc %g1, %o5, %g0 ! second byte zero (= found c) ? 129 bz,a %ncc, .done ! yup, done 130 sub %o2, 3, %o2 ! n -= 3 (undo counter bumping) 131 srlx %o4, 16, %o4 ! 0x0000ff00 = mask for 3rd byte 132 andcc %g1, %o4, %g0 ! third byte zero (= found c) ? 133 bz,a %ncc, .done ! nope, must be fourth byte 134 sub %o2, 2, %o2 ! n -= 2 (undo counter bumping) 135 sub %o2, 1, %o2 ! n -= 1, if fourth byte 136 retl ! done with leaf function 137 add %o0, %o2, %o0 ! return pointer to c in s 138.done: 139 retl ! done with leaf function 140 add %o0, %o2, %o0 ! return pointer to c in s 141 nop 142 nop 143 144 ! Here we know that "word" is the last word in the search, and that 145 ! some bytes possibly exceed n. However, "word" might also contain c. 146 ! "tword" (in %g1) has null bytes where "word" had c. Examine "tword" 147 ! while keeping track of number of remaining bytes 148 149.lastword: 150 set 0xff000000, %o4 ! mask for 1st byte 151 sub %o2, 4, %o2 ! n -= 4 (undo counter bumping) 152 andcc %g1, %o4, %g0 ! first byte zero (= found c) ? 153 bz %ncc, .done ! yup, done 154 set 0x00ff0000, %o5 ! mask for 2nd byte 155 addcc %o2, 1, %o2 ! n += 1 156 bz %ncc, .notfound ! c not found in first n bytes 157 andcc %g1, %o5, %g0 ! second byte zero (= found c) ? 158 bz %ncc, .done ! yup, done 159 srlx %o4, 16, %o4 ! 0x0000ff00 = mask for 3rd byte 160 addcc %o2, 1, %o2 ! n += 1 161 bz %ncc, .notfound ! c not found in first n bytes 162 andcc %g1, %o4, %g0 ! third byte zero (= found c) ? 163 bz %ncc, .done ! yup, done 164 nop ! 165 addcc %o2, 1, %o2 ! n += 1 166 bz %ncc, .notfound ! c not found in first n bytes 167 andcc %g1, 0xff, %g0 ! fourth byte zero (= found c) ? 168 bz %ncc, .done ! yup, done 169 nop 170 171.notfound: 172 retl ! done with leaf function 173 mov %g0, %o0 ! return null pointer 174 175 SET_SIZE(memchr) 176