1/* Copyright (c) 2010-2011, Linaro Limited 2 All rights reserved. 3 4 Redistribution and use in source and binary forms, with or without 5 modification, are permitted provided that the following conditions 6 are met: 7 8 * Redistributions of source code must retain the above copyright 9 notice, this list of conditions and the following disclaimer. 10 11 * Redistributions in binary form must reproduce the above copyright 12 notice, this list of conditions and the following disclaimer in the 13 documentation and/or other materials provided with the distribution. 14 15 * Neither the name of Linaro Limited nor the names of its 16 contributors may be used to endorse or promote products derived 17 from this software without specific prior written permission. 18 19 THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 20 "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 21 LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 22 A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 23 HOLDER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 24 SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 25 LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 26 DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 27 THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 28 (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 29 OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 30 */ 31 32/* 33 Written by Dave Gilbert <david.gilbert@linaro.org> 34 35 This memchr routine is optimised on a Cortex-A9 and should work on 36 all ARMv7 processors. It has a fast past for short sizes, and has 37 an optimised path for large data sets; the worst case is finding the 38 match early in a large data set. 39 40 */ 41 42@ 2011-02-07 david.gilbert@linaro.org 43@ Extracted from local git a5b438d861 44@ 2011-07-14 david.gilbert@linaro.org 45@ Import endianness fix from local git ea786f1b 46@ 2011-12-07 david.gilbert@linaro.org 47@ Removed unneeded cbz from align loop 48 49 .syntax unified 50 .arch armv7-a 51 52@ this lets us check a flag in a 00/ff byte easily in either endianness 53#ifdef __ARMEB__ 54#define CHARTSTMASK(c) 1<<(31-(c*8)) 55#else 56#define CHARTSTMASK(c) 1<<(c*8) 57#endif 58 .text 59 .thumb 60 61@ --------------------------------------------------------------------------- 62 .thumb_func 63 .align 2 64 .p2align 4,,15 65 .global memchr 66 .type memchr,%function 67memchr: 68 @ r0 = start of memory to scan 69 @ r1 = character to look for 70 @ r2 = length 71 @ returns r0 = pointer to character or NULL if not found 72 and r1,r1,#0xff @ Don't think we can trust the caller to actually pass a char 73 74 cmp r2,#16 @ If it's short don't bother with anything clever 75 blt 20f 76 77 tst r0, #7 @ If it's already aligned skip the next bit 78 beq 10f 79 80 @ Work up to an aligned point 815: 82 ldrb r3, [r0],#1 83 subs r2, r2, #1 84 cmp r3, r1 85 beq 50f @ If it matches exit found 86 tst r0, #7 87 bne 5b @ If not aligned yet then do next byte 88 8910: 90 @ At this point, we are aligned, we know we have at least 8 bytes to work with 91 push {r4,r5,r6,r7} 92 orr r1, r1, r1, lsl #8 @ expand the match word across to all bytes 93 orr r1, r1, r1, lsl #16 94 bic r4, r2, #7 @ Number of double words to work with 95 mvns r7, #0 @ all F's 96 movs r3, #0 97 9815: 99 ldmia r0!,{r5,r6} 100 subs r4, r4, #8 101 eor r5,r5, r1 @ Get it so that r5,r6 have 00's where the bytes match the target 102 eor r6,r6, r1 103 uadd8 r5, r5, r7 @ Parallel add 0xff - sets the GE bits for anything that wasn't 0 104 sel r5, r3, r7 @ bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION 105 uadd8 r6, r6, r7 @ Parallel add 0xff - sets the GE bits for anything that wasn't 0 106 sel r6, r5, r7 @ chained....bytes are 00 for none-00 bytes, or ff for 00 bytes - NOTE INVERSION 107 cbnz r6, 60f 108 bne 15b @ (Flags from the subs above) If not run out of bytes then go around again 109 110 pop {r4,r5,r6,r7} 111 and r1,r1,#0xff @ Get r1 back to a single character from the expansion above 112 and r2,r2,#7 @ Leave the count remaining as the number after the double words have been done 113 11420: 115 cbz r2, 40f @ 0 length or hit the end already then not found 116 11721: @ Post aligned section, or just a short call 118 ldrb r3,[r0],#1 119 subs r2,r2,#1 120 eor r3,r3,r1 @ r3 = 0 if match - doesn't break flags from sub 121 cbz r3, 50f 122 bne 21b @ on r2 flags 123 12440: 125 movs r0,#0 @ not found 126 bx lr 127 12850: 129 subs r0,r0,#1 @ found 130 bx lr 131 13260: @ We're here because the fast path found a hit - now we have to track down exactly which word it was 133 @ r0 points to the start of the double word after the one that was tested 134 @ r5 has the 00/ff pattern for the first word, r6 has the chained value 135 cmp r5, #0 136 itte eq 137 moveq r5, r6 @ the end is in the 2nd word 138 subeq r0,r0,#3 @ Points to 2nd byte of 2nd word 139 subne r0,r0,#7 @ or 2nd byte of 1st word 140 141 @ r0 currently points to the 3rd byte of the word containing the hit 142 tst r5, # CHARTSTMASK(0) @ 1st character 143 bne 61f 144 adds r0,r0,#1 145 tst r5, # CHARTSTMASK(1) @ 2nd character 146 ittt eq 147 addeq r0,r0,#1 148 tsteq r5, # (3<<15) @ 2nd & 3rd character 149 @ If not the 3rd must be the last one 150 addeq r0,r0,#1 151 15261: 153 pop {r4,r5,r6,r7} 154 subs r0,r0,#1 155 bx lr 156