1/* 2 * Copyright (C) 2004, 2007-2010, 2011-2012 Synopsys, Inc. (www.synopsys.com) 3 * 4 * This program is free software; you can redistribute it and/or modify 5 * it under the terms of the GNU General Public License version 2 as 6 * published by the Free Software Foundation. 7 */ 8 9/* This is optimized primarily for the ARC700. 10 It would be possible to speed up the loops by one cycle / word 11 respective one cycle / byte by forcing double source 1 alignment, unrolling 12 by a factor of two, and speculatively loading the second word / byte of 13 source 1; however, that would increase the overhead for loop setup / finish, 14 and strcmp might often terminate early. */ 15 16#include <linux/linkage.h> 17 18ENTRY_CFI(strcmp) 19 or r2,r0,r1 20 bmsk_s r2,r2,1 21 brne r2,0,.Lcharloop 22 mov_s r12,0x01010101 23 ror r5,r12 24.Lwordloop: 25 ld.ab r2,[r0,4] 26 ld.ab r3,[r1,4] 27 nop_s 28 sub r4,r2,r12 29 bic r4,r4,r2 30 and r4,r4,r5 31 brne r4,0,.Lfound0 32 breq r2,r3,.Lwordloop 33#ifdef __LITTLE_ENDIAN__ 34 xor r0,r2,r3 ; mask for difference 35 sub_s r1,r0,1 36 bic_s r0,r0,r1 ; mask for least significant difference bit 37 sub r1,r5,r0 38 xor r0,r5,r1 ; mask for least significant difference byte 39 and_s r2,r2,r0 40 and_s r3,r3,r0 41#endif /* LITTLE ENDIAN */ 42 cmp_s r2,r3 43 mov_s r0,1 44 j_s.d [blink] 45 bset.lo r0,r0,31 46 47 .balign 4 48#ifdef __LITTLE_ENDIAN__ 49.Lfound0: 50 xor r0,r2,r3 ; mask for difference 51 or r0,r0,r4 ; or in zero indicator 52 sub_s r1,r0,1 53 bic_s r0,r0,r1 ; mask for least significant difference bit 54 sub r1,r5,r0 55 xor r0,r5,r1 ; mask for least significant difference byte 56 and_s r2,r2,r0 57 and_s r3,r3,r0 58 sub.f r0,r2,r3 59 mov.hi r0,1 60 j_s.d [blink] 61 bset.lo r0,r0,31 62#else /* BIG ENDIAN */ 63 /* The zero-detection above can mis-detect 0x01 bytes as zeroes 64 because of carry-propagateion from a lower significant zero byte. 65 We can compensate for this by checking that bit0 is zero. 66 This compensation is not necessary in the step where we 67 get a low estimate for r2, because in any affected bytes 68 we already have 0x00 or 0x01, which will remain unchanged 69 when bit 7 is cleared. */ 70 .balign 4 71.Lfound0: 72 lsr r0,r4,8 73 lsr_s r1,r2 74 bic_s r2,r2,r0 ; get low estimate for r2 and get ... 75 bic_s r0,r0,r1 ; <this is the adjusted mask for zeros> 76 or_s r3,r3,r0 ; ... high estimate r3 so that r2 > r3 will ... 77 cmp_s r3,r2 ; ... be independent of trailing garbage 78 or_s r2,r2,r0 ; likewise for r3 > r2 79 bic_s r3,r3,r0 80 rlc r0,0 ; r0 := r2 > r3 ? 1 : 0 81 cmp_s r2,r3 82 j_s.d [blink] 83 bset.lo r0,r0,31 84#endif /* ENDIAN */ 85 86 .balign 4 87.Lcharloop: 88 ldb.ab r2,[r0,1] 89 ldb.ab r3,[r1,1] 90 nop_s 91 breq r2,0,.Lcmpend 92 breq r2,r3,.Lcharloop 93.Lcmpend: 94 j_s.d [blink] 95 sub r0,r2,r3 96END_CFI(strcmp) 97