1/* 2 * strcmp - compare two strings 3 * 4 * Copyright (c) 2012-2020, Arm Limited. 5 * SPDX-License-Identifier: MIT 6 */ 7 8/* Assumptions: 9 * 10 * ARMv8-a, AArch64 11 */ 12 13#include "../asmdefs.h" 14 15#define REP8_01 0x0101010101010101 16#define REP8_7f 0x7f7f7f7f7f7f7f7f 17#define REP8_80 0x8080808080808080 18 19/* Parameters and result. */ 20#define src1 x0 21#define src2 x1 22#define result x0 23 24/* Internal variables. */ 25#define data1 x2 26#define data1w w2 27#define data2 x3 28#define data2w w3 29#define has_nul x4 30#define diff x5 31#define syndrome x6 32#define tmp1 x7 33#define tmp2 x8 34#define tmp3 x9 35#define zeroones x10 36#define pos x11 37 38 /* Start of performance-critical section -- one 64B cache line. */ 39ENTRY (__strcmp_aarch64) 40 PTR_ARG (0) 41 PTR_ARG (1) 42 eor tmp1, src1, src2 43 mov zeroones, #REP8_01 44 tst tmp1, #7 45 b.ne L(misaligned8) 46 ands tmp1, src1, #7 47 b.ne L(mutual_align) 48 /* NUL detection works on the principle that (X - 1) & (~X) & 0x80 49 (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and 50 can be done in parallel across the entire word. */ 51L(loop_aligned): 52 ldr data1, [src1], #8 53 ldr data2, [src2], #8 54L(start_realigned): 55 sub tmp1, data1, zeroones 56 orr tmp2, data1, #REP8_7f 57 eor diff, data1, data2 /* Non-zero if differences found. */ 58 bic has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */ 59 orr syndrome, diff, has_nul 60 cbz syndrome, L(loop_aligned) 61 /* End of performance-critical section -- one 64B cache line. */ 62 63L(end): 64#ifndef __AARCH64EB__ 65 rev syndrome, syndrome 66 rev data1, data1 67 /* The MS-non-zero bit of the syndrome marks either the first bit 68 that is different, or the top bit of the first zero byte. 69 Shifting left now will bring the critical information into the 70 top bits. */ 71 clz pos, syndrome 72 rev data2, data2 73 lsl data1, data1, pos 74 lsl data2, data2, pos 75 /* But we need to zero-extend (char is unsigned) the value and then 76 perform a signed 32-bit subtraction. */ 77 lsr data1, data1, #56 78 sub result, data1, data2, lsr #56 79 ret 80#else 81 /* For big-endian we cannot use the trick with the syndrome value 82 as carry-propagation can corrupt the upper bits if the trailing 83 bytes in the string contain 0x01. */ 84 /* However, if there is no NUL byte in the dword, we can generate 85 the result directly. We can't just subtract the bytes as the 86 MSB might be significant. */ 87 cbnz has_nul, 1f 88 cmp data1, data2 89 cset result, ne 90 cneg result, result, lo 91 ret 921: 93 /* Re-compute the NUL-byte detection, using a byte-reversed value. */ 94 rev tmp3, data1 95 sub tmp1, tmp3, zeroones 96 orr tmp2, tmp3, #REP8_7f 97 bic has_nul, tmp1, tmp2 98 rev has_nul, has_nul 99 orr syndrome, diff, has_nul 100 clz pos, syndrome 101 /* The MS-non-zero bit of the syndrome marks either the first bit 102 that is different, or the top bit of the first zero byte. 103 Shifting left now will bring the critical information into the 104 top bits. */ 105 lsl data1, data1, pos 106 lsl data2, data2, pos 107 /* But we need to zero-extend (char is unsigned) the value and then 108 perform a signed 32-bit subtraction. */ 109 lsr data1, data1, #56 110 sub result, data1, data2, lsr #56 111 ret 112#endif 113 114L(mutual_align): 115 /* Sources are mutually aligned, but are not currently at an 116 alignment boundary. Round down the addresses and then mask off 117 the bytes that preceed the start point. */ 118 bic src1, src1, #7 119 bic src2, src2, #7 120 lsl tmp1, tmp1, #3 /* Bytes beyond alignment -> bits. */ 121 ldr data1, [src1], #8 122 neg tmp1, tmp1 /* Bits to alignment -64. */ 123 ldr data2, [src2], #8 124 mov tmp2, #~0 125#ifdef __AARCH64EB__ 126 /* Big-endian. Early bytes are at MSB. */ 127 lsl tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */ 128#else 129 /* Little-endian. Early bytes are at LSB. */ 130 lsr tmp2, tmp2, tmp1 /* Shift (tmp1 & 63). */ 131#endif 132 orr data1, data1, tmp2 133 orr data2, data2, tmp2 134 b L(start_realigned) 135 136L(misaligned8): 137 /* Align SRC1 to 8 bytes and then compare 8 bytes at a time, always 138 checking to make sure that we don't access beyond page boundary in 139 SRC2. */ 140 tst src1, #7 141 b.eq L(loop_misaligned) 142L(do_misaligned): 143 ldrb data1w, [src1], #1 144 ldrb data2w, [src2], #1 145 cmp data1w, #1 146 ccmp data1w, data2w, #0, cs /* NZCV = 0b0000. */ 147 b.ne L(done) 148 tst src1, #7 149 b.ne L(do_misaligned) 150 151L(loop_misaligned): 152 /* Test if we are within the last dword of the end of a 4K page. If 153 yes then jump back to the misaligned loop to copy a byte at a time. */ 154 and tmp1, src2, #0xff8 155 eor tmp1, tmp1, #0xff8 156 cbz tmp1, L(do_misaligned) 157 ldr data1, [src1], #8 158 ldr data2, [src2], #8 159 160 sub tmp1, data1, zeroones 161 orr tmp2, data1, #REP8_7f 162 eor diff, data1, data2 /* Non-zero if differences found. */ 163 bic has_nul, tmp1, tmp2 /* Non-zero if NUL terminator. */ 164 orr syndrome, diff, has_nul 165 cbz syndrome, L(loop_misaligned) 166 b L(end) 167 168L(done): 169 sub result, data1, data2 170 ret 171 172END (__strcmp_aarch64) 173 174