1/* 2 * strcmp - compare two strings 3 * 4 * Copyright (c) 2012-2022, Arm Limited. 5 * SPDX-License-Identifier: MIT 6 */ 7 8 9/* Assumptions: 10 * 11 * ARMv8-a, AArch64. 12 * MTE compatible. 13 */ 14 15#include <sys/elf_common.h> 16 17#include <machine/asm.h> 18 19#define L(l) .L ## l 20 21#define REP8_01 0x0101010101010101 22#define REP8_7f 0x7f7f7f7f7f7f7f7f 23 24#define src1 x0 25#define src2 x1 26#define result x0 27 28#define data1 x2 29#define data1w w2 30#define data2 x3 31#define data2w w3 32#define has_nul x4 33#define diff x5 34#define off1 x5 35#define syndrome x6 36#define tmp x6 37#define data3 x7 38#define zeroones x8 39#define shift x9 40#define off2 x10 41 42/* On big-endian early bytes are at MSB and on little-endian LSB. 43 LS_FW means shifting towards early bytes. */ 44#ifdef __AARCH64EB__ 45# define LS_FW lsl 46#else 47# define LS_FW lsr 48#endif 49 50/* NUL detection works on the principle that (X - 1) & (~X) & 0x80 51 (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and 52 can be done in parallel across the entire word. 53 Since carry propagation makes 0x1 bytes before a NUL byte appear 54 NUL too in big-endian, byte-reverse the data before the NUL check. */ 55 56 57ENTRY (strcmp) 58 sub off2, src2, src1 59 mov zeroones, REP8_01 60 and tmp, src1, 7 61 tst off2, 7 62 b.ne L(misaligned8) 63 cbnz tmp, L(mutual_align) 64 65 .p2align 4 66 67L(loop_aligned): 68 ldr data2, [src1, off2] 69 ldr data1, [src1], 8 70L(start_realigned): 71#ifdef __AARCH64EB__ 72 rev tmp, data1 73 sub has_nul, tmp, zeroones 74 orr tmp, tmp, REP8_7f 75#else 76 sub has_nul, data1, zeroones 77 orr tmp, data1, REP8_7f 78#endif 79 bics has_nul, has_nul, tmp /* Non-zero if NUL terminator. */ 80 ccmp data1, data2, 0, eq 81 b.eq L(loop_aligned) 82#ifdef __AARCH64EB__ 83 rev has_nul, has_nul 84#endif 85 eor diff, data1, data2 86 orr syndrome, diff, has_nul 87L(end): 88#ifndef __AARCH64EB__ 89 rev syndrome, syndrome 90 rev data1, data1 91 rev data2, data2 92#endif 93 clz shift, syndrome 94 /* The most-significant-non-zero bit of the syndrome marks either the 95 first bit that is different, or the top bit of the first zero byte. 96 Shifting left now will bring the critical information into the 97 top bits. */ 98 lsl data1, data1, shift 99 lsl data2, data2, shift 100 /* But we need to zero-extend (char is unsigned) the value and then 101 perform a signed 32-bit subtraction. */ 102 lsr data1, data1, 56 103 sub result, data1, data2, lsr 56 104 ret 105 106 .p2align 4 107 108L(mutual_align): 109 /* Sources are mutually aligned, but are not currently at an 110 alignment boundary. Round down the addresses and then mask off 111 the bytes that precede the start point. */ 112 bic src1, src1, 7 113 ldr data2, [src1, off2] 114 ldr data1, [src1], 8 115 neg shift, src2, lsl 3 /* Bits to alignment -64. */ 116 mov tmp, -1 117 LS_FW tmp, tmp, shift 118 orr data1, data1, tmp 119 orr data2, data2, tmp 120 b L(start_realigned) 121 122L(misaligned8): 123 /* Align SRC1 to 8 bytes and then compare 8 bytes at a time, always 124 checking to make sure that we don't access beyond the end of SRC2. */ 125 cbz tmp, L(src1_aligned) 126L(do_misaligned): 127 ldrb data1w, [src1], 1 128 ldrb data2w, [src2], 1 129 cmp data1w, 0 130 ccmp data1w, data2w, 0, ne /* NZCV = 0b0000. */ 131 b.ne L(done) 132 tst src1, 7 133 b.ne L(do_misaligned) 134 135L(src1_aligned): 136 neg shift, src2, lsl 3 137 bic src2, src2, 7 138 ldr data3, [src2], 8 139#ifdef __AARCH64EB__ 140 rev data3, data3 141#endif 142 lsr tmp, zeroones, shift 143 orr data3, data3, tmp 144 sub has_nul, data3, zeroones 145 orr tmp, data3, REP8_7f 146 bics has_nul, has_nul, tmp 147 b.ne L(tail) 148 149 sub off1, src2, src1 150 151 .p2align 4 152 153L(loop_unaligned): 154 ldr data3, [src1, off1] 155 ldr data2, [src1, off2] 156#ifdef __AARCH64EB__ 157 rev data3, data3 158#endif 159 sub has_nul, data3, zeroones 160 orr tmp, data3, REP8_7f 161 ldr data1, [src1], 8 162 bics has_nul, has_nul, tmp 163 ccmp data1, data2, 0, eq 164 b.eq L(loop_unaligned) 165 166 lsl tmp, has_nul, shift 167#ifdef __AARCH64EB__ 168 rev tmp, tmp 169#endif 170 eor diff, data1, data2 171 orr syndrome, diff, tmp 172 cbnz syndrome, L(end) 173L(tail): 174 ldr data1, [src1] 175 neg shift, shift 176 lsr data2, data3, shift 177 lsr has_nul, has_nul, shift 178#ifdef __AARCH64EB__ 179 rev data2, data2 180 rev has_nul, has_nul 181#endif 182 eor diff, data1, data2 183 orr syndrome, diff, has_nul 184 b L(end) 185 186L(done): 187 sub result, data1, data2 188 ret 189 190END (strcmp) 191 192GNU_PROPERTY_AARCH64_FEATURE_1_NOTE(GNU_PROPERTY_AARCH64_FEATURE_1_VAL) 193