1/* 2 * strcmp - compare two strings 3 * 4 * Copyright (c) 2012-2022, Arm Limited. 5 * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 6 */ 7 8 9/* Assumptions: 10 * 11 * ARMv8-a, AArch64. 12 * MTE compatible. 13 */ 14 15#include "asmdefs.h" 16 17#define REP8_01 0x0101010101010101 18#define REP8_7f 0x7f7f7f7f7f7f7f7f 19 20#define src1 x0 21#define src2 x1 22#define result x0 23 24#define data1 x2 25#define data1w w2 26#define data2 x3 27#define data2w w3 28#define has_nul x4 29#define diff x5 30#define off1 x5 31#define syndrome x6 32#define tmp x6 33#define data3 x7 34#define zeroones x8 35#define shift x9 36#define off2 x10 37 38/* On big-endian early bytes are at MSB and on little-endian LSB. 39 LS_FW means shifting towards early bytes. */ 40#ifdef __AARCH64EB__ 41# define LS_FW lsl 42#else 43# define LS_FW lsr 44#endif 45 46/* NUL detection works on the principle that (X - 1) & (~X) & 0x80 47 (=> (X - 1) & ~(X | 0x7f)) is non-zero iff a byte is zero, and 48 can be done in parallel across the entire word. 49 Since carry propagation makes 0x1 bytes before a NUL byte appear 50 NUL too in big-endian, byte-reverse the data before the NUL check. */ 51 52 53ENTRY (__strcmp_aarch64) 54 sub off2, src2, src1 55 mov zeroones, REP8_01 56 and tmp, src1, 7 57 tst off2, 7 58 b.ne L(misaligned8) 59 cbnz tmp, L(mutual_align) 60 61 .p2align 4 62 63L(loop_aligned): 64 ldr data2, [src1, off2] 65 ldr data1, [src1], 8 66L(start_realigned): 67#ifdef __AARCH64EB__ 68 rev tmp, data1 69 sub has_nul, tmp, zeroones 70 orr tmp, tmp, REP8_7f 71#else 72 sub has_nul, data1, zeroones 73 orr tmp, data1, REP8_7f 74#endif 75 bics has_nul, has_nul, tmp /* Non-zero if NUL terminator. */ 76 ccmp data1, data2, 0, eq 77 b.eq L(loop_aligned) 78#ifdef __AARCH64EB__ 79 rev has_nul, has_nul 80#endif 81 eor diff, data1, data2 82 orr syndrome, diff, has_nul 83L(end): 84#ifndef __AARCH64EB__ 85 rev syndrome, syndrome 86 rev data1, data1 87 rev data2, data2 88#endif 89 clz shift, syndrome 90 /* The most-significant-non-zero bit of the syndrome marks either the 91 first bit that is different, or the top bit of the first zero byte. 92 Shifting left now will bring the critical information into the 93 top bits. */ 94 lsl data1, data1, shift 95 lsl data2, data2, shift 96 /* But we need to zero-extend (char is unsigned) the value and then 97 perform a signed 32-bit subtraction. */ 98 lsr data1, data1, 56 99 sub result, data1, data2, lsr 56 100 ret 101 102 .p2align 4 103 104L(mutual_align): 105 /* Sources are mutually aligned, but are not currently at an 106 alignment boundary. Round down the addresses and then mask off 107 the bytes that precede the start point. */ 108 bic src1, src1, 7 109 ldr data2, [src1, off2] 110 ldr data1, [src1], 8 111 neg shift, src2, lsl 3 /* Bits to alignment -64. */ 112 mov tmp, -1 113 LS_FW tmp, tmp, shift 114 orr data1, data1, tmp 115 orr data2, data2, tmp 116 b L(start_realigned) 117 118L(misaligned8): 119 /* Align SRC1 to 8 bytes and then compare 8 bytes at a time, always 120 checking to make sure that we don't access beyond the end of SRC2. */ 121 cbz tmp, L(src1_aligned) 122L(do_misaligned): 123 ldrb data1w, [src1], 1 124 ldrb data2w, [src2], 1 125 cmp data1w, 0 126 ccmp data1w, data2w, 0, ne /* NZCV = 0b0000. */ 127 b.ne L(done) 128 tst src1, 7 129 b.ne L(do_misaligned) 130 131L(src1_aligned): 132 neg shift, src2, lsl 3 133 bic src2, src2, 7 134 ldr data3, [src2], 8 135#ifdef __AARCH64EB__ 136 rev data3, data3 137#endif 138 lsr tmp, zeroones, shift 139 orr data3, data3, tmp 140 sub has_nul, data3, zeroones 141 orr tmp, data3, REP8_7f 142 bics has_nul, has_nul, tmp 143 b.ne L(tail) 144 145 sub off1, src2, src1 146 147 .p2align 4 148 149L(loop_unaligned): 150 ldr data3, [src1, off1] 151 ldr data2, [src1, off2] 152#ifdef __AARCH64EB__ 153 rev data3, data3 154#endif 155 sub has_nul, data3, zeroones 156 orr tmp, data3, REP8_7f 157 ldr data1, [src1], 8 158 bics has_nul, has_nul, tmp 159 ccmp data1, data2, 0, eq 160 b.eq L(loop_unaligned) 161 162 lsl tmp, has_nul, shift 163#ifdef __AARCH64EB__ 164 rev tmp, tmp 165#endif 166 eor diff, data1, data2 167 orr syndrome, diff, tmp 168 cbnz syndrome, L(end) 169L(tail): 170 ldr data1, [src1] 171 neg shift, shift 172 lsr data2, data3, shift 173 lsr has_nul, has_nul, shift 174#ifdef __AARCH64EB__ 175 rev data2, data2 176 rev has_nul, has_nul 177#endif 178 eor diff, data1, data2 179 orr syndrome, diff, has_nul 180 b L(end) 181 182L(done): 183 sub result, data1, data2 184 ret 185 186END (__strcmp_aarch64) 187 188