1/* memcmp - compare memory 2 * 3 * Copyright (c) 2013-2020, Arm Limited. 4 * SPDX-License-Identifier: MIT 5 */ 6 7/* Assumptions: 8 * 9 * ARMv8-a, AArch64, unaligned accesses. 10 */ 11 12#include <sys/elf_common.h> 13 14#include <machine/asm.h> 15 16#define L(l) .L ## l 17 18/* Parameters and result. */ 19#define src1 x0 20#define src2 x1 21#define limit x2 22#define result w0 23 24/* Internal variables. */ 25#define data1 x3 26#define data1w w3 27#define data1h x4 28#define data2 x5 29#define data2w w5 30#define data2h x6 31#define tmp1 x7 32#define tmp2 x8 33 34ENTRY (memcmp) 35 subs limit, limit, 8 36 b.lo L(less8) 37 38 ldr data1, [src1], 8 39 ldr data2, [src2], 8 40 cmp data1, data2 41 b.ne L(return) 42 43 subs limit, limit, 8 44 b.gt L(more16) 45 46 ldr data1, [src1, limit] 47 ldr data2, [src2, limit] 48 b L(return) 49 50L(more16): 51 ldr data1, [src1], 8 52 ldr data2, [src2], 8 53 cmp data1, data2 54 bne L(return) 55 56 /* Jump directly to comparing the last 16 bytes for 32 byte (or less) 57 strings. */ 58 subs limit, limit, 16 59 b.ls L(last_bytes) 60 61 /* We overlap loads between 0-32 bytes at either side of SRC1 when we 62 try to align, so limit it only to strings larger than 128 bytes. */ 63 cmp limit, 96 64 b.ls L(loop16) 65 66 /* Align src1 and adjust src2 with bytes not yet done. */ 67 and tmp1, src1, 15 68 add limit, limit, tmp1 69 sub src1, src1, tmp1 70 sub src2, src2, tmp1 71 72 /* Loop performing 16 bytes per iteration using aligned src1. 73 Limit is pre-decremented by 16 and must be larger than zero. 74 Exit if <= 16 bytes left to do or if the data is not equal. */ 75 .p2align 4 76L(loop16): 77 ldp data1, data1h, [src1], 16 78 ldp data2, data2h, [src2], 16 79 subs limit, limit, 16 80 ccmp data1, data2, 0, hi 81 ccmp data1h, data2h, 0, eq 82 b.eq L(loop16) 83 84 cmp data1, data2 85 bne L(return) 86 mov data1, data1h 87 mov data2, data2h 88 cmp data1, data2 89 bne L(return) 90 91 /* Compare last 1-16 bytes using unaligned access. */ 92L(last_bytes): 93 add src1, src1, limit 94 add src2, src2, limit 95 ldp data1, data1h, [src1] 96 ldp data2, data2h, [src2] 97 cmp data1, data2 98 bne L(return) 99 mov data1, data1h 100 mov data2, data2h 101 cmp data1, data2 102 103 /* Compare data bytes and set return value to 0, -1 or 1. */ 104L(return): 105#ifndef __AARCH64EB__ 106 rev data1, data1 107 rev data2, data2 108#endif 109 cmp data1, data2 110L(ret_eq): 111 cset result, ne 112 cneg result, result, lo 113 ret 114 115 .p2align 4 116 /* Compare up to 8 bytes. Limit is [-8..-1]. */ 117L(less8): 118 adds limit, limit, 4 119 b.lo L(less4) 120 ldr data1w, [src1], 4 121 ldr data2w, [src2], 4 122 cmp data1w, data2w 123 b.ne L(return) 124 sub limit, limit, 4 125L(less4): 126 adds limit, limit, 4 127 beq L(ret_eq) 128L(byte_loop): 129 ldrb data1w, [src1], 1 130 ldrb data2w, [src2], 1 131 subs limit, limit, 1 132 ccmp data1w, data2w, 0, ne /* NZCV = 0b0000. */ 133 b.eq L(byte_loop) 134 sub result, data1w, data2w 135 ret 136 137END (memcmp) 138 139GNU_PROPERTY_AARCH64_FEATURE_1_NOTE(GNU_PROPERTY_AARCH64_FEATURE_1_VAL) 140