1/* memcmp - compare memory 2 * 3 * Copyright (c) 2013-2020, Arm Limited. 4 * SPDX-License-Identifier: MIT 5 */ 6 7/* Assumptions: 8 * 9 * ARMv8-a, AArch64, unaligned accesses. 10 */ 11 12#include "../asmdefs.h" 13 14/* Parameters and result. */ 15#define src1 x0 16#define src2 x1 17#define limit x2 18#define result w0 19 20/* Internal variables. */ 21#define data1 x3 22#define data1w w3 23#define data1h x4 24#define data2 x5 25#define data2w w5 26#define data2h x6 27#define tmp1 x7 28#define tmp2 x8 29 30ENTRY (__memcmp_aarch64) 31 PTR_ARG (0) 32 PTR_ARG (1) 33 SIZE_ARG (2) 34 subs limit, limit, 8 35 b.lo L(less8) 36 37 ldr data1, [src1], 8 38 ldr data2, [src2], 8 39 cmp data1, data2 40 b.ne L(return) 41 42 subs limit, limit, 8 43 b.gt L(more16) 44 45 ldr data1, [src1, limit] 46 ldr data2, [src2, limit] 47 b L(return) 48 49L(more16): 50 ldr data1, [src1], 8 51 ldr data2, [src2], 8 52 cmp data1, data2 53 bne L(return) 54 55 /* Jump directly to comparing the last 16 bytes for 32 byte (or less) 56 strings. */ 57 subs limit, limit, 16 58 b.ls L(last_bytes) 59 60 /* We overlap loads between 0-32 bytes at either side of SRC1 when we 61 try to align, so limit it only to strings larger than 128 bytes. */ 62 cmp limit, 96 63 b.ls L(loop16) 64 65 /* Align src1 and adjust src2 with bytes not yet done. */ 66 and tmp1, src1, 15 67 add limit, limit, tmp1 68 sub src1, src1, tmp1 69 sub src2, src2, tmp1 70 71 /* Loop performing 16 bytes per iteration using aligned src1. 72 Limit is pre-decremented by 16 and must be larger than zero. 73 Exit if <= 16 bytes left to do or if the data is not equal. */ 74 .p2align 4 75L(loop16): 76 ldp data1, data1h, [src1], 16 77 ldp data2, data2h, [src2], 16 78 subs limit, limit, 16 79 ccmp data1, data2, 0, hi 80 ccmp data1h, data2h, 0, eq 81 b.eq L(loop16) 82 83 cmp data1, data2 84 bne L(return) 85 mov data1, data1h 86 mov data2, data2h 87 cmp data1, data2 88 bne L(return) 89 90 /* Compare last 1-16 bytes using unaligned access. */ 91L(last_bytes): 92 add src1, src1, limit 93 add src2, src2, limit 94 ldp data1, data1h, [src1] 95 ldp data2, data2h, [src2] 96 cmp data1, data2 97 bne L(return) 98 mov data1, data1h 99 mov data2, data2h 100 cmp data1, data2 101 102 /* Compare data bytes and set return value to 0, -1 or 1. */ 103L(return): 104#ifndef __AARCH64EB__ 105 rev data1, data1 106 rev data2, data2 107#endif 108 cmp data1, data2 109L(ret_eq): 110 cset result, ne 111 cneg result, result, lo 112 ret 113 114 .p2align 4 115 /* Compare up to 8 bytes. Limit is [-8..-1]. */ 116L(less8): 117 adds limit, limit, 4 118 b.lo L(less4) 119 ldr data1w, [src1], 4 120 ldr data2w, [src2], 4 121 cmp data1w, data2w 122 b.ne L(return) 123 sub limit, limit, 4 124L(less4): 125 adds limit, limit, 4 126 beq L(ret_eq) 127L(byte_loop): 128 ldrb data1w, [src1], 1 129 ldrb data2w, [src2], 1 130 subs limit, limit, 1 131 ccmp data1w, data2w, 0, ne /* NZCV = 0b0000. */ 132 b.eq L(byte_loop) 133 sub result, data1w, data2w 134 ret 135 136END (__memcmp_aarch64) 137 138