/freebsd/contrib/llvm-project/llvm/lib/Target/ARM/ |
H A D | ARMProcessors.td | 4 //===----------------------------------------------------------------------===// 9 "Cortex-A5 ARM processors", []>; 11 "Cortex-A7 ARM processors", []>; 13 "Cortex-A8 ARM processors", []>; 15 "Cortex-A9 ARM processors", []>; 17 "Cortex-A12 ARM processors", []>; 19 "Cortex-A15 ARM processors", []>; 21 "Cortex-A17 ARM processors", []>; 23 "Cortex-A32 ARM processors", []>; 25 "Cortex-A35 ARM processors", []>; [all …]
|
/freebsd/contrib/arm-optimized-routines/string/arm/ |
H A D | memset.S | 2 * memset - fill memory with a constant 4 * Copyright (c) 2010-2021, Arm Limited. 5 * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 11 This memset routine is optimised on a Cortex-A9 and should work on 17 .arch armv7-a 19 @ 2011-08-30 david.gilbert@linaro.org 24 #define CHARTSTMASK(c) 1<<(31-(c*8)) 30 @ --------------------------------------------------------------------------- 58 push {r4,r5,r6,r7} 69 mov r5,r1 [all …]
|
H A D | memchr.S | 2 * memchr - scan memory for a character 4 * Copyright (c) 2010-2022, Arm Limited. 5 * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 11 This __memchr_arm routine is optimised on a Cortex-A9 and should work on 18 @ 2011-02-07 david.gilbert@linaro.org 20 @ 2011-07-14 david.gilbert@linaro.org 22 @ 2011-12-07 david.gilbert@linaro.org 27 /* keep config inherited from -march= */ 29 .arch armv7-a 34 #define CHARTSTMASK(c) 1<<(31-(c*8)) [all …]
|
H A D | memcpy.S | 2 * memcpy - copy memory area 4 * Copyright (c) 2013-2022, Arm Limited. 5 * SPDX-License-Identifier: MIT OR Apache-2.0 WITH LLVM-exception 9 This memcpy routine is optimised for Cortex-A15 cores and takes advantage 14 ARMv6 (ARMv7-a if using Neon) 29 .arch armv7-a 69 #define A_l r2 /* Call-clobbered. */ 70 #define A_h r3 /* Call-clobbered. */ 72 #define B_h r5 79 /* Number of lines ahead to pre-fetch data. If you change this the code [all …]
|
/freebsd/sys/arm/arm/ |
H A D | swtch-v6.S | 3 /*- 37 /*- 38 * Copyright (c) 1994-1998 Mark Brinicombe. 141 * Manual ARMv7-A and ARMv7-R edition, page B2-1264(65), Branch 148 * is effectively NOP on Cortex-A15 so it needs special treatment. 154 /* Branch Target Cache on Cortex-A15. */ 181 ldr r5, [r8, #PC_CURPMAP] 183 add r5, r0 /* r5 = old pm_active */ 189 add r5, r0 /* r5 = position in old pm_active */ 199 1: ldrex r0, [r5] [all …]
|
/freebsd/contrib/cortex-strings/src/arm/ |
H A D | memchr.S | 1 /* Copyright (c) 2010-2011, Linaro Limited 35 This memchr routine is optimised on a Cortex-A9 and should work on 42 @ 2011-02-07 david.gilbert@linaro.org 44 @ 2011-07-14 david.gilbert@linaro.org 46 @ 2011-12-07 david.gilbert@linaro.org 50 .arch armv7-a 54 #define CHARTSTMASK(c) 1<<(31-(c*8)) 61 @ --------------------------------------------------------------------------- 91 push {r4,r5,r6,r7} 99 ldmia r0!,{r5,r6} [all …]
|
H A D | memset.S | 1 /* Copyright (c) 2010-2011, Linaro Limited 35 This memset routine is optimised on a Cortex-A9 and should work on 41 .arch armv7-a 43 @ 2011-08-30 david.gilbert@linaro.org 48 #define CHARTSTMASK(c) 1<<(31-(c*8)) 55 @ --------------------------------------------------------------------------- 83 push {r4,r5,r6,r7} 94 mov r5,r1 100 stmia r3!,{r1,r5,r6,r7} 104 @ At this point we're still aligned and we have upto align-1 bytes left to right [all …]
|
/freebsd/contrib/opencsd/decoder/source/ |
H A D | trc_core_arch_map.cpp | 44 { "Cortex-A77", { ARCH_V8r3, profile_CortexA } }, 45 { "Cortex-A76", { ARCH_V8r3, profile_CortexA } }, 46 { "Cortex-A75", { ARCH_V8r3, profile_CortexA } }, 47 { "Cortex-A73", { ARCH_V8, profile_CortexA } }, 48 { "Cortex-A72", { ARCH_V8, profile_CortexA } }, 49 { "Cortex-A65", { ARCH_V8r3, profile_CortexA } }, 50 { "Cortex-A57", { ARCH_V8, profile_CortexA } }, 51 { "Cortex-A55", { ARCH_V8r3, profile_CortexA } }, 52 { "Cortex-A53", { ARCH_V8, profile_CortexA } }, 53 { "Cortex-A35", { ARCH_V8, profile_CortexA } }, [all …]
|
/freebsd/sys/contrib/device-tree/src/arm64/ti/ |
H A D | k3-am62a.dtsi | 1 // SPDX-License-Identifier: GPL-2.0 5 * Copyright (C) 2022 Texas Instruments Incorporated - https://www.ti.com/ 8 #include <dt-bindings/gpio/gpio.h> 9 #include <dt-bindings/interrupt-controller/irq.h> 10 #include <dt-bindings/interrupt-controller/arm-gic.h> 11 #include <dt-bindings/soc/ti,sci_pm_domain.h> 13 #include "k3-pinctrl.h" 18 interrupt-parent = <&gic500>; 19 #address-cells = <2>; 20 #size-cells = <2>; [all …]
|
/freebsd/crypto/openssl/crypto/poly1305/asm/ |
H A D | poly1305-armv4.pl | 2 # Copyright 2016-2020 The OpenSSL Project Authors. All Rights Reserved. 17 # IALU(*)/gcc-4.4 NEON 19 # ARM11xx(ARMv6) 7.78/+100% - 20 # Cortex-A5 6.35/+130% 3.00 21 # Cortex-A8 6.25/+115% 2.36 22 # Cortex-A9 5.10/+95% 2.55 23 # Cortex-A15 3.85/+85% 1.25(**) 26 # (*) this is for -march=armv6, i.e. with bunch of ldrb loading data; 27 # (**) these are trade-off results, they can be improved by ~8% but at 28 # the cost of 15/12% regression on Cortex-A5/A7, it's even possible [all …]
|
/freebsd/crypto/openssl/crypto/ec/asm/ |
H A D | ecp_nistz256-armv4.pl | 2 # Copyright 2015-2020 The OpenSSL Project Authors. All Rights Reserved. 23 # original .c module was made 32-bit savvy in order to make this 26 # with/without -DECP_NISTZ256_ASM 27 # Cortex-A8 +53-170% 28 # Cortex-A9 +76-205% 29 # Cortex-A15 +100-316% 30 # Snapdragon S4 +66-187% 33 # on benchmark. Lower coefficients are for ECDSA sign, server-side 43 ( $xlate="${dir}arm-xlate.pl" and -f $xlate ) or 44 ( $xlate="${dir}../../perlasm/arm-xlate.pl" and -f $xlate) or [all …]
|
/freebsd/sys/contrib/device-tree/Bindings/arm/ |
H A D | cpus.yaml | 1 # SPDX-License-Identifier: GPL-2.0 3 --- 5 $schema: http://devicetree.org/meta-schemas/core.yaml# 10 - Lorenzo Pieralisi <lorenzo.pieralisi@arm.com> 21 with updates for 32-bit and 64-bi [all...] |
/freebsd/crypto/openssl/crypto/bn/asm/ |
H A D | armv4-mont.pl | 2 # Copyright 2007-2020 The OpenSSL Project Authors. All Rights Reserved. 22 # and compilers. The code was observed to provide +65-35% improvement 24 # +115-80% on Intel IXP425. This is compared to pre-bn_mul_mont code 25 # base and compiler generated code with in-lined umull and even umlal 36 # performance improvement on Cortex-A8 is ~45-100% depending on key 37 # length, more for longer keys. On Cortex-A15 the span is ~10-105%. 40 # rather because original integer-only code seems to perform 41 # suboptimally on S4. Situation on Cortex-A9 is unfortunately 44 # of percent worse than for integer-only code. The code is chosen 45 # for execution on all NEON-capable processors, because gain on [all …]
|
/freebsd/sys/crypto/openssl/arm/ |
H A D | aes-armv4.S | 1 /* Do not modify. This file is auto-generated from aes-armv4.pl. */ 2 @ Copyright 2007-2020 The OpenSSL Project Authors. All Rights Reserved. 21 @ Code uses single 1K S-box and is >2 times faster than code generated 22 @ by gcc-3.4.1. This is thanks to unique feature of ARMv4 ISA, which 25 @ is endian-neutral. The performance is ~42 cycles/byte for 128-bit 26 @ key [on single-issue Xscale PXA250 core]. 34 @ Rescheduling for dual-issue pipeline resulted in 12% improvement on 35 @ Cortex A8 core and ~25 cycles per byte processed with 128-bit key. 39 @ Profiler-assisted and platform-specific optimization resulted in 16% 40 @ improvement on Cortex A8 core and ~21.5 cycles per byte. [all …]
|
H A D | sha512-armv4.S | 1 /* Do not modify. This file is auto-generated from sha512-armv4.pl. */ 2 @ Copyright 2007-2020 The OpenSSL Project Authors. All Rights Reserved. 22 @ by gcc 3.4 and it spends ~72 clock cycles per byte [on single-issue 27 @ Rescheduling for dual-issue pipeline resulted in 6% improvement on 28 @ Cortex A8 core and ~40 cycles per processed byte. 32 @ Profiler-assisted and platform-specific optimization resulted in 7% 37 @ Add NEON implementation. On Cortex A8 it was measured to process 38 @ one byte in 23.3 cycles or ~60% faster than integer-only code. 44 @ Technical writers asserted that 3-way S4 pipeline can sustain 46 @ not be observed, see http://www.openssl.org/~appro/Snapdragon-S4.html [all …]
|
H A D | sha256-armv4.S | 1 /* Do not modify. This file is auto-generated from sha256-armv4.pl. */ 2 @ Copyright 2007-2020 The OpenSSL Project Authors. All Rights Reserved. 21 @ Performance is ~2x better than gcc 3.4 generated code and in "abso- 22 @ lute" terms is ~2250 cycles per 64-byte block or ~35 cycles per 23 @ byte [on single-issue Xscale PXA250 core]. 27 @ Rescheduling for dual-issue pipeline resulted in 22% improvement on 28 @ Cortex A8 core and ~20 cycles per processed byte. 32 @ Profiler-assisted and platform-specific optimization resulted in 16% 33 @ improvement on Cortex A8 core and ~15.4 cycles per processed byte. 37 @ Add NEON implementation. On Cortex A8 it was measured to process one [all …]
|
H A D | bsaes-armv7.S | 1 /* Do not modify. This file is auto-generated from bsaes-armv7.pl. */ 2 @ Copyright 2012-2023 The OpenSSL Project Authors. All Rights Reserved. 20 @ Bit-sliced AES for ARM NEON 24 @ This implementation is direct adaptation of bsaes-x86_64 module for 25 @ ARM NEON. Except that this module is endian-neutral [in sense that 28 @ only low-level primitives and unsupported entry points, just enough 29 @ to collect performance results, which for Cortex-A8 core are: 31 @ encrypt 19.5 cycles per byte processed with 128-bit key 32 @ decrypt 22.1 cycles per byte processed with 128-bit key 33 @ key conv. 440 cycles per 128-bit key/0.18 of 8x block [all …]
|
H A D | sha1-armv4-large.S | 1 /* Do not modify. This file is auto-generated from sha1-armv4-large.pl. */ 33 stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} 35 ldmia r0,{r3,r4,r5,r6,r7} 40 mov r5,r5,ror#30 51 eor r10,r5,r6 @ F_xx_xx 58 eor r10,r5,r6 @ F_xx_xx 67 str r9,[r14,#-4]! 76 eor r10,r4,r5 @ F_xx_xx 83 eor r10,r4,r5 @ F_xx_xx 91 eor r10,r10,r5,ror#2 @ F_00_19(B,C,D) [all …]
|
/freebsd/sys/contrib/device-tree/Bindings/arm/tegra/ |
H A D | nvidia,tegra194-cbb.yaml | 1 # SPDX-License-Identifier: (GPL-2.0-only OR BSD-2-Clause) 3 --- 4 $id: http://devicetree.org/schemas/arm/tegra/nvidia,tegra194-cbb.yaml# 5 $schema: http://devicetree.org/meta-schemas/core.yaml# 10 - Sumit Gupta <sumitg@nvidia.com> 15 multiple hierarchical sub-NOCs (Network-on-Chip) and connects various 20 "AON-NOC, SCE-NOC, RCE-NOC, BPMP-NOC, CV-NOC" and "CBB Central NOC" 28 - For CCPLEX (CPU Complex) initiator, the driver sets ERD bit. So, the 31 - For other initiators, the ERD is disabled. So, the access issuing 34 include all engines using Cortex-R5 (which is ARMv7 CPU cluster) and [all …]
|
/freebsd/contrib/llvm-project/llvm/include/llvm/TargetParser/ |
H A D | ARMTargetParser.def | 1 //===- ARMTargetParser.def - ARM target parsing defines ---------*- C++ -*-===// 5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception 7 //===----------------------------------------------------------------------===// 11 //===----------------------------------------------------------------------===// 28 ARM_FPU("vfpv3-fp16", FK_VFPV3_FP16, FPUVersion::VFPV3_FP16, 30 ARM_FPU("vfpv3-d16", FK_VFPV3_D16, FPUVersion::VFPV3, NeonSupportLevel::None, 32 ARM_FPU("vfpv3-d16-fp16", FK_VFPV3_D16_FP16, FPUVersion::VFPV3_FP16, 36 ARM_FPU("vfpv3xd-fp16", FK_VFPV3XD_FP16, FPUVersion::VFPV3_FP16, 40 ARM_FPU("vfpv4-d16", FK_VFPV4_D16, FPUVersion::VFPV4, NeonSupportLevel::None, 42 ARM_FPU("fpv4-sp-d16", FK_FPV4_SP_D16, FPUVersion::VFPV4, [all …]
|
/freebsd/crypto/openssl/crypto/sha/asm/ |
H A D | keccak1600-armv4.pl | 2 # Copyright 2017-2020 The OpenSSL Project Authors. All Rights Reserved. 16 # Keccak-1600 for ARMv4. 20 # Non-NEON code is KECCAK_1X variant (see sha/keccak1600.c) with bit 22 # fast, but several times smaller, and is endian- and ISA-neutral. ISA 24 # be assembled even as Thumb-2. NEON code path is KECCAK_1X_ALT with 26 # in fact faster by 10-15% on some processors, and endian-neutral. 30 # Switch to KECCAK_2X variant for non-NEON code and merge almost 1/2 33 # minimizes re-loads from temporary storage, and merged rotates just 40 # 'eor a,b,c>>>(x-y)' and then merge-rotating 'a' in next operation 47 # Reduce per-round instruction count in Thumb-2 case by 16%. This is [all …]
|
H A D | sha1-armv4-large.pl | 2 # Copyright 2007-2020 The OpenSSL Project Authors. All Rights Reserved. 21 # Size/performance trade-off 26 # armv4-small 392/+29% 1958/+64% 2250/+96% 27 # armv4-compact 740/+89% 1552/+26% 1840/+22% 28 # armv4-large 1420/+92% 1307/+19% 1370/+34%[***] 40 # i-cache availability, branch penalties, etc. 47 # [***] which is also ~35% better than compiler generated code. Dual- 48 # issue Cortex A8 core was measured to process input block in 53 # Rescheduling for dual-issue pipeline resulted in 13% improvement on 54 # Cortex A8 core and in absolute terms ~870 cycles per input block [all …]
|
/freebsd/sys/contrib/openzfs/module/icp/asm-arm/sha2/ |
H A D | sha512-armv7.S | 2 * Copyright 2004-2022 The OpenSSL Project Authors. All Rights Reserved. 8 * https://www.apache.org/licenses/LICENSE-2.0 18 * Portions Copyright (c) 2022 Tino Reichardt <milky-zfs@mcmilk.de> 19 * - modified assembly to fit into OpenZFS 31 # define VFP_ABI_PUSH vstmdb sp!,{d8-d15} 32 # define VFP_ABI_POP vldmia sp!,{d8-d15} 101 .size K512,.-K512 117 stmdb sp!,{r4,r5,r6,r7,r8,r9,r10,r11,r12,lr} 132 ldr r5,[r0,#0+LO] 208 str r5,[sp,#0+0] [all …]
|
/freebsd/crypto/openssl/crypto/modes/asm/ |
H A D | ghash-armv4.pl | 2 # Copyright 2010-2020 The OpenSSL Project Authors. All Rights Reserved. 19 # The module implements "4-bit" GCM GHASH function and underlying 20 # single multiplication operation in GF(2^128). "4-bit" means that it 21 # uses 256 bytes per-key table [+32 bytes shared table]. There is no 24 # 32 instructions long and on single-issue core should execute in <40 27 # compiler-generated one... 31 # Rescheduling for dual-issue pipeline resulted in 8.5% improvement on 32 # Cortex A8 core and ~25 cycles per processed byte (which was observed 33 # to be ~3 times faster than gcc-generated code:-) 37 # Profiler-assisted and platform-specific optimization resulted in 7% [all …]
|
/freebsd/crypto/openssl/crypto/aes/asm/ |
H A D | bsaes-armv7.pl | 2 # Copyright 2012-2023 The OpenSSL Project Authors. All Rights Reserved. 20 # Bit-sliced AES for ARM NEON 24 # This implementation is direct adaptation of bsaes-x86_64 module for 25 # ARM NEON. Except that this module is endian-neutral [in sense that 28 # only low-level primitives and unsupported entry points, just enough 29 # to collect performance results, which for Cortex-A8 core are: 31 # encrypt 19.5 cycles per byte processed with 128-bit key 32 # decrypt 22.1 cycles per byte processed with 128-bit key 33 # key conv. 440 cycles per 128-bit key/0.18 of 8x block 37 # http://www.openssl.org/~appro/Snapdragon-S4.html). [all …]
|