xref: /linux/arch/riscv/kernel/vec-copy-unaligned.S (revision c1668520c9aa4019738f27097b187a5460646cbc)
1*e7c9d66eSJesse Taube/* SPDX-License-Identifier: GPL-2.0 */
2*e7c9d66eSJesse Taube/* Copyright (C) 2024 Rivos Inc. */
3*e7c9d66eSJesse Taube
4*e7c9d66eSJesse Taube#include <linux/args.h>
5*e7c9d66eSJesse Taube#include <linux/linkage.h>
6*e7c9d66eSJesse Taube#include <asm/asm.h>
7*e7c9d66eSJesse Taube
8*e7c9d66eSJesse Taube	.text
9*e7c9d66eSJesse Taube
10*e7c9d66eSJesse Taube#define WORD_EEW 32
11*e7c9d66eSJesse Taube
12*e7c9d66eSJesse Taube#define WORD_SEW CONCATENATE(e, WORD_EEW)
13*e7c9d66eSJesse Taube#define VEC_L CONCATENATE(vle, WORD_EEW).v
14*e7c9d66eSJesse Taube#define VEC_S CONCATENATE(vle, WORD_EEW).v
15*e7c9d66eSJesse Taube
16*e7c9d66eSJesse Taube/* void __riscv_copy_vec_words_unaligned(void *, const void *, size_t) */
17*e7c9d66eSJesse Taube/* Performs a memcpy without aligning buffers, using word loads and stores. */
18*e7c9d66eSJesse Taube/* Note: The size is truncated to a multiple of WORD_EEW */
19*e7c9d66eSJesse TaubeSYM_FUNC_START(__riscv_copy_vec_words_unaligned)
20*e7c9d66eSJesse Taube	andi  a4, a2, ~(WORD_EEW-1)
21*e7c9d66eSJesse Taube	beqz  a4, 2f
22*e7c9d66eSJesse Taube	add   a3, a1, a4
23*e7c9d66eSJesse Taube	.option push
24*e7c9d66eSJesse Taube	.option arch, +zve32x
25*e7c9d66eSJesse Taube1:
26*e7c9d66eSJesse Taube	vsetivli t0, 8, WORD_SEW, m8, ta, ma
27*e7c9d66eSJesse Taube	VEC_L v0, (a1)
28*e7c9d66eSJesse Taube	VEC_S v0, (a0)
29*e7c9d66eSJesse Taube	addi  a0, a0, WORD_EEW
30*e7c9d66eSJesse Taube	addi  a1, a1, WORD_EEW
31*e7c9d66eSJesse Taube	bltu  a1, a3, 1b
32*e7c9d66eSJesse Taube
33*e7c9d66eSJesse Taube2:
34*e7c9d66eSJesse Taube	.option pop
35*e7c9d66eSJesse Taube	ret
36*e7c9d66eSJesse TaubeSYM_FUNC_END(__riscv_copy_vec_words_unaligned)
37*e7c9d66eSJesse Taube
38*e7c9d66eSJesse Taube/* void __riscv_copy_vec_bytes_unaligned(void *, const void *, size_t) */
39*e7c9d66eSJesse Taube/* Performs a memcpy without aligning buffers, using only byte accesses. */
40*e7c9d66eSJesse Taube/* Note: The size is truncated to a multiple of 8 */
41*e7c9d66eSJesse TaubeSYM_FUNC_START(__riscv_copy_vec_bytes_unaligned)
42*e7c9d66eSJesse Taube	andi a4, a2, ~(8-1)
43*e7c9d66eSJesse Taube	beqz a4, 2f
44*e7c9d66eSJesse Taube	add  a3, a1, a4
45*e7c9d66eSJesse Taube	.option push
46*e7c9d66eSJesse Taube	.option arch, +zve32x
47*e7c9d66eSJesse Taube1:
48*e7c9d66eSJesse Taube	vsetivli t0, 8, e8, m8, ta, ma
49*e7c9d66eSJesse Taube	vle8.v v0, (a1)
50*e7c9d66eSJesse Taube	vse8.v v0, (a0)
51*e7c9d66eSJesse Taube	addi a0, a0, 8
52*e7c9d66eSJesse Taube	addi a1, a1, 8
53*e7c9d66eSJesse Taube	bltu a1, a3, 1b
54*e7c9d66eSJesse Taube
55*e7c9d66eSJesse Taube2:
56*e7c9d66eSJesse Taube	.option pop
57*e7c9d66eSJesse Taube	ret
58*e7c9d66eSJesse TaubeSYM_FUNC_END(__riscv_copy_vec_bytes_unaligned)
59