1*e7c9d66eSJesse Taube/* SPDX-License-Identifier: GPL-2.0 */ 2*e7c9d66eSJesse Taube/* Copyright (C) 2024 Rivos Inc. */ 3*e7c9d66eSJesse Taube 4*e7c9d66eSJesse Taube#include <linux/args.h> 5*e7c9d66eSJesse Taube#include <linux/linkage.h> 6*e7c9d66eSJesse Taube#include <asm/asm.h> 7*e7c9d66eSJesse Taube 8*e7c9d66eSJesse Taube .text 9*e7c9d66eSJesse Taube 10*e7c9d66eSJesse Taube#define WORD_EEW 32 11*e7c9d66eSJesse Taube 12*e7c9d66eSJesse Taube#define WORD_SEW CONCATENATE(e, WORD_EEW) 13*e7c9d66eSJesse Taube#define VEC_L CONCATENATE(vle, WORD_EEW).v 14*e7c9d66eSJesse Taube#define VEC_S CONCATENATE(vle, WORD_EEW).v 15*e7c9d66eSJesse Taube 16*e7c9d66eSJesse Taube/* void __riscv_copy_vec_words_unaligned(void *, const void *, size_t) */ 17*e7c9d66eSJesse Taube/* Performs a memcpy without aligning buffers, using word loads and stores. */ 18*e7c9d66eSJesse Taube/* Note: The size is truncated to a multiple of WORD_EEW */ 19*e7c9d66eSJesse TaubeSYM_FUNC_START(__riscv_copy_vec_words_unaligned) 20*e7c9d66eSJesse Taube andi a4, a2, ~(WORD_EEW-1) 21*e7c9d66eSJesse Taube beqz a4, 2f 22*e7c9d66eSJesse Taube add a3, a1, a4 23*e7c9d66eSJesse Taube .option push 24*e7c9d66eSJesse Taube .option arch, +zve32x 25*e7c9d66eSJesse Taube1: 26*e7c9d66eSJesse Taube vsetivli t0, 8, WORD_SEW, m8, ta, ma 27*e7c9d66eSJesse Taube VEC_L v0, (a1) 28*e7c9d66eSJesse Taube VEC_S v0, (a0) 29*e7c9d66eSJesse Taube addi a0, a0, WORD_EEW 30*e7c9d66eSJesse Taube addi a1, a1, WORD_EEW 31*e7c9d66eSJesse Taube bltu a1, a3, 1b 32*e7c9d66eSJesse Taube 33*e7c9d66eSJesse Taube2: 34*e7c9d66eSJesse Taube .option pop 35*e7c9d66eSJesse Taube ret 36*e7c9d66eSJesse TaubeSYM_FUNC_END(__riscv_copy_vec_words_unaligned) 37*e7c9d66eSJesse Taube 38*e7c9d66eSJesse Taube/* void __riscv_copy_vec_bytes_unaligned(void *, const void *, size_t) */ 39*e7c9d66eSJesse Taube/* Performs a memcpy without aligning buffers, using only byte accesses. */ 40*e7c9d66eSJesse Taube/* Note: The size is truncated to a multiple of 8 */ 41*e7c9d66eSJesse TaubeSYM_FUNC_START(__riscv_copy_vec_bytes_unaligned) 42*e7c9d66eSJesse Taube andi a4, a2, ~(8-1) 43*e7c9d66eSJesse Taube beqz a4, 2f 44*e7c9d66eSJesse Taube add a3, a1, a4 45*e7c9d66eSJesse Taube .option push 46*e7c9d66eSJesse Taube .option arch, +zve32x 47*e7c9d66eSJesse Taube1: 48*e7c9d66eSJesse Taube vsetivli t0, 8, e8, m8, ta, ma 49*e7c9d66eSJesse Taube vle8.v v0, (a1) 50*e7c9d66eSJesse Taube vse8.v v0, (a0) 51*e7c9d66eSJesse Taube addi a0, a0, 8 52*e7c9d66eSJesse Taube addi a1, a1, 8 53*e7c9d66eSJesse Taube bltu a1, a3, 1b 54*e7c9d66eSJesse Taube 55*e7c9d66eSJesse Taube2: 56*e7c9d66eSJesse Taube .option pop 57*e7c9d66eSJesse Taube ret 58*e7c9d66eSJesse TaubeSYM_FUNC_END(__riscv_copy_vec_bytes_unaligned) 59