1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright (C) 2023 WANG Xuerui <git@xen0n.name> 4 * 5 * Template for XOR operations, instantiated in xor_simd.c. 6 * 7 * Expected preprocessor definitions: 8 * 9 * - LINE_WIDTH 10 * - XOR_FUNC_NAME(nr) 11 * - LD_INOUT_LINE(buf) 12 * - LD_AND_XOR_LINE(buf) 13 * - ST_LINE(buf) 14 */ 15 16 void XOR_FUNC_NAME(2)(unsigned long bytes, 17 unsigned long * __restrict v1, 18 const unsigned long * __restrict v2) 19 { 20 unsigned long lines = bytes / LINE_WIDTH; 21 22 do { 23 __asm__ __volatile__ ( 24 LD_INOUT_LINE(v1) 25 LD_AND_XOR_LINE(v2) 26 ST_LINE(v1) 27 : : [v1] "r"(v1), [v2] "r"(v2) : "memory" 28 ); 29 30 v1 += LINE_WIDTH / sizeof(unsigned long); 31 v2 += LINE_WIDTH / sizeof(unsigned long); 32 } while (--lines > 0); 33 } 34 35 void XOR_FUNC_NAME(3)(unsigned long bytes, 36 unsigned long * __restrict v1, 37 const unsigned long * __restrict v2, 38 const unsigned long * __restrict v3) 39 { 40 unsigned long lines = bytes / LINE_WIDTH; 41 42 do { 43 __asm__ __volatile__ ( 44 LD_INOUT_LINE(v1) 45 LD_AND_XOR_LINE(v2) 46 LD_AND_XOR_LINE(v3) 47 ST_LINE(v1) 48 : : [v1] "r"(v1), [v2] "r"(v2), [v3] "r"(v3) : "memory" 49 ); 50 51 v1 += LINE_WIDTH / sizeof(unsigned long); 52 v2 += LINE_WIDTH / sizeof(unsigned long); 53 v3 += LINE_WIDTH / sizeof(unsigned long); 54 } while (--lines > 0); 55 } 56 57 void XOR_FUNC_NAME(4)(unsigned long bytes, 58 unsigned long * __restrict v1, 59 const unsigned long * __restrict v2, 60 const unsigned long * __restrict v3, 61 const unsigned long * __restrict v4) 62 { 63 unsigned long lines = bytes / LINE_WIDTH; 64 65 do { 66 __asm__ __volatile__ ( 67 LD_INOUT_LINE(v1) 68 LD_AND_XOR_LINE(v2) 69 LD_AND_XOR_LINE(v3) 70 LD_AND_XOR_LINE(v4) 71 ST_LINE(v1) 72 : : [v1] "r"(v1), [v2] "r"(v2), [v3] "r"(v3), [v4] "r"(v4) 73 : "memory" 74 ); 75 76 v1 += LINE_WIDTH / sizeof(unsigned long); 77 v2 += LINE_WIDTH / sizeof(unsigned long); 78 v3 += LINE_WIDTH / sizeof(unsigned long); 79 v4 += LINE_WIDTH / sizeof(unsigned long); 80 } while (--lines > 0); 81 } 82 83 void XOR_FUNC_NAME(5)(unsigned long bytes, 84 unsigned long * __restrict v1, 85 const unsigned long * __restrict v2, 86 const unsigned long * __restrict v3, 87 const unsigned long * __restrict v4, 88 const unsigned long * __restrict v5) 89 { 90 unsigned long lines = bytes / LINE_WIDTH; 91 92 do { 93 __asm__ __volatile__ ( 94 LD_INOUT_LINE(v1) 95 LD_AND_XOR_LINE(v2) 96 LD_AND_XOR_LINE(v3) 97 LD_AND_XOR_LINE(v4) 98 LD_AND_XOR_LINE(v5) 99 ST_LINE(v1) 100 : : [v1] "r"(v1), [v2] "r"(v2), [v3] "r"(v3), [v4] "r"(v4), 101 [v5] "r"(v5) : "memory" 102 ); 103 104 v1 += LINE_WIDTH / sizeof(unsigned long); 105 v2 += LINE_WIDTH / sizeof(unsigned long); 106 v3 += LINE_WIDTH / sizeof(unsigned long); 107 v4 += LINE_WIDTH / sizeof(unsigned long); 108 v5 += LINE_WIDTH / sizeof(unsigned long); 109 } while (--lines > 0); 110 } 111