11a59d1b8SThomas Gleixner // SPDX-License-Identifier: GPL-2.0-or-later
2ef1313deSAnton Blanchard /*
3ef1313deSAnton Blanchard *
4ef1313deSAnton Blanchard * Copyright (C) IBM Corporation, 2012
5ef1313deSAnton Blanchard *
6ef1313deSAnton Blanchard * Author: Anton Blanchard <anton@au.ibm.com>
7ef1313deSAnton Blanchard */
88fe08885SDaniel Axtens
98fe08885SDaniel Axtens /*
108fe08885SDaniel Axtens * Sparse (as at v0.5.0) gets very, very confused by this file.
118fe08885SDaniel Axtens * Make it a bit simpler for it.
128fe08885SDaniel Axtens */
138fe08885SDaniel Axtens #if !defined(__CHECKER__)
14ef1313deSAnton Blanchard #include <altivec.h>
158fe08885SDaniel Axtens #else
168fe08885SDaniel Axtens #define vec_xor(a, b) a ^ b
178fe08885SDaniel Axtens #define vector __attribute__((vector_size(16)))
188fe08885SDaniel Axtens #endif
19ef1313deSAnton Blanchard
20f718d426SMatt Brown #include "xor_vmx.h"
21ef1313deSAnton Blanchard
22ef1313deSAnton Blanchard typedef vector signed char unative_t;
23ef1313deSAnton Blanchard
24ef1313deSAnton Blanchard #define DEFINE(V) \
25ef1313deSAnton Blanchard unative_t *V = (unative_t *)V##_in; \
26ef1313deSAnton Blanchard unative_t V##_0, V##_1, V##_2, V##_3
27ef1313deSAnton Blanchard
28ef1313deSAnton Blanchard #define LOAD(V) \
29ef1313deSAnton Blanchard do { \
30ef1313deSAnton Blanchard V##_0 = V[0]; \
31ef1313deSAnton Blanchard V##_1 = V[1]; \
32ef1313deSAnton Blanchard V##_2 = V[2]; \
33ef1313deSAnton Blanchard V##_3 = V[3]; \
34ef1313deSAnton Blanchard } while (0)
35ef1313deSAnton Blanchard
36ef1313deSAnton Blanchard #define STORE(V) \
37ef1313deSAnton Blanchard do { \
38ef1313deSAnton Blanchard V[0] = V##_0; \
39ef1313deSAnton Blanchard V[1] = V##_1; \
40ef1313deSAnton Blanchard V[2] = V##_2; \
41ef1313deSAnton Blanchard V[3] = V##_3; \
42ef1313deSAnton Blanchard } while (0)
43ef1313deSAnton Blanchard
44ef1313deSAnton Blanchard #define XOR(V1, V2) \
45ef1313deSAnton Blanchard do { \
46ef1313deSAnton Blanchard V1##_0 = vec_xor(V1##_0, V2##_0); \
47ef1313deSAnton Blanchard V1##_1 = vec_xor(V1##_1, V2##_1); \
48ef1313deSAnton Blanchard V1##_2 = vec_xor(V1##_2, V2##_2); \
49ef1313deSAnton Blanchard V1##_3 = vec_xor(V1##_3, V2##_3); \
50ef1313deSAnton Blanchard } while (0)
51ef1313deSAnton Blanchard
__xor_altivec_2(unsigned long bytes,unsigned long * __restrict v1_in,const unsigned long * __restrict v2_in)52*297565aaSArd Biesheuvel void __xor_altivec_2(unsigned long bytes,
53*297565aaSArd Biesheuvel unsigned long * __restrict v1_in,
54*297565aaSArd Biesheuvel const unsigned long * __restrict v2_in)
55ef1313deSAnton Blanchard {
56ef1313deSAnton Blanchard DEFINE(v1);
57ef1313deSAnton Blanchard DEFINE(v2);
58ef1313deSAnton Blanchard unsigned long lines = bytes / (sizeof(unative_t)) / 4;
59ef1313deSAnton Blanchard
60ef1313deSAnton Blanchard do {
61ef1313deSAnton Blanchard LOAD(v1);
62ef1313deSAnton Blanchard LOAD(v2);
63ef1313deSAnton Blanchard XOR(v1, v2);
64ef1313deSAnton Blanchard STORE(v1);
65ef1313deSAnton Blanchard
66ef1313deSAnton Blanchard v1 += 4;
67ef1313deSAnton Blanchard v2 += 4;
68ef1313deSAnton Blanchard } while (--lines > 0);
69ef1313deSAnton Blanchard }
70ef1313deSAnton Blanchard
__xor_altivec_3(unsigned long bytes,unsigned long * __restrict v1_in,const unsigned long * __restrict v2_in,const unsigned long * __restrict v3_in)71*297565aaSArd Biesheuvel void __xor_altivec_3(unsigned long bytes,
72*297565aaSArd Biesheuvel unsigned long * __restrict v1_in,
73*297565aaSArd Biesheuvel const unsigned long * __restrict v2_in,
74*297565aaSArd Biesheuvel const unsigned long * __restrict v3_in)
75ef1313deSAnton Blanchard {
76ef1313deSAnton Blanchard DEFINE(v1);
77ef1313deSAnton Blanchard DEFINE(v2);
78ef1313deSAnton Blanchard DEFINE(v3);
79ef1313deSAnton Blanchard unsigned long lines = bytes / (sizeof(unative_t)) / 4;
80ef1313deSAnton Blanchard
81ef1313deSAnton Blanchard do {
82ef1313deSAnton Blanchard LOAD(v1);
83ef1313deSAnton Blanchard LOAD(v2);
84ef1313deSAnton Blanchard LOAD(v3);
85ef1313deSAnton Blanchard XOR(v1, v2);
86ef1313deSAnton Blanchard XOR(v1, v3);
87ef1313deSAnton Blanchard STORE(v1);
88ef1313deSAnton Blanchard
89ef1313deSAnton Blanchard v1 += 4;
90ef1313deSAnton Blanchard v2 += 4;
91ef1313deSAnton Blanchard v3 += 4;
92ef1313deSAnton Blanchard } while (--lines > 0);
93ef1313deSAnton Blanchard }
94ef1313deSAnton Blanchard
__xor_altivec_4(unsigned long bytes,unsigned long * __restrict v1_in,const unsigned long * __restrict v2_in,const unsigned long * __restrict v3_in,const unsigned long * __restrict v4_in)95*297565aaSArd Biesheuvel void __xor_altivec_4(unsigned long bytes,
96*297565aaSArd Biesheuvel unsigned long * __restrict v1_in,
97*297565aaSArd Biesheuvel const unsigned long * __restrict v2_in,
98*297565aaSArd Biesheuvel const unsigned long * __restrict v3_in,
99*297565aaSArd Biesheuvel const unsigned long * __restrict v4_in)
100ef1313deSAnton Blanchard {
101ef1313deSAnton Blanchard DEFINE(v1);
102ef1313deSAnton Blanchard DEFINE(v2);
103ef1313deSAnton Blanchard DEFINE(v3);
104ef1313deSAnton Blanchard DEFINE(v4);
105ef1313deSAnton Blanchard unsigned long lines = bytes / (sizeof(unative_t)) / 4;
106ef1313deSAnton Blanchard
107ef1313deSAnton Blanchard do {
108ef1313deSAnton Blanchard LOAD(v1);
109ef1313deSAnton Blanchard LOAD(v2);
110ef1313deSAnton Blanchard LOAD(v3);
111ef1313deSAnton Blanchard LOAD(v4);
112ef1313deSAnton Blanchard XOR(v1, v2);
113ef1313deSAnton Blanchard XOR(v3, v4);
114ef1313deSAnton Blanchard XOR(v1, v3);
115ef1313deSAnton Blanchard STORE(v1);
116ef1313deSAnton Blanchard
117ef1313deSAnton Blanchard v1 += 4;
118ef1313deSAnton Blanchard v2 += 4;
119ef1313deSAnton Blanchard v3 += 4;
120ef1313deSAnton Blanchard v4 += 4;
121ef1313deSAnton Blanchard } while (--lines > 0);
122ef1313deSAnton Blanchard }
123ef1313deSAnton Blanchard
__xor_altivec_5(unsigned long bytes,unsigned long * __restrict v1_in,const unsigned long * __restrict v2_in,const unsigned long * __restrict v3_in,const unsigned long * __restrict v4_in,const unsigned long * __restrict v5_in)124*297565aaSArd Biesheuvel void __xor_altivec_5(unsigned long bytes,
125*297565aaSArd Biesheuvel unsigned long * __restrict v1_in,
126*297565aaSArd Biesheuvel const unsigned long * __restrict v2_in,
127*297565aaSArd Biesheuvel const unsigned long * __restrict v3_in,
128*297565aaSArd Biesheuvel const unsigned long * __restrict v4_in,
129*297565aaSArd Biesheuvel const unsigned long * __restrict v5_in)
130ef1313deSAnton Blanchard {
131ef1313deSAnton Blanchard DEFINE(v1);
132ef1313deSAnton Blanchard DEFINE(v2);
133ef1313deSAnton Blanchard DEFINE(v3);
134ef1313deSAnton Blanchard DEFINE(v4);
135ef1313deSAnton Blanchard DEFINE(v5);
136ef1313deSAnton Blanchard unsigned long lines = bytes / (sizeof(unative_t)) / 4;
137ef1313deSAnton Blanchard
138ef1313deSAnton Blanchard do {
139ef1313deSAnton Blanchard LOAD(v1);
140ef1313deSAnton Blanchard LOAD(v2);
141ef1313deSAnton Blanchard LOAD(v3);
142ef1313deSAnton Blanchard LOAD(v4);
143ef1313deSAnton Blanchard LOAD(v5);
144ef1313deSAnton Blanchard XOR(v1, v2);
145ef1313deSAnton Blanchard XOR(v3, v4);
146ef1313deSAnton Blanchard XOR(v1, v5);
147ef1313deSAnton Blanchard XOR(v1, v3);
148ef1313deSAnton Blanchard STORE(v1);
149ef1313deSAnton Blanchard
150ef1313deSAnton Blanchard v1 += 4;
151ef1313deSAnton Blanchard v2 += 4;
152ef1313deSAnton Blanchard v3 += 4;
153ef1313deSAnton Blanchard v4 += 4;
154ef1313deSAnton Blanchard v5 += 4;
155ef1313deSAnton Blanchard } while (--lines > 0);
156ef1313deSAnton Blanchard }
157