1 // SPDX-License-Identifier: CDDL-1.0 2 /* 3 * CDDL HEADER START 4 * 5 * The contents of this file are subject to the terms of the 6 * Common Development and Distribution License (the "License"). 7 * You may not use this file except in compliance with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or https://opensource.org/licenses/CDDL-1.0. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright (C) 2019 Romain Dolbeau. All rights reserved. 24 * <romain.dolbeau@european-processor-initiative.eu> 25 */ 26 27 #include <sys/types.h> 28 #include <sys/simd.h> 29 30 #define _REG_CNT(_0, _1, _2, _3, _4, _5, _6, _7, N, ...) N 31 #define REG_CNT(r...) _REG_CNT(r, 8, 7, 6, 5, 4, 3, 2, 1) 32 33 #define VR0_(REG, ...) "%[w"#REG"]" 34 #define VR1_(_1, REG, ...) "%[w"#REG"]" 35 #define VR2_(_1, _2, REG, ...) "%[w"#REG"]" 36 #define VR3_(_1, _2, _3, REG, ...) "%[w"#REG"]" 37 #define VR4_(_1, _2, _3, _4, REG, ...) "%[w"#REG"]" 38 #define VR5_(_1, _2, _3, _4, _5, REG, ...) "%[w"#REG"]" 39 #define VR6_(_1, _2, _3, _4, _5, _6, REG, ...) "%[w"#REG"]" 40 #define VR7_(_1, _2, _3, _4, _5, _6, _7, REG, ...) "%[w"#REG"]" 41 42 /* 43 * Here we need registers not used otherwise. 44 * They will be used in unused ASM for the case 45 * with more registers than required... but GCC 46 * will still need to make sure the constraints 47 * are correct, and duplicate constraints are illegal 48 * ... and we use the "register" number as a name 49 */ 50 51 #define VR0(r...) VR0_(r) 52 #define VR1(r...) VR1_(r) 53 #define VR2(r...) VR2_(r, 36) 54 #define VR3(r...) VR3_(r, 36, 35) 55 #define VR4(r...) VR4_(r, 36, 35, 34, 33) 56 #define VR5(r...) VR5_(r, 36, 35, 34, 33, 32) 57 #define VR6(r...) VR6_(r, 36, 35, 34, 33, 32, 31) 58 #define VR7(r...) VR7_(r, 36, 35, 34, 33, 32, 31, 30) 59 60 #define VR(X) "%[w"#X"]" 61 62 #define RVR0_(REG, ...) [w##REG] "v" (w##REG) 63 #define RVR1_(_1, REG, ...) [w##REG] "v" (w##REG) 64 #define RVR2_(_1, _2, REG, ...) [w##REG] "v" (w##REG) 65 #define RVR3_(_1, _2, _3, REG, ...) [w##REG] "v" (w##REG) 66 #define RVR4_(_1, _2, _3, _4, REG, ...) [w##REG] "v" (w##REG) 67 #define RVR5_(_1, _2, _3, _4, _5, REG, ...) [w##REG] "v" (w##REG) 68 #define RVR6_(_1, _2, _3, _4, _5, _6, REG, ...) [w##REG] "v" (w##REG) 69 #define RVR7_(_1, _2, _3, _4, _5, _6, _7, REG, ...) [w##REG] "v" (w##REG) 70 71 #define RVR0(r...) RVR0_(r) 72 #define RVR1(r...) RVR1_(r) 73 #define RVR2(r...) RVR2_(r, 36) 74 #define RVR3(r...) RVR3_(r, 36, 35) 75 #define RVR4(r...) RVR4_(r, 36, 35, 34, 33) 76 #define RVR5(r...) RVR5_(r, 36, 35, 34, 33, 32) 77 #define RVR6(r...) RVR6_(r, 36, 35, 34, 33, 32, 31) 78 #define RVR7(r...) RVR7_(r, 36, 35, 34, 33, 32, 31, 30) 79 80 #define RVR(X) [w##X] "v" (w##X) 81 82 #define WVR0_(REG, ...) [w##REG] "=v" (w##REG) 83 #define WVR1_(_1, REG, ...) [w##REG] "=v" (w##REG) 84 #define WVR2_(_1, _2, REG, ...) [w##REG] "=v" (w##REG) 85 #define WVR3_(_1, _2, _3, REG, ...) [w##REG] "=v" (w##REG) 86 #define WVR4_(_1, _2, _3, _4, REG, ...) [w##REG] "=v" (w##REG) 87 #define WVR5_(_1, _2, _3, _4, _5, REG, ...) [w##REG] "=v" (w##REG) 88 #define WVR6_(_1, _2, _3, _4, _5, _6, REG, ...) [w##REG] "=v" (w##REG) 89 #define WVR7_(_1, _2, _3, _4, _5, _6, _7, REG, ...) [w##REG] "=v" (w##REG) 90 91 #define WVR0(r...) WVR0_(r) 92 #define WVR1(r...) WVR1_(r) 93 #define WVR2(r...) WVR2_(r, 36) 94 #define WVR3(r...) WVR3_(r, 36, 35) 95 #define WVR4(r...) WVR4_(r, 36, 35, 34, 33) 96 #define WVR5(r...) WVR5_(r, 36, 35, 34, 33, 32) 97 #define WVR6(r...) WVR6_(r, 36, 35, 34, 33, 32, 31) 98 #define WVR7(r...) WVR7_(r, 36, 35, 34, 33, 32, 31, 30) 99 100 #define WVR(X) [w##X] "=v" (w##X) 101 102 #define UVR0_(REG, ...) [w##REG] "+&v" (w##REG) 103 #define UVR1_(_1, REG, ...) [w##REG] "+&v" (w##REG) 104 #define UVR2_(_1, _2, REG, ...) [w##REG] "+&v" (w##REG) 105 #define UVR3_(_1, _2, _3, REG, ...) [w##REG] "+&v" (w##REG) 106 #define UVR4_(_1, _2, _3, _4, REG, ...) [w##REG] "+&v" (w##REG) 107 #define UVR5_(_1, _2, _3, _4, _5, REG, ...) [w##REG] "+&v" (w##REG) 108 #define UVR6_(_1, _2, _3, _4, _5, _6, REG, ...) [w##REG] "+&v" (w##REG) 109 #define UVR7_(_1, _2, _3, _4, _5, _6, _7, REG, ...) [w##REG] "+&v" (w##REG) 110 111 #define UVR0(r...) UVR0_(r) 112 #define UVR1(r...) UVR1_(r) 113 #define UVR2(r...) UVR2_(r, 36) 114 #define UVR3(r...) UVR3_(r, 36, 35) 115 #define UVR4(r...) UVR4_(r, 36, 35, 34, 33) 116 #define UVR5(r...) UVR5_(r, 36, 35, 34, 33, 32) 117 #define UVR6(r...) UVR6_(r, 36, 35, 34, 33, 32, 31) 118 #define UVR7(r...) UVR7_(r, 36, 35, 34, 33, 32, 31, 30) 119 120 #define UVR(X) [w##X] "+&v" (w##X) 121 122 #define R_01(REG1, REG2, ...) REG1, REG2 123 #define _R_23(_0, _1, REG2, REG3, ...) REG2, REG3 124 #define R_23(REG...) _R_23(REG, 1, 2, 3) 125 126 #define ZFS_ASM_BUG() ASSERT(0) 127 128 #define OFFSET(ptr, val) (((unsigned char *)(ptr))+val) 129 130 extern const uint8_t gf_clmul_mod_lt[4*256][16]; 131 132 #define ELEM_SIZE 16 133 134 typedef struct v { 135 uint8_t b[ELEM_SIZE] __attribute__((aligned(ELEM_SIZE))); 136 } v_t; 137 138 #define XOR_ACC(src, r...) \ 139 { \ 140 switch (REG_CNT(r)) { \ 141 case 8: \ 142 __asm__ __volatile__( \ 143 "lvx 21,0,%[SRC0]\n" \ 144 "lvx 20,0,%[SRC1]\n" \ 145 "lvx 19,0,%[SRC2]\n" \ 146 "lvx 18,0,%[SRC3]\n" \ 147 "vxor " VR0(r) "," VR0(r) ",21\n" \ 148 "vxor " VR1(r) "," VR1(r) ",20\n" \ 149 "vxor " VR2(r) "," VR2(r) ",19\n" \ 150 "vxor " VR3(r) "," VR3(r) ",18\n" \ 151 "lvx 21,0,%[SRC4]\n" \ 152 "lvx 20,0,%[SRC5]\n" \ 153 "lvx 19,0,%[SRC6]\n" \ 154 "lvx 18,0,%[SRC7]\n" \ 155 "vxor " VR4(r) "," VR4(r) ",21\n" \ 156 "vxor " VR5(r) "," VR5(r) ",20\n" \ 157 "vxor " VR6(r) "," VR6(r) ",19\n" \ 158 "vxor " VR7(r) "," VR7(r) ",18\n" \ 159 : UVR0(r), UVR1(r), UVR2(r), UVR3(r), \ 160 UVR4(r), UVR5(r), UVR6(r), UVR7(r) \ 161 : [SRC0] "r" ((OFFSET(src, 0))), \ 162 [SRC1] "r" ((OFFSET(src, 16))), \ 163 [SRC2] "r" ((OFFSET(src, 32))), \ 164 [SRC3] "r" ((OFFSET(src, 48))), \ 165 [SRC4] "r" ((OFFSET(src, 64))), \ 166 [SRC5] "r" ((OFFSET(src, 80))), \ 167 [SRC6] "r" ((OFFSET(src, 96))), \ 168 [SRC7] "r" ((OFFSET(src, 112))) \ 169 : "v18", "v19", "v20", "v21"); \ 170 break; \ 171 case 4: \ 172 __asm__ __volatile__( \ 173 "lvx 21,0,%[SRC0]\n" \ 174 "lvx 20,0,%[SRC1]\n" \ 175 "lvx 19,0,%[SRC2]\n" \ 176 "lvx 18,0,%[SRC3]\n" \ 177 "vxor " VR0(r) "," VR0(r) ",21\n" \ 178 "vxor " VR1(r) "," VR1(r) ",20\n" \ 179 "vxor " VR2(r) "," VR2(r) ",19\n" \ 180 "vxor " VR3(r) "," VR3(r) ",18\n" \ 181 : UVR0(r), UVR1(r), UVR2(r), UVR3(r) \ 182 : [SRC0] "r" ((OFFSET(src, 0))), \ 183 [SRC1] "r" ((OFFSET(src, 16))), \ 184 [SRC2] "r" ((OFFSET(src, 32))), \ 185 [SRC3] "r" ((OFFSET(src, 48))) \ 186 : "v18", "v19", "v20", "v21"); \ 187 break; \ 188 case 2: \ 189 __asm__ __volatile__( \ 190 "lvx 21,0,%[SRC0]\n" \ 191 "lvx 20,0,%[SRC1]\n" \ 192 "vxor " VR0(r) "," VR0(r) ",21\n" \ 193 "vxor " VR1(r) "," VR1(r) ",20\n" \ 194 : UVR0(r), UVR1(r) \ 195 : [SRC0] "r" ((OFFSET(src, 0))), \ 196 [SRC1] "r" ((OFFSET(src, 16))) \ 197 : "v20", "v21"); \ 198 break; \ 199 default: \ 200 ZFS_ASM_BUG(); \ 201 } \ 202 } 203 204 #define XOR(r...) \ 205 { \ 206 switch (REG_CNT(r)) { \ 207 case 8: \ 208 __asm__ __volatile__( \ 209 "vxor " VR4(r) "," VR4(r) "," VR0(r) "\n" \ 210 "vxor " VR5(r) "," VR5(r) "," VR1(r) "\n" \ 211 "vxor " VR6(r) "," VR6(r) "," VR2(r) "\n" \ 212 "vxor " VR7(r) "," VR7(r) "," VR3(r) "\n" \ 213 : UVR4(r), UVR5(r), UVR6(r), UVR7(r) \ 214 : RVR0(r), RVR1(r), RVR2(r), RVR3(r)); \ 215 break; \ 216 case 4: \ 217 __asm__ __volatile__( \ 218 "vxor " VR2(r) "," VR2(r) "," VR0(r) "\n" \ 219 "vxor " VR3(r) "," VR3(r) "," VR1(r) "\n" \ 220 : UVR2(r), UVR3(r) \ 221 : RVR0(r), RVR1(r)); \ 222 break; \ 223 default: \ 224 ZFS_ASM_BUG(); \ 225 } \ 226 } 227 228 #define ZERO(r...) \ 229 { \ 230 switch (REG_CNT(r)) { \ 231 case 8: \ 232 __asm__ __volatile__( \ 233 "vxor " VR0(r) "," VR0(r) "," VR0(r) "\n" \ 234 "vxor " VR1(r) "," VR1(r) "," VR1(r) "\n" \ 235 "vxor " VR2(r) "," VR2(r) "," VR2(r) "\n" \ 236 "vxor " VR3(r) "," VR3(r) "," VR3(r) "\n" \ 237 "vxor " VR4(r) "," VR4(r) "," VR4(r) "\n" \ 238 "vxor " VR5(r) "," VR5(r) "," VR5(r) "\n" \ 239 "vxor " VR6(r) "," VR6(r) "," VR6(r) "\n" \ 240 "vxor " VR7(r) "," VR7(r) "," VR7(r) "\n" \ 241 : WVR0(r), WVR1(r), WVR2(r), WVR3(r), \ 242 WVR4(r), WVR5(r), WVR6(r), WVR7(r)); \ 243 break; \ 244 case 4: \ 245 __asm__ __volatile__( \ 246 "vxor " VR0(r) "," VR0(r) "," VR0(r) "\n" \ 247 "vxor " VR1(r) "," VR1(r) "," VR1(r) "\n" \ 248 "vxor " VR2(r) "," VR2(r) "," VR2(r) "\n" \ 249 "vxor " VR3(r) "," VR3(r) "," VR3(r) "\n" \ 250 : WVR0(r), WVR1(r), WVR2(r), WVR3(r)); \ 251 break; \ 252 case 2: \ 253 __asm__ __volatile__( \ 254 "vxor " VR0(r) "," VR0(r) "," VR0(r) "\n" \ 255 "vxor " VR1(r) "," VR1(r) "," VR1(r) "\n" \ 256 : WVR0(r), WVR1(r)); \ 257 break; \ 258 default: \ 259 ZFS_ASM_BUG(); \ 260 } \ 261 } 262 263 #define COPY(r...) \ 264 { \ 265 switch (REG_CNT(r)) { \ 266 case 8: \ 267 __asm__ __volatile__( \ 268 "vor " VR4(r) "," VR0(r) "," VR0(r) "\n" \ 269 "vor " VR5(r) "," VR1(r) "," VR1(r) "\n" \ 270 "vor " VR6(r) "," VR2(r) "," VR2(r) "\n" \ 271 "vor " VR7(r) "," VR3(r) "," VR3(r) "\n" \ 272 : WVR4(r), WVR5(r), WVR6(r), WVR7(r) \ 273 : RVR0(r), RVR1(r), RVR2(r), RVR3(r)); \ 274 break; \ 275 case 4: \ 276 __asm__ __volatile__( \ 277 "vor " VR2(r) "," VR0(r) "," VR0(r) "\n" \ 278 "vor " VR3(r) "," VR1(r) "," VR1(r) "\n" \ 279 : WVR2(r), WVR3(r) \ 280 : RVR0(r), RVR1(r)); \ 281 break; \ 282 default: \ 283 ZFS_ASM_BUG(); \ 284 } \ 285 } 286 287 #define LOAD(src, r...) \ 288 { \ 289 switch (REG_CNT(r)) { \ 290 case 8: \ 291 __asm__ __volatile__( \ 292 "lvx " VR0(r) " ,0,%[SRC0]\n" \ 293 "lvx " VR1(r) " ,0,%[SRC1]\n" \ 294 "lvx " VR2(r) " ,0,%[SRC2]\n" \ 295 "lvx " VR3(r) " ,0,%[SRC3]\n" \ 296 "lvx " VR4(r) " ,0,%[SRC4]\n" \ 297 "lvx " VR5(r) " ,0,%[SRC5]\n" \ 298 "lvx " VR6(r) " ,0,%[SRC6]\n" \ 299 "lvx " VR7(r) " ,0,%[SRC7]\n" \ 300 : WVR0(r), WVR1(r), WVR2(r), WVR3(r), \ 301 WVR4(r), WVR5(r), WVR6(r), WVR7(r) \ 302 : [SRC0] "r" ((OFFSET(src, 0))), \ 303 [SRC1] "r" ((OFFSET(src, 16))), \ 304 [SRC2] "r" ((OFFSET(src, 32))), \ 305 [SRC3] "r" ((OFFSET(src, 48))), \ 306 [SRC4] "r" ((OFFSET(src, 64))), \ 307 [SRC5] "r" ((OFFSET(src, 80))), \ 308 [SRC6] "r" ((OFFSET(src, 96))), \ 309 [SRC7] "r" ((OFFSET(src, 112)))); \ 310 break; \ 311 case 4: \ 312 __asm__ __volatile__( \ 313 "lvx " VR0(r) " ,0,%[SRC0]\n" \ 314 "lvx " VR1(r) " ,0,%[SRC1]\n" \ 315 "lvx " VR2(r) " ,0,%[SRC2]\n" \ 316 "lvx " VR3(r) " ,0,%[SRC3]\n" \ 317 : WVR0(r), WVR1(r), WVR2(r), WVR3(r) \ 318 : [SRC0] "r" ((OFFSET(src, 0))), \ 319 [SRC1] "r" ((OFFSET(src, 16))), \ 320 [SRC2] "r" ((OFFSET(src, 32))), \ 321 [SRC3] "r" ((OFFSET(src, 48)))); \ 322 break; \ 323 case 2: \ 324 __asm__ __volatile__( \ 325 "lvx " VR0(r) " ,0,%[SRC0]\n" \ 326 "lvx " VR1(r) " ,0,%[SRC1]\n" \ 327 : WVR0(r), WVR1(r) \ 328 : [SRC0] "r" ((OFFSET(src, 0))), \ 329 [SRC1] "r" ((OFFSET(src, 16)))); \ 330 break; \ 331 default: \ 332 ZFS_ASM_BUG(); \ 333 } \ 334 } 335 336 #define STORE(dst, r...) \ 337 { \ 338 switch (REG_CNT(r)) { \ 339 case 8: \ 340 __asm__ __volatile__( \ 341 "stvx " VR0(r) " ,0,%[DST0]\n" \ 342 "stvx " VR1(r) " ,0,%[DST1]\n" \ 343 "stvx " VR2(r) " ,0,%[DST2]\n" \ 344 "stvx " VR3(r) " ,0,%[DST3]\n" \ 345 "stvx " VR4(r) " ,0,%[DST4]\n" \ 346 "stvx " VR5(r) " ,0,%[DST5]\n" \ 347 "stvx " VR6(r) " ,0,%[DST6]\n" \ 348 "stvx " VR7(r) " ,0,%[DST7]\n" \ 349 : : [DST0] "r" ((OFFSET(dst, 0))), \ 350 [DST1] "r" ((OFFSET(dst, 16))), \ 351 [DST2] "r" ((OFFSET(dst, 32))), \ 352 [DST3] "r" ((OFFSET(dst, 48))), \ 353 [DST4] "r" ((OFFSET(dst, 64))), \ 354 [DST5] "r" ((OFFSET(dst, 80))), \ 355 [DST6] "r" ((OFFSET(dst, 96))), \ 356 [DST7] "r" ((OFFSET(dst, 112))), \ 357 RVR0(r), RVR1(r), RVR2(r), RVR3(r), \ 358 RVR4(r), RVR5(r), RVR6(r), RVR7(r) \ 359 : "memory"); \ 360 break; \ 361 case 4: \ 362 __asm__ __volatile__( \ 363 "stvx " VR0(r) " ,0,%[DST0]\n" \ 364 "stvx " VR1(r) " ,0,%[DST1]\n" \ 365 "stvx " VR2(r) " ,0,%[DST2]\n" \ 366 "stvx " VR3(r) " ,0,%[DST3]\n" \ 367 : : [DST0] "r" ((OFFSET(dst, 0))), \ 368 [DST1] "r" ((OFFSET(dst, 16))), \ 369 [DST2] "r" ((OFFSET(dst, 32))), \ 370 [DST3] "r" ((OFFSET(dst, 48))), \ 371 RVR0(r), RVR1(r), RVR2(r), RVR3(r) \ 372 : "memory"); \ 373 break; \ 374 case 2: \ 375 __asm__ __volatile__( \ 376 "stvx " VR0(r) " ,0,%[DST0]\n" \ 377 "stvx " VR1(r) " ,0,%[DST1]\n" \ 378 : : [DST0] "r" ((OFFSET(dst, 0))), \ 379 [DST1] "r" ((OFFSET(dst, 16))), \ 380 RVR0(r), RVR1(r) : "memory"); \ 381 break; \ 382 default: \ 383 ZFS_ASM_BUG(); \ 384 } \ 385 } 386 387 /* 388 * Unfortunately cannot use the macro, because GCC 389 * will try to use the macro name and not value 390 * later on... 391 * Kept as a reference to what a numbered variable is 392 */ 393 #define _00 "17" 394 #define _1d "16" 395 #define _temp0 "19" 396 #define _temp1 "18" 397 398 #define MUL2_SETUP() \ 399 { \ 400 __asm__ __volatile__( \ 401 "vspltisb " VR(16) ",14\n" \ 402 "vspltisb " VR(17) ",15\n" \ 403 "vaddubm " VR(16) "," VR(17) "," VR(16) "\n" \ 404 "vxor " VR(17) "," VR(17) "," VR(17) "\n" \ 405 : WVR(16), WVR(17)); \ 406 } 407 408 #define MUL2(r...) \ 409 { \ 410 switch (REG_CNT(r)) { \ 411 case 4: \ 412 __asm__ __volatile__( \ 413 "vcmpgtsb 19," VR(17) "," VR0(r) "\n" \ 414 "vcmpgtsb 18," VR(17) "," VR1(r) "\n" \ 415 "vcmpgtsb 21," VR(17) "," VR2(r) "\n" \ 416 "vcmpgtsb 20," VR(17) "," VR3(r) "\n" \ 417 "vand 19,19," VR(16) "\n" \ 418 "vand 18,18," VR(16) "\n" \ 419 "vand 21,21," VR(16) "\n" \ 420 "vand 20,20," VR(16) "\n" \ 421 "vaddubm " VR0(r) "," VR0(r) "," VR0(r) "\n" \ 422 "vaddubm " VR1(r) "," VR1(r) "," VR1(r) "\n" \ 423 "vaddubm " VR2(r) "," VR2(r) "," VR2(r) "\n" \ 424 "vaddubm " VR3(r) "," VR3(r) "," VR3(r) "\n" \ 425 "vxor " VR0(r) ",19," VR0(r) "\n" \ 426 "vxor " VR1(r) ",18," VR1(r) "\n" \ 427 "vxor " VR2(r) ",21," VR2(r) "\n" \ 428 "vxor " VR3(r) ",20," VR3(r) "\n" \ 429 : UVR0(r), UVR1(r), UVR2(r), UVR3(r) \ 430 : RVR(17), RVR(16) \ 431 : "v18", "v19", "v20", "v21"); \ 432 break; \ 433 case 2: \ 434 __asm__ __volatile__( \ 435 "vcmpgtsb 19," VR(17) "," VR0(r) "\n" \ 436 "vcmpgtsb 18," VR(17) "," VR1(r) "\n" \ 437 "vand 19,19," VR(16) "\n" \ 438 "vand 18,18," VR(16) "\n" \ 439 "vaddubm " VR0(r) "," VR0(r) "," VR0(r) "\n" \ 440 "vaddubm " VR1(r) "," VR1(r) "," VR1(r) "\n" \ 441 "vxor " VR0(r) ",19," VR0(r) "\n" \ 442 "vxor " VR1(r) ",18," VR1(r) "\n" \ 443 : UVR0(r), UVR1(r) \ 444 : RVR(17), RVR(16) \ 445 : "v18", "v19"); \ 446 break; \ 447 default: \ 448 ZFS_ASM_BUG(); \ 449 } \ 450 } 451 452 #define MUL4(r...) \ 453 { \ 454 MUL2(r); \ 455 MUL2(r); \ 456 } 457 458 /* 459 * Unfortunately cannot use the macro, because GCC 460 * will try to use the macro name and not value 461 * later on... 462 * Kept as a reference to what a register is 463 * (here we're using actual registers for the 464 * clobbered ones) 465 */ 466 #define _0f "15" 467 #define _a_save "14" 468 #define _b_save "13" 469 #define _lt_mod_a "12" 470 #define _lt_clmul_a "11" 471 #define _lt_mod_b "10" 472 #define _lt_clmul_b "15" 473 474 #define _MULx2(c, r...) \ 475 { \ 476 switch (REG_CNT(r)) { \ 477 case 2: \ 478 __asm__ __volatile__( \ 479 /* lts for upper part */ \ 480 "vspltisb 15,15\n" \ 481 "lvx 10,0,%[lt0]\n" \ 482 "lvx 11,0,%[lt1]\n" \ 483 /* upper part */ \ 484 "vand 14," VR0(r) ",15\n" \ 485 "vand 13," VR1(r) ",15\n" \ 486 "vspltisb 15,4\n" \ 487 "vsrab " VR0(r) "," VR0(r) ",15\n" \ 488 "vsrab " VR1(r) "," VR1(r) ",15\n" \ 489 \ 490 "vperm 12,10,10," VR0(r) "\n" \ 491 "vperm 10,10,10," VR1(r) "\n" \ 492 "vperm 15,11,11," VR0(r) "\n" \ 493 "vperm 11,11,11," VR1(r) "\n" \ 494 \ 495 "vxor " VR0(r) ",15,12\n" \ 496 "vxor " VR1(r) ",11,10\n" \ 497 /* lts for lower part */ \ 498 "lvx 10,0,%[lt2]\n" \ 499 "lvx 15,0,%[lt3]\n" \ 500 /* lower part */ \ 501 "vperm 12,10,10,14\n" \ 502 "vperm 10,10,10,13\n" \ 503 "vperm 11,15,15,14\n" \ 504 "vperm 15,15,15,13\n" \ 505 \ 506 "vxor " VR0(r) "," VR0(r) ",12\n" \ 507 "vxor " VR1(r) "," VR1(r) ",10\n" \ 508 "vxor " VR0(r) "," VR0(r) ",11\n" \ 509 "vxor " VR1(r) "," VR1(r) ",15\n" \ 510 : UVR0(r), UVR1(r) \ 511 : [lt0] "r" (&(gf_clmul_mod_lt[4*(c)+0][0])), \ 512 [lt1] "r" (&(gf_clmul_mod_lt[4*(c)+1][0])), \ 513 [lt2] "r" (&(gf_clmul_mod_lt[4*(c)+2][0])), \ 514 [lt3] "r" (&(gf_clmul_mod_lt[4*(c)+3][0])) \ 515 : "v10", "v11", "v12", "v13", "v14", "v15"); \ 516 break; \ 517 default: \ 518 ZFS_ASM_BUG(); \ 519 } \ 520 } 521 522 #define MUL(c, r...) \ 523 { \ 524 switch (REG_CNT(r)) { \ 525 case 4: \ 526 _MULx2(c, R_23(r)); \ 527 _MULx2(c, R_01(r)); \ 528 break; \ 529 case 2: \ 530 _MULx2(c, R_01(r)); \ 531 break; \ 532 default: \ 533 ZFS_ASM_BUG(); \ 534 } \ 535 } 536 537 #define raidz_math_begin() kfpu_begin() 538 #define raidz_math_end() kfpu_end() 539 540 /* Overkill... */ 541 #if 0 // defined(_KERNEL) 542 #define GEN_X_DEFINE_0_3() \ 543 register unsigned char w0 asm("0") __attribute__((vector_size(16))); \ 544 register unsigned char w1 asm("1") __attribute__((vector_size(16))); \ 545 register unsigned char w2 asm("2") __attribute__((vector_size(16))); \ 546 register unsigned char w3 asm("3") __attribute__((vector_size(16))); 547 #define GEN_X_DEFINE_4_5() \ 548 register unsigned char w4 asm("4") __attribute__((vector_size(16))); \ 549 register unsigned char w5 asm("5") __attribute__((vector_size(16))); 550 #define GEN_X_DEFINE_6_7() \ 551 register unsigned char w6 asm("6") __attribute__((vector_size(16))); \ 552 register unsigned char w7 asm("7") __attribute__((vector_size(16))); 553 #define GEN_X_DEFINE_8_9() \ 554 register unsigned char w8 asm("8") __attribute__((vector_size(16))); \ 555 register unsigned char w9 asm("9") __attribute__((vector_size(16))); 556 #define GEN_X_DEFINE_10_11() \ 557 register unsigned char w10 asm("10") __attribute__((vector_size(16))); \ 558 register unsigned char w11 asm("11") __attribute__((vector_size(16))); 559 #define GEN_X_DEFINE_12_15() \ 560 register unsigned char w12 asm("12") __attribute__((vector_size(16))); \ 561 register unsigned char w13 asm("13") __attribute__((vector_size(16))); \ 562 register unsigned char w14 asm("14") __attribute__((vector_size(16))); \ 563 register unsigned char w15 asm("15") __attribute__((vector_size(16))); 564 #define GEN_X_DEFINE_16() \ 565 register unsigned char w16 asm("16") __attribute__((vector_size(16))); 566 #define GEN_X_DEFINE_17() \ 567 register unsigned char w17 asm("17") __attribute__((vector_size(16))); 568 #define GEN_X_DEFINE_18_21() \ 569 register unsigned char w18 asm("18") __attribute__((vector_size(16))); \ 570 register unsigned char w19 asm("19") __attribute__((vector_size(16))); \ 571 register unsigned char w20 asm("20") __attribute__((vector_size(16))); \ 572 register unsigned char w21 asm("21") __attribute__((vector_size(16))); 573 #define GEN_X_DEFINE_22_23() \ 574 register unsigned char w22 asm("22") __attribute__((vector_size(16))); \ 575 register unsigned char w23 asm("23") __attribute__((vector_size(16))); 576 #define GEN_X_DEFINE_24_27() \ 577 register unsigned char w24 asm("24") __attribute__((vector_size(16))); \ 578 register unsigned char w25 asm("25") __attribute__((vector_size(16))); \ 579 register unsigned char w26 asm("26") __attribute__((vector_size(16))); \ 580 register unsigned char w27 asm("27") __attribute__((vector_size(16))); 581 #define GEN_X_DEFINE_28_30() \ 582 register unsigned char w28 asm("28") __attribute__((vector_size(16))); \ 583 register unsigned char w29 asm("29") __attribute__((vector_size(16))); \ 584 register unsigned char w30 asm("30") __attribute__((vector_size(16))); 585 #define GEN_X_DEFINE_31() \ 586 register unsigned char w31 asm("31") __attribute__((vector_size(16))); 587 #define GEN_X_DEFINE_32() \ 588 register unsigned char w32 asm("31") __attribute__((vector_size(16))); 589 #define GEN_X_DEFINE_33_36() \ 590 register unsigned char w33 asm("31") __attribute__((vector_size(16))); \ 591 register unsigned char w34 asm("31") __attribute__((vector_size(16))); \ 592 register unsigned char w35 asm("31") __attribute__((vector_size(16))); \ 593 register unsigned char w36 asm("31") __attribute__((vector_size(16))); 594 #define GEN_X_DEFINE_37_38() \ 595 register unsigned char w37 asm("31") __attribute__((vector_size(16))); \ 596 register unsigned char w38 asm("31") __attribute__((vector_size(16))); 597 #define GEN_X_DEFINE_ALL() \ 598 GEN_X_DEFINE_0_3() \ 599 GEN_X_DEFINE_4_5() \ 600 GEN_X_DEFINE_6_7() \ 601 GEN_X_DEFINE_8_9() \ 602 GEN_X_DEFINE_10_11() \ 603 GEN_X_DEFINE_12_15() \ 604 GEN_X_DEFINE_16() \ 605 GEN_X_DEFINE_17() \ 606 GEN_X_DEFINE_18_21() \ 607 GEN_X_DEFINE_22_23() \ 608 GEN_X_DEFINE_24_27() \ 609 GEN_X_DEFINE_28_30() \ 610 GEN_X_DEFINE_31() \ 611 GEN_X_DEFINE_32() \ 612 GEN_X_DEFINE_33_36() \ 613 GEN_X_DEFINE_37_38() 614 #else 615 #define GEN_X_DEFINE_0_3() \ 616 unsigned char w0 __attribute__((vector_size(16))); \ 617 unsigned char w1 __attribute__((vector_size(16))); \ 618 unsigned char w2 __attribute__((vector_size(16))); \ 619 unsigned char w3 __attribute__((vector_size(16))); 620 #define GEN_X_DEFINE_4_5() \ 621 unsigned char w4 __attribute__((vector_size(16))); \ 622 unsigned char w5 __attribute__((vector_size(16))); 623 #define GEN_X_DEFINE_6_7() \ 624 unsigned char w6 __attribute__((vector_size(16))); \ 625 unsigned char w7 __attribute__((vector_size(16))); 626 #define GEN_X_DEFINE_8_9() \ 627 unsigned char w8 __attribute__((vector_size(16))); \ 628 unsigned char w9 __attribute__((vector_size(16))); 629 #define GEN_X_DEFINE_10_11() \ 630 unsigned char w10 __attribute__((vector_size(16))); \ 631 unsigned char w11 __attribute__((vector_size(16))); 632 #define GEN_X_DEFINE_12_15() \ 633 unsigned char w12 __attribute__((vector_size(16))); \ 634 unsigned char w13 __attribute__((vector_size(16))); \ 635 unsigned char w14 __attribute__((vector_size(16))); \ 636 unsigned char w15 __attribute__((vector_size(16))); 637 #define GEN_X_DEFINE_16() \ 638 unsigned char w16 __attribute__((vector_size(16))); 639 #define GEN_X_DEFINE_17() \ 640 unsigned char w17 __attribute__((vector_size(16))); 641 #define GEN_X_DEFINE_18_21() \ 642 unsigned char w18 __attribute__((vector_size(16))); \ 643 unsigned char w19 __attribute__((vector_size(16))); \ 644 unsigned char w20 __attribute__((vector_size(16))); \ 645 unsigned char w21 __attribute__((vector_size(16))); 646 #define GEN_X_DEFINE_22_23() \ 647 unsigned char w22 __attribute__((vector_size(16))); \ 648 unsigned char w23 __attribute__((vector_size(16))); 649 #define GEN_X_DEFINE_24_27() \ 650 unsigned char w24 __attribute__((vector_size(16))); \ 651 unsigned char w25 __attribute__((vector_size(16))); \ 652 unsigned char w26 __attribute__((vector_size(16))); \ 653 unsigned char w27 __attribute__((vector_size(16))); 654 #define GEN_X_DEFINE_28_30() \ 655 unsigned char w28 __attribute__((vector_size(16))); \ 656 unsigned char w29 __attribute__((vector_size(16))); \ 657 unsigned char w30 __attribute__((vector_size(16))); 658 #define GEN_X_DEFINE_31() \ 659 unsigned char w31 __attribute__((vector_size(16))); 660 #define GEN_X_DEFINE_32() \ 661 unsigned char w32 __attribute__((vector_size(16))); 662 #define GEN_X_DEFINE_33_36() \ 663 unsigned char w33 __attribute__((vector_size(16))); \ 664 unsigned char w34 __attribute__((vector_size(16))); \ 665 unsigned char w35 __attribute__((vector_size(16))); \ 666 unsigned char w36 __attribute__((vector_size(16))); 667 #define GEN_X_DEFINE_37_38() \ 668 unsigned char w37 __attribute__((vector_size(16))); \ 669 unsigned char w38 __attribute__((vector_size(16))); 670 #define GEN_X_DEFINE_ALL() \ 671 GEN_X_DEFINE_0_3() \ 672 GEN_X_DEFINE_4_5() \ 673 GEN_X_DEFINE_6_7() \ 674 GEN_X_DEFINE_8_9() \ 675 GEN_X_DEFINE_10_11() \ 676 GEN_X_DEFINE_12_15() \ 677 GEN_X_DEFINE_16() \ 678 GEN_X_DEFINE_17() \ 679 GEN_X_DEFINE_18_21() \ 680 GEN_X_DEFINE_22_23() \ 681 GEN_X_DEFINE_24_27() \ 682 GEN_X_DEFINE_28_30() \ 683 GEN_X_DEFINE_31() \ 684 GEN_X_DEFINE_32() \ 685 GEN_X_DEFINE_33_36() \ 686 GEN_X_DEFINE_37_38() 687 #endif 688