1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * Copyright 2024- IBM Corp. 4 * 5 * X25519 scalar multiplication with 51 bits limbs for PPC64le. 6 * Based on RFC7748 and AArch64 optimized implementation for X25519 7 * - Algorithm 1 Scalar multiplication of a variable point 8 */ 9 10 #include <linux/types.h> 11 #include <linux/jump_label.h> 12 #include <linux/kernel.h> 13 14 #include <linux/cpufeature.h> 15 #include <linux/processor.h> 16 17 typedef uint64_t fe51[5]; 18 19 asmlinkage void x25519_fe51_mul(fe51 h, const fe51 f, const fe51 g); 20 asmlinkage void x25519_fe51_sqr(fe51 h, const fe51 f); 21 asmlinkage void x25519_fe51_mul121666(fe51 h, fe51 f); 22 asmlinkage void x25519_fe51_sqr_times(fe51 h, const fe51 f, int n); 23 asmlinkage void x25519_fe51_frombytes(fe51 h, const uint8_t *s); 24 asmlinkage void x25519_fe51_tobytes(uint8_t *s, const fe51 h); 25 asmlinkage void x25519_cswap(fe51 p, fe51 q, unsigned int bit); 26 27 #define fmul x25519_fe51_mul 28 #define fsqr x25519_fe51_sqr 29 #define fmul121666 x25519_fe51_mul121666 30 #define fe51_tobytes x25519_fe51_tobytes 31 32 static void fadd(fe51 h, const fe51 f, const fe51 g) 33 { 34 h[0] = f[0] + g[0]; 35 h[1] = f[1] + g[1]; 36 h[2] = f[2] + g[2]; 37 h[3] = f[3] + g[3]; 38 h[4] = f[4] + g[4]; 39 } 40 41 /* 42 * Prime = 2 ** 255 - 19, 255 bits 43 * (0x7fffffff ffffffff ffffffff ffffffff ffffffff ffffffff ffffffff ffffffed) 44 * 45 * Prime in 5 51-bit limbs 46 */ 47 static fe51 prime51 = { 0x7ffffffffffed, 0x7ffffffffffff, 0x7ffffffffffff, 0x7ffffffffffff, 0x7ffffffffffff}; 48 49 static void fsub(fe51 h, const fe51 f, const fe51 g) 50 { 51 h[0] = (f[0] + ((prime51[0] * 2))) - g[0]; 52 h[1] = (f[1] + ((prime51[1] * 2))) - g[1]; 53 h[2] = (f[2] + ((prime51[2] * 2))) - g[2]; 54 h[3] = (f[3] + ((prime51[3] * 2))) - g[3]; 55 h[4] = (f[4] + ((prime51[4] * 2))) - g[4]; 56 } 57 58 static void fe51_frombytes(fe51 h, const uint8_t *s) 59 { 60 /* 61 * Make sure 64-bit aligned. 62 */ 63 unsigned char sbuf[32+8]; 64 unsigned char *sb = PTR_ALIGN((void *)sbuf, 8); 65 66 memcpy(sb, s, 32); 67 x25519_fe51_frombytes(h, sb); 68 } 69 70 static void finv(fe51 o, const fe51 i) 71 { 72 fe51 a0, b, c, t00; 73 74 fsqr(a0, i); 75 x25519_fe51_sqr_times(t00, a0, 2); 76 77 fmul(b, t00, i); 78 fmul(a0, b, a0); 79 80 fsqr(t00, a0); 81 82 fmul(b, t00, b); 83 x25519_fe51_sqr_times(t00, b, 5); 84 85 fmul(b, t00, b); 86 x25519_fe51_sqr_times(t00, b, 10); 87 88 fmul(c, t00, b); 89 x25519_fe51_sqr_times(t00, c, 20); 90 91 fmul(t00, t00, c); 92 x25519_fe51_sqr_times(t00, t00, 10); 93 94 fmul(b, t00, b); 95 x25519_fe51_sqr_times(t00, b, 50); 96 97 fmul(c, t00, b); 98 x25519_fe51_sqr_times(t00, c, 100); 99 100 fmul(t00, t00, c); 101 x25519_fe51_sqr_times(t00, t00, 50); 102 103 fmul(t00, t00, b); 104 x25519_fe51_sqr_times(t00, t00, 5); 105 106 fmul(o, t00, a0); 107 } 108 109 static void curve25519_fe51(uint8_t out[32], const uint8_t scalar[32], 110 const uint8_t point[32]) 111 { 112 fe51 x1, x2, z2, x3, z3; 113 uint8_t s[32]; 114 unsigned int swap = 0; 115 int i; 116 117 memcpy(s, scalar, 32); 118 s[0] &= 0xf8; 119 s[31] &= 0x7f; 120 s[31] |= 0x40; 121 fe51_frombytes(x1, point); 122 123 z2[0] = z2[1] = z2[2] = z2[3] = z2[4] = 0; 124 x3[0] = x1[0]; 125 x3[1] = x1[1]; 126 x3[2] = x1[2]; 127 x3[3] = x1[3]; 128 x3[4] = x1[4]; 129 130 x2[0] = z3[0] = 1; 131 x2[1] = z3[1] = 0; 132 x2[2] = z3[2] = 0; 133 x2[3] = z3[3] = 0; 134 x2[4] = z3[4] = 0; 135 136 for (i = 254; i >= 0; --i) { 137 unsigned int k_t = 1 & (s[i / 8] >> (i & 7)); 138 fe51 a, b, c, d, e; 139 fe51 da, cb, aa, bb; 140 fe51 dacb_p, dacb_m; 141 142 swap ^= k_t; 143 x25519_cswap(x2, x3, swap); 144 x25519_cswap(z2, z3, swap); 145 swap = k_t; 146 147 fsub(b, x2, z2); // B = x_2 - z_2 148 fadd(a, x2, z2); // A = x_2 + z_2 149 fsub(d, x3, z3); // D = x_3 - z_3 150 fadd(c, x3, z3); // C = x_3 + z_3 151 152 fsqr(bb, b); // BB = B^2 153 fsqr(aa, a); // AA = A^2 154 fmul(da, d, a); // DA = D * A 155 fmul(cb, c, b); // CB = C * B 156 157 fsub(e, aa, bb); // E = AA - BB 158 fmul(x2, aa, bb); // x2 = AA * BB 159 fadd(dacb_p, da, cb); // DA + CB 160 fsub(dacb_m, da, cb); // DA - CB 161 162 fmul121666(z3, e); // 121666 * E 163 fsqr(z2, dacb_m); // (DA - CB)^2 164 fsqr(x3, dacb_p); // x3 = (DA + CB)^2 165 fadd(b, bb, z3); // BB + 121666 * E 166 fmul(z3, x1, z2); // z3 = x1 * (DA - CB)^2 167 fmul(z2, e, b); // z2 = e * (BB + (DA + CB)^2) 168 } 169 170 finv(z2, z2); 171 fmul(x2, x2, z2); 172 fe51_tobytes(out, x2); 173 } 174 175 static void curve25519_arch(u8 mypublic[CURVE25519_KEY_SIZE], 176 const u8 secret[CURVE25519_KEY_SIZE], 177 const u8 basepoint[CURVE25519_KEY_SIZE]) 178 { 179 curve25519_fe51(mypublic, secret, basepoint); 180 } 181 182 static void curve25519_base_arch(u8 pub[CURVE25519_KEY_SIZE], 183 const u8 secret[CURVE25519_KEY_SIZE]) 184 { 185 curve25519_fe51(pub, secret, curve25519_base_point); 186 } 187