1 // SPDX-License-Identifier: CDDL-1.0 2 /* 3 * CDDL HEADER START 4 * 5 * The contents of this file are subject to the terms of the 6 * Common Development and Distribution License (the "License"). 7 * You may not use this file except in compliance with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or https://opensource.org/licenses/CDDL-1.0. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 23 /* 24 * Based on BLAKE3 v1.3.1, https://github.com/BLAKE3-team/BLAKE3 25 * Copyright (c) 2019-2020 Samuel Neves and Jack O'Connor 26 * Copyright (c) 2021-2022 Tino Reichardt <milky-zfs@mcmilk.de> 27 */ 28 29 #include <sys/simd.h> 30 #include <sys/zfs_context.h> 31 #include "blake3_impl.h" 32 33 #define rotr32(x, n) (((x) >> (n)) | ((x) << (32 - (n)))) 34 static inline void g(uint32_t *state, size_t a, size_t b, size_t c, size_t d, 35 uint32_t x, uint32_t y) 36 { 37 state[a] = state[a] + state[b] + x; 38 state[d] = rotr32(state[d] ^ state[a], 16); 39 state[c] = state[c] + state[d]; 40 state[b] = rotr32(state[b] ^ state[c], 12); 41 state[a] = state[a] + state[b] + y; 42 state[d] = rotr32(state[d] ^ state[a], 8); 43 state[c] = state[c] + state[d]; 44 state[b] = rotr32(state[b] ^ state[c], 7); 45 } 46 47 static inline void round_fn(uint32_t state[16], const uint32_t *msg, 48 size_t round) 49 { 50 /* Select the message schedule based on the round. */ 51 const uint8_t *schedule = BLAKE3_MSG_SCHEDULE[round]; 52 53 /* Mix the columns. */ 54 g(state, 0, 4, 8, 12, msg[schedule[0]], msg[schedule[1]]); 55 g(state, 1, 5, 9, 13, msg[schedule[2]], msg[schedule[3]]); 56 g(state, 2, 6, 10, 14, msg[schedule[4]], msg[schedule[5]]); 57 g(state, 3, 7, 11, 15, msg[schedule[6]], msg[schedule[7]]); 58 59 /* Mix the rows. */ 60 g(state, 0, 5, 10, 15, msg[schedule[8]], msg[schedule[9]]); 61 g(state, 1, 6, 11, 12, msg[schedule[10]], msg[schedule[11]]); 62 g(state, 2, 7, 8, 13, msg[schedule[12]], msg[schedule[13]]); 63 g(state, 3, 4, 9, 14, msg[schedule[14]], msg[schedule[15]]); 64 } 65 66 static inline void compress_pre(uint32_t state[16], const uint32_t cv[8], 67 const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, 68 uint64_t counter, uint8_t flags) 69 { 70 uint32_t block_words[16]; 71 block_words[0] = load32(block + 4 * 0); 72 block_words[1] = load32(block + 4 * 1); 73 block_words[2] = load32(block + 4 * 2); 74 block_words[3] = load32(block + 4 * 3); 75 block_words[4] = load32(block + 4 * 4); 76 block_words[5] = load32(block + 4 * 5); 77 block_words[6] = load32(block + 4 * 6); 78 block_words[7] = load32(block + 4 * 7); 79 block_words[8] = load32(block + 4 * 8); 80 block_words[9] = load32(block + 4 * 9); 81 block_words[10] = load32(block + 4 * 10); 82 block_words[11] = load32(block + 4 * 11); 83 block_words[12] = load32(block + 4 * 12); 84 block_words[13] = load32(block + 4 * 13); 85 block_words[14] = load32(block + 4 * 14); 86 block_words[15] = load32(block + 4 * 15); 87 88 state[0] = cv[0]; 89 state[1] = cv[1]; 90 state[2] = cv[2]; 91 state[3] = cv[3]; 92 state[4] = cv[4]; 93 state[5] = cv[5]; 94 state[6] = cv[6]; 95 state[7] = cv[7]; 96 state[8] = BLAKE3_IV[0]; 97 state[9] = BLAKE3_IV[1]; 98 state[10] = BLAKE3_IV[2]; 99 state[11] = BLAKE3_IV[3]; 100 state[12] = counter_low(counter); 101 state[13] = counter_high(counter); 102 state[14] = (uint32_t)block_len; 103 state[15] = (uint32_t)flags; 104 105 round_fn(state, &block_words[0], 0); 106 round_fn(state, &block_words[0], 1); 107 round_fn(state, &block_words[0], 2); 108 round_fn(state, &block_words[0], 3); 109 round_fn(state, &block_words[0], 4); 110 round_fn(state, &block_words[0], 5); 111 round_fn(state, &block_words[0], 6); 112 } 113 114 static inline void blake3_compress_in_place_generic(uint32_t cv[8], 115 const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, 116 uint64_t counter, uint8_t flags) 117 { 118 uint32_t state[16]; 119 compress_pre(state, cv, block, block_len, counter, flags); 120 cv[0] = state[0] ^ state[8]; 121 cv[1] = state[1] ^ state[9]; 122 cv[2] = state[2] ^ state[10]; 123 cv[3] = state[3] ^ state[11]; 124 cv[4] = state[4] ^ state[12]; 125 cv[5] = state[5] ^ state[13]; 126 cv[6] = state[6] ^ state[14]; 127 cv[7] = state[7] ^ state[15]; 128 } 129 130 static inline void hash_one_generic(const uint8_t *input, size_t blocks, 131 const uint32_t key[8], uint64_t counter, uint8_t flags, 132 uint8_t flags_start, uint8_t flags_end, uint8_t out[BLAKE3_OUT_LEN]) 133 { 134 uint32_t cv[8]; 135 memcpy(cv, key, BLAKE3_KEY_LEN); 136 uint8_t block_flags = flags | flags_start; 137 while (blocks > 0) { 138 if (blocks == 1) { 139 block_flags |= flags_end; 140 } 141 blake3_compress_in_place_generic(cv, input, BLAKE3_BLOCK_LEN, 142 counter, block_flags); 143 input = &input[BLAKE3_BLOCK_LEN]; 144 blocks -= 1; 145 block_flags = flags; 146 } 147 store_cv_words(out, cv); 148 } 149 150 static inline void blake3_compress_xof_generic(const uint32_t cv[8], 151 const uint8_t block[BLAKE3_BLOCK_LEN], uint8_t block_len, 152 uint64_t counter, uint8_t flags, uint8_t out[64]) 153 { 154 uint32_t state[16]; 155 compress_pre(state, cv, block, block_len, counter, flags); 156 157 store32(&out[0 * 4], state[0] ^ state[8]); 158 store32(&out[1 * 4], state[1] ^ state[9]); 159 store32(&out[2 * 4], state[2] ^ state[10]); 160 store32(&out[3 * 4], state[3] ^ state[11]); 161 store32(&out[4 * 4], state[4] ^ state[12]); 162 store32(&out[5 * 4], state[5] ^ state[13]); 163 store32(&out[6 * 4], state[6] ^ state[14]); 164 store32(&out[7 * 4], state[7] ^ state[15]); 165 store32(&out[8 * 4], state[8] ^ cv[0]); 166 store32(&out[9 * 4], state[9] ^ cv[1]); 167 store32(&out[10 * 4], state[10] ^ cv[2]); 168 store32(&out[11 * 4], state[11] ^ cv[3]); 169 store32(&out[12 * 4], state[12] ^ cv[4]); 170 store32(&out[13 * 4], state[13] ^ cv[5]); 171 store32(&out[14 * 4], state[14] ^ cv[6]); 172 store32(&out[15 * 4], state[15] ^ cv[7]); 173 } 174 175 static inline void blake3_hash_many_generic(const uint8_t * const *inputs, 176 size_t num_inputs, size_t blocks, const uint32_t key[8], uint64_t counter, 177 boolean_t increment_counter, uint8_t flags, uint8_t flags_start, 178 uint8_t flags_end, uint8_t *out) 179 { 180 while (num_inputs > 0) { 181 hash_one_generic(inputs[0], blocks, key, counter, flags, 182 flags_start, flags_end, out); 183 if (increment_counter) { 184 counter += 1; 185 } 186 inputs += 1; 187 num_inputs -= 1; 188 out = &out[BLAKE3_OUT_LEN]; 189 } 190 } 191 192 /* the generic implementation is always okay */ 193 static boolean_t blake3_is_supported(void) 194 { 195 return (B_TRUE); 196 } 197 198 const blake3_ops_t blake3_generic_impl = { 199 .compress_in_place = blake3_compress_in_place_generic, 200 .compress_xof = blake3_compress_xof_generic, 201 .hash_many = blake3_hash_many_generic, 202 .is_supported = blake3_is_supported, 203 .degree = 4, 204 .name = "generic" 205 }; 206