1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Data verification functions, i.e. hooks for ->readahead() 4 * 5 * Copyright 2019 Google LLC 6 */ 7 8 #include "fsverity_private.h" 9 10 #include <linux/bio.h> 11 #include <linux/export.h> 12 13 #define FS_VERITY_MAX_PENDING_BLOCKS 2 14 15 struct fsverity_pending_block { 16 const void *data; 17 u64 pos; 18 u8 real_hash[FS_VERITY_MAX_DIGEST_SIZE]; 19 }; 20 21 struct fsverity_verification_context { 22 struct fsverity_info *vi; 23 24 /* 25 * This is the queue of data blocks that are pending verification. When 26 * the crypto layer supports interleaved hashing, we allow multiple 27 * blocks to be queued up in order to utilize it. This can improve 28 * performance significantly vs. sequential hashing of each block. 29 */ 30 int num_pending; 31 int max_pending; 32 struct fsverity_pending_block 33 pending_blocks[FS_VERITY_MAX_PENDING_BLOCKS]; 34 }; 35 36 static struct workqueue_struct *fsverity_read_workqueue; 37 38 /** 39 * fsverity_readahead() - kick off readahead on fsverity hashes 40 * @vi: fsverity_info for the inode to be read 41 * @index: first file data page index that is being read 42 * @nr_pages: number of file data pages to be read 43 * 44 * Start readahead on the fsverity hashes that are needed to verify the file 45 * data in the range from @index to @index + @nr_pages (exclusive upper bound). 46 * 47 * To be called from the file systems' ->read_folio and ->readahead methods to 48 * ensure that the hashes are already cached on completion of the file data 49 * read if possible. 50 */ 51 void fsverity_readahead(struct fsverity_info *vi, pgoff_t index, 52 unsigned long nr_pages) 53 { 54 struct inode *inode = vi->inode; 55 const struct merkle_tree_params *params = &vi->tree_params; 56 u64 start_hidx = (u64)index << params->log_blocks_per_page; 57 u64 end_hidx = 58 (((u64)index + nr_pages) << params->log_blocks_per_page) - 1; 59 int level; 60 61 if (!inode->i_sb->s_vop->readahead_merkle_tree) 62 return; 63 64 for (level = 0; level < params->num_levels; level++) { 65 unsigned long level_start = params->level_start[level]; 66 unsigned long next_start_hidx = start_hidx >> params->log_arity; 67 unsigned long next_end_hidx = end_hidx >> params->log_arity; 68 pgoff_t start_idx = (level_start + next_start_hidx) >> 69 params->log_blocks_per_page; 70 pgoff_t end_idx = (level_start + next_end_hidx) >> 71 params->log_blocks_per_page; 72 73 inode->i_sb->s_vop->readahead_merkle_tree( 74 inode, start_idx, end_idx - start_idx + 1); 75 76 start_hidx = next_start_hidx; 77 end_hidx = next_end_hidx; 78 } 79 } 80 EXPORT_SYMBOL_GPL(fsverity_readahead); 81 82 /* 83 * Returns true if the hash block with index @hblock_idx in the tree, located in 84 * @hpage, has already been verified. 85 */ 86 static bool is_hash_block_verified(struct fsverity_info *vi, struct page *hpage, 87 unsigned long hblock_idx) 88 { 89 unsigned int blocks_per_page; 90 unsigned int i; 91 92 /* 93 * When the Merkle tree block size and page size are the same, then the 94 * ->hash_block_verified bitmap isn't allocated, and we use PG_checked 95 * to directly indicate whether the page's block has been verified. 96 * 97 * Using PG_checked also guarantees that we re-verify hash pages that 98 * get evicted and re-instantiated from the backing storage, as new 99 * pages always start out with PG_checked cleared. 100 */ 101 if (!vi->hash_block_verified) 102 return PageChecked(hpage); 103 104 /* 105 * When the Merkle tree block size and page size differ, we use a bitmap 106 * to indicate whether each hash block has been verified. 107 * 108 * However, we still need to ensure that hash pages that get evicted and 109 * re-instantiated from the backing storage are re-verified. To do 110 * this, we use PG_checked again, but now it doesn't really mean 111 * "checked". Instead, now it just serves as an indicator for whether 112 * the hash page is newly instantiated or not. If the page is new, as 113 * indicated by PG_checked=0, we clear the bitmap bits for the page's 114 * blocks since they are untrustworthy, then set PG_checked=1. 115 * Otherwise we return the bitmap bit for the requested block. 116 * 117 * Multiple threads may execute this code concurrently on the same page. 118 * This is safe because we use memory barriers to ensure that if a 119 * thread sees PG_checked=1, then it also sees the associated bitmap 120 * clearing to have occurred. Also, all writes and their corresponding 121 * reads are atomic, and all writes are safe to repeat in the event that 122 * multiple threads get into the PG_checked=0 section. (Clearing a 123 * bitmap bit again at worst causes a hash block to be verified 124 * redundantly. That event should be very rare, so it's not worth using 125 * a lock to avoid. Setting PG_checked again has no effect.) 126 */ 127 if (PageChecked(hpage)) { 128 /* 129 * A read memory barrier is needed here to give ACQUIRE 130 * semantics to the above PageChecked() test. 131 */ 132 smp_rmb(); 133 return test_bit(hblock_idx, vi->hash_block_verified); 134 } 135 blocks_per_page = vi->tree_params.blocks_per_page; 136 hblock_idx = round_down(hblock_idx, blocks_per_page); 137 for (i = 0; i < blocks_per_page; i++) 138 clear_bit(hblock_idx + i, vi->hash_block_verified); 139 /* 140 * A write memory barrier is needed here to give RELEASE semantics to 141 * the below SetPageChecked() operation. 142 */ 143 smp_wmb(); 144 SetPageChecked(hpage); 145 return false; 146 } 147 148 /* 149 * Verify the hash of a single data block against the file's Merkle tree. 150 * 151 * In principle, we need to verify the entire path to the root node. However, 152 * for efficiency the filesystem may cache the hash blocks. Therefore we need 153 * only ascend the tree until an already-verified hash block is seen, and then 154 * verify the path to that block. 155 * 156 * Return: %true if the data block is valid, else %false. 157 */ 158 static bool verify_data_block(struct fsverity_info *vi, 159 const struct fsverity_pending_block *dblock) 160 { 161 struct inode *inode = vi->inode; 162 const u64 data_pos = dblock->pos; 163 const struct merkle_tree_params *params = &vi->tree_params; 164 const unsigned int hsize = params->digest_size; 165 int level; 166 u8 _want_hash[FS_VERITY_MAX_DIGEST_SIZE]; 167 const u8 *want_hash; 168 u8 real_hash[FS_VERITY_MAX_DIGEST_SIZE]; 169 /* The hash blocks that are traversed, indexed by level */ 170 struct { 171 /* Page containing the hash block */ 172 struct page *page; 173 /* Mapped address of the hash block (will be within @page) */ 174 const void *addr; 175 /* Index of the hash block in the tree overall */ 176 unsigned long index; 177 /* Byte offset of the wanted hash relative to @addr */ 178 unsigned int hoffset; 179 } hblocks[FS_VERITY_MAX_LEVELS]; 180 181 trace_fsverity_verify_data_block(inode, params, data_pos); 182 183 /* 184 * The index of the previous level's block within that level; also the 185 * index of that block's hash within the current level. 186 */ 187 u64 hidx = data_pos >> params->log_blocksize; 188 189 /* 190 * Up to FS_VERITY_MAX_PENDING_BLOCKS + FS_VERITY_MAX_LEVELS pages may 191 * be mapped at once. 192 */ 193 static_assert(FS_VERITY_MAX_PENDING_BLOCKS + FS_VERITY_MAX_LEVELS <= 194 KM_MAX_IDX); 195 196 if (unlikely(data_pos >= inode->i_size)) { 197 /* 198 * This can happen in the data page spanning EOF when the Merkle 199 * tree block size is less than the page size. The Merkle tree 200 * doesn't cover data blocks fully past EOF. But the entire 201 * page spanning EOF can be visible to userspace via a mmap, and 202 * any part past EOF should be all zeroes. Therefore, we need 203 * to verify that any data blocks fully past EOF are all zeroes. 204 */ 205 if (memchr_inv(dblock->data, 0, params->block_size)) { 206 fsverity_err(inode, 207 "FILE CORRUPTED! Data past EOF is not zeroed"); 208 return false; 209 } 210 return true; 211 } 212 213 /* 214 * Starting at the leaf level, ascend the tree saving hash blocks along 215 * the way until we find a hash block that has already been verified, or 216 * until we reach the root. 217 */ 218 for (level = 0; level < params->num_levels; level++) { 219 unsigned long next_hidx; 220 unsigned long hblock_idx; 221 pgoff_t hpage_idx; 222 unsigned int hblock_offset_in_page; 223 unsigned int hoffset; 224 struct page *hpage; 225 const void *haddr; 226 227 /* 228 * The index of the block in the current level; also the index 229 * of that block's hash within the next level. 230 */ 231 next_hidx = hidx >> params->log_arity; 232 233 /* Index of the hash block in the tree overall */ 234 hblock_idx = params->level_start[level] + next_hidx; 235 236 /* Index of the hash page in the tree overall */ 237 hpage_idx = hblock_idx >> params->log_blocks_per_page; 238 239 /* Byte offset of the hash block within the page */ 240 hblock_offset_in_page = 241 (hblock_idx << params->log_blocksize) & ~PAGE_MASK; 242 243 /* Byte offset of the hash within the block */ 244 hoffset = (hidx << params->log_digestsize) & 245 (params->block_size - 1); 246 247 hpage = inode->i_sb->s_vop->read_merkle_tree_page(inode, 248 hpage_idx); 249 if (IS_ERR(hpage)) { 250 fsverity_err(inode, 251 "Error %ld reading Merkle tree page %lu", 252 PTR_ERR(hpage), hpage_idx); 253 goto error; 254 } 255 haddr = kmap_local_page(hpage) + hblock_offset_in_page; 256 if (is_hash_block_verified(vi, hpage, hblock_idx)) { 257 memcpy(_want_hash, haddr + hoffset, hsize); 258 want_hash = _want_hash; 259 kunmap_local(haddr); 260 put_page(hpage); 261 trace_fsverity_merkle_hit(inode, data_pos, hblock_idx, 262 level, 263 hoffset >> params->log_digestsize); 264 goto descend; 265 } 266 hblocks[level].page = hpage; 267 hblocks[level].addr = haddr; 268 hblocks[level].index = hblock_idx; 269 hblocks[level].hoffset = hoffset; 270 hidx = next_hidx; 271 } 272 273 want_hash = vi->root_hash; 274 descend: 275 /* Descend the tree verifying hash blocks. */ 276 for (; level > 0; level--) { 277 struct page *hpage = hblocks[level - 1].page; 278 const void *haddr = hblocks[level - 1].addr; 279 unsigned long hblock_idx = hblocks[level - 1].index; 280 unsigned int hoffset = hblocks[level - 1].hoffset; 281 282 trace_fsverity_verify_merkle_block(inode, hblock_idx, 283 level, hoffset >> params->log_digestsize); 284 285 fsverity_hash_block(params, haddr, real_hash); 286 if (memcmp(want_hash, real_hash, hsize) != 0) 287 goto corrupted; 288 /* 289 * Mark the hash block as verified. This must be atomic and 290 * idempotent, as the same hash block might be verified by 291 * multiple threads concurrently. 292 */ 293 if (vi->hash_block_verified) 294 set_bit(hblock_idx, vi->hash_block_verified); 295 else 296 SetPageChecked(hpage); 297 memcpy(_want_hash, haddr + hoffset, hsize); 298 want_hash = _want_hash; 299 kunmap_local(haddr); 300 put_page(hpage); 301 } 302 303 /* Finally, verify the hash of the data block. */ 304 if (memcmp(want_hash, dblock->real_hash, hsize) != 0) 305 goto corrupted; 306 return true; 307 308 corrupted: 309 fsverity_err( 310 inode, 311 "FILE CORRUPTED! pos=%llu, level=%d, want_hash=%s:%*phN, real_hash=%s:%*phN", 312 data_pos, level - 1, params->hash_alg->name, hsize, want_hash, 313 params->hash_alg->name, hsize, 314 level == 0 ? dblock->real_hash : real_hash); 315 error: 316 for (; level > 0; level--) { 317 kunmap_local(hblocks[level - 1].addr); 318 put_page(hblocks[level - 1].page); 319 } 320 return false; 321 } 322 323 static void 324 fsverity_init_verification_context(struct fsverity_verification_context *ctx, 325 struct fsverity_info *vi) 326 { 327 ctx->vi = vi; 328 ctx->num_pending = 0; 329 if (vi->tree_params.hash_alg->algo_id == HASH_ALGO_SHA256 && 330 sha256_finup_2x_is_optimized()) 331 ctx->max_pending = 2; 332 else 333 ctx->max_pending = 1; 334 } 335 336 static void 337 fsverity_clear_pending_blocks(struct fsverity_verification_context *ctx) 338 { 339 int i; 340 341 for (i = ctx->num_pending - 1; i >= 0; i--) { 342 kunmap_local(ctx->pending_blocks[i].data); 343 ctx->pending_blocks[i].data = NULL; 344 } 345 ctx->num_pending = 0; 346 } 347 348 static bool 349 fsverity_verify_pending_blocks(struct fsverity_verification_context *ctx) 350 { 351 struct fsverity_info *vi = ctx->vi; 352 const struct merkle_tree_params *params = &vi->tree_params; 353 int i; 354 355 if (ctx->num_pending == 2) { 356 /* num_pending == 2 implies that the algorithm is SHA-256 */ 357 sha256_finup_2x(params->hashstate ? ¶ms->hashstate->sha256 : 358 NULL, 359 ctx->pending_blocks[0].data, 360 ctx->pending_blocks[1].data, params->block_size, 361 ctx->pending_blocks[0].real_hash, 362 ctx->pending_blocks[1].real_hash); 363 } else { 364 for (i = 0; i < ctx->num_pending; i++) 365 fsverity_hash_block(params, ctx->pending_blocks[i].data, 366 ctx->pending_blocks[i].real_hash); 367 } 368 369 for (i = 0; i < ctx->num_pending; i++) { 370 if (!verify_data_block(vi, &ctx->pending_blocks[i])) 371 return false; 372 } 373 fsverity_clear_pending_blocks(ctx); 374 return true; 375 } 376 377 static bool fsverity_add_data_blocks(struct fsverity_verification_context *ctx, 378 struct folio *data_folio, size_t len, 379 size_t offset) 380 { 381 struct fsverity_info *vi = ctx->vi; 382 const struct merkle_tree_params *params = &vi->tree_params; 383 const unsigned int block_size = params->block_size; 384 u64 pos = (u64)data_folio->index << PAGE_SHIFT; 385 386 if (WARN_ON_ONCE(len <= 0 || !IS_ALIGNED(len | offset, block_size))) 387 return false; 388 if (WARN_ON_ONCE(!folio_test_locked(data_folio) || 389 folio_test_uptodate(data_folio))) 390 return false; 391 do { 392 ctx->pending_blocks[ctx->num_pending].data = 393 kmap_local_folio(data_folio, offset); 394 ctx->pending_blocks[ctx->num_pending].pos = pos + offset; 395 if (++ctx->num_pending == ctx->max_pending && 396 !fsverity_verify_pending_blocks(ctx)) 397 return false; 398 offset += block_size; 399 len -= block_size; 400 } while (len); 401 return true; 402 } 403 404 /** 405 * fsverity_verify_blocks() - verify data in a folio 406 * @vi: fsverity_info for the inode to be read 407 * @folio: the folio containing the data to verify 408 * @len: the length of the data to verify in the folio 409 * @offset: the offset of the data to verify in the folio 410 * 411 * Verify data that has just been read from a verity file. The data must be 412 * located in a pagecache folio that is still locked and not yet uptodate. The 413 * length and offset of the data must be Merkle tree block size aligned. 414 * 415 * Return: %true if the data is valid, else %false. 416 */ 417 bool fsverity_verify_blocks(struct fsverity_info *vi, struct folio *folio, 418 size_t len, size_t offset) 419 { 420 struct fsverity_verification_context ctx; 421 422 fsverity_init_verification_context(&ctx, vi); 423 424 if (fsverity_add_data_blocks(&ctx, folio, len, offset) && 425 fsverity_verify_pending_blocks(&ctx)) 426 return true; 427 fsverity_clear_pending_blocks(&ctx); 428 return false; 429 } 430 EXPORT_SYMBOL_GPL(fsverity_verify_blocks); 431 432 #ifdef CONFIG_BLOCK 433 /** 434 * fsverity_verify_bio() - verify a 'read' bio that has just completed 435 * @vi: fsverity_info for the inode to be read 436 * @bio: the bio to verify 437 * 438 * Verify the bio's data against the file's Merkle tree. All bio data segments 439 * must be aligned to the file's Merkle tree block size. If any data fails 440 * verification, then bio->bi_status is set to an error status. 441 * 442 * This is a helper function for use by the ->readahead() method of filesystems 443 * that issue bios to read data directly into the page cache. Filesystems that 444 * populate the page cache without issuing bios (e.g. non block-based 445 * filesystems) must instead call fsverity_verify_page() directly on each page. 446 * All filesystems must also call fsverity_verify_page() on holes. 447 */ 448 void fsverity_verify_bio(struct fsverity_info *vi, struct bio *bio) 449 { 450 struct fsverity_verification_context ctx; 451 struct folio_iter fi; 452 453 fsverity_init_verification_context(&ctx, vi); 454 455 bio_for_each_folio_all(fi, bio) { 456 if (!fsverity_add_data_blocks(&ctx, fi.folio, fi.length, 457 fi.offset)) 458 goto ioerr; 459 } 460 461 if (!fsverity_verify_pending_blocks(&ctx)) 462 goto ioerr; 463 return; 464 465 ioerr: 466 fsverity_clear_pending_blocks(&ctx); 467 bio->bi_status = BLK_STS_IOERR; 468 } 469 EXPORT_SYMBOL_GPL(fsverity_verify_bio); 470 #endif /* CONFIG_BLOCK */ 471 472 /** 473 * fsverity_enqueue_verify_work() - enqueue work on the fs-verity workqueue 474 * @work: the work to enqueue 475 * 476 * Enqueue verification work for asynchronous processing. 477 */ 478 void fsverity_enqueue_verify_work(struct work_struct *work) 479 { 480 queue_work(fsverity_read_workqueue, work); 481 } 482 EXPORT_SYMBOL_GPL(fsverity_enqueue_verify_work); 483 484 void __init fsverity_init_workqueue(void) 485 { 486 /* 487 * Use a high-priority workqueue to prioritize verification work, which 488 * blocks reads from completing, over regular application tasks. 489 * 490 * For performance reasons, don't use an unbound workqueue. Using an 491 * unbound workqueue for crypto operations causes excessive scheduler 492 * latency on ARM64. 493 */ 494 fsverity_read_workqueue = alloc_workqueue("fsverity_read_queue", 495 WQ_HIGHPRI | WQ_PERCPU, 496 num_online_cpus()); 497 if (!fsverity_read_workqueue) 498 panic("failed to allocate fsverity_read_queue"); 499 } 500