1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * Data verification functions, i.e. hooks for ->readahead() 4 * 5 * Copyright 2019 Google LLC 6 */ 7 8 #include "fsverity_private.h" 9 10 #include <linux/bio.h> 11 #include <linux/export.h> 12 13 #define FS_VERITY_MAX_PENDING_BLOCKS 2 14 15 struct fsverity_pending_block { 16 const void *data; 17 u64 pos; 18 u8 real_hash[FS_VERITY_MAX_DIGEST_SIZE]; 19 }; 20 21 struct fsverity_verification_context { 22 struct inode *inode; 23 struct fsverity_info *vi; 24 unsigned long max_ra_pages; 25 26 /* 27 * This is the queue of data blocks that are pending verification. When 28 * the crypto layer supports interleaved hashing, we allow multiple 29 * blocks to be queued up in order to utilize it. This can improve 30 * performance significantly vs. sequential hashing of each block. 31 */ 32 int num_pending; 33 int max_pending; 34 struct fsverity_pending_block 35 pending_blocks[FS_VERITY_MAX_PENDING_BLOCKS]; 36 }; 37 38 static struct workqueue_struct *fsverity_read_workqueue; 39 40 /* 41 * Returns true if the hash block with index @hblock_idx in the tree, located in 42 * @hpage, has already been verified. 43 */ 44 static bool is_hash_block_verified(struct fsverity_info *vi, struct page *hpage, 45 unsigned long hblock_idx) 46 { 47 unsigned int blocks_per_page; 48 unsigned int i; 49 50 /* 51 * When the Merkle tree block size and page size are the same, then the 52 * ->hash_block_verified bitmap isn't allocated, and we use PG_checked 53 * to directly indicate whether the page's block has been verified. 54 * 55 * Using PG_checked also guarantees that we re-verify hash pages that 56 * get evicted and re-instantiated from the backing storage, as new 57 * pages always start out with PG_checked cleared. 58 */ 59 if (!vi->hash_block_verified) 60 return PageChecked(hpage); 61 62 /* 63 * When the Merkle tree block size and page size differ, we use a bitmap 64 * to indicate whether each hash block has been verified. 65 * 66 * However, we still need to ensure that hash pages that get evicted and 67 * re-instantiated from the backing storage are re-verified. To do 68 * this, we use PG_checked again, but now it doesn't really mean 69 * "checked". Instead, now it just serves as an indicator for whether 70 * the hash page is newly instantiated or not. If the page is new, as 71 * indicated by PG_checked=0, we clear the bitmap bits for the page's 72 * blocks since they are untrustworthy, then set PG_checked=1. 73 * Otherwise we return the bitmap bit for the requested block. 74 * 75 * Multiple threads may execute this code concurrently on the same page. 76 * This is safe because we use memory barriers to ensure that if a 77 * thread sees PG_checked=1, then it also sees the associated bitmap 78 * clearing to have occurred. Also, all writes and their corresponding 79 * reads are atomic, and all writes are safe to repeat in the event that 80 * multiple threads get into the PG_checked=0 section. (Clearing a 81 * bitmap bit again at worst causes a hash block to be verified 82 * redundantly. That event should be very rare, so it's not worth using 83 * a lock to avoid. Setting PG_checked again has no effect.) 84 */ 85 if (PageChecked(hpage)) { 86 /* 87 * A read memory barrier is needed here to give ACQUIRE 88 * semantics to the above PageChecked() test. 89 */ 90 smp_rmb(); 91 return test_bit(hblock_idx, vi->hash_block_verified); 92 } 93 blocks_per_page = vi->tree_params.blocks_per_page; 94 hblock_idx = round_down(hblock_idx, blocks_per_page); 95 for (i = 0; i < blocks_per_page; i++) 96 clear_bit(hblock_idx + i, vi->hash_block_verified); 97 /* 98 * A write memory barrier is needed here to give RELEASE semantics to 99 * the below SetPageChecked() operation. 100 */ 101 smp_wmb(); 102 SetPageChecked(hpage); 103 return false; 104 } 105 106 /* 107 * Verify the hash of a single data block against the file's Merkle tree. 108 * 109 * In principle, we need to verify the entire path to the root node. However, 110 * for efficiency the filesystem may cache the hash blocks. Therefore we need 111 * only ascend the tree until an already-verified hash block is seen, and then 112 * verify the path to that block. 113 * 114 * Return: %true if the data block is valid, else %false. 115 */ 116 static bool verify_data_block(struct inode *inode, struct fsverity_info *vi, 117 const struct fsverity_pending_block *dblock, 118 unsigned long max_ra_pages) 119 { 120 const u64 data_pos = dblock->pos; 121 const struct merkle_tree_params *params = &vi->tree_params; 122 const unsigned int hsize = params->digest_size; 123 int level; 124 u8 _want_hash[FS_VERITY_MAX_DIGEST_SIZE]; 125 const u8 *want_hash; 126 u8 real_hash[FS_VERITY_MAX_DIGEST_SIZE]; 127 /* The hash blocks that are traversed, indexed by level */ 128 struct { 129 /* Page containing the hash block */ 130 struct page *page; 131 /* Mapped address of the hash block (will be within @page) */ 132 const void *addr; 133 /* Index of the hash block in the tree overall */ 134 unsigned long index; 135 /* Byte offset of the wanted hash relative to @addr */ 136 unsigned int hoffset; 137 } hblocks[FS_VERITY_MAX_LEVELS]; 138 /* 139 * The index of the previous level's block within that level; also the 140 * index of that block's hash within the current level. 141 */ 142 u64 hidx = data_pos >> params->log_blocksize; 143 144 /* 145 * Up to FS_VERITY_MAX_PENDING_BLOCKS + FS_VERITY_MAX_LEVELS pages may 146 * be mapped at once. 147 */ 148 static_assert(FS_VERITY_MAX_PENDING_BLOCKS + FS_VERITY_MAX_LEVELS <= 149 KM_MAX_IDX); 150 151 if (unlikely(data_pos >= inode->i_size)) { 152 /* 153 * This can happen in the data page spanning EOF when the Merkle 154 * tree block size is less than the page size. The Merkle tree 155 * doesn't cover data blocks fully past EOF. But the entire 156 * page spanning EOF can be visible to userspace via a mmap, and 157 * any part past EOF should be all zeroes. Therefore, we need 158 * to verify that any data blocks fully past EOF are all zeroes. 159 */ 160 if (memchr_inv(dblock->data, 0, params->block_size)) { 161 fsverity_err(inode, 162 "FILE CORRUPTED! Data past EOF is not zeroed"); 163 return false; 164 } 165 return true; 166 } 167 168 /* 169 * Starting at the leaf level, ascend the tree saving hash blocks along 170 * the way until we find a hash block that has already been verified, or 171 * until we reach the root. 172 */ 173 for (level = 0; level < params->num_levels; level++) { 174 unsigned long next_hidx; 175 unsigned long hblock_idx; 176 pgoff_t hpage_idx; 177 unsigned int hblock_offset_in_page; 178 unsigned int hoffset; 179 struct page *hpage; 180 const void *haddr; 181 182 /* 183 * The index of the block in the current level; also the index 184 * of that block's hash within the next level. 185 */ 186 next_hidx = hidx >> params->log_arity; 187 188 /* Index of the hash block in the tree overall */ 189 hblock_idx = params->level_start[level] + next_hidx; 190 191 /* Index of the hash page in the tree overall */ 192 hpage_idx = hblock_idx >> params->log_blocks_per_page; 193 194 /* Byte offset of the hash block within the page */ 195 hblock_offset_in_page = 196 (hblock_idx << params->log_blocksize) & ~PAGE_MASK; 197 198 /* Byte offset of the hash within the block */ 199 hoffset = (hidx << params->log_digestsize) & 200 (params->block_size - 1); 201 202 hpage = inode->i_sb->s_vop->read_merkle_tree_page(inode, 203 hpage_idx, level == 0 ? min(max_ra_pages, 204 params->tree_pages - hpage_idx) : 0); 205 if (IS_ERR(hpage)) { 206 fsverity_err(inode, 207 "Error %ld reading Merkle tree page %lu", 208 PTR_ERR(hpage), hpage_idx); 209 goto error; 210 } 211 haddr = kmap_local_page(hpage) + hblock_offset_in_page; 212 if (is_hash_block_verified(vi, hpage, hblock_idx)) { 213 memcpy(_want_hash, haddr + hoffset, hsize); 214 want_hash = _want_hash; 215 kunmap_local(haddr); 216 put_page(hpage); 217 goto descend; 218 } 219 hblocks[level].page = hpage; 220 hblocks[level].addr = haddr; 221 hblocks[level].index = hblock_idx; 222 hblocks[level].hoffset = hoffset; 223 hidx = next_hidx; 224 } 225 226 want_hash = vi->root_hash; 227 descend: 228 /* Descend the tree verifying hash blocks. */ 229 for (; level > 0; level--) { 230 struct page *hpage = hblocks[level - 1].page; 231 const void *haddr = hblocks[level - 1].addr; 232 unsigned long hblock_idx = hblocks[level - 1].index; 233 unsigned int hoffset = hblocks[level - 1].hoffset; 234 235 fsverity_hash_block(params, haddr, real_hash); 236 if (memcmp(want_hash, real_hash, hsize) != 0) 237 goto corrupted; 238 /* 239 * Mark the hash block as verified. This must be atomic and 240 * idempotent, as the same hash block might be verified by 241 * multiple threads concurrently. 242 */ 243 if (vi->hash_block_verified) 244 set_bit(hblock_idx, vi->hash_block_verified); 245 else 246 SetPageChecked(hpage); 247 memcpy(_want_hash, haddr + hoffset, hsize); 248 want_hash = _want_hash; 249 kunmap_local(haddr); 250 put_page(hpage); 251 } 252 253 /* Finally, verify the hash of the data block. */ 254 if (memcmp(want_hash, dblock->real_hash, hsize) != 0) 255 goto corrupted; 256 return true; 257 258 corrupted: 259 fsverity_err( 260 inode, 261 "FILE CORRUPTED! pos=%llu, level=%d, want_hash=%s:%*phN, real_hash=%s:%*phN", 262 data_pos, level - 1, params->hash_alg->name, hsize, want_hash, 263 params->hash_alg->name, hsize, 264 level == 0 ? dblock->real_hash : real_hash); 265 error: 266 for (; level > 0; level--) { 267 kunmap_local(hblocks[level - 1].addr); 268 put_page(hblocks[level - 1].page); 269 } 270 return false; 271 } 272 273 static void 274 fsverity_init_verification_context(struct fsverity_verification_context *ctx, 275 struct inode *inode, 276 unsigned long max_ra_pages) 277 { 278 struct fsverity_info *vi = *fsverity_info_addr(inode); 279 280 ctx->inode = inode; 281 ctx->vi = vi; 282 ctx->max_ra_pages = max_ra_pages; 283 ctx->num_pending = 0; 284 if (vi->tree_params.hash_alg->algo_id == HASH_ALGO_SHA256 && 285 sha256_finup_2x_is_optimized()) 286 ctx->max_pending = 2; 287 else 288 ctx->max_pending = 1; 289 } 290 291 static void 292 fsverity_clear_pending_blocks(struct fsverity_verification_context *ctx) 293 { 294 int i; 295 296 for (i = ctx->num_pending - 1; i >= 0; i--) { 297 kunmap_local(ctx->pending_blocks[i].data); 298 ctx->pending_blocks[i].data = NULL; 299 } 300 ctx->num_pending = 0; 301 } 302 303 static bool 304 fsverity_verify_pending_blocks(struct fsverity_verification_context *ctx) 305 { 306 struct fsverity_info *vi = ctx->vi; 307 const struct merkle_tree_params *params = &vi->tree_params; 308 int i; 309 310 if (ctx->num_pending == 2) { 311 /* num_pending == 2 implies that the algorithm is SHA-256 */ 312 sha256_finup_2x(params->hashstate ? ¶ms->hashstate->sha256 : 313 NULL, 314 ctx->pending_blocks[0].data, 315 ctx->pending_blocks[1].data, params->block_size, 316 ctx->pending_blocks[0].real_hash, 317 ctx->pending_blocks[1].real_hash); 318 } else { 319 for (i = 0; i < ctx->num_pending; i++) 320 fsverity_hash_block(params, ctx->pending_blocks[i].data, 321 ctx->pending_blocks[i].real_hash); 322 } 323 324 for (i = 0; i < ctx->num_pending; i++) { 325 if (!verify_data_block(ctx->inode, vi, &ctx->pending_blocks[i], 326 ctx->max_ra_pages)) 327 return false; 328 } 329 fsverity_clear_pending_blocks(ctx); 330 return true; 331 } 332 333 static bool fsverity_add_data_blocks(struct fsverity_verification_context *ctx, 334 struct folio *data_folio, size_t len, 335 size_t offset) 336 { 337 struct fsverity_info *vi = ctx->vi; 338 const struct merkle_tree_params *params = &vi->tree_params; 339 const unsigned int block_size = params->block_size; 340 u64 pos = (u64)data_folio->index << PAGE_SHIFT; 341 342 if (WARN_ON_ONCE(len <= 0 || !IS_ALIGNED(len | offset, block_size))) 343 return false; 344 if (WARN_ON_ONCE(!folio_test_locked(data_folio) || 345 folio_test_uptodate(data_folio))) 346 return false; 347 do { 348 ctx->pending_blocks[ctx->num_pending].data = 349 kmap_local_folio(data_folio, offset); 350 ctx->pending_blocks[ctx->num_pending].pos = pos + offset; 351 if (++ctx->num_pending == ctx->max_pending && 352 !fsverity_verify_pending_blocks(ctx)) 353 return false; 354 offset += block_size; 355 len -= block_size; 356 } while (len); 357 return true; 358 } 359 360 /** 361 * fsverity_verify_blocks() - verify data in a folio 362 * @folio: the folio containing the data to verify 363 * @len: the length of the data to verify in the folio 364 * @offset: the offset of the data to verify in the folio 365 * 366 * Verify data that has just been read from a verity file. The data must be 367 * located in a pagecache folio that is still locked and not yet uptodate. The 368 * length and offset of the data must be Merkle tree block size aligned. 369 * 370 * Return: %true if the data is valid, else %false. 371 */ 372 bool fsverity_verify_blocks(struct folio *folio, size_t len, size_t offset) 373 { 374 struct fsverity_verification_context ctx; 375 376 fsverity_init_verification_context(&ctx, folio->mapping->host, 0); 377 378 if (fsverity_add_data_blocks(&ctx, folio, len, offset) && 379 fsverity_verify_pending_blocks(&ctx)) 380 return true; 381 fsverity_clear_pending_blocks(&ctx); 382 return false; 383 } 384 EXPORT_SYMBOL_GPL(fsverity_verify_blocks); 385 386 #ifdef CONFIG_BLOCK 387 /** 388 * fsverity_verify_bio() - verify a 'read' bio that has just completed 389 * @bio: the bio to verify 390 * 391 * Verify the bio's data against the file's Merkle tree. All bio data segments 392 * must be aligned to the file's Merkle tree block size. If any data fails 393 * verification, then bio->bi_status is set to an error status. 394 * 395 * This is a helper function for use by the ->readahead() method of filesystems 396 * that issue bios to read data directly into the page cache. Filesystems that 397 * populate the page cache without issuing bios (e.g. non block-based 398 * filesystems) must instead call fsverity_verify_page() directly on each page. 399 * All filesystems must also call fsverity_verify_page() on holes. 400 */ 401 void fsverity_verify_bio(struct bio *bio) 402 { 403 struct inode *inode = bio_first_folio_all(bio)->mapping->host; 404 struct fsverity_verification_context ctx; 405 struct folio_iter fi; 406 unsigned long max_ra_pages = 0; 407 408 if (bio->bi_opf & REQ_RAHEAD) { 409 /* 410 * If this bio is for data readahead, then we also do readahead 411 * of the first (largest) level of the Merkle tree. Namely, 412 * when a Merkle tree page is read, we also try to piggy-back on 413 * some additional pages -- up to 1/4 the number of data pages. 414 * 415 * This improves sequential read performance, as it greatly 416 * reduces the number of I/O requests made to the Merkle tree. 417 */ 418 max_ra_pages = bio->bi_iter.bi_size >> (PAGE_SHIFT + 2); 419 } 420 421 fsverity_init_verification_context(&ctx, inode, max_ra_pages); 422 423 bio_for_each_folio_all(fi, bio) { 424 if (!fsverity_add_data_blocks(&ctx, fi.folio, fi.length, 425 fi.offset)) 426 goto ioerr; 427 } 428 429 if (!fsverity_verify_pending_blocks(&ctx)) 430 goto ioerr; 431 return; 432 433 ioerr: 434 fsverity_clear_pending_blocks(&ctx); 435 bio->bi_status = BLK_STS_IOERR; 436 } 437 EXPORT_SYMBOL_GPL(fsverity_verify_bio); 438 #endif /* CONFIG_BLOCK */ 439 440 /** 441 * fsverity_enqueue_verify_work() - enqueue work on the fs-verity workqueue 442 * @work: the work to enqueue 443 * 444 * Enqueue verification work for asynchronous processing. 445 */ 446 void fsverity_enqueue_verify_work(struct work_struct *work) 447 { 448 queue_work(fsverity_read_workqueue, work); 449 } 450 EXPORT_SYMBOL_GPL(fsverity_enqueue_verify_work); 451 452 void __init fsverity_init_workqueue(void) 453 { 454 /* 455 * Use a high-priority workqueue to prioritize verification work, which 456 * blocks reads from completing, over regular application tasks. 457 * 458 * For performance reasons, don't use an unbound workqueue. Using an 459 * unbound workqueue for crypto operations causes excessive scheduler 460 * latency on ARM64. 461 */ 462 fsverity_read_workqueue = alloc_workqueue("fsverity_read_queue", 463 WQ_HIGHPRI | WQ_PERCPU, 464 num_online_cpus()); 465 if (!fsverity_read_workqueue) 466 panic("failed to allocate fsverity_read_queue"); 467 } 468