1 // SPDX-License-Identifier: GPL-2.0
2 /*
3 * Data verification functions, i.e. hooks for ->readahead()
4 *
5 * Copyright 2019 Google LLC
6 */
7
8 #include "fsverity_private.h"
9
10 #include <linux/bio.h>
11 #include <linux/export.h>
12
13 #define FS_VERITY_MAX_PENDING_BLOCKS 2
14
15 struct fsverity_pending_block {
16 const void *data;
17 u64 pos;
18 u8 real_hash[FS_VERITY_MAX_DIGEST_SIZE];
19 };
20
21 struct fsverity_verification_context {
22 struct inode *inode;
23 struct fsverity_info *vi;
24 unsigned long max_ra_pages;
25
26 /*
27 * This is the queue of data blocks that are pending verification. When
28 * the crypto layer supports interleaved hashing, we allow multiple
29 * blocks to be queued up in order to utilize it. This can improve
30 * performance significantly vs. sequential hashing of each block.
31 */
32 int num_pending;
33 int max_pending;
34 struct fsverity_pending_block
35 pending_blocks[FS_VERITY_MAX_PENDING_BLOCKS];
36 };
37
38 static struct workqueue_struct *fsverity_read_workqueue;
39
40 /*
41 * Returns true if the hash block with index @hblock_idx in the tree, located in
42 * @hpage, has already been verified.
43 */
is_hash_block_verified(struct fsverity_info * vi,struct page * hpage,unsigned long hblock_idx)44 static bool is_hash_block_verified(struct fsverity_info *vi, struct page *hpage,
45 unsigned long hblock_idx)
46 {
47 unsigned int blocks_per_page;
48 unsigned int i;
49
50 /*
51 * When the Merkle tree block size and page size are the same, then the
52 * ->hash_block_verified bitmap isn't allocated, and we use PG_checked
53 * to directly indicate whether the page's block has been verified.
54 *
55 * Using PG_checked also guarantees that we re-verify hash pages that
56 * get evicted and re-instantiated from the backing storage, as new
57 * pages always start out with PG_checked cleared.
58 */
59 if (!vi->hash_block_verified)
60 return PageChecked(hpage);
61
62 /*
63 * When the Merkle tree block size and page size differ, we use a bitmap
64 * to indicate whether each hash block has been verified.
65 *
66 * However, we still need to ensure that hash pages that get evicted and
67 * re-instantiated from the backing storage are re-verified. To do
68 * this, we use PG_checked again, but now it doesn't really mean
69 * "checked". Instead, now it just serves as an indicator for whether
70 * the hash page is newly instantiated or not. If the page is new, as
71 * indicated by PG_checked=0, we clear the bitmap bits for the page's
72 * blocks since they are untrustworthy, then set PG_checked=1.
73 * Otherwise we return the bitmap bit for the requested block.
74 *
75 * Multiple threads may execute this code concurrently on the same page.
76 * This is safe because we use memory barriers to ensure that if a
77 * thread sees PG_checked=1, then it also sees the associated bitmap
78 * clearing to have occurred. Also, all writes and their corresponding
79 * reads are atomic, and all writes are safe to repeat in the event that
80 * multiple threads get into the PG_checked=0 section. (Clearing a
81 * bitmap bit again at worst causes a hash block to be verified
82 * redundantly. That event should be very rare, so it's not worth using
83 * a lock to avoid. Setting PG_checked again has no effect.)
84 */
85 if (PageChecked(hpage)) {
86 /*
87 * A read memory barrier is needed here to give ACQUIRE
88 * semantics to the above PageChecked() test.
89 */
90 smp_rmb();
91 return test_bit(hblock_idx, vi->hash_block_verified);
92 }
93 blocks_per_page = vi->tree_params.blocks_per_page;
94 hblock_idx = round_down(hblock_idx, blocks_per_page);
95 for (i = 0; i < blocks_per_page; i++)
96 clear_bit(hblock_idx + i, vi->hash_block_verified);
97 /*
98 * A write memory barrier is needed here to give RELEASE semantics to
99 * the below SetPageChecked() operation.
100 */
101 smp_wmb();
102 SetPageChecked(hpage);
103 return false;
104 }
105
106 /*
107 * Verify the hash of a single data block against the file's Merkle tree.
108 *
109 * In principle, we need to verify the entire path to the root node. However,
110 * for efficiency the filesystem may cache the hash blocks. Therefore we need
111 * only ascend the tree until an already-verified hash block is seen, and then
112 * verify the path to that block.
113 *
114 * Return: %true if the data block is valid, else %false.
115 */
verify_data_block(struct inode * inode,struct fsverity_info * vi,const struct fsverity_pending_block * dblock,unsigned long max_ra_pages)116 static bool verify_data_block(struct inode *inode, struct fsverity_info *vi,
117 const struct fsverity_pending_block *dblock,
118 unsigned long max_ra_pages)
119 {
120 const u64 data_pos = dblock->pos;
121 const struct merkle_tree_params *params = &vi->tree_params;
122 const unsigned int hsize = params->digest_size;
123 int level;
124 u8 _want_hash[FS_VERITY_MAX_DIGEST_SIZE];
125 const u8 *want_hash;
126 u8 real_hash[FS_VERITY_MAX_DIGEST_SIZE];
127 /* The hash blocks that are traversed, indexed by level */
128 struct {
129 /* Page containing the hash block */
130 struct page *page;
131 /* Mapped address of the hash block (will be within @page) */
132 const void *addr;
133 /* Index of the hash block in the tree overall */
134 unsigned long index;
135 /* Byte offset of the wanted hash relative to @addr */
136 unsigned int hoffset;
137 } hblocks[FS_VERITY_MAX_LEVELS];
138 /*
139 * The index of the previous level's block within that level; also the
140 * index of that block's hash within the current level.
141 */
142 u64 hidx = data_pos >> params->log_blocksize;
143
144 /*
145 * Up to FS_VERITY_MAX_PENDING_BLOCKS + FS_VERITY_MAX_LEVELS pages may
146 * be mapped at once.
147 */
148 static_assert(FS_VERITY_MAX_PENDING_BLOCKS + FS_VERITY_MAX_LEVELS <=
149 KM_MAX_IDX);
150
151 if (unlikely(data_pos >= inode->i_size)) {
152 /*
153 * This can happen in the data page spanning EOF when the Merkle
154 * tree block size is less than the page size. The Merkle tree
155 * doesn't cover data blocks fully past EOF. But the entire
156 * page spanning EOF can be visible to userspace via a mmap, and
157 * any part past EOF should be all zeroes. Therefore, we need
158 * to verify that any data blocks fully past EOF are all zeroes.
159 */
160 if (memchr_inv(dblock->data, 0, params->block_size)) {
161 fsverity_err(inode,
162 "FILE CORRUPTED! Data past EOF is not zeroed");
163 return false;
164 }
165 return true;
166 }
167
168 /*
169 * Starting at the leaf level, ascend the tree saving hash blocks along
170 * the way until we find a hash block that has already been verified, or
171 * until we reach the root.
172 */
173 for (level = 0; level < params->num_levels; level++) {
174 unsigned long next_hidx;
175 unsigned long hblock_idx;
176 pgoff_t hpage_idx;
177 unsigned int hblock_offset_in_page;
178 unsigned int hoffset;
179 struct page *hpage;
180 const void *haddr;
181
182 /*
183 * The index of the block in the current level; also the index
184 * of that block's hash within the next level.
185 */
186 next_hidx = hidx >> params->log_arity;
187
188 /* Index of the hash block in the tree overall */
189 hblock_idx = params->level_start[level] + next_hidx;
190
191 /* Index of the hash page in the tree overall */
192 hpage_idx = hblock_idx >> params->log_blocks_per_page;
193
194 /* Byte offset of the hash block within the page */
195 hblock_offset_in_page =
196 (hblock_idx << params->log_blocksize) & ~PAGE_MASK;
197
198 /* Byte offset of the hash within the block */
199 hoffset = (hidx << params->log_digestsize) &
200 (params->block_size - 1);
201
202 hpage = inode->i_sb->s_vop->read_merkle_tree_page(inode,
203 hpage_idx, level == 0 ? min(max_ra_pages,
204 params->tree_pages - hpage_idx) : 0);
205 if (IS_ERR(hpage)) {
206 fsverity_err(inode,
207 "Error %ld reading Merkle tree page %lu",
208 PTR_ERR(hpage), hpage_idx);
209 goto error;
210 }
211 haddr = kmap_local_page(hpage) + hblock_offset_in_page;
212 if (is_hash_block_verified(vi, hpage, hblock_idx)) {
213 memcpy(_want_hash, haddr + hoffset, hsize);
214 want_hash = _want_hash;
215 kunmap_local(haddr);
216 put_page(hpage);
217 goto descend;
218 }
219 hblocks[level].page = hpage;
220 hblocks[level].addr = haddr;
221 hblocks[level].index = hblock_idx;
222 hblocks[level].hoffset = hoffset;
223 hidx = next_hidx;
224 }
225
226 want_hash = vi->root_hash;
227 descend:
228 /* Descend the tree verifying hash blocks. */
229 for (; level > 0; level--) {
230 struct page *hpage = hblocks[level - 1].page;
231 const void *haddr = hblocks[level - 1].addr;
232 unsigned long hblock_idx = hblocks[level - 1].index;
233 unsigned int hoffset = hblocks[level - 1].hoffset;
234
235 fsverity_hash_block(params, haddr, real_hash);
236 if (memcmp(want_hash, real_hash, hsize) != 0)
237 goto corrupted;
238 /*
239 * Mark the hash block as verified. This must be atomic and
240 * idempotent, as the same hash block might be verified by
241 * multiple threads concurrently.
242 */
243 if (vi->hash_block_verified)
244 set_bit(hblock_idx, vi->hash_block_verified);
245 else
246 SetPageChecked(hpage);
247 memcpy(_want_hash, haddr + hoffset, hsize);
248 want_hash = _want_hash;
249 kunmap_local(haddr);
250 put_page(hpage);
251 }
252
253 /* Finally, verify the hash of the data block. */
254 if (memcmp(want_hash, dblock->real_hash, hsize) != 0)
255 goto corrupted;
256 return true;
257
258 corrupted:
259 fsverity_err(
260 inode,
261 "FILE CORRUPTED! pos=%llu, level=%d, want_hash=%s:%*phN, real_hash=%s:%*phN",
262 data_pos, level - 1, params->hash_alg->name, hsize, want_hash,
263 params->hash_alg->name, hsize,
264 level == 0 ? dblock->real_hash : real_hash);
265 error:
266 for (; level > 0; level--) {
267 kunmap_local(hblocks[level - 1].addr);
268 put_page(hblocks[level - 1].page);
269 }
270 return false;
271 }
272
273 static void
fsverity_init_verification_context(struct fsverity_verification_context * ctx,struct inode * inode,unsigned long max_ra_pages)274 fsverity_init_verification_context(struct fsverity_verification_context *ctx,
275 struct inode *inode,
276 unsigned long max_ra_pages)
277 {
278 struct fsverity_info *vi = *fsverity_info_addr(inode);
279
280 ctx->inode = inode;
281 ctx->vi = vi;
282 ctx->max_ra_pages = max_ra_pages;
283 ctx->num_pending = 0;
284 if (vi->tree_params.hash_alg->algo_id == HASH_ALGO_SHA256 &&
285 sha256_finup_2x_is_optimized())
286 ctx->max_pending = 2;
287 else
288 ctx->max_pending = 1;
289 }
290
291 static void
fsverity_clear_pending_blocks(struct fsverity_verification_context * ctx)292 fsverity_clear_pending_blocks(struct fsverity_verification_context *ctx)
293 {
294 int i;
295
296 for (i = ctx->num_pending - 1; i >= 0; i--) {
297 kunmap_local(ctx->pending_blocks[i].data);
298 ctx->pending_blocks[i].data = NULL;
299 }
300 ctx->num_pending = 0;
301 }
302
303 static bool
fsverity_verify_pending_blocks(struct fsverity_verification_context * ctx)304 fsverity_verify_pending_blocks(struct fsverity_verification_context *ctx)
305 {
306 struct fsverity_info *vi = ctx->vi;
307 const struct merkle_tree_params *params = &vi->tree_params;
308 int i;
309
310 if (ctx->num_pending == 2) {
311 /* num_pending == 2 implies that the algorithm is SHA-256 */
312 sha256_finup_2x(params->hashstate ? ¶ms->hashstate->sha256 :
313 NULL,
314 ctx->pending_blocks[0].data,
315 ctx->pending_blocks[1].data, params->block_size,
316 ctx->pending_blocks[0].real_hash,
317 ctx->pending_blocks[1].real_hash);
318 } else {
319 for (i = 0; i < ctx->num_pending; i++)
320 fsverity_hash_block(params, ctx->pending_blocks[i].data,
321 ctx->pending_blocks[i].real_hash);
322 }
323
324 for (i = 0; i < ctx->num_pending; i++) {
325 if (!verify_data_block(ctx->inode, vi, &ctx->pending_blocks[i],
326 ctx->max_ra_pages))
327 return false;
328 }
329 fsverity_clear_pending_blocks(ctx);
330 return true;
331 }
332
fsverity_add_data_blocks(struct fsverity_verification_context * ctx,struct folio * data_folio,size_t len,size_t offset)333 static bool fsverity_add_data_blocks(struct fsverity_verification_context *ctx,
334 struct folio *data_folio, size_t len,
335 size_t offset)
336 {
337 struct fsverity_info *vi = ctx->vi;
338 const struct merkle_tree_params *params = &vi->tree_params;
339 const unsigned int block_size = params->block_size;
340 u64 pos = (u64)data_folio->index << PAGE_SHIFT;
341
342 if (WARN_ON_ONCE(len <= 0 || !IS_ALIGNED(len | offset, block_size)))
343 return false;
344 if (WARN_ON_ONCE(!folio_test_locked(data_folio) ||
345 folio_test_uptodate(data_folio)))
346 return false;
347 do {
348 ctx->pending_blocks[ctx->num_pending].data =
349 kmap_local_folio(data_folio, offset);
350 ctx->pending_blocks[ctx->num_pending].pos = pos + offset;
351 if (++ctx->num_pending == ctx->max_pending &&
352 !fsverity_verify_pending_blocks(ctx))
353 return false;
354 offset += block_size;
355 len -= block_size;
356 } while (len);
357 return true;
358 }
359
360 /**
361 * fsverity_verify_blocks() - verify data in a folio
362 * @folio: the folio containing the data to verify
363 * @len: the length of the data to verify in the folio
364 * @offset: the offset of the data to verify in the folio
365 *
366 * Verify data that has just been read from a verity file. The data must be
367 * located in a pagecache folio that is still locked and not yet uptodate. The
368 * length and offset of the data must be Merkle tree block size aligned.
369 *
370 * Return: %true if the data is valid, else %false.
371 */
fsverity_verify_blocks(struct folio * folio,size_t len,size_t offset)372 bool fsverity_verify_blocks(struct folio *folio, size_t len, size_t offset)
373 {
374 struct fsverity_verification_context ctx;
375
376 fsverity_init_verification_context(&ctx, folio->mapping->host, 0);
377
378 if (fsverity_add_data_blocks(&ctx, folio, len, offset) &&
379 fsverity_verify_pending_blocks(&ctx))
380 return true;
381 fsverity_clear_pending_blocks(&ctx);
382 return false;
383 }
384 EXPORT_SYMBOL_GPL(fsverity_verify_blocks);
385
386 #ifdef CONFIG_BLOCK
387 /**
388 * fsverity_verify_bio() - verify a 'read' bio that has just completed
389 * @bio: the bio to verify
390 *
391 * Verify the bio's data against the file's Merkle tree. All bio data segments
392 * must be aligned to the file's Merkle tree block size. If any data fails
393 * verification, then bio->bi_status is set to an error status.
394 *
395 * This is a helper function for use by the ->readahead() method of filesystems
396 * that issue bios to read data directly into the page cache. Filesystems that
397 * populate the page cache without issuing bios (e.g. non block-based
398 * filesystems) must instead call fsverity_verify_page() directly on each page.
399 * All filesystems must also call fsverity_verify_page() on holes.
400 */
fsverity_verify_bio(struct bio * bio)401 void fsverity_verify_bio(struct bio *bio)
402 {
403 struct inode *inode = bio_first_folio_all(bio)->mapping->host;
404 struct fsverity_verification_context ctx;
405 struct folio_iter fi;
406 unsigned long max_ra_pages = 0;
407
408 if (bio->bi_opf & REQ_RAHEAD) {
409 /*
410 * If this bio is for data readahead, then we also do readahead
411 * of the first (largest) level of the Merkle tree. Namely,
412 * when a Merkle tree page is read, we also try to piggy-back on
413 * some additional pages -- up to 1/4 the number of data pages.
414 *
415 * This improves sequential read performance, as it greatly
416 * reduces the number of I/O requests made to the Merkle tree.
417 */
418 max_ra_pages = bio->bi_iter.bi_size >> (PAGE_SHIFT + 2);
419 }
420
421 fsverity_init_verification_context(&ctx, inode, max_ra_pages);
422
423 bio_for_each_folio_all(fi, bio) {
424 if (!fsverity_add_data_blocks(&ctx, fi.folio, fi.length,
425 fi.offset))
426 goto ioerr;
427 }
428
429 if (!fsverity_verify_pending_blocks(&ctx))
430 goto ioerr;
431 return;
432
433 ioerr:
434 fsverity_clear_pending_blocks(&ctx);
435 bio->bi_status = BLK_STS_IOERR;
436 }
437 EXPORT_SYMBOL_GPL(fsverity_verify_bio);
438 #endif /* CONFIG_BLOCK */
439
440 /**
441 * fsverity_enqueue_verify_work() - enqueue work on the fs-verity workqueue
442 * @work: the work to enqueue
443 *
444 * Enqueue verification work for asynchronous processing.
445 */
fsverity_enqueue_verify_work(struct work_struct * work)446 void fsverity_enqueue_verify_work(struct work_struct *work)
447 {
448 queue_work(fsverity_read_workqueue, work);
449 }
450 EXPORT_SYMBOL_GPL(fsverity_enqueue_verify_work);
451
fsverity_init_workqueue(void)452 void __init fsverity_init_workqueue(void)
453 {
454 /*
455 * Use a high-priority workqueue to prioritize verification work, which
456 * blocks reads from completing, over regular application tasks.
457 *
458 * For performance reasons, don't use an unbound workqueue. Using an
459 * unbound workqueue for crypto operations causes excessive scheduler
460 * latency on ARM64.
461 */
462 fsverity_read_workqueue = alloc_workqueue("fsverity_read_queue",
463 WQ_HIGHPRI | WQ_PERCPU,
464 num_online_cpus());
465 if (!fsverity_read_workqueue)
466 panic("failed to allocate fsverity_read_queue");
467 }
468