xref: /linux/fs/verity/verify.c (revision 1896ce8eb6c61824f6c1125d69d8fda1f44a22f8)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Data verification functions, i.e. hooks for ->readahead()
4  *
5  * Copyright 2019 Google LLC
6  */
7 
8 #include "fsverity_private.h"
9 
10 #include <linux/bio.h>
11 #include <linux/export.h>
12 
13 #define FS_VERITY_MAX_PENDING_BLOCKS 2
14 
15 struct fsverity_pending_block {
16 	const void *data;
17 	u64 pos;
18 	u8 real_hash[FS_VERITY_MAX_DIGEST_SIZE];
19 };
20 
21 struct fsverity_verification_context {
22 	struct inode *inode;
23 	struct fsverity_info *vi;
24 	unsigned long max_ra_pages;
25 
26 	/*
27 	 * This is the queue of data blocks that are pending verification.  When
28 	 * the crypto layer supports interleaved hashing, we allow multiple
29 	 * blocks to be queued up in order to utilize it.  This can improve
30 	 * performance significantly vs. sequential hashing of each block.
31 	 */
32 	int num_pending;
33 	int max_pending;
34 	struct fsverity_pending_block
35 		pending_blocks[FS_VERITY_MAX_PENDING_BLOCKS];
36 };
37 
38 static struct workqueue_struct *fsverity_read_workqueue;
39 
40 /*
41  * Returns true if the hash block with index @hblock_idx in the tree, located in
42  * @hpage, has already been verified.
43  */
is_hash_block_verified(struct fsverity_info * vi,struct page * hpage,unsigned long hblock_idx)44 static bool is_hash_block_verified(struct fsverity_info *vi, struct page *hpage,
45 				   unsigned long hblock_idx)
46 {
47 	unsigned int blocks_per_page;
48 	unsigned int i;
49 
50 	/*
51 	 * When the Merkle tree block size and page size are the same, then the
52 	 * ->hash_block_verified bitmap isn't allocated, and we use PG_checked
53 	 * to directly indicate whether the page's block has been verified.
54 	 *
55 	 * Using PG_checked also guarantees that we re-verify hash pages that
56 	 * get evicted and re-instantiated from the backing storage, as new
57 	 * pages always start out with PG_checked cleared.
58 	 */
59 	if (!vi->hash_block_verified)
60 		return PageChecked(hpage);
61 
62 	/*
63 	 * When the Merkle tree block size and page size differ, we use a bitmap
64 	 * to indicate whether each hash block has been verified.
65 	 *
66 	 * However, we still need to ensure that hash pages that get evicted and
67 	 * re-instantiated from the backing storage are re-verified.  To do
68 	 * this, we use PG_checked again, but now it doesn't really mean
69 	 * "checked".  Instead, now it just serves as an indicator for whether
70 	 * the hash page is newly instantiated or not.  If the page is new, as
71 	 * indicated by PG_checked=0, we clear the bitmap bits for the page's
72 	 * blocks since they are untrustworthy, then set PG_checked=1.
73 	 * Otherwise we return the bitmap bit for the requested block.
74 	 *
75 	 * Multiple threads may execute this code concurrently on the same page.
76 	 * This is safe because we use memory barriers to ensure that if a
77 	 * thread sees PG_checked=1, then it also sees the associated bitmap
78 	 * clearing to have occurred.  Also, all writes and their corresponding
79 	 * reads are atomic, and all writes are safe to repeat in the event that
80 	 * multiple threads get into the PG_checked=0 section.  (Clearing a
81 	 * bitmap bit again at worst causes a hash block to be verified
82 	 * redundantly.  That event should be very rare, so it's not worth using
83 	 * a lock to avoid.  Setting PG_checked again has no effect.)
84 	 */
85 	if (PageChecked(hpage)) {
86 		/*
87 		 * A read memory barrier is needed here to give ACQUIRE
88 		 * semantics to the above PageChecked() test.
89 		 */
90 		smp_rmb();
91 		return test_bit(hblock_idx, vi->hash_block_verified);
92 	}
93 	blocks_per_page = vi->tree_params.blocks_per_page;
94 	hblock_idx = round_down(hblock_idx, blocks_per_page);
95 	for (i = 0; i < blocks_per_page; i++)
96 		clear_bit(hblock_idx + i, vi->hash_block_verified);
97 	/*
98 	 * A write memory barrier is needed here to give RELEASE semantics to
99 	 * the below SetPageChecked() operation.
100 	 */
101 	smp_wmb();
102 	SetPageChecked(hpage);
103 	return false;
104 }
105 
106 /*
107  * Verify the hash of a single data block against the file's Merkle tree.
108  *
109  * In principle, we need to verify the entire path to the root node.  However,
110  * for efficiency the filesystem may cache the hash blocks.  Therefore we need
111  * only ascend the tree until an already-verified hash block is seen, and then
112  * verify the path to that block.
113  *
114  * Return: %true if the data block is valid, else %false.
115  */
verify_data_block(struct inode * inode,struct fsverity_info * vi,const struct fsverity_pending_block * dblock,unsigned long max_ra_pages)116 static bool verify_data_block(struct inode *inode, struct fsverity_info *vi,
117 			      const struct fsverity_pending_block *dblock,
118 			      unsigned long max_ra_pages)
119 {
120 	const u64 data_pos = dblock->pos;
121 	const struct merkle_tree_params *params = &vi->tree_params;
122 	const unsigned int hsize = params->digest_size;
123 	int level;
124 	u8 _want_hash[FS_VERITY_MAX_DIGEST_SIZE];
125 	const u8 *want_hash;
126 	u8 real_hash[FS_VERITY_MAX_DIGEST_SIZE];
127 	/* The hash blocks that are traversed, indexed by level */
128 	struct {
129 		/* Page containing the hash block */
130 		struct page *page;
131 		/* Mapped address of the hash block (will be within @page) */
132 		const void *addr;
133 		/* Index of the hash block in the tree overall */
134 		unsigned long index;
135 		/* Byte offset of the wanted hash relative to @addr */
136 		unsigned int hoffset;
137 	} hblocks[FS_VERITY_MAX_LEVELS];
138 	/*
139 	 * The index of the previous level's block within that level; also the
140 	 * index of that block's hash within the current level.
141 	 */
142 	u64 hidx = data_pos >> params->log_blocksize;
143 
144 	/*
145 	 * Up to FS_VERITY_MAX_PENDING_BLOCKS + FS_VERITY_MAX_LEVELS pages may
146 	 * be mapped at once.
147 	 */
148 	static_assert(FS_VERITY_MAX_PENDING_BLOCKS + FS_VERITY_MAX_LEVELS <=
149 		      KM_MAX_IDX);
150 
151 	if (unlikely(data_pos >= inode->i_size)) {
152 		/*
153 		 * This can happen in the data page spanning EOF when the Merkle
154 		 * tree block size is less than the page size.  The Merkle tree
155 		 * doesn't cover data blocks fully past EOF.  But the entire
156 		 * page spanning EOF can be visible to userspace via a mmap, and
157 		 * any part past EOF should be all zeroes.  Therefore, we need
158 		 * to verify that any data blocks fully past EOF are all zeroes.
159 		 */
160 		if (memchr_inv(dblock->data, 0, params->block_size)) {
161 			fsverity_err(inode,
162 				     "FILE CORRUPTED!  Data past EOF is not zeroed");
163 			return false;
164 		}
165 		return true;
166 	}
167 
168 	/*
169 	 * Starting at the leaf level, ascend the tree saving hash blocks along
170 	 * the way until we find a hash block that has already been verified, or
171 	 * until we reach the root.
172 	 */
173 	for (level = 0; level < params->num_levels; level++) {
174 		unsigned long next_hidx;
175 		unsigned long hblock_idx;
176 		pgoff_t hpage_idx;
177 		unsigned int hblock_offset_in_page;
178 		unsigned int hoffset;
179 		struct page *hpage;
180 		const void *haddr;
181 
182 		/*
183 		 * The index of the block in the current level; also the index
184 		 * of that block's hash within the next level.
185 		 */
186 		next_hidx = hidx >> params->log_arity;
187 
188 		/* Index of the hash block in the tree overall */
189 		hblock_idx = params->level_start[level] + next_hidx;
190 
191 		/* Index of the hash page in the tree overall */
192 		hpage_idx = hblock_idx >> params->log_blocks_per_page;
193 
194 		/* Byte offset of the hash block within the page */
195 		hblock_offset_in_page =
196 			(hblock_idx << params->log_blocksize) & ~PAGE_MASK;
197 
198 		/* Byte offset of the hash within the block */
199 		hoffset = (hidx << params->log_digestsize) &
200 			  (params->block_size - 1);
201 
202 		hpage = inode->i_sb->s_vop->read_merkle_tree_page(inode,
203 				hpage_idx, level == 0 ? min(max_ra_pages,
204 					params->tree_pages - hpage_idx) : 0);
205 		if (IS_ERR(hpage)) {
206 			fsverity_err(inode,
207 				     "Error %ld reading Merkle tree page %lu",
208 				     PTR_ERR(hpage), hpage_idx);
209 			goto error;
210 		}
211 		haddr = kmap_local_page(hpage) + hblock_offset_in_page;
212 		if (is_hash_block_verified(vi, hpage, hblock_idx)) {
213 			memcpy(_want_hash, haddr + hoffset, hsize);
214 			want_hash = _want_hash;
215 			kunmap_local(haddr);
216 			put_page(hpage);
217 			goto descend;
218 		}
219 		hblocks[level].page = hpage;
220 		hblocks[level].addr = haddr;
221 		hblocks[level].index = hblock_idx;
222 		hblocks[level].hoffset = hoffset;
223 		hidx = next_hidx;
224 	}
225 
226 	want_hash = vi->root_hash;
227 descend:
228 	/* Descend the tree verifying hash blocks. */
229 	for (; level > 0; level--) {
230 		struct page *hpage = hblocks[level - 1].page;
231 		const void *haddr = hblocks[level - 1].addr;
232 		unsigned long hblock_idx = hblocks[level - 1].index;
233 		unsigned int hoffset = hblocks[level - 1].hoffset;
234 
235 		fsverity_hash_block(params, haddr, real_hash);
236 		if (memcmp(want_hash, real_hash, hsize) != 0)
237 			goto corrupted;
238 		/*
239 		 * Mark the hash block as verified.  This must be atomic and
240 		 * idempotent, as the same hash block might be verified by
241 		 * multiple threads concurrently.
242 		 */
243 		if (vi->hash_block_verified)
244 			set_bit(hblock_idx, vi->hash_block_verified);
245 		else
246 			SetPageChecked(hpage);
247 		memcpy(_want_hash, haddr + hoffset, hsize);
248 		want_hash = _want_hash;
249 		kunmap_local(haddr);
250 		put_page(hpage);
251 	}
252 
253 	/* Finally, verify the hash of the data block. */
254 	if (memcmp(want_hash, dblock->real_hash, hsize) != 0)
255 		goto corrupted;
256 	return true;
257 
258 corrupted:
259 	fsverity_err(
260 		inode,
261 		"FILE CORRUPTED! pos=%llu, level=%d, want_hash=%s:%*phN, real_hash=%s:%*phN",
262 		data_pos, level - 1, params->hash_alg->name, hsize, want_hash,
263 		params->hash_alg->name, hsize,
264 		level == 0 ? dblock->real_hash : real_hash);
265 error:
266 	for (; level > 0; level--) {
267 		kunmap_local(hblocks[level - 1].addr);
268 		put_page(hblocks[level - 1].page);
269 	}
270 	return false;
271 }
272 
273 static void
fsverity_init_verification_context(struct fsverity_verification_context * ctx,struct inode * inode,unsigned long max_ra_pages)274 fsverity_init_verification_context(struct fsverity_verification_context *ctx,
275 				   struct inode *inode,
276 				   unsigned long max_ra_pages)
277 {
278 	struct fsverity_info *vi = *fsverity_info_addr(inode);
279 
280 	ctx->inode = inode;
281 	ctx->vi = vi;
282 	ctx->max_ra_pages = max_ra_pages;
283 	ctx->num_pending = 0;
284 	if (vi->tree_params.hash_alg->algo_id == HASH_ALGO_SHA256 &&
285 	    sha256_finup_2x_is_optimized())
286 		ctx->max_pending = 2;
287 	else
288 		ctx->max_pending = 1;
289 }
290 
291 static void
fsverity_clear_pending_blocks(struct fsverity_verification_context * ctx)292 fsverity_clear_pending_blocks(struct fsverity_verification_context *ctx)
293 {
294 	int i;
295 
296 	for (i = ctx->num_pending - 1; i >= 0; i--) {
297 		kunmap_local(ctx->pending_blocks[i].data);
298 		ctx->pending_blocks[i].data = NULL;
299 	}
300 	ctx->num_pending = 0;
301 }
302 
303 static bool
fsverity_verify_pending_blocks(struct fsverity_verification_context * ctx)304 fsverity_verify_pending_blocks(struct fsverity_verification_context *ctx)
305 {
306 	struct fsverity_info *vi = ctx->vi;
307 	const struct merkle_tree_params *params = &vi->tree_params;
308 	int i;
309 
310 	if (ctx->num_pending == 2) {
311 		/* num_pending == 2 implies that the algorithm is SHA-256 */
312 		sha256_finup_2x(params->hashstate ? &params->hashstate->sha256 :
313 						    NULL,
314 				ctx->pending_blocks[0].data,
315 				ctx->pending_blocks[1].data, params->block_size,
316 				ctx->pending_blocks[0].real_hash,
317 				ctx->pending_blocks[1].real_hash);
318 	} else {
319 		for (i = 0; i < ctx->num_pending; i++)
320 			fsverity_hash_block(params, ctx->pending_blocks[i].data,
321 					    ctx->pending_blocks[i].real_hash);
322 	}
323 
324 	for (i = 0; i < ctx->num_pending; i++) {
325 		if (!verify_data_block(ctx->inode, vi, &ctx->pending_blocks[i],
326 				       ctx->max_ra_pages))
327 			return false;
328 	}
329 	fsverity_clear_pending_blocks(ctx);
330 	return true;
331 }
332 
fsverity_add_data_blocks(struct fsverity_verification_context * ctx,struct folio * data_folio,size_t len,size_t offset)333 static bool fsverity_add_data_blocks(struct fsverity_verification_context *ctx,
334 				     struct folio *data_folio, size_t len,
335 				     size_t offset)
336 {
337 	struct fsverity_info *vi = ctx->vi;
338 	const struct merkle_tree_params *params = &vi->tree_params;
339 	const unsigned int block_size = params->block_size;
340 	u64 pos = (u64)data_folio->index << PAGE_SHIFT;
341 
342 	if (WARN_ON_ONCE(len <= 0 || !IS_ALIGNED(len | offset, block_size)))
343 		return false;
344 	if (WARN_ON_ONCE(!folio_test_locked(data_folio) ||
345 			 folio_test_uptodate(data_folio)))
346 		return false;
347 	do {
348 		ctx->pending_blocks[ctx->num_pending].data =
349 			kmap_local_folio(data_folio, offset);
350 		ctx->pending_blocks[ctx->num_pending].pos = pos + offset;
351 		if (++ctx->num_pending == ctx->max_pending &&
352 		    !fsverity_verify_pending_blocks(ctx))
353 			return false;
354 		offset += block_size;
355 		len -= block_size;
356 	} while (len);
357 	return true;
358 }
359 
360 /**
361  * fsverity_verify_blocks() - verify data in a folio
362  * @folio: the folio containing the data to verify
363  * @len: the length of the data to verify in the folio
364  * @offset: the offset of the data to verify in the folio
365  *
366  * Verify data that has just been read from a verity file.  The data must be
367  * located in a pagecache folio that is still locked and not yet uptodate.  The
368  * length and offset of the data must be Merkle tree block size aligned.
369  *
370  * Return: %true if the data is valid, else %false.
371  */
fsverity_verify_blocks(struct folio * folio,size_t len,size_t offset)372 bool fsverity_verify_blocks(struct folio *folio, size_t len, size_t offset)
373 {
374 	struct fsverity_verification_context ctx;
375 
376 	fsverity_init_verification_context(&ctx, folio->mapping->host, 0);
377 
378 	if (fsverity_add_data_blocks(&ctx, folio, len, offset) &&
379 	    fsverity_verify_pending_blocks(&ctx))
380 		return true;
381 	fsverity_clear_pending_blocks(&ctx);
382 	return false;
383 }
384 EXPORT_SYMBOL_GPL(fsverity_verify_blocks);
385 
386 #ifdef CONFIG_BLOCK
387 /**
388  * fsverity_verify_bio() - verify a 'read' bio that has just completed
389  * @bio: the bio to verify
390  *
391  * Verify the bio's data against the file's Merkle tree.  All bio data segments
392  * must be aligned to the file's Merkle tree block size.  If any data fails
393  * verification, then bio->bi_status is set to an error status.
394  *
395  * This is a helper function for use by the ->readahead() method of filesystems
396  * that issue bios to read data directly into the page cache.  Filesystems that
397  * populate the page cache without issuing bios (e.g. non block-based
398  * filesystems) must instead call fsverity_verify_page() directly on each page.
399  * All filesystems must also call fsverity_verify_page() on holes.
400  */
fsverity_verify_bio(struct bio * bio)401 void fsverity_verify_bio(struct bio *bio)
402 {
403 	struct inode *inode = bio_first_folio_all(bio)->mapping->host;
404 	struct fsverity_verification_context ctx;
405 	struct folio_iter fi;
406 	unsigned long max_ra_pages = 0;
407 
408 	if (bio->bi_opf & REQ_RAHEAD) {
409 		/*
410 		 * If this bio is for data readahead, then we also do readahead
411 		 * of the first (largest) level of the Merkle tree.  Namely,
412 		 * when a Merkle tree page is read, we also try to piggy-back on
413 		 * some additional pages -- up to 1/4 the number of data pages.
414 		 *
415 		 * This improves sequential read performance, as it greatly
416 		 * reduces the number of I/O requests made to the Merkle tree.
417 		 */
418 		max_ra_pages = bio->bi_iter.bi_size >> (PAGE_SHIFT + 2);
419 	}
420 
421 	fsverity_init_verification_context(&ctx, inode, max_ra_pages);
422 
423 	bio_for_each_folio_all(fi, bio) {
424 		if (!fsverity_add_data_blocks(&ctx, fi.folio, fi.length,
425 					      fi.offset))
426 			goto ioerr;
427 	}
428 
429 	if (!fsverity_verify_pending_blocks(&ctx))
430 		goto ioerr;
431 	return;
432 
433 ioerr:
434 	fsverity_clear_pending_blocks(&ctx);
435 	bio->bi_status = BLK_STS_IOERR;
436 }
437 EXPORT_SYMBOL_GPL(fsverity_verify_bio);
438 #endif /* CONFIG_BLOCK */
439 
440 /**
441  * fsverity_enqueue_verify_work() - enqueue work on the fs-verity workqueue
442  * @work: the work to enqueue
443  *
444  * Enqueue verification work for asynchronous processing.
445  */
fsverity_enqueue_verify_work(struct work_struct * work)446 void fsverity_enqueue_verify_work(struct work_struct *work)
447 {
448 	queue_work(fsverity_read_workqueue, work);
449 }
450 EXPORT_SYMBOL_GPL(fsverity_enqueue_verify_work);
451 
fsverity_init_workqueue(void)452 void __init fsverity_init_workqueue(void)
453 {
454 	/*
455 	 * Use a high-priority workqueue to prioritize verification work, which
456 	 * blocks reads from completing, over regular application tasks.
457 	 *
458 	 * For performance reasons, don't use an unbound workqueue.  Using an
459 	 * unbound workqueue for crypto operations causes excessive scheduler
460 	 * latency on ARM64.
461 	 */
462 	fsverity_read_workqueue = alloc_workqueue("fsverity_read_queue",
463 						  WQ_HIGHPRI | WQ_PERCPU,
464 						  num_online_cpus());
465 	if (!fsverity_read_workqueue)
466 		panic("failed to allocate fsverity_read_queue");
467 }
468