xref: /linux/fs/verity/enable.c (revision 69050f8d6d075dc01af7a5f2f550a8067510366f)
1 // SPDX-License-Identifier: GPL-2.0
2 /*
3  * Ioctl to enable verity on a file
4  *
5  * Copyright 2019 Google LLC
6  */
7 
8 #include "fsverity_private.h"
9 
10 #include <linux/export.h>
11 #include <linux/mount.h>
12 #include <linux/sched/signal.h>
13 #include <linux/uaccess.h>
14 
15 struct block_buffer {
16 	u32 filled;
17 	bool is_root_hash;
18 	u8 *data;
19 };
20 
21 /* Hash a block, writing the result to the next level's pending block buffer. */
22 static int hash_one_block(const struct merkle_tree_params *params,
23 			  struct block_buffer *cur)
24 {
25 	struct block_buffer *next = cur + 1;
26 
27 	/*
28 	 * Safety check to prevent a buffer overflow in case of a filesystem bug
29 	 * that allows the file size to change despite deny_write_access(), or a
30 	 * bug in the Merkle tree logic itself
31 	 */
32 	if (WARN_ON_ONCE(next->is_root_hash && next->filled != 0))
33 		return -EINVAL;
34 
35 	/* Zero-pad the block if it's shorter than the block size. */
36 	memset(&cur->data[cur->filled], 0, params->block_size - cur->filled);
37 
38 	fsverity_hash_block(params, cur->data, &next->data[next->filled]);
39 	next->filled += params->digest_size;
40 	cur->filled = 0;
41 	return 0;
42 }
43 
44 static int write_merkle_tree_block(struct file *file, const u8 *buf,
45 				   unsigned long index,
46 				   const struct merkle_tree_params *params)
47 {
48 	struct inode *inode = file_inode(file);
49 	u64 pos = (u64)index << params->log_blocksize;
50 	int err;
51 
52 	err = inode->i_sb->s_vop->write_merkle_tree_block(file, buf, pos,
53 							  params->block_size);
54 	if (err)
55 		fsverity_err(inode, "Error %d writing Merkle tree block %lu",
56 			     err, index);
57 	return err;
58 }
59 
60 /*
61  * Build the Merkle tree for the given file using the given parameters, and
62  * return the root hash in @root_hash.
63  *
64  * The tree is written to a filesystem-specific location as determined by the
65  * ->write_merkle_tree_block() method.  However, the blocks that comprise the
66  * tree are the same for all filesystems.
67  */
68 static int build_merkle_tree(struct file *filp,
69 			     const struct merkle_tree_params *params,
70 			     u8 *root_hash)
71 {
72 	struct inode *inode = file_inode(filp);
73 	const u64 data_size = inode->i_size;
74 	const int num_levels = params->num_levels;
75 	struct block_buffer _buffers[1 + FS_VERITY_MAX_LEVELS + 1] = {};
76 	struct block_buffer *buffers = &_buffers[1];
77 	unsigned long level_offset[FS_VERITY_MAX_LEVELS];
78 	int level;
79 	u64 offset;
80 	int err;
81 
82 	if (data_size == 0) {
83 		/* Empty file is a special case; root hash is all 0's */
84 		memset(root_hash, 0, params->digest_size);
85 		return 0;
86 	}
87 
88 	/*
89 	 * Allocate the block buffers.  Buffer "-1" is for data blocks.
90 	 * Buffers 0 <= level < num_levels are for the actual tree levels.
91 	 * Buffer 'num_levels' is for the root hash.
92 	 */
93 	for (level = -1; level < num_levels; level++) {
94 		buffers[level].data = kzalloc(params->block_size, GFP_KERNEL);
95 		if (!buffers[level].data) {
96 			err = -ENOMEM;
97 			goto out;
98 		}
99 	}
100 	buffers[num_levels].data = root_hash;
101 	buffers[num_levels].is_root_hash = true;
102 
103 	BUILD_BUG_ON(sizeof(level_offset) != sizeof(params->level_start));
104 	memcpy(level_offset, params->level_start, sizeof(level_offset));
105 
106 	/* Hash each data block, also hashing the tree blocks as they fill up */
107 	for (offset = 0; offset < data_size; offset += params->block_size) {
108 		ssize_t bytes_read;
109 		loff_t pos = offset;
110 
111 		buffers[-1].filled = min_t(u64, params->block_size,
112 					   data_size - offset);
113 		bytes_read = __kernel_read(filp, buffers[-1].data,
114 					   buffers[-1].filled, &pos);
115 		if (bytes_read < 0) {
116 			err = bytes_read;
117 			fsverity_err(inode, "Error %d reading file data", err);
118 			goto out;
119 		}
120 		if (bytes_read != buffers[-1].filled) {
121 			err = -EINVAL;
122 			fsverity_err(inode, "Short read of file data");
123 			goto out;
124 		}
125 		err = hash_one_block(params, &buffers[-1]);
126 		if (err)
127 			goto out;
128 		for (level = 0; level < num_levels; level++) {
129 			if (buffers[level].filled + params->digest_size <=
130 			    params->block_size) {
131 				/* Next block at @level isn't full yet */
132 				break;
133 			}
134 			/* Next block at @level is full */
135 
136 			err = hash_one_block(params, &buffers[level]);
137 			if (err)
138 				goto out;
139 			err = write_merkle_tree_block(filp,
140 						      buffers[level].data,
141 						      level_offset[level],
142 						      params);
143 			if (err)
144 				goto out;
145 			level_offset[level]++;
146 		}
147 		if (fatal_signal_pending(current)) {
148 			err = -EINTR;
149 			goto out;
150 		}
151 		cond_resched();
152 	}
153 	/* Finish all nonempty pending tree blocks. */
154 	for (level = 0; level < num_levels; level++) {
155 		if (buffers[level].filled != 0) {
156 			err = hash_one_block(params, &buffers[level]);
157 			if (err)
158 				goto out;
159 			err = write_merkle_tree_block(filp,
160 						      buffers[level].data,
161 						      level_offset[level],
162 						      params);
163 			if (err)
164 				goto out;
165 		}
166 	}
167 	/* The root hash was filled by the last call to hash_one_block(). */
168 	if (WARN_ON_ONCE(buffers[num_levels].filled != params->digest_size)) {
169 		err = -EINVAL;
170 		goto out;
171 	}
172 	err = 0;
173 out:
174 	for (level = -1; level < num_levels; level++)
175 		kfree(buffers[level].data);
176 	return err;
177 }
178 
179 static int enable_verity(struct file *filp,
180 			 const struct fsverity_enable_arg *arg)
181 {
182 	struct inode *inode = file_inode(filp);
183 	const struct fsverity_operations *vops = inode->i_sb->s_vop;
184 	struct merkle_tree_params params = { };
185 	struct fsverity_descriptor *desc;
186 	size_t desc_size = struct_size(desc, signature, arg->sig_size);
187 	struct fsverity_info *vi;
188 	int err;
189 
190 	/* Start initializing the fsverity_descriptor */
191 	desc = kzalloc(desc_size, GFP_KERNEL);
192 	if (!desc)
193 		return -ENOMEM;
194 	desc->version = 1;
195 	desc->hash_algorithm = arg->hash_algorithm;
196 	desc->log_blocksize = ilog2(arg->block_size);
197 
198 	/* Get the salt if the user provided one */
199 	if (arg->salt_size &&
200 	    copy_from_user(desc->salt, u64_to_user_ptr(arg->salt_ptr),
201 			   arg->salt_size)) {
202 		err = -EFAULT;
203 		goto out;
204 	}
205 	desc->salt_size = arg->salt_size;
206 
207 	/* Get the builtin signature if the user provided one */
208 	if (arg->sig_size &&
209 	    copy_from_user(desc->signature, u64_to_user_ptr(arg->sig_ptr),
210 			   arg->sig_size)) {
211 		err = -EFAULT;
212 		goto out;
213 	}
214 	desc->sig_size = cpu_to_le32(arg->sig_size);
215 
216 	desc->data_size = cpu_to_le64(inode->i_size);
217 
218 	/* Prepare the Merkle tree parameters */
219 	err = fsverity_init_merkle_tree_params(&params, inode,
220 					       arg->hash_algorithm,
221 					       desc->log_blocksize,
222 					       desc->salt, desc->salt_size);
223 	if (err)
224 		goto out;
225 
226 	trace_fsverity_enable(inode, &params);
227 
228 	/*
229 	 * Start enabling verity on this file, serialized by the inode lock.
230 	 * Fail if verity is already enabled or is already being enabled.
231 	 */
232 	inode_lock(inode);
233 	if (IS_VERITY(inode))
234 		err = -EEXIST;
235 	else
236 		err = vops->begin_enable_verity(filp);
237 	inode_unlock(inode);
238 	if (err)
239 		goto out;
240 
241 	/*
242 	 * Build the Merkle tree.  Don't hold the inode lock during this, since
243 	 * on huge files this may take a very long time and we don't want to
244 	 * force unrelated syscalls like chown() to block forever.  We don't
245 	 * need the inode lock here because deny_write_access() already prevents
246 	 * the file from being written to or truncated, and we still serialize
247 	 * ->begin_enable_verity() and ->end_enable_verity() using the inode
248 	 * lock and only allow one process to be here at a time on a given file.
249 	 */
250 	BUILD_BUG_ON(sizeof(desc->root_hash) < FS_VERITY_MAX_DIGEST_SIZE);
251 	err = build_merkle_tree(filp, &params, desc->root_hash);
252 	if (err) {
253 		fsverity_err(inode, "Error %d building Merkle tree", err);
254 		goto rollback;
255 	}
256 
257 	/*
258 	 * Create the fsverity_info.  Don't bother trying to save work by
259 	 * reusing the merkle_tree_params from above.  Instead, just create the
260 	 * fsverity_info from the fsverity_descriptor as if it were just loaded
261 	 * from disk.  This is simpler, and it serves as an extra check that the
262 	 * metadata we're writing is valid before actually enabling verity.
263 	 */
264 	vi = fsverity_create_info(inode, desc);
265 	if (IS_ERR(vi)) {
266 		err = PTR_ERR(vi);
267 		goto rollback;
268 	}
269 
270 	trace_fsverity_tree_done(inode, vi, &params);
271 
272 	/*
273 	 * Add the fsverity_info into the hash table before finishing the
274 	 * initialization so that we don't have to undo the enabling when memory
275 	 * allocation for the hash table fails.  This is safe because looking up
276 	 * the fsverity_info always first checks the S_VERITY flag on the inode,
277 	 * which will only be set at the very end of the ->end_enable_verity
278 	 * method.
279 	 */
280 	err = fsverity_set_info(vi);
281 	if (err) {
282 		fsverity_free_info(vi);
283 		goto rollback;
284 	}
285 
286 	/*
287 	 * Tell the filesystem to finish enabling verity on the file.
288 	 * Serialized with ->begin_enable_verity() by the inode lock.  The file
289 	 * system needs to set the S_VERITY flag on the inode at the very end of
290 	 * the method, at which point the fsverity information can be accessed
291 	 * by other threads.
292 	 */
293 	inode_lock(inode);
294 	err = vops->end_enable_verity(filp, desc, desc_size, params.tree_size);
295 	inode_unlock(inode);
296 	if (err) {
297 		fsverity_err(inode, "%ps() failed with err %d",
298 			     vops->end_enable_verity, err);
299 		fsverity_remove_info(vi);
300 	} else if (WARN_ON_ONCE(!IS_VERITY(inode))) {
301 		fsverity_remove_info(vi);
302 		err = -EINVAL;
303 	}
304 out:
305 	kfree(params.hashstate);
306 	kfree(desc);
307 	return err;
308 
309 rollback:
310 	inode_lock(inode);
311 	(void)vops->end_enable_verity(filp, NULL, 0, params.tree_size);
312 	inode_unlock(inode);
313 	goto out;
314 }
315 
316 /**
317  * fsverity_ioctl_enable() - enable verity on a file
318  * @filp: file to enable verity on
319  * @uarg: user pointer to fsverity_enable_arg
320  *
321  * Enable fs-verity on a file.  See the "FS_IOC_ENABLE_VERITY" section of
322  * Documentation/filesystems/fsverity.rst for the documentation.
323  *
324  * Return: 0 on success, -errno on failure
325  */
326 int fsverity_ioctl_enable(struct file *filp, const void __user *uarg)
327 {
328 	struct inode *inode = file_inode(filp);
329 	struct fsverity_enable_arg arg;
330 	int err;
331 
332 	if (copy_from_user(&arg, uarg, sizeof(arg)))
333 		return -EFAULT;
334 
335 	if (arg.version != 1)
336 		return -EINVAL;
337 
338 	if (arg.__reserved1 ||
339 	    memchr_inv(arg.__reserved2, 0, sizeof(arg.__reserved2)))
340 		return -EINVAL;
341 
342 	if (!is_power_of_2(arg.block_size))
343 		return -EINVAL;
344 
345 	if (arg.salt_size > sizeof_field(struct fsverity_descriptor, salt))
346 		return -EMSGSIZE;
347 
348 	if (arg.sig_size > FS_VERITY_MAX_SIGNATURE_SIZE)
349 		return -EMSGSIZE;
350 
351 	/*
352 	 * Require a regular file with write access.  But the actual fd must
353 	 * still be readonly so that we can lock out all writers.  This is
354 	 * needed to guarantee that no writable fds exist to the file once it
355 	 * has verity enabled, and to stabilize the data being hashed.
356 	 */
357 
358 	err = file_permission(filp, MAY_WRITE);
359 	if (err)
360 		return err;
361 	/*
362 	 * __kernel_read() is used while building the Merkle tree.  So, we can't
363 	 * allow file descriptors that were opened for ioctl access only, using
364 	 * the special nonstandard access mode 3.  O_RDONLY only, please!
365 	 */
366 	if (!(filp->f_mode & FMODE_READ))
367 		return -EBADF;
368 
369 	if (IS_APPEND(inode))
370 		return -EPERM;
371 
372 	if (S_ISDIR(inode->i_mode))
373 		return -EISDIR;
374 
375 	if (!S_ISREG(inode->i_mode))
376 		return -EINVAL;
377 
378 	err = mnt_want_write_file(filp);
379 	if (err) /* -EROFS */
380 		return err;
381 
382 	err = deny_write_access(filp);
383 	if (err) /* -ETXTBSY */
384 		goto out_drop_write;
385 
386 	err = enable_verity(filp, &arg);
387 
388 	/*
389 	 * We no longer drop the inode's pagecache after enabling verity.  This
390 	 * used to be done to try to avoid a race condition where pages could be
391 	 * evicted after being used in the Merkle tree construction, then
392 	 * re-instantiated by a concurrent read.  Such pages are unverified, and
393 	 * the backing storage could have filled them with different content, so
394 	 * they shouldn't be used to fulfill reads once verity is enabled.
395 	 *
396 	 * But, dropping the pagecache has a big performance impact, and it
397 	 * doesn't fully solve the race condition anyway.  So for those reasons,
398 	 * and also because this race condition isn't very important relatively
399 	 * speaking (especially for small-ish files, where the chance of a page
400 	 * being used, evicted, *and* re-instantiated all while enabling verity
401 	 * is quite small), we no longer drop the inode's pagecache.
402 	 */
403 
404 	/*
405 	 * allow_write_access() is needed to pair with deny_write_access().
406 	 * Regardless, the filesystem won't allow writing to verity files.
407 	 */
408 	allow_write_access(filp);
409 out_drop_write:
410 	mnt_drop_write_file(filp);
411 	return err;
412 }
413 EXPORT_SYMBOL_GPL(fsverity_ioctl_enable);
414