1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * fs/ext4/verity.c: fs-verity support for ext4 4 * 5 * Copyright 2019 Google LLC 6 */ 7 8 /* 9 * Implementation of fsverity_operations for ext4. 10 * 11 * ext4 stores the verity metadata (Merkle tree and fsverity_descriptor) past 12 * the end of the file, starting at the first 64K boundary beyond i_size. This 13 * approach works because (a) verity files are readonly, and (b) pages fully 14 * beyond i_size aren't visible to userspace but can be read/written internally 15 * by ext4 with only some relatively small changes to ext4. This approach 16 * avoids having to depend on the EA_INODE feature and on rearchitecturing 17 * ext4's xattr support to support paging multi-gigabyte xattrs into memory, and 18 * to support encrypting xattrs. Note that the verity metadata *must* be 19 * encrypted when the file is, since it contains hashes of the plaintext data. 20 * 21 * Using a 64K boundary rather than a 4K one keeps things ready for 22 * architectures with 64K pages, and it doesn't necessarily waste space on-disk 23 * since there can be a hole between i_size and the start of the Merkle tree. 24 */ 25 26 #include <linux/quotaops.h> 27 28 #include "ext4.h" 29 #include "ext4_extents.h" 30 #include "ext4_jbd2.h" 31 32 static inline loff_t ext4_verity_metadata_pos(const struct inode *inode) 33 { 34 return round_up(inode->i_size, 65536); 35 } 36 37 /* 38 * Read some verity metadata from the inode. __vfs_read() can't be used because 39 * we need to read beyond i_size. 40 */ 41 static int pagecache_read(struct inode *inode, void *buf, size_t count, 42 loff_t pos) 43 { 44 while (count) { 45 struct folio *folio; 46 size_t n; 47 48 folio = read_mapping_folio(inode->i_mapping, pos >> PAGE_SHIFT, 49 NULL); 50 if (IS_ERR(folio)) 51 return PTR_ERR(folio); 52 53 n = memcpy_from_file_folio(buf, folio, pos, count); 54 folio_put(folio); 55 56 buf += n; 57 pos += n; 58 count -= n; 59 } 60 return 0; 61 } 62 63 /* 64 * Write some verity metadata to the inode for FS_IOC_ENABLE_VERITY. 65 * kernel_write() can't be used because the file descriptor is readonly. 66 */ 67 static int pagecache_write(struct inode *inode, const void *buf, size_t count, 68 loff_t pos) 69 { 70 struct address_space *mapping = inode->i_mapping; 71 const struct address_space_operations *aops = mapping->a_ops; 72 73 if (pos + count > inode->i_sb->s_maxbytes) 74 return -EFBIG; 75 76 while (count) { 77 size_t n = min_t(size_t, count, 78 PAGE_SIZE - offset_in_page(pos)); 79 struct folio *folio; 80 void *fsdata = NULL; 81 int res; 82 83 res = aops->write_begin(NULL, mapping, pos, n, &folio, &fsdata); 84 if (res) 85 return res; 86 87 memcpy_to_folio(folio, offset_in_folio(folio, pos), buf, n); 88 89 res = aops->write_end(NULL, mapping, pos, n, n, folio, fsdata); 90 if (res < 0) 91 return res; 92 if (res != n) 93 return -EIO; 94 95 buf += n; 96 pos += n; 97 count -= n; 98 } 99 return 0; 100 } 101 102 static int ext4_begin_enable_verity(struct file *filp) 103 { 104 struct inode *inode = file_inode(filp); 105 const int credits = 2; /* superblock and inode for ext4_orphan_add() */ 106 handle_t *handle; 107 int err; 108 109 if (IS_DAX(inode) || ext4_test_inode_flag(inode, EXT4_INODE_DAX)) 110 return -EINVAL; 111 112 if (ext4_verity_in_progress(inode)) 113 return -EBUSY; 114 115 /* 116 * Since the file was opened readonly, we have to initialize the jbd 117 * inode and quotas here and not rely on ->open() doing it. This must 118 * be done before evicting the inline data. 119 */ 120 121 err = ext4_inode_attach_jinode(inode); 122 if (err) 123 return err; 124 125 err = dquot_initialize(inode); 126 if (err) 127 return err; 128 129 err = ext4_convert_inline_data(inode); 130 if (err) 131 return err; 132 133 if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { 134 ext4_warning_inode(inode, 135 "verity is only allowed on extent-based files"); 136 return -EOPNOTSUPP; 137 } 138 139 /* 140 * ext4 uses the last allocated block to find the verity descriptor, so 141 * we must remove any other blocks past EOF which might confuse things. 142 */ 143 err = ext4_truncate(inode); 144 if (err) 145 return err; 146 147 handle = ext4_journal_start(inode, EXT4_HT_INODE, credits); 148 if (IS_ERR(handle)) 149 return PTR_ERR(handle); 150 151 err = ext4_orphan_add(handle, inode); 152 if (err == 0) 153 ext4_set_inode_state(inode, EXT4_STATE_VERITY_IN_PROGRESS); 154 155 ext4_journal_stop(handle); 156 return err; 157 } 158 159 /* 160 * ext4 stores the verity descriptor beginning on the next filesystem block 161 * boundary after the Merkle tree. Then, the descriptor size is stored in the 162 * last 4 bytes of the last allocated filesystem block --- which is either the 163 * block in which the descriptor ends, or the next block after that if there 164 * weren't at least 4 bytes remaining. 165 * 166 * We can't simply store the descriptor in an xattr because it *must* be 167 * encrypted when ext4 encryption is used, but ext4 encryption doesn't encrypt 168 * xattrs. Also, if the descriptor includes a large signature blob it may be 169 * too large to store in an xattr without the EA_INODE feature. 170 */ 171 static int ext4_write_verity_descriptor(struct inode *inode, const void *desc, 172 size_t desc_size, u64 merkle_tree_size) 173 { 174 const u64 desc_pos = round_up(ext4_verity_metadata_pos(inode) + 175 merkle_tree_size, i_blocksize(inode)); 176 const u64 desc_end = desc_pos + desc_size; 177 const __le32 desc_size_disk = cpu_to_le32(desc_size); 178 const u64 desc_size_pos = round_up(desc_end + sizeof(desc_size_disk), 179 i_blocksize(inode)) - 180 sizeof(desc_size_disk); 181 int err; 182 183 err = pagecache_write(inode, desc, desc_size, desc_pos); 184 if (err) 185 return err; 186 187 return pagecache_write(inode, &desc_size_disk, sizeof(desc_size_disk), 188 desc_size_pos); 189 } 190 191 static int ext4_end_enable_verity(struct file *filp, const void *desc, 192 size_t desc_size, u64 merkle_tree_size) 193 { 194 struct inode *inode = file_inode(filp); 195 const int credits = 2; /* superblock and inode for ext4_orphan_del() */ 196 handle_t *handle; 197 struct ext4_iloc iloc; 198 int err = 0; 199 200 /* 201 * If an error already occurred (which fs/verity/ signals by passing 202 * desc == NULL), then only clean-up is needed. 203 */ 204 if (desc == NULL) 205 goto cleanup; 206 207 /* Append the verity descriptor. */ 208 err = ext4_write_verity_descriptor(inode, desc, desc_size, 209 merkle_tree_size); 210 if (err) 211 goto cleanup; 212 213 /* 214 * Write all pages (both data and verity metadata). Note that this must 215 * happen before clearing EXT4_STATE_VERITY_IN_PROGRESS; otherwise pages 216 * beyond i_size won't be written properly. For crash consistency, this 217 * also must happen before the verity inode flag gets persisted. 218 */ 219 err = filemap_write_and_wait(inode->i_mapping); 220 if (err) 221 goto cleanup; 222 223 /* 224 * Finally, set the verity inode flag and remove the inode from the 225 * orphan list (in a single transaction). 226 */ 227 228 handle = ext4_journal_start(inode, EXT4_HT_INODE, credits); 229 if (IS_ERR(handle)) { 230 err = PTR_ERR(handle); 231 goto cleanup; 232 } 233 234 ext4_fc_mark_ineligible(inode->i_sb, EXT4_FC_REASON_VERITY, handle); 235 236 err = ext4_orphan_del(handle, inode); 237 if (err) 238 goto stop_and_cleanup; 239 240 err = ext4_reserve_inode_write(handle, inode, &iloc); 241 if (err) 242 goto stop_and_cleanup; 243 244 ext4_set_inode_flag(inode, EXT4_INODE_VERITY); 245 ext4_set_inode_flags(inode, false); 246 err = ext4_mark_iloc_dirty(handle, inode, &iloc); 247 if (err) 248 goto stop_and_cleanup; 249 250 ext4_journal_stop(handle); 251 252 ext4_clear_inode_state(inode, EXT4_STATE_VERITY_IN_PROGRESS); 253 return 0; 254 255 stop_and_cleanup: 256 ext4_journal_stop(handle); 257 cleanup: 258 /* 259 * Verity failed to be enabled, so clean up by truncating any verity 260 * metadata that was written beyond i_size (both from cache and from 261 * disk), removing the inode from the orphan list (if it wasn't done 262 * already), and clearing EXT4_STATE_VERITY_IN_PROGRESS. 263 */ 264 truncate_inode_pages(inode->i_mapping, inode->i_size); 265 ext4_truncate(inode); 266 ext4_orphan_del(NULL, inode); 267 ext4_clear_inode_state(inode, EXT4_STATE_VERITY_IN_PROGRESS); 268 return err; 269 } 270 271 static int ext4_get_verity_descriptor_location(struct inode *inode, 272 size_t *desc_size_ret, 273 u64 *desc_pos_ret) 274 { 275 struct ext4_ext_path *path; 276 struct ext4_extent *last_extent; 277 u32 end_lblk; 278 u64 desc_size_pos; 279 __le32 desc_size_disk; 280 u32 desc_size; 281 u64 desc_pos; 282 int err; 283 284 /* 285 * Descriptor size is in last 4 bytes of last allocated block. 286 * See ext4_write_verity_descriptor(). 287 */ 288 289 if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { 290 EXT4_ERROR_INODE(inode, "verity file doesn't use extents"); 291 return -EFSCORRUPTED; 292 } 293 294 path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL, 0); 295 if (IS_ERR(path)) 296 return PTR_ERR(path); 297 298 last_extent = path[path->p_depth].p_ext; 299 if (!last_extent) { 300 EXT4_ERROR_INODE(inode, "verity file has no extents"); 301 ext4_free_ext_path(path); 302 return -EFSCORRUPTED; 303 } 304 305 end_lblk = le32_to_cpu(last_extent->ee_block) + 306 ext4_ext_get_actual_len(last_extent); 307 desc_size_pos = EXT4_LBLK_TO_B(inode, end_lblk); 308 ext4_free_ext_path(path); 309 310 if (desc_size_pos < sizeof(desc_size_disk)) 311 goto bad; 312 desc_size_pos -= sizeof(desc_size_disk); 313 314 err = pagecache_read(inode, &desc_size_disk, sizeof(desc_size_disk), 315 desc_size_pos); 316 if (err) 317 return err; 318 desc_size = le32_to_cpu(desc_size_disk); 319 320 /* 321 * The descriptor is stored just before the desc_size_disk, but starting 322 * on a filesystem block boundary. 323 */ 324 325 if (desc_size > INT_MAX || desc_size > desc_size_pos) 326 goto bad; 327 328 desc_pos = round_down(desc_size_pos - desc_size, i_blocksize(inode)); 329 if (desc_pos < ext4_verity_metadata_pos(inode)) 330 goto bad; 331 332 *desc_size_ret = desc_size; 333 *desc_pos_ret = desc_pos; 334 return 0; 335 336 bad: 337 EXT4_ERROR_INODE(inode, "verity file corrupted; can't find descriptor"); 338 return -EFSCORRUPTED; 339 } 340 341 static int ext4_get_verity_descriptor(struct inode *inode, void *buf, 342 size_t buf_size) 343 { 344 size_t desc_size = 0; 345 u64 desc_pos = 0; 346 int err; 347 348 err = ext4_get_verity_descriptor_location(inode, &desc_size, &desc_pos); 349 if (err) 350 return err; 351 352 if (buf_size) { 353 if (desc_size > buf_size) 354 return -ERANGE; 355 err = pagecache_read(inode, buf, desc_size, desc_pos); 356 if (err) 357 return err; 358 } 359 return desc_size; 360 } 361 362 static struct page *ext4_read_merkle_tree_page(struct inode *inode, 363 pgoff_t index) 364 { 365 index += ext4_verity_metadata_pos(inode) >> PAGE_SHIFT; 366 return generic_read_merkle_tree_page(inode, index); 367 } 368 369 static void ext4_readahead_merkle_tree(struct inode *inode, pgoff_t index, 370 unsigned long nr_pages) 371 { 372 index += ext4_verity_metadata_pos(inode) >> PAGE_SHIFT; 373 generic_readahead_merkle_tree(inode, index, nr_pages); 374 } 375 376 static int ext4_write_merkle_tree_block(struct file *file, const void *buf, 377 u64 pos, unsigned int size) 378 { 379 pos += ext4_verity_metadata_pos(file_inode(file)); 380 381 return pagecache_write(file_inode(file), buf, size, pos); 382 } 383 384 const struct fsverity_operations ext4_verityops = { 385 .begin_enable_verity = ext4_begin_enable_verity, 386 .end_enable_verity = ext4_end_enable_verity, 387 .get_verity_descriptor = ext4_get_verity_descriptor, 388 .read_merkle_tree_page = ext4_read_merkle_tree_page, 389 .readahead_merkle_tree = ext4_readahead_merkle_tree, 390 .write_merkle_tree_block = ext4_write_merkle_tree_block, 391 }; 392