1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * fs/ext4/verity.c: fs-verity support for ext4 4 * 5 * Copyright 2019 Google LLC 6 */ 7 8 /* 9 * Implementation of fsverity_operations for ext4. 10 * 11 * ext4 stores the verity metadata (Merkle tree and fsverity_descriptor) past 12 * the end of the file, starting at the first 64K boundary beyond i_size. This 13 * approach works because (a) verity files are readonly, and (b) pages fully 14 * beyond i_size aren't visible to userspace but can be read/written internally 15 * by ext4 with only some relatively small changes to ext4. This approach 16 * avoids having to depend on the EA_INODE feature and on rearchitecturing 17 * ext4's xattr support to support paging multi-gigabyte xattrs into memory, and 18 * to support encrypting xattrs. Note that the verity metadata *must* be 19 * encrypted when the file is, since it contains hashes of the plaintext data. 20 * 21 * Using a 64K boundary rather than a 4K one keeps things ready for 22 * architectures with 64K pages, and it doesn't necessarily waste space on-disk 23 * since there can be a hole between i_size and the start of the Merkle tree. 24 */ 25 26 #include <linux/quotaops.h> 27 28 #include "ext4.h" 29 #include "ext4_extents.h" 30 #include "ext4_jbd2.h" 31 32 static inline loff_t ext4_verity_metadata_pos(const struct inode *inode) 33 { 34 return round_up(inode->i_size, 65536); 35 } 36 37 /* 38 * Read some verity metadata from the inode. __vfs_read() can't be used because 39 * we need to read beyond i_size. 40 */ 41 static int pagecache_read(struct inode *inode, void *buf, size_t count, 42 loff_t pos) 43 { 44 while (count) { 45 size_t n = min_t(size_t, count, 46 PAGE_SIZE - offset_in_page(pos)); 47 struct page *page; 48 49 page = read_mapping_page(inode->i_mapping, pos >> PAGE_SHIFT, 50 NULL); 51 if (IS_ERR(page)) 52 return PTR_ERR(page); 53 54 memcpy_from_page(buf, page, offset_in_page(pos), n); 55 56 put_page(page); 57 58 buf += n; 59 pos += n; 60 count -= n; 61 } 62 return 0; 63 } 64 65 /* 66 * Write some verity metadata to the inode for FS_IOC_ENABLE_VERITY. 67 * kernel_write() can't be used because the file descriptor is readonly. 68 */ 69 static int pagecache_write(struct inode *inode, const void *buf, size_t count, 70 loff_t pos) 71 { 72 struct address_space *mapping = inode->i_mapping; 73 const struct address_space_operations *aops = mapping->a_ops; 74 75 if (pos + count > inode->i_sb->s_maxbytes) 76 return -EFBIG; 77 78 while (count) { 79 size_t n = min_t(size_t, count, 80 PAGE_SIZE - offset_in_page(pos)); 81 struct page *page; 82 void *fsdata; 83 int res; 84 85 res = aops->write_begin(NULL, mapping, pos, n, &page, &fsdata); 86 if (res) 87 return res; 88 89 memcpy_to_page(page, offset_in_page(pos), buf, n); 90 91 res = aops->write_end(NULL, mapping, pos, n, n, page, fsdata); 92 if (res < 0) 93 return res; 94 if (res != n) 95 return -EIO; 96 97 buf += n; 98 pos += n; 99 count -= n; 100 } 101 return 0; 102 } 103 104 static int ext4_begin_enable_verity(struct file *filp) 105 { 106 struct inode *inode = file_inode(filp); 107 const int credits = 2; /* superblock and inode for ext4_orphan_add() */ 108 handle_t *handle; 109 int err; 110 111 if (IS_DAX(inode) || ext4_test_inode_flag(inode, EXT4_INODE_DAX)) 112 return -EINVAL; 113 114 if (ext4_verity_in_progress(inode)) 115 return -EBUSY; 116 117 /* 118 * Since the file was opened readonly, we have to initialize the jbd 119 * inode and quotas here and not rely on ->open() doing it. This must 120 * be done before evicting the inline data. 121 */ 122 123 err = ext4_inode_attach_jinode(inode); 124 if (err) 125 return err; 126 127 err = dquot_initialize(inode); 128 if (err) 129 return err; 130 131 err = ext4_convert_inline_data(inode); 132 if (err) 133 return err; 134 135 if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { 136 ext4_warning_inode(inode, 137 "verity is only allowed on extent-based files"); 138 return -EOPNOTSUPP; 139 } 140 141 /* 142 * ext4 uses the last allocated block to find the verity descriptor, so 143 * we must remove any other blocks past EOF which might confuse things. 144 */ 145 err = ext4_truncate(inode); 146 if (err) 147 return err; 148 149 handle = ext4_journal_start(inode, EXT4_HT_INODE, credits); 150 if (IS_ERR(handle)) 151 return PTR_ERR(handle); 152 153 err = ext4_orphan_add(handle, inode); 154 if (err == 0) 155 ext4_set_inode_state(inode, EXT4_STATE_VERITY_IN_PROGRESS); 156 157 ext4_journal_stop(handle); 158 return err; 159 } 160 161 /* 162 * ext4 stores the verity descriptor beginning on the next filesystem block 163 * boundary after the Merkle tree. Then, the descriptor size is stored in the 164 * last 4 bytes of the last allocated filesystem block --- which is either the 165 * block in which the descriptor ends, or the next block after that if there 166 * weren't at least 4 bytes remaining. 167 * 168 * We can't simply store the descriptor in an xattr because it *must* be 169 * encrypted when ext4 encryption is used, but ext4 encryption doesn't encrypt 170 * xattrs. Also, if the descriptor includes a large signature blob it may be 171 * too large to store in an xattr without the EA_INODE feature. 172 */ 173 static int ext4_write_verity_descriptor(struct inode *inode, const void *desc, 174 size_t desc_size, u64 merkle_tree_size) 175 { 176 const u64 desc_pos = round_up(ext4_verity_metadata_pos(inode) + 177 merkle_tree_size, i_blocksize(inode)); 178 const u64 desc_end = desc_pos + desc_size; 179 const __le32 desc_size_disk = cpu_to_le32(desc_size); 180 const u64 desc_size_pos = round_up(desc_end + sizeof(desc_size_disk), 181 i_blocksize(inode)) - 182 sizeof(desc_size_disk); 183 int err; 184 185 err = pagecache_write(inode, desc, desc_size, desc_pos); 186 if (err) 187 return err; 188 189 return pagecache_write(inode, &desc_size_disk, sizeof(desc_size_disk), 190 desc_size_pos); 191 } 192 193 static int ext4_end_enable_verity(struct file *filp, const void *desc, 194 size_t desc_size, u64 merkle_tree_size) 195 { 196 struct inode *inode = file_inode(filp); 197 const int credits = 2; /* superblock and inode for ext4_orphan_del() */ 198 handle_t *handle; 199 struct ext4_iloc iloc; 200 int err = 0; 201 202 /* 203 * If an error already occurred (which fs/verity/ signals by passing 204 * desc == NULL), then only clean-up is needed. 205 */ 206 if (desc == NULL) 207 goto cleanup; 208 209 /* Append the verity descriptor. */ 210 err = ext4_write_verity_descriptor(inode, desc, desc_size, 211 merkle_tree_size); 212 if (err) 213 goto cleanup; 214 215 /* 216 * Write all pages (both data and verity metadata). Note that this must 217 * happen before clearing EXT4_STATE_VERITY_IN_PROGRESS; otherwise pages 218 * beyond i_size won't be written properly. For crash consistency, this 219 * also must happen before the verity inode flag gets persisted. 220 */ 221 err = filemap_write_and_wait(inode->i_mapping); 222 if (err) 223 goto cleanup; 224 225 /* 226 * Finally, set the verity inode flag and remove the inode from the 227 * orphan list (in a single transaction). 228 */ 229 230 handle = ext4_journal_start(inode, EXT4_HT_INODE, credits); 231 if (IS_ERR(handle)) { 232 err = PTR_ERR(handle); 233 goto cleanup; 234 } 235 236 err = ext4_orphan_del(handle, inode); 237 if (err) 238 goto stop_and_cleanup; 239 240 err = ext4_reserve_inode_write(handle, inode, &iloc); 241 if (err) 242 goto stop_and_cleanup; 243 244 ext4_set_inode_flag(inode, EXT4_INODE_VERITY); 245 ext4_set_inode_flags(inode, false); 246 err = ext4_mark_iloc_dirty(handle, inode, &iloc); 247 if (err) 248 goto stop_and_cleanup; 249 250 ext4_journal_stop(handle); 251 252 ext4_clear_inode_state(inode, EXT4_STATE_VERITY_IN_PROGRESS); 253 return 0; 254 255 stop_and_cleanup: 256 ext4_journal_stop(handle); 257 cleanup: 258 /* 259 * Verity failed to be enabled, so clean up by truncating any verity 260 * metadata that was written beyond i_size (both from cache and from 261 * disk), removing the inode from the orphan list (if it wasn't done 262 * already), and clearing EXT4_STATE_VERITY_IN_PROGRESS. 263 */ 264 truncate_inode_pages(inode->i_mapping, inode->i_size); 265 ext4_truncate(inode); 266 ext4_orphan_del(NULL, inode); 267 ext4_clear_inode_state(inode, EXT4_STATE_VERITY_IN_PROGRESS); 268 return err; 269 } 270 271 static int ext4_get_verity_descriptor_location(struct inode *inode, 272 size_t *desc_size_ret, 273 u64 *desc_pos_ret) 274 { 275 struct ext4_ext_path *path; 276 struct ext4_extent *last_extent; 277 u32 end_lblk; 278 u64 desc_size_pos; 279 __le32 desc_size_disk; 280 u32 desc_size; 281 u64 desc_pos; 282 int err; 283 284 /* 285 * Descriptor size is in last 4 bytes of last allocated block. 286 * See ext4_write_verity_descriptor(). 287 */ 288 289 if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { 290 EXT4_ERROR_INODE(inode, "verity file doesn't use extents"); 291 return -EFSCORRUPTED; 292 } 293 294 path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL, 0); 295 if (IS_ERR(path)) 296 return PTR_ERR(path); 297 298 last_extent = path[path->p_depth].p_ext; 299 if (!last_extent) { 300 EXT4_ERROR_INODE(inode, "verity file has no extents"); 301 ext4_ext_drop_refs(path); 302 kfree(path); 303 return -EFSCORRUPTED; 304 } 305 306 end_lblk = le32_to_cpu(last_extent->ee_block) + 307 ext4_ext_get_actual_len(last_extent); 308 desc_size_pos = (u64)end_lblk << inode->i_blkbits; 309 ext4_ext_drop_refs(path); 310 kfree(path); 311 312 if (desc_size_pos < sizeof(desc_size_disk)) 313 goto bad; 314 desc_size_pos -= sizeof(desc_size_disk); 315 316 err = pagecache_read(inode, &desc_size_disk, sizeof(desc_size_disk), 317 desc_size_pos); 318 if (err) 319 return err; 320 desc_size = le32_to_cpu(desc_size_disk); 321 322 /* 323 * The descriptor is stored just before the desc_size_disk, but starting 324 * on a filesystem block boundary. 325 */ 326 327 if (desc_size > INT_MAX || desc_size > desc_size_pos) 328 goto bad; 329 330 desc_pos = round_down(desc_size_pos - desc_size, i_blocksize(inode)); 331 if (desc_pos < ext4_verity_metadata_pos(inode)) 332 goto bad; 333 334 *desc_size_ret = desc_size; 335 *desc_pos_ret = desc_pos; 336 return 0; 337 338 bad: 339 EXT4_ERROR_INODE(inode, "verity file corrupted; can't find descriptor"); 340 return -EFSCORRUPTED; 341 } 342 343 static int ext4_get_verity_descriptor(struct inode *inode, void *buf, 344 size_t buf_size) 345 { 346 size_t desc_size = 0; 347 u64 desc_pos = 0; 348 int err; 349 350 err = ext4_get_verity_descriptor_location(inode, &desc_size, &desc_pos); 351 if (err) 352 return err; 353 354 if (buf_size) { 355 if (desc_size > buf_size) 356 return -ERANGE; 357 err = pagecache_read(inode, buf, desc_size, desc_pos); 358 if (err) 359 return err; 360 } 361 return desc_size; 362 } 363 364 static struct page *ext4_read_merkle_tree_page(struct inode *inode, 365 pgoff_t index, 366 unsigned long num_ra_pages) 367 { 368 DEFINE_READAHEAD(ractl, NULL, NULL, inode->i_mapping, index); 369 struct page *page; 370 371 index += ext4_verity_metadata_pos(inode) >> PAGE_SHIFT; 372 373 page = find_get_page_flags(inode->i_mapping, index, FGP_ACCESSED); 374 if (!page || !PageUptodate(page)) { 375 if (page) 376 put_page(page); 377 else if (num_ra_pages > 1) 378 page_cache_ra_unbounded(&ractl, num_ra_pages, 0); 379 page = read_mapping_page(inode->i_mapping, index, NULL); 380 } 381 return page; 382 } 383 384 static int ext4_write_merkle_tree_block(struct inode *inode, const void *buf, 385 u64 index, int log_blocksize) 386 { 387 loff_t pos = ext4_verity_metadata_pos(inode) + (index << log_blocksize); 388 389 return pagecache_write(inode, buf, 1 << log_blocksize, pos); 390 } 391 392 const struct fsverity_operations ext4_verityops = { 393 .begin_enable_verity = ext4_begin_enable_verity, 394 .end_enable_verity = ext4_end_enable_verity, 395 .get_verity_descriptor = ext4_get_verity_descriptor, 396 .read_merkle_tree_page = ext4_read_merkle_tree_page, 397 .write_merkle_tree_block = ext4_write_merkle_tree_block, 398 }; 399