1 // SPDX-License-Identifier: GPL-2.0 2 /* 3 * fs/ext4/verity.c: fs-verity support for ext4 4 * 5 * Copyright 2019 Google LLC 6 */ 7 8 /* 9 * Implementation of fsverity_operations for ext4. 10 * 11 * ext4 stores the verity metadata (Merkle tree and fsverity_descriptor) past 12 * the end of the file, starting at the first 64K boundary beyond i_size. This 13 * approach works because (a) verity files are readonly, and (b) pages fully 14 * beyond i_size aren't visible to userspace but can be read/written internally 15 * by ext4 with only some relatively small changes to ext4. This approach 16 * avoids having to depend on the EA_INODE feature and on rearchitecturing 17 * ext4's xattr support to support paging multi-gigabyte xattrs into memory, and 18 * to support encrypting xattrs. Note that the verity metadata *must* be 19 * encrypted when the file is, since it contains hashes of the plaintext data. 20 * 21 * Using a 64K boundary rather than a 4K one keeps things ready for 22 * architectures with 64K pages, and it doesn't necessarily waste space on-disk 23 * since there can be a hole between i_size and the start of the Merkle tree. 24 */ 25 26 #include <linux/quotaops.h> 27 28 #include "ext4.h" 29 #include "ext4_extents.h" 30 #include "ext4_jbd2.h" 31 32 static inline loff_t ext4_verity_metadata_pos(const struct inode *inode) 33 { 34 return round_up(inode->i_size, 65536); 35 } 36 37 /* 38 * Read some verity metadata from the inode. __vfs_read() can't be used because 39 * we need to read beyond i_size. 40 */ 41 static int pagecache_read(struct inode *inode, void *buf, size_t count, 42 loff_t pos) 43 { 44 while (count) { 45 size_t n = min_t(size_t, count, 46 PAGE_SIZE - offset_in_page(pos)); 47 struct page *page; 48 49 page = read_mapping_page(inode->i_mapping, pos >> PAGE_SHIFT, 50 NULL); 51 if (IS_ERR(page)) 52 return PTR_ERR(page); 53 54 memcpy_from_page(buf, page, offset_in_page(pos), n); 55 56 put_page(page); 57 58 buf += n; 59 pos += n; 60 count -= n; 61 } 62 return 0; 63 } 64 65 /* 66 * Write some verity metadata to the inode for FS_IOC_ENABLE_VERITY. 67 * kernel_write() can't be used because the file descriptor is readonly. 68 */ 69 static int pagecache_write(struct inode *inode, const void *buf, size_t count, 70 loff_t pos) 71 { 72 if (pos + count > inode->i_sb->s_maxbytes) 73 return -EFBIG; 74 75 while (count) { 76 size_t n = min_t(size_t, count, 77 PAGE_SIZE - offset_in_page(pos)); 78 struct page *page; 79 void *fsdata; 80 int res; 81 82 res = pagecache_write_begin(NULL, inode->i_mapping, pos, n, 0, 83 &page, &fsdata); 84 if (res) 85 return res; 86 87 memcpy_to_page(page, offset_in_page(pos), buf, n); 88 89 res = pagecache_write_end(NULL, inode->i_mapping, pos, n, n, 90 page, fsdata); 91 if (res < 0) 92 return res; 93 if (res != n) 94 return -EIO; 95 96 buf += n; 97 pos += n; 98 count -= n; 99 } 100 return 0; 101 } 102 103 static int ext4_begin_enable_verity(struct file *filp) 104 { 105 struct inode *inode = file_inode(filp); 106 const int credits = 2; /* superblock and inode for ext4_orphan_add() */ 107 handle_t *handle; 108 int err; 109 110 if (IS_DAX(inode) || ext4_test_inode_flag(inode, EXT4_INODE_DAX)) 111 return -EINVAL; 112 113 if (ext4_verity_in_progress(inode)) 114 return -EBUSY; 115 116 /* 117 * Since the file was opened readonly, we have to initialize the jbd 118 * inode and quotas here and not rely on ->open() doing it. This must 119 * be done before evicting the inline data. 120 */ 121 122 err = ext4_inode_attach_jinode(inode); 123 if (err) 124 return err; 125 126 err = dquot_initialize(inode); 127 if (err) 128 return err; 129 130 err = ext4_convert_inline_data(inode); 131 if (err) 132 return err; 133 134 if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { 135 ext4_warning_inode(inode, 136 "verity is only allowed on extent-based files"); 137 return -EOPNOTSUPP; 138 } 139 140 /* 141 * ext4 uses the last allocated block to find the verity descriptor, so 142 * we must remove any other blocks past EOF which might confuse things. 143 */ 144 err = ext4_truncate(inode); 145 if (err) 146 return err; 147 148 handle = ext4_journal_start(inode, EXT4_HT_INODE, credits); 149 if (IS_ERR(handle)) 150 return PTR_ERR(handle); 151 152 err = ext4_orphan_add(handle, inode); 153 if (err == 0) 154 ext4_set_inode_state(inode, EXT4_STATE_VERITY_IN_PROGRESS); 155 156 ext4_journal_stop(handle); 157 return err; 158 } 159 160 /* 161 * ext4 stores the verity descriptor beginning on the next filesystem block 162 * boundary after the Merkle tree. Then, the descriptor size is stored in the 163 * last 4 bytes of the last allocated filesystem block --- which is either the 164 * block in which the descriptor ends, or the next block after that if there 165 * weren't at least 4 bytes remaining. 166 * 167 * We can't simply store the descriptor in an xattr because it *must* be 168 * encrypted when ext4 encryption is used, but ext4 encryption doesn't encrypt 169 * xattrs. Also, if the descriptor includes a large signature blob it may be 170 * too large to store in an xattr without the EA_INODE feature. 171 */ 172 static int ext4_write_verity_descriptor(struct inode *inode, const void *desc, 173 size_t desc_size, u64 merkle_tree_size) 174 { 175 const u64 desc_pos = round_up(ext4_verity_metadata_pos(inode) + 176 merkle_tree_size, i_blocksize(inode)); 177 const u64 desc_end = desc_pos + desc_size; 178 const __le32 desc_size_disk = cpu_to_le32(desc_size); 179 const u64 desc_size_pos = round_up(desc_end + sizeof(desc_size_disk), 180 i_blocksize(inode)) - 181 sizeof(desc_size_disk); 182 int err; 183 184 err = pagecache_write(inode, desc, desc_size, desc_pos); 185 if (err) 186 return err; 187 188 return pagecache_write(inode, &desc_size_disk, sizeof(desc_size_disk), 189 desc_size_pos); 190 } 191 192 static int ext4_end_enable_verity(struct file *filp, const void *desc, 193 size_t desc_size, u64 merkle_tree_size) 194 { 195 struct inode *inode = file_inode(filp); 196 const int credits = 2; /* superblock and inode for ext4_orphan_del() */ 197 handle_t *handle; 198 struct ext4_iloc iloc; 199 int err = 0; 200 201 /* 202 * If an error already occurred (which fs/verity/ signals by passing 203 * desc == NULL), then only clean-up is needed. 204 */ 205 if (desc == NULL) 206 goto cleanup; 207 208 /* Append the verity descriptor. */ 209 err = ext4_write_verity_descriptor(inode, desc, desc_size, 210 merkle_tree_size); 211 if (err) 212 goto cleanup; 213 214 /* 215 * Write all pages (both data and verity metadata). Note that this must 216 * happen before clearing EXT4_STATE_VERITY_IN_PROGRESS; otherwise pages 217 * beyond i_size won't be written properly. For crash consistency, this 218 * also must happen before the verity inode flag gets persisted. 219 */ 220 err = filemap_write_and_wait(inode->i_mapping); 221 if (err) 222 goto cleanup; 223 224 /* 225 * Finally, set the verity inode flag and remove the inode from the 226 * orphan list (in a single transaction). 227 */ 228 229 handle = ext4_journal_start(inode, EXT4_HT_INODE, credits); 230 if (IS_ERR(handle)) { 231 err = PTR_ERR(handle); 232 goto cleanup; 233 } 234 235 err = ext4_orphan_del(handle, inode); 236 if (err) 237 goto stop_and_cleanup; 238 239 err = ext4_reserve_inode_write(handle, inode, &iloc); 240 if (err) 241 goto stop_and_cleanup; 242 243 ext4_set_inode_flag(inode, EXT4_INODE_VERITY); 244 ext4_set_inode_flags(inode, false); 245 err = ext4_mark_iloc_dirty(handle, inode, &iloc); 246 if (err) 247 goto stop_and_cleanup; 248 249 ext4_journal_stop(handle); 250 251 ext4_clear_inode_state(inode, EXT4_STATE_VERITY_IN_PROGRESS); 252 return 0; 253 254 stop_and_cleanup: 255 ext4_journal_stop(handle); 256 cleanup: 257 /* 258 * Verity failed to be enabled, so clean up by truncating any verity 259 * metadata that was written beyond i_size (both from cache and from 260 * disk), removing the inode from the orphan list (if it wasn't done 261 * already), and clearing EXT4_STATE_VERITY_IN_PROGRESS. 262 */ 263 truncate_inode_pages(inode->i_mapping, inode->i_size); 264 ext4_truncate(inode); 265 ext4_orphan_del(NULL, inode); 266 ext4_clear_inode_state(inode, EXT4_STATE_VERITY_IN_PROGRESS); 267 return err; 268 } 269 270 static int ext4_get_verity_descriptor_location(struct inode *inode, 271 size_t *desc_size_ret, 272 u64 *desc_pos_ret) 273 { 274 struct ext4_ext_path *path; 275 struct ext4_extent *last_extent; 276 u32 end_lblk; 277 u64 desc_size_pos; 278 __le32 desc_size_disk; 279 u32 desc_size; 280 u64 desc_pos; 281 int err; 282 283 /* 284 * Descriptor size is in last 4 bytes of last allocated block. 285 * See ext4_write_verity_descriptor(). 286 */ 287 288 if (!ext4_test_inode_flag(inode, EXT4_INODE_EXTENTS)) { 289 EXT4_ERROR_INODE(inode, "verity file doesn't use extents"); 290 return -EFSCORRUPTED; 291 } 292 293 path = ext4_find_extent(inode, EXT_MAX_BLOCKS - 1, NULL, 0); 294 if (IS_ERR(path)) 295 return PTR_ERR(path); 296 297 last_extent = path[path->p_depth].p_ext; 298 if (!last_extent) { 299 EXT4_ERROR_INODE(inode, "verity file has no extents"); 300 ext4_ext_drop_refs(path); 301 kfree(path); 302 return -EFSCORRUPTED; 303 } 304 305 end_lblk = le32_to_cpu(last_extent->ee_block) + 306 ext4_ext_get_actual_len(last_extent); 307 desc_size_pos = (u64)end_lblk << inode->i_blkbits; 308 ext4_ext_drop_refs(path); 309 kfree(path); 310 311 if (desc_size_pos < sizeof(desc_size_disk)) 312 goto bad; 313 desc_size_pos -= sizeof(desc_size_disk); 314 315 err = pagecache_read(inode, &desc_size_disk, sizeof(desc_size_disk), 316 desc_size_pos); 317 if (err) 318 return err; 319 desc_size = le32_to_cpu(desc_size_disk); 320 321 /* 322 * The descriptor is stored just before the desc_size_disk, but starting 323 * on a filesystem block boundary. 324 */ 325 326 if (desc_size > INT_MAX || desc_size > desc_size_pos) 327 goto bad; 328 329 desc_pos = round_down(desc_size_pos - desc_size, i_blocksize(inode)); 330 if (desc_pos < ext4_verity_metadata_pos(inode)) 331 goto bad; 332 333 *desc_size_ret = desc_size; 334 *desc_pos_ret = desc_pos; 335 return 0; 336 337 bad: 338 EXT4_ERROR_INODE(inode, "verity file corrupted; can't find descriptor"); 339 return -EFSCORRUPTED; 340 } 341 342 static int ext4_get_verity_descriptor(struct inode *inode, void *buf, 343 size_t buf_size) 344 { 345 size_t desc_size = 0; 346 u64 desc_pos = 0; 347 int err; 348 349 err = ext4_get_verity_descriptor_location(inode, &desc_size, &desc_pos); 350 if (err) 351 return err; 352 353 if (buf_size) { 354 if (desc_size > buf_size) 355 return -ERANGE; 356 err = pagecache_read(inode, buf, desc_size, desc_pos); 357 if (err) 358 return err; 359 } 360 return desc_size; 361 } 362 363 static struct page *ext4_read_merkle_tree_page(struct inode *inode, 364 pgoff_t index, 365 unsigned long num_ra_pages) 366 { 367 DEFINE_READAHEAD(ractl, NULL, NULL, inode->i_mapping, index); 368 struct page *page; 369 370 index += ext4_verity_metadata_pos(inode) >> PAGE_SHIFT; 371 372 page = find_get_page_flags(inode->i_mapping, index, FGP_ACCESSED); 373 if (!page || !PageUptodate(page)) { 374 if (page) 375 put_page(page); 376 else if (num_ra_pages > 1) 377 page_cache_ra_unbounded(&ractl, num_ra_pages, 0); 378 page = read_mapping_page(inode->i_mapping, index, NULL); 379 } 380 return page; 381 } 382 383 static int ext4_write_merkle_tree_block(struct inode *inode, const void *buf, 384 u64 index, int log_blocksize) 385 { 386 loff_t pos = ext4_verity_metadata_pos(inode) + (index << log_blocksize); 387 388 return pagecache_write(inode, buf, 1 << log_blocksize, pos); 389 } 390 391 const struct fsverity_operations ext4_verityops = { 392 .begin_enable_verity = ext4_begin_enable_verity, 393 .end_enable_verity = ext4_end_enable_verity, 394 .get_verity_descriptor = ext4_get_verity_descriptor, 395 .read_merkle_tree_page = ext4_read_merkle_tree_page, 396 .write_merkle_tree_block = ext4_write_merkle_tree_block, 397 }; 398