1 // SPDX-License-Identifier: LGPL-2.1 2 /* 3 * Copyright (c) 2012 Taobao. 4 * Written by Tao Ma <boyu.mt@taobao.com> 5 */ 6 7 #include <linux/iomap.h> 8 #include <linux/fiemap.h> 9 #include <linux/namei.h> 10 #include <linux/iversion.h> 11 #include <linux/sched/mm.h> 12 13 #include "ext4_jbd2.h" 14 #include "ext4.h" 15 #include "xattr.h" 16 #include "truncate.h" 17 18 #define EXT4_XATTR_SYSTEM_DATA "data" 19 #define EXT4_MIN_INLINE_DATA_SIZE ((sizeof(__le32) * EXT4_N_BLOCKS)) 20 #define EXT4_INLINE_DOTDOT_OFFSET 2 21 #define EXT4_INLINE_DOTDOT_SIZE 4 22 23 24 static int ext4_da_convert_inline_data_to_extent(struct address_space *mapping, 25 struct inode *inode, 26 void **fsdata); 27 28 static int ext4_get_inline_size(struct inode *inode) 29 { 30 if (EXT4_I(inode)->i_inline_off) 31 return EXT4_I(inode)->i_inline_size; 32 33 return 0; 34 } 35 36 static int get_max_inline_xattr_value_size(struct inode *inode, 37 struct ext4_iloc *iloc) 38 { 39 struct ext4_xattr_ibody_header *header; 40 struct ext4_xattr_entry *entry; 41 struct ext4_inode *raw_inode; 42 void *end; 43 int free, min_offs; 44 45 if (!EXT4_INODE_HAS_XATTR_SPACE(inode)) 46 return 0; 47 48 min_offs = EXT4_SB(inode->i_sb)->s_inode_size - 49 EXT4_GOOD_OLD_INODE_SIZE - 50 EXT4_I(inode)->i_extra_isize - 51 sizeof(struct ext4_xattr_ibody_header); 52 53 /* 54 * We need to subtract another sizeof(__u32) since an in-inode xattr 55 * needs an empty 4 bytes to indicate the gap between the xattr entry 56 * and the name/value pair. 57 */ 58 if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR)) 59 return EXT4_XATTR_SIZE(min_offs - 60 EXT4_XATTR_LEN(strlen(EXT4_XATTR_SYSTEM_DATA)) - 61 EXT4_XATTR_ROUND - sizeof(__u32)); 62 63 raw_inode = ext4_raw_inode(iloc); 64 header = IHDR(inode, raw_inode); 65 entry = IFIRST(header); 66 end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size; 67 68 /* Compute min_offs. */ 69 while (!IS_LAST_ENTRY(entry)) { 70 void *next = EXT4_XATTR_NEXT(entry); 71 72 if (next >= end) { 73 EXT4_ERROR_INODE(inode, 74 "corrupt xattr in inline inode"); 75 return 0; 76 } 77 if (!entry->e_value_inum && entry->e_value_size) { 78 size_t offs = le16_to_cpu(entry->e_value_offs); 79 if (offs < min_offs) 80 min_offs = offs; 81 } 82 entry = next; 83 } 84 free = min_offs - 85 ((void *)entry - (void *)IFIRST(header)) - sizeof(__u32); 86 87 if (EXT4_I(inode)->i_inline_off) { 88 entry = (struct ext4_xattr_entry *) 89 ((void *)raw_inode + EXT4_I(inode)->i_inline_off); 90 91 free += EXT4_XATTR_SIZE(le32_to_cpu(entry->e_value_size)); 92 goto out; 93 } 94 95 free -= EXT4_XATTR_LEN(strlen(EXT4_XATTR_SYSTEM_DATA)); 96 97 if (free > EXT4_XATTR_ROUND) 98 free = EXT4_XATTR_SIZE(free - EXT4_XATTR_ROUND); 99 else 100 free = 0; 101 102 out: 103 return free; 104 } 105 106 /* 107 * Get the maximum size we now can store in an inode. 108 * If we can't find the space for a xattr entry, don't use the space 109 * of the extents since we have no space to indicate the inline data. 110 */ 111 int ext4_get_max_inline_size(struct inode *inode) 112 { 113 int error, max_inline_size; 114 struct ext4_iloc iloc; 115 116 if (EXT4_I(inode)->i_extra_isize == 0) 117 return 0; 118 119 error = ext4_get_inode_loc(inode, &iloc); 120 if (error) { 121 ext4_error_inode_err(inode, __func__, __LINE__, 0, -error, 122 "can't get inode location %lu", 123 inode->i_ino); 124 return 0; 125 } 126 127 down_read(&EXT4_I(inode)->xattr_sem); 128 max_inline_size = get_max_inline_xattr_value_size(inode, &iloc); 129 up_read(&EXT4_I(inode)->xattr_sem); 130 131 brelse(iloc.bh); 132 133 if (!max_inline_size) 134 return 0; 135 136 return max_inline_size + EXT4_MIN_INLINE_DATA_SIZE; 137 } 138 139 /* 140 * this function does not take xattr_sem, which is OK because it is 141 * currently only used in a code path coming form ext4_iget, before 142 * the new inode has been unlocked 143 */ 144 int ext4_find_inline_data_nolock(struct inode *inode) 145 { 146 struct ext4_xattr_ibody_find is = { 147 .s = { .not_found = -ENODATA, }, 148 }; 149 struct ext4_xattr_info i = { 150 .name_index = EXT4_XATTR_INDEX_SYSTEM, 151 .name = EXT4_XATTR_SYSTEM_DATA, 152 }; 153 int error; 154 155 if (EXT4_I(inode)->i_extra_isize == 0) 156 return 0; 157 158 error = ext4_get_inode_loc(inode, &is.iloc); 159 if (error) 160 return error; 161 162 error = ext4_xattr_ibody_find(inode, &i, &is); 163 if (error) 164 goto out; 165 166 if (!is.s.not_found) { 167 if (is.s.here->e_value_inum) { 168 EXT4_ERROR_INODE(inode, "inline data xattr refers " 169 "to an external xattr inode"); 170 error = -EFSCORRUPTED; 171 goto out; 172 } 173 EXT4_I(inode)->i_inline_off = (u16)((void *)is.s.here - 174 (void *)ext4_raw_inode(&is.iloc)); 175 EXT4_I(inode)->i_inline_size = EXT4_MIN_INLINE_DATA_SIZE + 176 le32_to_cpu(is.s.here->e_value_size); 177 } 178 out: 179 brelse(is.iloc.bh); 180 return error; 181 } 182 183 static int ext4_read_inline_data(struct inode *inode, void *buffer, 184 unsigned int len, 185 struct ext4_iloc *iloc) 186 { 187 struct ext4_xattr_entry *entry; 188 struct ext4_xattr_ibody_header *header; 189 int cp_len = 0; 190 struct ext4_inode *raw_inode; 191 192 if (!len) 193 return 0; 194 195 BUG_ON(len > EXT4_I(inode)->i_inline_size); 196 197 cp_len = min_t(unsigned int, len, EXT4_MIN_INLINE_DATA_SIZE); 198 199 raw_inode = ext4_raw_inode(iloc); 200 memcpy(buffer, (void *)(raw_inode->i_block), cp_len); 201 202 len -= cp_len; 203 buffer += cp_len; 204 205 if (!len) 206 goto out; 207 208 header = IHDR(inode, raw_inode); 209 entry = (struct ext4_xattr_entry *)((void *)raw_inode + 210 EXT4_I(inode)->i_inline_off); 211 len = min_t(unsigned int, len, 212 (unsigned int)le32_to_cpu(entry->e_value_size)); 213 214 memcpy(buffer, 215 (void *)IFIRST(header) + le16_to_cpu(entry->e_value_offs), len); 216 cp_len += len; 217 218 out: 219 return cp_len; 220 } 221 222 /* 223 * write the buffer to the inline inode. 224 * If 'create' is set, we don't need to do the extra copy in the xattr 225 * value since it is already handled by ext4_xattr_ibody_set. 226 * That saves us one memcpy. 227 */ 228 static void ext4_write_inline_data(struct inode *inode, struct ext4_iloc *iloc, 229 void *buffer, loff_t pos, unsigned int len) 230 { 231 struct ext4_xattr_entry *entry; 232 struct ext4_xattr_ibody_header *header; 233 struct ext4_inode *raw_inode; 234 int cp_len = 0; 235 236 if (unlikely(ext4_emergency_state(inode->i_sb))) 237 return; 238 239 BUG_ON(!EXT4_I(inode)->i_inline_off); 240 BUG_ON(pos + len > EXT4_I(inode)->i_inline_size); 241 242 raw_inode = ext4_raw_inode(iloc); 243 buffer += pos; 244 245 if (pos < EXT4_MIN_INLINE_DATA_SIZE) { 246 cp_len = pos + len > EXT4_MIN_INLINE_DATA_SIZE ? 247 EXT4_MIN_INLINE_DATA_SIZE - pos : len; 248 memcpy((void *)raw_inode->i_block + pos, buffer, cp_len); 249 250 len -= cp_len; 251 buffer += cp_len; 252 pos += cp_len; 253 } 254 255 if (!len) 256 return; 257 258 pos -= EXT4_MIN_INLINE_DATA_SIZE; 259 header = IHDR(inode, raw_inode); 260 entry = (struct ext4_xattr_entry *)((void *)raw_inode + 261 EXT4_I(inode)->i_inline_off); 262 263 memcpy((void *)IFIRST(header) + le16_to_cpu(entry->e_value_offs) + pos, 264 buffer, len); 265 } 266 267 static int ext4_create_inline_data(handle_t *handle, 268 struct inode *inode, unsigned len) 269 { 270 int error; 271 void *value = NULL; 272 struct ext4_xattr_ibody_find is = { 273 .s = { .not_found = -ENODATA, }, 274 }; 275 struct ext4_xattr_info i = { 276 .name_index = EXT4_XATTR_INDEX_SYSTEM, 277 .name = EXT4_XATTR_SYSTEM_DATA, 278 }; 279 280 error = ext4_get_inode_loc(inode, &is.iloc); 281 if (error) 282 return error; 283 284 BUFFER_TRACE(is.iloc.bh, "get_write_access"); 285 error = ext4_journal_get_write_access(handle, inode->i_sb, is.iloc.bh, 286 EXT4_JTR_NONE); 287 if (error) 288 goto out; 289 290 if (len > EXT4_MIN_INLINE_DATA_SIZE) { 291 value = EXT4_ZERO_XATTR_VALUE; 292 len -= EXT4_MIN_INLINE_DATA_SIZE; 293 } else { 294 value = ""; 295 len = 0; 296 } 297 298 /* Insert the xttr entry. */ 299 i.value = value; 300 i.value_len = len; 301 302 error = ext4_xattr_ibody_find(inode, &i, &is); 303 if (error) 304 goto out; 305 306 if (!is.s.not_found) { 307 EXT4_ERROR_INODE(inode, "unexpected inline data xattr"); 308 error = -EFSCORRUPTED; 309 goto out; 310 } 311 312 error = ext4_xattr_ibody_set(handle, inode, &i, &is); 313 if (error) { 314 if (error == -ENOSPC) 315 ext4_clear_inode_state(inode, 316 EXT4_STATE_MAY_INLINE_DATA); 317 goto out; 318 } 319 320 memset((void *)ext4_raw_inode(&is.iloc)->i_block, 321 0, EXT4_MIN_INLINE_DATA_SIZE); 322 323 EXT4_I(inode)->i_inline_off = (u16)((void *)is.s.here - 324 (void *)ext4_raw_inode(&is.iloc)); 325 EXT4_I(inode)->i_inline_size = len + EXT4_MIN_INLINE_DATA_SIZE; 326 ext4_clear_inode_flag(inode, EXT4_INODE_EXTENTS); 327 ext4_set_inode_flag(inode, EXT4_INODE_INLINE_DATA); 328 get_bh(is.iloc.bh); 329 error = ext4_mark_iloc_dirty(handle, inode, &is.iloc); 330 331 out: 332 brelse(is.iloc.bh); 333 return error; 334 } 335 336 static int ext4_update_inline_data(handle_t *handle, struct inode *inode, 337 unsigned int len) 338 { 339 int error; 340 void *value = NULL; 341 struct ext4_xattr_ibody_find is = { 342 .s = { .not_found = -ENODATA, }, 343 }; 344 struct ext4_xattr_info i = { 345 .name_index = EXT4_XATTR_INDEX_SYSTEM, 346 .name = EXT4_XATTR_SYSTEM_DATA, 347 }; 348 349 /* If the old space is ok, write the data directly. */ 350 if (len <= EXT4_I(inode)->i_inline_size) 351 return 0; 352 353 error = ext4_get_inode_loc(inode, &is.iloc); 354 if (error) 355 return error; 356 357 error = ext4_xattr_ibody_find(inode, &i, &is); 358 if (error) 359 goto out; 360 361 if (is.s.not_found) { 362 EXT4_ERROR_INODE(inode, "missing inline data xattr"); 363 error = -EFSCORRUPTED; 364 goto out; 365 } 366 367 len -= EXT4_MIN_INLINE_DATA_SIZE; 368 value = kzalloc(len, GFP_NOFS); 369 if (!value) { 370 error = -ENOMEM; 371 goto out; 372 } 373 374 error = ext4_xattr_ibody_get(inode, i.name_index, i.name, 375 value, len); 376 if (error < 0) 377 goto out; 378 379 BUFFER_TRACE(is.iloc.bh, "get_write_access"); 380 error = ext4_journal_get_write_access(handle, inode->i_sb, is.iloc.bh, 381 EXT4_JTR_NONE); 382 if (error) 383 goto out; 384 385 /* Update the xattr entry. */ 386 i.value = value; 387 i.value_len = len; 388 389 error = ext4_xattr_ibody_set(handle, inode, &i, &is); 390 if (error) 391 goto out; 392 393 EXT4_I(inode)->i_inline_off = (u16)((void *)is.s.here - 394 (void *)ext4_raw_inode(&is.iloc)); 395 EXT4_I(inode)->i_inline_size = EXT4_MIN_INLINE_DATA_SIZE + 396 le32_to_cpu(is.s.here->e_value_size); 397 ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); 398 get_bh(is.iloc.bh); 399 error = ext4_mark_iloc_dirty(handle, inode, &is.iloc); 400 401 out: 402 kfree(value); 403 brelse(is.iloc.bh); 404 return error; 405 } 406 407 static int ext4_prepare_inline_data(handle_t *handle, struct inode *inode, 408 loff_t len) 409 { 410 int ret, size, no_expand; 411 struct ext4_inode_info *ei = EXT4_I(inode); 412 413 if (!ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) 414 return -ENOSPC; 415 416 size = ext4_get_max_inline_size(inode); 417 if (size < len) 418 return -ENOSPC; 419 420 ext4_write_lock_xattr(inode, &no_expand); 421 422 if (ei->i_inline_off) 423 ret = ext4_update_inline_data(handle, inode, len); 424 else 425 ret = ext4_create_inline_data(handle, inode, len); 426 427 ext4_write_unlock_xattr(inode, &no_expand); 428 return ret; 429 } 430 431 static int ext4_destroy_inline_data_nolock(handle_t *handle, 432 struct inode *inode) 433 { 434 struct ext4_inode_info *ei = EXT4_I(inode); 435 struct ext4_xattr_ibody_find is = { 436 .s = { .not_found = 0, }, 437 }; 438 struct ext4_xattr_info i = { 439 .name_index = EXT4_XATTR_INDEX_SYSTEM, 440 .name = EXT4_XATTR_SYSTEM_DATA, 441 .value = NULL, 442 .value_len = 0, 443 }; 444 int error; 445 446 if (!ei->i_inline_off) 447 return 0; 448 449 error = ext4_get_inode_loc(inode, &is.iloc); 450 if (error) 451 return error; 452 453 error = ext4_xattr_ibody_find(inode, &i, &is); 454 if (error) 455 goto out; 456 457 BUFFER_TRACE(is.iloc.bh, "get_write_access"); 458 error = ext4_journal_get_write_access(handle, inode->i_sb, is.iloc.bh, 459 EXT4_JTR_NONE); 460 if (error) 461 goto out; 462 463 error = ext4_xattr_ibody_set(handle, inode, &i, &is); 464 if (error) 465 goto out; 466 467 memset((void *)ext4_raw_inode(&is.iloc)->i_block, 468 0, EXT4_MIN_INLINE_DATA_SIZE); 469 memset(ei->i_data, 0, EXT4_MIN_INLINE_DATA_SIZE); 470 471 if (ext4_has_feature_extents(inode->i_sb)) { 472 if (S_ISDIR(inode->i_mode) || 473 S_ISREG(inode->i_mode) || S_ISLNK(inode->i_mode)) { 474 ext4_set_inode_flag(inode, EXT4_INODE_EXTENTS); 475 ext4_ext_tree_init(handle, inode); 476 } 477 } 478 ext4_clear_inode_flag(inode, EXT4_INODE_INLINE_DATA); 479 480 get_bh(is.iloc.bh); 481 error = ext4_mark_iloc_dirty(handle, inode, &is.iloc); 482 483 EXT4_I(inode)->i_inline_off = 0; 484 EXT4_I(inode)->i_inline_size = 0; 485 ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); 486 out: 487 brelse(is.iloc.bh); 488 if (error == -ENODATA) 489 error = 0; 490 return error; 491 } 492 493 static int ext4_read_inline_folio(struct inode *inode, struct folio *folio) 494 { 495 void *kaddr; 496 int ret = 0; 497 size_t len; 498 struct ext4_iloc iloc; 499 500 BUG_ON(!folio_test_locked(folio)); 501 BUG_ON(!ext4_has_inline_data(inode)); 502 BUG_ON(folio->index); 503 504 if (!EXT4_I(inode)->i_inline_off) { 505 ext4_warning(inode->i_sb, "inode %lu doesn't have inline data.", 506 inode->i_ino); 507 goto out; 508 } 509 510 ret = ext4_get_inode_loc(inode, &iloc); 511 if (ret) 512 goto out; 513 514 len = min_t(size_t, ext4_get_inline_size(inode), i_size_read(inode)); 515 BUG_ON(len > PAGE_SIZE); 516 kaddr = kmap_local_folio(folio, 0); 517 ret = ext4_read_inline_data(inode, kaddr, len, &iloc); 518 kaddr = folio_zero_tail(folio, len, kaddr + len); 519 kunmap_local(kaddr); 520 folio_mark_uptodate(folio); 521 brelse(iloc.bh); 522 523 out: 524 return ret; 525 } 526 527 int ext4_readpage_inline(struct inode *inode, struct folio *folio) 528 { 529 int ret = 0; 530 531 down_read(&EXT4_I(inode)->xattr_sem); 532 if (!ext4_has_inline_data(inode)) { 533 up_read(&EXT4_I(inode)->xattr_sem); 534 return -EAGAIN; 535 } 536 537 /* 538 * Current inline data can only exist in the 1st page, 539 * So for all the other pages, just set them uptodate. 540 */ 541 if (!folio->index) 542 ret = ext4_read_inline_folio(inode, folio); 543 else if (!folio_test_uptodate(folio)) { 544 folio_zero_segment(folio, 0, folio_size(folio)); 545 folio_mark_uptodate(folio); 546 } 547 548 up_read(&EXT4_I(inode)->xattr_sem); 549 550 folio_unlock(folio); 551 return ret >= 0 ? 0 : ret; 552 } 553 554 static int ext4_convert_inline_data_to_extent(struct address_space *mapping, 555 struct inode *inode) 556 { 557 int ret, needed_blocks, no_expand; 558 handle_t *handle = NULL; 559 int retries = 0, sem_held = 0; 560 struct folio *folio = NULL; 561 unsigned from, to; 562 struct ext4_iloc iloc; 563 564 if (!ext4_has_inline_data(inode)) { 565 /* 566 * clear the flag so that no new write 567 * will trap here again. 568 */ 569 ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); 570 return 0; 571 } 572 573 needed_blocks = ext4_chunk_trans_extent(inode, 1); 574 575 ret = ext4_get_inode_loc(inode, &iloc); 576 if (ret) 577 return ret; 578 579 retry: 580 handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, needed_blocks); 581 if (IS_ERR(handle)) { 582 ret = PTR_ERR(handle); 583 handle = NULL; 584 goto out; 585 } 586 587 /* We cannot recurse into the filesystem as the transaction is already 588 * started */ 589 folio = __filemap_get_folio(mapping, 0, FGP_WRITEBEGIN | FGP_NOFS, 590 mapping_gfp_mask(mapping)); 591 if (IS_ERR(folio)) { 592 ret = PTR_ERR(folio); 593 goto out_nofolio; 594 } 595 596 ext4_write_lock_xattr(inode, &no_expand); 597 sem_held = 1; 598 /* If some one has already done this for us, just exit. */ 599 if (!ext4_has_inline_data(inode)) { 600 ret = 0; 601 goto out; 602 } 603 604 from = 0; 605 to = ext4_get_inline_size(inode); 606 if (!folio_test_uptodate(folio)) { 607 ret = ext4_read_inline_folio(inode, folio); 608 if (ret < 0) 609 goto out; 610 } 611 612 ext4_fc_track_inode(handle, inode); 613 ret = ext4_destroy_inline_data_nolock(handle, inode); 614 if (ret) 615 goto out; 616 617 if (ext4_should_dioread_nolock(inode)) { 618 ret = ext4_block_write_begin(handle, folio, from, to, 619 ext4_get_block_unwritten); 620 } else 621 ret = ext4_block_write_begin(handle, folio, from, to, 622 ext4_get_block); 623 clear_buffer_new(folio_buffers(folio)); 624 625 if (!ret && ext4_should_journal_data(inode)) { 626 ret = ext4_walk_page_buffers(handle, inode, 627 folio_buffers(folio), from, to, 628 NULL, do_journal_get_write_access); 629 } 630 631 if (ret) { 632 folio_unlock(folio); 633 folio_put(folio); 634 folio = NULL; 635 ext4_orphan_add(handle, inode); 636 ext4_write_unlock_xattr(inode, &no_expand); 637 sem_held = 0; 638 ext4_journal_stop(handle); 639 handle = NULL; 640 ext4_truncate_failed_write(inode); 641 /* 642 * If truncate failed early the inode might 643 * still be on the orphan list; we need to 644 * make sure the inode is removed from the 645 * orphan list in that case. 646 */ 647 if (inode->i_nlink) 648 ext4_orphan_del(NULL, inode); 649 } 650 651 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) 652 goto retry; 653 654 if (folio) 655 block_commit_write(folio, from, to); 656 out: 657 if (folio) { 658 folio_unlock(folio); 659 folio_put(folio); 660 } 661 out_nofolio: 662 if (sem_held) 663 ext4_write_unlock_xattr(inode, &no_expand); 664 if (handle) 665 ext4_journal_stop(handle); 666 brelse(iloc.bh); 667 return ret; 668 } 669 670 /* 671 * Prepare the write for the inline data. 672 * If the data can be written into the inode, we just read 673 * the page and make it uptodate, and start the journal. 674 * Otherwise read the page, makes it dirty so that it can be 675 * handle in writepages(the i_disksize update is left to the 676 * normal ext4_da_write_end). 677 */ 678 int ext4_generic_write_inline_data(struct address_space *mapping, 679 struct inode *inode, 680 loff_t pos, unsigned len, 681 struct folio **foliop, 682 void **fsdata, bool da) 683 { 684 int ret; 685 handle_t *handle; 686 struct folio *folio; 687 struct ext4_iloc iloc; 688 int retries = 0; 689 690 ret = ext4_get_inode_loc(inode, &iloc); 691 if (ret) 692 return ret; 693 694 retry_journal: 695 handle = ext4_journal_start(inode, EXT4_HT_INODE, 1); 696 if (IS_ERR(handle)) { 697 ret = PTR_ERR(handle); 698 goto out_release_bh; 699 } 700 701 ret = ext4_prepare_inline_data(handle, inode, pos + len); 702 if (ret && ret != -ENOSPC) 703 goto out_stop_journal; 704 705 if (ret == -ENOSPC) { 706 ext4_journal_stop(handle); 707 if (!da) { 708 brelse(iloc.bh); 709 /* Retry inside */ 710 return ext4_convert_inline_data_to_extent(mapping, inode); 711 } 712 713 ret = ext4_da_convert_inline_data_to_extent(mapping, inode, fsdata); 714 if (ret == -ENOSPC && 715 ext4_should_retry_alloc(inode->i_sb, &retries)) 716 goto retry_journal; 717 goto out_release_bh; 718 } 719 720 folio = __filemap_get_folio(mapping, 0, FGP_WRITEBEGIN | FGP_NOFS, 721 mapping_gfp_mask(mapping)); 722 if (IS_ERR(folio)) { 723 ret = PTR_ERR(folio); 724 goto out_stop_journal; 725 } 726 727 down_read(&EXT4_I(inode)->xattr_sem); 728 /* Someone else had converted it to extent */ 729 if (!ext4_has_inline_data(inode)) { 730 ret = 0; 731 goto out_release_folio; 732 } 733 734 if (!folio_test_uptodate(folio)) { 735 ret = ext4_read_inline_folio(inode, folio); 736 if (ret < 0) 737 goto out_release_folio; 738 } 739 740 ret = ext4_journal_get_write_access(handle, inode->i_sb, iloc.bh, EXT4_JTR_NONE); 741 if (ret) 742 goto out_release_folio; 743 *foliop = folio; 744 up_read(&EXT4_I(inode)->xattr_sem); 745 brelse(iloc.bh); 746 return 1; 747 748 out_release_folio: 749 up_read(&EXT4_I(inode)->xattr_sem); 750 folio_unlock(folio); 751 folio_put(folio); 752 out_stop_journal: 753 ext4_journal_stop(handle); 754 out_release_bh: 755 brelse(iloc.bh); 756 return ret; 757 } 758 759 /* 760 * Try to write data in the inode. 761 * If the inode has inline data, check whether the new write can be 762 * in the inode also. If not, create the page the handle, move the data 763 * to the page make it update and let the later codes create extent for it. 764 */ 765 int ext4_try_to_write_inline_data(struct address_space *mapping, 766 struct inode *inode, 767 loff_t pos, unsigned len, 768 struct folio **foliop) 769 { 770 if (pos + len > ext4_get_max_inline_size(inode)) 771 return ext4_convert_inline_data_to_extent(mapping, inode); 772 return ext4_generic_write_inline_data(mapping, inode, pos, len, 773 foliop, NULL, false); 774 } 775 776 int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len, 777 unsigned copied, struct folio *folio) 778 { 779 handle_t *handle = ext4_journal_current_handle(); 780 int no_expand; 781 void *kaddr; 782 struct ext4_iloc iloc; 783 int ret = 0, ret2; 784 785 if (unlikely(copied < len) && !folio_test_uptodate(folio)) 786 copied = 0; 787 788 if (likely(copied)) { 789 ret = ext4_get_inode_loc(inode, &iloc); 790 if (ret) { 791 folio_unlock(folio); 792 folio_put(folio); 793 ext4_std_error(inode->i_sb, ret); 794 goto out; 795 } 796 ext4_write_lock_xattr(inode, &no_expand); 797 BUG_ON(!ext4_has_inline_data(inode)); 798 799 /* 800 * ei->i_inline_off may have changed since 801 * ext4_write_begin() called 802 * ext4_try_to_write_inline_data() 803 */ 804 (void) ext4_find_inline_data_nolock(inode); 805 806 kaddr = kmap_local_folio(folio, 0); 807 ext4_write_inline_data(inode, &iloc, kaddr, pos, copied); 808 kunmap_local(kaddr); 809 folio_mark_uptodate(folio); 810 /* clear dirty flag so that writepages wouldn't work for us. */ 811 folio_clear_dirty(folio); 812 813 ext4_write_unlock_xattr(inode, &no_expand); 814 brelse(iloc.bh); 815 816 /* 817 * It's important to update i_size while still holding folio 818 * lock: page writeout could otherwise come in and zero 819 * beyond i_size. 820 */ 821 ext4_update_inode_size(inode, pos + copied); 822 } 823 folio_unlock(folio); 824 folio_put(folio); 825 826 /* 827 * Don't mark the inode dirty under folio lock. First, it unnecessarily 828 * makes the holding time of folio lock longer. Second, it forces lock 829 * ordering of folio lock and transaction start for journaling 830 * filesystems. 831 */ 832 if (likely(copied)) 833 mark_inode_dirty(inode); 834 out: 835 /* 836 * If we didn't copy as much data as expected, we need to trim back 837 * size of xattr containing inline data. 838 */ 839 if (pos + len > inode->i_size && ext4_can_truncate(inode)) 840 ext4_orphan_add(handle, inode); 841 842 ret2 = ext4_journal_stop(handle); 843 if (!ret) 844 ret = ret2; 845 if (pos + len > inode->i_size) { 846 ext4_truncate_failed_write(inode); 847 /* 848 * If truncate failed early the inode might still be 849 * on the orphan list; we need to make sure the inode 850 * is removed from the orphan list in that case. 851 */ 852 if (inode->i_nlink) 853 ext4_orphan_del(NULL, inode); 854 } 855 return ret ? ret : copied; 856 } 857 858 /* 859 * Try to make the page cache and handle ready for the inline data case. 860 * We can call this function in 2 cases: 861 * 1. The inode is created and the first write exceeds inline size. We can 862 * clear the inode state safely. 863 * 2. The inode has inline data, then we need to read the data, make it 864 * update and dirty so that ext4_da_writepages can handle it. We don't 865 * need to start the journal since the file's metadata isn't changed now. 866 */ 867 static int ext4_da_convert_inline_data_to_extent(struct address_space *mapping, 868 struct inode *inode, 869 void **fsdata) 870 { 871 int ret = 0, inline_size; 872 struct folio *folio; 873 874 folio = __filemap_get_folio(mapping, 0, FGP_WRITEBEGIN, 875 mapping_gfp_mask(mapping)); 876 if (IS_ERR(folio)) 877 return PTR_ERR(folio); 878 879 down_read(&EXT4_I(inode)->xattr_sem); 880 if (!ext4_has_inline_data(inode)) { 881 ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); 882 goto out; 883 } 884 885 inline_size = ext4_get_inline_size(inode); 886 887 if (!folio_test_uptodate(folio)) { 888 ret = ext4_read_inline_folio(inode, folio); 889 if (ret < 0) 890 goto out; 891 } 892 893 ret = ext4_block_write_begin(NULL, folio, 0, inline_size, 894 ext4_da_get_block_prep); 895 if (ret) { 896 up_read(&EXT4_I(inode)->xattr_sem); 897 folio_unlock(folio); 898 folio_put(folio); 899 ext4_truncate_failed_write(inode); 900 return ret; 901 } 902 903 clear_buffer_new(folio_buffers(folio)); 904 folio_mark_dirty(folio); 905 folio_mark_uptodate(folio); 906 ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); 907 *fsdata = (void *)CONVERT_INLINE_DATA; 908 909 out: 910 up_read(&EXT4_I(inode)->xattr_sem); 911 if (folio) { 912 folio_unlock(folio); 913 folio_put(folio); 914 } 915 return ret; 916 } 917 918 #ifdef INLINE_DIR_DEBUG 919 void ext4_show_inline_dir(struct inode *dir, struct buffer_head *bh, 920 void *inline_start, int inline_size) 921 { 922 int offset; 923 unsigned short de_len; 924 struct ext4_dir_entry_2 *de = inline_start; 925 void *dlimit = inline_start + inline_size; 926 927 trace_printk("inode %lu\n", dir->i_ino); 928 offset = 0; 929 while ((void *)de < dlimit) { 930 de_len = ext4_rec_len_from_disk(de->rec_len, inline_size); 931 trace_printk("de: off %u rlen %u name %.*s nlen %u ino %u\n", 932 offset, de_len, de->name_len, de->name, 933 de->name_len, le32_to_cpu(de->inode)); 934 if (ext4_check_dir_entry(dir, NULL, de, bh, 935 inline_start, inline_size, offset)) 936 BUG(); 937 938 offset += de_len; 939 de = (struct ext4_dir_entry_2 *) ((char *) de + de_len); 940 } 941 } 942 #else 943 #define ext4_show_inline_dir(dir, bh, inline_start, inline_size) 944 #endif 945 946 /* 947 * Add a new entry into a inline dir. 948 * It will return -ENOSPC if no space is available, and -EIO 949 * and -EEXIST if directory entry already exists. 950 */ 951 static int ext4_add_dirent_to_inline(handle_t *handle, 952 struct ext4_filename *fname, 953 struct inode *dir, 954 struct inode *inode, 955 struct ext4_iloc *iloc, 956 void *inline_start, int inline_size) 957 { 958 int err; 959 struct ext4_dir_entry_2 *de; 960 961 err = ext4_find_dest_de(dir, iloc->bh, inline_start, 962 inline_size, fname, &de); 963 if (err) 964 return err; 965 966 BUFFER_TRACE(iloc->bh, "get_write_access"); 967 err = ext4_journal_get_write_access(handle, dir->i_sb, iloc->bh, 968 EXT4_JTR_NONE); 969 if (err) 970 return err; 971 ext4_insert_dentry(dir, inode, de, inline_size, fname); 972 973 ext4_show_inline_dir(dir, iloc->bh, inline_start, inline_size); 974 975 /* 976 * XXX shouldn't update any times until successful 977 * completion of syscall, but too many callers depend 978 * on this. 979 * 980 * XXX similarly, too many callers depend on 981 * ext4_new_inode() setting the times, but error 982 * recovery deletes the inode, so the worst that can 983 * happen is that the times are slightly out of date 984 * and/or different from the directory change time. 985 */ 986 inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); 987 ext4_update_dx_flag(dir); 988 inode_inc_iversion(dir); 989 return 1; 990 } 991 992 static void *ext4_get_inline_xattr_pos(struct inode *inode, 993 struct ext4_iloc *iloc) 994 { 995 struct ext4_xattr_entry *entry; 996 struct ext4_xattr_ibody_header *header; 997 998 BUG_ON(!EXT4_I(inode)->i_inline_off); 999 1000 header = IHDR(inode, ext4_raw_inode(iloc)); 1001 entry = (struct ext4_xattr_entry *)((void *)ext4_raw_inode(iloc) + 1002 EXT4_I(inode)->i_inline_off); 1003 1004 return (void *)IFIRST(header) + le16_to_cpu(entry->e_value_offs); 1005 } 1006 1007 /* Set the final de to cover the whole block. */ 1008 void ext4_update_final_de(void *de_buf, int old_size, int new_size) 1009 { 1010 struct ext4_dir_entry_2 *de, *prev_de; 1011 void *limit; 1012 int de_len; 1013 1014 de = de_buf; 1015 if (old_size) { 1016 limit = de_buf + old_size; 1017 do { 1018 prev_de = de; 1019 de_len = ext4_rec_len_from_disk(de->rec_len, old_size); 1020 de_buf += de_len; 1021 de = de_buf; 1022 } while (de_buf < limit); 1023 1024 prev_de->rec_len = ext4_rec_len_to_disk(de_len + new_size - 1025 old_size, new_size); 1026 } else { 1027 /* this is just created, so create an empty entry. */ 1028 de->inode = 0; 1029 de->rec_len = ext4_rec_len_to_disk(new_size, new_size); 1030 } 1031 } 1032 1033 static int ext4_update_inline_dir(handle_t *handle, struct inode *dir, 1034 struct ext4_iloc *iloc) 1035 { 1036 int ret; 1037 int old_size = EXT4_I(dir)->i_inline_size - EXT4_MIN_INLINE_DATA_SIZE; 1038 int new_size = get_max_inline_xattr_value_size(dir, iloc); 1039 1040 if (new_size - old_size <= ext4_dir_rec_len(1, NULL)) 1041 return -ENOSPC; 1042 1043 ret = ext4_update_inline_data(handle, dir, 1044 new_size + EXT4_MIN_INLINE_DATA_SIZE); 1045 if (ret) 1046 return ret; 1047 1048 ext4_update_final_de(ext4_get_inline_xattr_pos(dir, iloc), old_size, 1049 EXT4_I(dir)->i_inline_size - 1050 EXT4_MIN_INLINE_DATA_SIZE); 1051 dir->i_size = EXT4_I(dir)->i_disksize = EXT4_I(dir)->i_inline_size; 1052 return 0; 1053 } 1054 1055 static void ext4_restore_inline_data(handle_t *handle, struct inode *inode, 1056 struct ext4_iloc *iloc, 1057 void *buf, int inline_size) 1058 { 1059 int ret; 1060 1061 ret = ext4_create_inline_data(handle, inode, inline_size); 1062 if (ret) { 1063 ext4_msg(inode->i_sb, KERN_EMERG, 1064 "error restoring inline_data for inode -- potential data loss! (inode %lu, error %d)", 1065 inode->i_ino, ret); 1066 return; 1067 } 1068 ext4_write_inline_data(inode, iloc, buf, 0, inline_size); 1069 ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); 1070 } 1071 1072 static int ext4_convert_inline_data_nolock(handle_t *handle, 1073 struct inode *inode, 1074 struct ext4_iloc *iloc) 1075 { 1076 int error; 1077 void *buf = NULL; 1078 struct buffer_head *data_bh = NULL; 1079 struct ext4_map_blocks map; 1080 int inline_size; 1081 1082 inline_size = ext4_get_inline_size(inode); 1083 buf = kmalloc(inline_size, GFP_NOFS); 1084 if (!buf) { 1085 error = -ENOMEM; 1086 goto out; 1087 } 1088 1089 error = ext4_read_inline_data(inode, buf, inline_size, iloc); 1090 if (error < 0) 1091 goto out; 1092 1093 /* 1094 * Make sure the inline directory entries pass checks before we try to 1095 * convert them, so that we avoid touching stuff that needs fsck. 1096 */ 1097 if (S_ISDIR(inode->i_mode)) { 1098 error = ext4_check_all_de(inode, iloc->bh, 1099 buf + EXT4_INLINE_DOTDOT_SIZE, 1100 inline_size - EXT4_INLINE_DOTDOT_SIZE); 1101 if (error) 1102 goto out; 1103 } 1104 1105 error = ext4_destroy_inline_data_nolock(handle, inode); 1106 if (error) 1107 goto out; 1108 1109 map.m_lblk = 0; 1110 map.m_len = 1; 1111 map.m_flags = 0; 1112 error = ext4_map_blocks(handle, inode, &map, EXT4_GET_BLOCKS_CREATE); 1113 if (error < 0) 1114 goto out_restore; 1115 if (!(map.m_flags & EXT4_MAP_MAPPED)) { 1116 error = -EIO; 1117 goto out_restore; 1118 } 1119 1120 data_bh = sb_getblk(inode->i_sb, map.m_pblk); 1121 if (!data_bh) { 1122 error = -ENOMEM; 1123 goto out_restore; 1124 } 1125 1126 lock_buffer(data_bh); 1127 error = ext4_journal_get_create_access(handle, inode->i_sb, data_bh, 1128 EXT4_JTR_NONE); 1129 if (error) { 1130 unlock_buffer(data_bh); 1131 error = -EIO; 1132 goto out_restore; 1133 } 1134 memset(data_bh->b_data, 0, inode->i_sb->s_blocksize); 1135 1136 if (!S_ISDIR(inode->i_mode)) { 1137 memcpy(data_bh->b_data, buf, inline_size); 1138 set_buffer_uptodate(data_bh); 1139 unlock_buffer(data_bh); 1140 error = ext4_handle_dirty_metadata(handle, 1141 inode, data_bh); 1142 } else { 1143 unlock_buffer(data_bh); 1144 inode->i_size = inode->i_sb->s_blocksize; 1145 i_size_write(inode, inode->i_sb->s_blocksize); 1146 EXT4_I(inode)->i_disksize = inode->i_sb->s_blocksize; 1147 1148 error = ext4_init_dirblock(handle, inode, data_bh, 1149 le32_to_cpu(((struct ext4_dir_entry_2 *)buf)->inode), 1150 buf + EXT4_INLINE_DOTDOT_SIZE, 1151 inline_size - EXT4_INLINE_DOTDOT_SIZE); 1152 if (!error) 1153 error = ext4_mark_inode_dirty(handle, inode); 1154 } 1155 1156 out_restore: 1157 if (error) 1158 ext4_restore_inline_data(handle, inode, iloc, buf, inline_size); 1159 1160 out: 1161 brelse(data_bh); 1162 kfree(buf); 1163 return error; 1164 } 1165 1166 /* 1167 * Try to add the new entry to the inline data. 1168 * If succeeds, return 0. If not, extended the inline dir and copied data to 1169 * the new created block. 1170 */ 1171 int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname, 1172 struct inode *dir, struct inode *inode) 1173 { 1174 int ret, ret2, inline_size, no_expand; 1175 void *inline_start; 1176 struct ext4_iloc iloc; 1177 1178 ret = ext4_get_inode_loc(dir, &iloc); 1179 if (ret) 1180 return ret; 1181 1182 ext4_write_lock_xattr(dir, &no_expand); 1183 if (!ext4_has_inline_data(dir)) 1184 goto out; 1185 1186 inline_start = (void *)ext4_raw_inode(&iloc)->i_block + 1187 EXT4_INLINE_DOTDOT_SIZE; 1188 inline_size = EXT4_MIN_INLINE_DATA_SIZE - EXT4_INLINE_DOTDOT_SIZE; 1189 1190 ret = ext4_add_dirent_to_inline(handle, fname, dir, inode, &iloc, 1191 inline_start, inline_size); 1192 if (ret != -ENOSPC) 1193 goto out; 1194 1195 /* check whether it can be inserted to inline xattr space. */ 1196 inline_size = EXT4_I(dir)->i_inline_size - 1197 EXT4_MIN_INLINE_DATA_SIZE; 1198 if (!inline_size) { 1199 /* Try to use the xattr space.*/ 1200 ret = ext4_update_inline_dir(handle, dir, &iloc); 1201 if (ret && ret != -ENOSPC) 1202 goto out; 1203 1204 inline_size = EXT4_I(dir)->i_inline_size - 1205 EXT4_MIN_INLINE_DATA_SIZE; 1206 } 1207 1208 if (inline_size) { 1209 inline_start = ext4_get_inline_xattr_pos(dir, &iloc); 1210 1211 ret = ext4_add_dirent_to_inline(handle, fname, dir, 1212 inode, &iloc, inline_start, 1213 inline_size); 1214 1215 if (ret != -ENOSPC) 1216 goto out; 1217 } 1218 1219 /* 1220 * The inline space is filled up, so create a new block for it. 1221 * As the extent tree will be created, we have to save the inline 1222 * dir first. 1223 */ 1224 ret = ext4_convert_inline_data_nolock(handle, dir, &iloc); 1225 1226 out: 1227 ext4_write_unlock_xattr(dir, &no_expand); 1228 ret2 = ext4_mark_inode_dirty(handle, dir); 1229 if (unlikely(ret2 && !ret)) 1230 ret = ret2; 1231 brelse(iloc.bh); 1232 return ret; 1233 } 1234 1235 /* 1236 * This function fills a red-black tree with information from an 1237 * inlined dir. It returns the number directory entries loaded 1238 * into the tree. If there is an error it is returned in err. 1239 */ 1240 int ext4_inlinedir_to_tree(struct file *dir_file, 1241 struct inode *dir, ext4_lblk_t block, 1242 struct dx_hash_info *hinfo, 1243 __u32 start_hash, __u32 start_minor_hash, 1244 int *has_inline_data) 1245 { 1246 int err = 0, count = 0; 1247 unsigned int parent_ino; 1248 int pos; 1249 struct ext4_dir_entry_2 *de; 1250 struct inode *inode = file_inode(dir_file); 1251 int ret, inline_size = 0; 1252 struct ext4_iloc iloc; 1253 void *dir_buf = NULL; 1254 struct ext4_dir_entry_2 fake; 1255 struct fscrypt_str tmp_str; 1256 1257 ret = ext4_get_inode_loc(inode, &iloc); 1258 if (ret) 1259 return ret; 1260 1261 down_read(&EXT4_I(inode)->xattr_sem); 1262 if (!ext4_has_inline_data(inode)) { 1263 up_read(&EXT4_I(inode)->xattr_sem); 1264 *has_inline_data = 0; 1265 goto out; 1266 } 1267 1268 inline_size = ext4_get_inline_size(inode); 1269 dir_buf = kmalloc(inline_size, GFP_NOFS); 1270 if (!dir_buf) { 1271 ret = -ENOMEM; 1272 up_read(&EXT4_I(inode)->xattr_sem); 1273 goto out; 1274 } 1275 1276 ret = ext4_read_inline_data(inode, dir_buf, inline_size, &iloc); 1277 up_read(&EXT4_I(inode)->xattr_sem); 1278 if (ret < 0) 1279 goto out; 1280 1281 pos = 0; 1282 parent_ino = le32_to_cpu(((struct ext4_dir_entry_2 *)dir_buf)->inode); 1283 while (pos < inline_size) { 1284 /* 1285 * As inlined dir doesn't store any information about '.' and 1286 * only the inode number of '..' is stored, we have to handle 1287 * them differently. 1288 */ 1289 if (pos == 0) { 1290 fake.inode = cpu_to_le32(inode->i_ino); 1291 fake.name_len = 1; 1292 memcpy(fake.name, ".", 2); 1293 fake.rec_len = ext4_rec_len_to_disk( 1294 ext4_dir_rec_len(fake.name_len, NULL), 1295 inline_size); 1296 ext4_set_de_type(inode->i_sb, &fake, S_IFDIR); 1297 de = &fake; 1298 pos = EXT4_INLINE_DOTDOT_OFFSET; 1299 } else if (pos == EXT4_INLINE_DOTDOT_OFFSET) { 1300 fake.inode = cpu_to_le32(parent_ino); 1301 fake.name_len = 2; 1302 memcpy(fake.name, "..", 3); 1303 fake.rec_len = ext4_rec_len_to_disk( 1304 ext4_dir_rec_len(fake.name_len, NULL), 1305 inline_size); 1306 ext4_set_de_type(inode->i_sb, &fake, S_IFDIR); 1307 de = &fake; 1308 pos = EXT4_INLINE_DOTDOT_SIZE; 1309 } else { 1310 de = (struct ext4_dir_entry_2 *)(dir_buf + pos); 1311 pos += ext4_rec_len_from_disk(de->rec_len, inline_size); 1312 if (ext4_check_dir_entry(inode, dir_file, de, 1313 iloc.bh, dir_buf, 1314 inline_size, pos)) { 1315 ret = count; 1316 goto out; 1317 } 1318 } 1319 1320 if (ext4_hash_in_dirent(dir)) { 1321 hinfo->hash = EXT4_DIRENT_HASH(de); 1322 hinfo->minor_hash = EXT4_DIRENT_MINOR_HASH(de); 1323 } else { 1324 err = ext4fs_dirhash(dir, de->name, de->name_len, hinfo); 1325 if (err) { 1326 ret = err; 1327 goto out; 1328 } 1329 } 1330 if ((hinfo->hash < start_hash) || 1331 ((hinfo->hash == start_hash) && 1332 (hinfo->minor_hash < start_minor_hash))) 1333 continue; 1334 if (de->inode == 0) 1335 continue; 1336 tmp_str.name = de->name; 1337 tmp_str.len = de->name_len; 1338 err = ext4_htree_store_dirent(dir_file, hinfo->hash, 1339 hinfo->minor_hash, de, &tmp_str); 1340 if (err) { 1341 ret = err; 1342 goto out; 1343 } 1344 count++; 1345 } 1346 ret = count; 1347 out: 1348 kfree(dir_buf); 1349 brelse(iloc.bh); 1350 return ret; 1351 } 1352 1353 /* 1354 * So this function is called when the volume is mkfsed with 1355 * dir_index disabled. In order to keep f_pos persistent 1356 * after we convert from an inlined dir to a blocked based, 1357 * we just pretend that we are a normal dir and return the 1358 * offset as if '.' and '..' really take place. 1359 * 1360 */ 1361 int ext4_read_inline_dir(struct file *file, 1362 struct dir_context *ctx, 1363 int *has_inline_data) 1364 { 1365 unsigned int offset, parent_ino; 1366 int i; 1367 struct ext4_dir_entry_2 *de; 1368 struct super_block *sb; 1369 struct inode *inode = file_inode(file); 1370 int ret, inline_size = 0; 1371 struct ext4_iloc iloc; 1372 void *dir_buf = NULL; 1373 int dotdot_offset, dotdot_size, extra_offset, extra_size; 1374 struct dir_private_info *info = file->private_data; 1375 1376 ret = ext4_get_inode_loc(inode, &iloc); 1377 if (ret) 1378 return ret; 1379 1380 down_read(&EXT4_I(inode)->xattr_sem); 1381 if (!ext4_has_inline_data(inode)) { 1382 up_read(&EXT4_I(inode)->xattr_sem); 1383 *has_inline_data = 0; 1384 goto out; 1385 } 1386 1387 inline_size = ext4_get_inline_size(inode); 1388 dir_buf = kmalloc(inline_size, GFP_NOFS); 1389 if (!dir_buf) { 1390 ret = -ENOMEM; 1391 up_read(&EXT4_I(inode)->xattr_sem); 1392 goto out; 1393 } 1394 1395 ret = ext4_read_inline_data(inode, dir_buf, inline_size, &iloc); 1396 up_read(&EXT4_I(inode)->xattr_sem); 1397 if (ret < 0) 1398 goto out; 1399 1400 ret = 0; 1401 sb = inode->i_sb; 1402 parent_ino = le32_to_cpu(((struct ext4_dir_entry_2 *)dir_buf)->inode); 1403 offset = ctx->pos; 1404 1405 /* 1406 * dotdot_offset and dotdot_size is the real offset and 1407 * size for ".." and "." if the dir is block based while 1408 * the real size for them are only EXT4_INLINE_DOTDOT_SIZE. 1409 * So we will use extra_offset and extra_size to indicate them 1410 * during the inline dir iteration. 1411 */ 1412 dotdot_offset = ext4_dir_rec_len(1, NULL); 1413 dotdot_size = dotdot_offset + ext4_dir_rec_len(2, NULL); 1414 extra_offset = dotdot_size - EXT4_INLINE_DOTDOT_SIZE; 1415 extra_size = extra_offset + inline_size; 1416 1417 /* 1418 * If the cookie has changed since the last call to 1419 * readdir(2), then we might be pointing to an invalid 1420 * dirent right now. Scan from the start of the inline 1421 * dir to make sure. 1422 */ 1423 if (!inode_eq_iversion(inode, info->cookie)) { 1424 for (i = 0; i < extra_size && i < offset;) { 1425 /* 1426 * "." is with offset 0 and 1427 * ".." is dotdot_offset. 1428 */ 1429 if (!i) { 1430 i = dotdot_offset; 1431 continue; 1432 } else if (i == dotdot_offset) { 1433 i = dotdot_size; 1434 continue; 1435 } 1436 /* for other entry, the real offset in 1437 * the buf has to be tuned accordingly. 1438 */ 1439 de = (struct ext4_dir_entry_2 *) 1440 (dir_buf + i - extra_offset); 1441 /* It's too expensive to do a full 1442 * dirent test each time round this 1443 * loop, but we do have to test at 1444 * least that it is non-zero. A 1445 * failure will be detected in the 1446 * dirent test below. */ 1447 if (ext4_rec_len_from_disk(de->rec_len, extra_size) 1448 < ext4_dir_rec_len(1, NULL)) 1449 break; 1450 i += ext4_rec_len_from_disk(de->rec_len, 1451 extra_size); 1452 } 1453 offset = i; 1454 ctx->pos = offset; 1455 info->cookie = inode_query_iversion(inode); 1456 } 1457 1458 while (ctx->pos < extra_size) { 1459 if (ctx->pos == 0) { 1460 if (!dir_emit(ctx, ".", 1, inode->i_ino, DT_DIR)) 1461 goto out; 1462 ctx->pos = dotdot_offset; 1463 continue; 1464 } 1465 1466 if (ctx->pos == dotdot_offset) { 1467 if (!dir_emit(ctx, "..", 2, parent_ino, DT_DIR)) 1468 goto out; 1469 ctx->pos = dotdot_size; 1470 continue; 1471 } 1472 1473 de = (struct ext4_dir_entry_2 *) 1474 (dir_buf + ctx->pos - extra_offset); 1475 if (ext4_check_dir_entry(inode, file, de, iloc.bh, dir_buf, 1476 extra_size, ctx->pos)) 1477 goto out; 1478 if (le32_to_cpu(de->inode)) { 1479 if (!dir_emit(ctx, de->name, de->name_len, 1480 le32_to_cpu(de->inode), 1481 get_dtype(sb, de->file_type))) 1482 goto out; 1483 } 1484 ctx->pos += ext4_rec_len_from_disk(de->rec_len, extra_size); 1485 } 1486 out: 1487 kfree(dir_buf); 1488 brelse(iloc.bh); 1489 return ret; 1490 } 1491 1492 void *ext4_read_inline_link(struct inode *inode) 1493 { 1494 struct ext4_iloc iloc; 1495 int ret, inline_size; 1496 void *link; 1497 1498 ret = ext4_get_inode_loc(inode, &iloc); 1499 if (ret) 1500 return ERR_PTR(ret); 1501 1502 ret = -ENOMEM; 1503 inline_size = ext4_get_inline_size(inode); 1504 link = kmalloc(inline_size + 1, GFP_NOFS); 1505 if (!link) 1506 goto out; 1507 1508 ret = ext4_read_inline_data(inode, link, inline_size, &iloc); 1509 if (ret < 0) { 1510 kfree(link); 1511 goto out; 1512 } 1513 nd_terminate_link(link, inode->i_size, ret); 1514 out: 1515 if (ret < 0) 1516 link = ERR_PTR(ret); 1517 brelse(iloc.bh); 1518 return link; 1519 } 1520 1521 struct buffer_head *ext4_get_first_inline_block(struct inode *inode, 1522 struct ext4_dir_entry_2 **parent_de, 1523 int *retval) 1524 { 1525 struct ext4_iloc iloc; 1526 1527 *retval = ext4_get_inode_loc(inode, &iloc); 1528 if (*retval) 1529 return NULL; 1530 1531 *parent_de = (struct ext4_dir_entry_2 *)ext4_raw_inode(&iloc)->i_block; 1532 1533 return iloc.bh; 1534 } 1535 1536 /* 1537 * Try to create the inline data for the new dir. 1538 * If it succeeds, return 0, otherwise return the error. 1539 * In case of ENOSPC, the caller should create the normal disk layout dir. 1540 */ 1541 int ext4_try_create_inline_dir(handle_t *handle, struct inode *parent, 1542 struct inode *inode) 1543 { 1544 int ret, inline_size = EXT4_MIN_INLINE_DATA_SIZE; 1545 struct ext4_iloc iloc; 1546 struct ext4_dir_entry_2 *de; 1547 1548 ret = ext4_get_inode_loc(inode, &iloc); 1549 if (ret) 1550 return ret; 1551 1552 ret = ext4_prepare_inline_data(handle, inode, inline_size); 1553 if (ret) 1554 goto out; 1555 1556 /* 1557 * For inline dir, we only save the inode information for the ".." 1558 * and create a fake dentry to cover the left space. 1559 */ 1560 de = (struct ext4_dir_entry_2 *)ext4_raw_inode(&iloc)->i_block; 1561 de->inode = cpu_to_le32(parent->i_ino); 1562 de = (struct ext4_dir_entry_2 *)((void *)de + EXT4_INLINE_DOTDOT_SIZE); 1563 de->inode = 0; 1564 de->rec_len = ext4_rec_len_to_disk( 1565 inline_size - EXT4_INLINE_DOTDOT_SIZE, 1566 inline_size); 1567 set_nlink(inode, 2); 1568 inode->i_size = EXT4_I(inode)->i_disksize = inline_size; 1569 out: 1570 brelse(iloc.bh); 1571 return ret; 1572 } 1573 1574 struct buffer_head *ext4_find_inline_entry(struct inode *dir, 1575 struct ext4_filename *fname, 1576 struct ext4_dir_entry_2 **res_dir, 1577 int *has_inline_data) 1578 { 1579 struct ext4_xattr_ibody_find is = { 1580 .s = { .not_found = -ENODATA, }, 1581 }; 1582 struct ext4_xattr_info i = { 1583 .name_index = EXT4_XATTR_INDEX_SYSTEM, 1584 .name = EXT4_XATTR_SYSTEM_DATA, 1585 }; 1586 int ret; 1587 void *inline_start; 1588 int inline_size; 1589 1590 ret = ext4_get_inode_loc(dir, &is.iloc); 1591 if (ret) 1592 return ERR_PTR(ret); 1593 1594 down_read(&EXT4_I(dir)->xattr_sem); 1595 1596 ret = ext4_xattr_ibody_find(dir, &i, &is); 1597 if (ret) 1598 goto out; 1599 1600 if (!ext4_has_inline_data(dir)) { 1601 *has_inline_data = 0; 1602 goto out; 1603 } 1604 1605 inline_start = (void *)ext4_raw_inode(&is.iloc)->i_block + 1606 EXT4_INLINE_DOTDOT_SIZE; 1607 inline_size = EXT4_MIN_INLINE_DATA_SIZE - EXT4_INLINE_DOTDOT_SIZE; 1608 ret = ext4_search_dir(is.iloc.bh, inline_start, inline_size, 1609 dir, fname, 0, res_dir); 1610 if (ret == 1) 1611 goto out_find; 1612 if (ret < 0) 1613 goto out; 1614 1615 if (ext4_get_inline_size(dir) == EXT4_MIN_INLINE_DATA_SIZE) 1616 goto out; 1617 1618 inline_start = ext4_get_inline_xattr_pos(dir, &is.iloc); 1619 inline_size = ext4_get_inline_size(dir) - EXT4_MIN_INLINE_DATA_SIZE; 1620 1621 ret = ext4_search_dir(is.iloc.bh, inline_start, inline_size, 1622 dir, fname, 0, res_dir); 1623 if (ret == 1) 1624 goto out_find; 1625 1626 out: 1627 brelse(is.iloc.bh); 1628 if (ret < 0) 1629 is.iloc.bh = ERR_PTR(ret); 1630 else 1631 is.iloc.bh = NULL; 1632 out_find: 1633 up_read(&EXT4_I(dir)->xattr_sem); 1634 return is.iloc.bh; 1635 } 1636 1637 int ext4_delete_inline_entry(handle_t *handle, 1638 struct inode *dir, 1639 struct ext4_dir_entry_2 *de_del, 1640 struct buffer_head *bh, 1641 int *has_inline_data) 1642 { 1643 int err, inline_size, no_expand; 1644 struct ext4_iloc iloc; 1645 void *inline_start; 1646 1647 err = ext4_get_inode_loc(dir, &iloc); 1648 if (err) 1649 return err; 1650 1651 ext4_write_lock_xattr(dir, &no_expand); 1652 if (!ext4_has_inline_data(dir)) { 1653 *has_inline_data = 0; 1654 goto out; 1655 } 1656 1657 if ((void *)de_del - ((void *)ext4_raw_inode(&iloc)->i_block) < 1658 EXT4_MIN_INLINE_DATA_SIZE) { 1659 inline_start = (void *)ext4_raw_inode(&iloc)->i_block + 1660 EXT4_INLINE_DOTDOT_SIZE; 1661 inline_size = EXT4_MIN_INLINE_DATA_SIZE - 1662 EXT4_INLINE_DOTDOT_SIZE; 1663 } else { 1664 inline_start = ext4_get_inline_xattr_pos(dir, &iloc); 1665 inline_size = ext4_get_inline_size(dir) - 1666 EXT4_MIN_INLINE_DATA_SIZE; 1667 } 1668 1669 BUFFER_TRACE(bh, "get_write_access"); 1670 err = ext4_journal_get_write_access(handle, dir->i_sb, bh, 1671 EXT4_JTR_NONE); 1672 if (err) 1673 goto out; 1674 1675 err = ext4_generic_delete_entry(dir, de_del, bh, 1676 inline_start, inline_size, 0); 1677 if (err) 1678 goto out; 1679 1680 ext4_show_inline_dir(dir, iloc.bh, inline_start, inline_size); 1681 out: 1682 ext4_write_unlock_xattr(dir, &no_expand); 1683 if (likely(err == 0)) 1684 err = ext4_mark_inode_dirty(handle, dir); 1685 brelse(iloc.bh); 1686 if (err != -ENOENT) 1687 ext4_std_error(dir->i_sb, err); 1688 return err; 1689 } 1690 1691 /* 1692 * Get the inline dentry at offset. 1693 */ 1694 static inline struct ext4_dir_entry_2 * 1695 ext4_get_inline_entry(struct inode *inode, 1696 struct ext4_iloc *iloc, 1697 unsigned int offset, 1698 void **inline_start, 1699 int *inline_size) 1700 { 1701 void *inline_pos; 1702 1703 BUG_ON(offset > ext4_get_inline_size(inode)); 1704 1705 if (offset < EXT4_MIN_INLINE_DATA_SIZE) { 1706 inline_pos = (void *)ext4_raw_inode(iloc)->i_block; 1707 *inline_size = EXT4_MIN_INLINE_DATA_SIZE; 1708 } else { 1709 inline_pos = ext4_get_inline_xattr_pos(inode, iloc); 1710 offset -= EXT4_MIN_INLINE_DATA_SIZE; 1711 *inline_size = ext4_get_inline_size(inode) - 1712 EXT4_MIN_INLINE_DATA_SIZE; 1713 } 1714 1715 if (inline_start) 1716 *inline_start = inline_pos; 1717 return (struct ext4_dir_entry_2 *)(inline_pos + offset); 1718 } 1719 1720 bool empty_inline_dir(struct inode *dir, int *has_inline_data) 1721 { 1722 int err, inline_size; 1723 struct ext4_iloc iloc; 1724 size_t inline_len; 1725 void *inline_pos; 1726 unsigned int offset; 1727 struct ext4_dir_entry_2 *de; 1728 bool ret = false; 1729 1730 err = ext4_get_inode_loc(dir, &iloc); 1731 if (err) { 1732 EXT4_ERROR_INODE_ERR(dir, -err, 1733 "error %d getting inode %lu block", 1734 err, dir->i_ino); 1735 return false; 1736 } 1737 1738 down_read(&EXT4_I(dir)->xattr_sem); 1739 if (!ext4_has_inline_data(dir)) { 1740 *has_inline_data = 0; 1741 ret = true; 1742 goto out; 1743 } 1744 1745 de = (struct ext4_dir_entry_2 *)ext4_raw_inode(&iloc)->i_block; 1746 if (!le32_to_cpu(de->inode)) { 1747 ext4_warning(dir->i_sb, 1748 "bad inline directory (dir #%lu) - no `..'", 1749 dir->i_ino); 1750 goto out; 1751 } 1752 1753 inline_len = ext4_get_inline_size(dir); 1754 offset = EXT4_INLINE_DOTDOT_SIZE; 1755 while (offset < inline_len) { 1756 de = ext4_get_inline_entry(dir, &iloc, offset, 1757 &inline_pos, &inline_size); 1758 if (ext4_check_dir_entry(dir, NULL, de, 1759 iloc.bh, inline_pos, 1760 inline_size, offset)) { 1761 ext4_warning(dir->i_sb, 1762 "bad inline directory (dir #%lu) - " 1763 "inode %u, rec_len %u, name_len %d" 1764 "inline size %d", 1765 dir->i_ino, le32_to_cpu(de->inode), 1766 le16_to_cpu(de->rec_len), de->name_len, 1767 inline_size); 1768 goto out; 1769 } 1770 if (le32_to_cpu(de->inode)) { 1771 goto out; 1772 } 1773 offset += ext4_rec_len_from_disk(de->rec_len, inline_size); 1774 } 1775 1776 ret = true; 1777 out: 1778 up_read(&EXT4_I(dir)->xattr_sem); 1779 brelse(iloc.bh); 1780 return ret; 1781 } 1782 1783 int ext4_destroy_inline_data(handle_t *handle, struct inode *inode) 1784 { 1785 int ret, no_expand; 1786 1787 ext4_write_lock_xattr(inode, &no_expand); 1788 ret = ext4_destroy_inline_data_nolock(handle, inode); 1789 ext4_write_unlock_xattr(inode, &no_expand); 1790 1791 return ret; 1792 } 1793 1794 int ext4_inline_data_iomap(struct inode *inode, struct iomap *iomap) 1795 { 1796 __u64 addr; 1797 int error = -EAGAIN; 1798 struct ext4_iloc iloc; 1799 1800 down_read(&EXT4_I(inode)->xattr_sem); 1801 if (!ext4_has_inline_data(inode)) 1802 goto out; 1803 1804 error = ext4_get_inode_loc(inode, &iloc); 1805 if (error) 1806 goto out; 1807 1808 addr = (__u64)iloc.bh->b_blocknr << inode->i_sb->s_blocksize_bits; 1809 addr += (char *)ext4_raw_inode(&iloc) - iloc.bh->b_data; 1810 addr += offsetof(struct ext4_inode, i_block); 1811 1812 brelse(iloc.bh); 1813 1814 iomap->addr = addr; 1815 iomap->offset = 0; 1816 iomap->length = min_t(loff_t, ext4_get_inline_size(inode), 1817 i_size_read(inode)); 1818 iomap->type = IOMAP_INLINE; 1819 iomap->flags = 0; 1820 1821 out: 1822 up_read(&EXT4_I(inode)->xattr_sem); 1823 return error; 1824 } 1825 1826 int ext4_inline_data_truncate(struct inode *inode, int *has_inline) 1827 { 1828 handle_t *handle; 1829 int inline_size, value_len, needed_blocks, no_expand, err = 0; 1830 size_t i_size; 1831 void *value = NULL; 1832 struct ext4_xattr_ibody_find is = { 1833 .s = { .not_found = -ENODATA, }, 1834 }; 1835 struct ext4_xattr_info i = { 1836 .name_index = EXT4_XATTR_INDEX_SYSTEM, 1837 .name = EXT4_XATTR_SYSTEM_DATA, 1838 }; 1839 1840 1841 needed_blocks = ext4_chunk_trans_extent(inode, 1); 1842 handle = ext4_journal_start(inode, EXT4_HT_INODE, needed_blocks); 1843 if (IS_ERR(handle)) 1844 return PTR_ERR(handle); 1845 1846 ext4_write_lock_xattr(inode, &no_expand); 1847 if (!ext4_has_inline_data(inode)) { 1848 ext4_write_unlock_xattr(inode, &no_expand); 1849 *has_inline = 0; 1850 ext4_journal_stop(handle); 1851 return 0; 1852 } 1853 1854 if ((err = ext4_orphan_add(handle, inode)) != 0) 1855 goto out; 1856 1857 if ((err = ext4_get_inode_loc(inode, &is.iloc)) != 0) 1858 goto out; 1859 1860 down_write(&EXT4_I(inode)->i_data_sem); 1861 i_size = inode->i_size; 1862 inline_size = ext4_get_inline_size(inode); 1863 EXT4_I(inode)->i_disksize = i_size; 1864 1865 if (i_size < inline_size) { 1866 /* 1867 * if there's inline data to truncate and this file was 1868 * converted to extents after that inline data was written, 1869 * the extent status cache must be cleared to avoid leaving 1870 * behind stale delayed allocated extent entries 1871 */ 1872 if (!ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) 1873 ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS); 1874 1875 /* Clear the content in the xattr space. */ 1876 if (inline_size > EXT4_MIN_INLINE_DATA_SIZE) { 1877 if ((err = ext4_xattr_ibody_find(inode, &i, &is)) != 0) 1878 goto out_error; 1879 1880 if (is.s.not_found) { 1881 EXT4_ERROR_INODE(inode, 1882 "missing inline data xattr"); 1883 err = -EFSCORRUPTED; 1884 goto out_error; 1885 } 1886 1887 value_len = le32_to_cpu(is.s.here->e_value_size); 1888 value = kmalloc(value_len, GFP_NOFS); 1889 if (!value) { 1890 err = -ENOMEM; 1891 goto out_error; 1892 } 1893 1894 err = ext4_xattr_ibody_get(inode, i.name_index, 1895 i.name, value, value_len); 1896 if (err <= 0) 1897 goto out_error; 1898 1899 i.value = value; 1900 i.value_len = i_size > EXT4_MIN_INLINE_DATA_SIZE ? 1901 i_size - EXT4_MIN_INLINE_DATA_SIZE : 0; 1902 err = ext4_xattr_ibody_set(handle, inode, &i, &is); 1903 if (err) 1904 goto out_error; 1905 } 1906 1907 /* Clear the content within i_blocks. */ 1908 if (i_size < EXT4_MIN_INLINE_DATA_SIZE) { 1909 void *p = (void *) ext4_raw_inode(&is.iloc)->i_block; 1910 memset(p + i_size, 0, 1911 EXT4_MIN_INLINE_DATA_SIZE - i_size); 1912 } 1913 1914 EXT4_I(inode)->i_inline_size = i_size < 1915 EXT4_MIN_INLINE_DATA_SIZE ? 1916 EXT4_MIN_INLINE_DATA_SIZE : i_size; 1917 } 1918 1919 out_error: 1920 up_write(&EXT4_I(inode)->i_data_sem); 1921 out: 1922 brelse(is.iloc.bh); 1923 ext4_write_unlock_xattr(inode, &no_expand); 1924 kfree(value); 1925 if (inode->i_nlink) 1926 ext4_orphan_del(handle, inode); 1927 1928 if (err == 0) { 1929 inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); 1930 err = ext4_mark_inode_dirty(handle, inode); 1931 if (IS_SYNC(inode)) 1932 ext4_handle_sync(handle); 1933 } 1934 ext4_journal_stop(handle); 1935 return err; 1936 } 1937 1938 int ext4_convert_inline_data(struct inode *inode) 1939 { 1940 int error, needed_blocks, no_expand; 1941 handle_t *handle; 1942 struct ext4_iloc iloc; 1943 1944 if (!ext4_has_inline_data(inode)) { 1945 ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); 1946 return 0; 1947 } else if (!ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) { 1948 /* 1949 * Inode has inline data but EXT4_STATE_MAY_INLINE_DATA is 1950 * cleared. This means we are in the middle of moving of 1951 * inline data to delay allocated block. Just force writeout 1952 * here to finish conversion. 1953 */ 1954 error = filemap_flush(inode->i_mapping); 1955 if (error) 1956 return error; 1957 if (!ext4_has_inline_data(inode)) 1958 return 0; 1959 } 1960 1961 needed_blocks = ext4_chunk_trans_extent(inode, 1); 1962 1963 iloc.bh = NULL; 1964 error = ext4_get_inode_loc(inode, &iloc); 1965 if (error) 1966 return error; 1967 1968 handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, needed_blocks); 1969 if (IS_ERR(handle)) { 1970 error = PTR_ERR(handle); 1971 goto out_free; 1972 } 1973 1974 ext4_write_lock_xattr(inode, &no_expand); 1975 if (ext4_has_inline_data(inode)) 1976 error = ext4_convert_inline_data_nolock(handle, inode, &iloc); 1977 ext4_write_unlock_xattr(inode, &no_expand); 1978 ext4_journal_stop(handle); 1979 out_free: 1980 brelse(iloc.bh); 1981 return error; 1982 } 1983