1 // SPDX-License-Identifier: LGPL-2.1 2 /* 3 * Copyright (c) 2012 Taobao. 4 * Written by Tao Ma <boyu.mt@taobao.com> 5 */ 6 7 #include <linux/iomap.h> 8 #include <linux/fiemap.h> 9 #include <linux/namei.h> 10 #include <linux/iversion.h> 11 #include <linux/sched/mm.h> 12 13 #include "ext4_jbd2.h" 14 #include "ext4.h" 15 #include "xattr.h" 16 #include "truncate.h" 17 18 #define EXT4_XATTR_SYSTEM_DATA "data" 19 #define EXT4_MIN_INLINE_DATA_SIZE ((sizeof(__le32) * EXT4_N_BLOCKS)) 20 #define EXT4_INLINE_DOTDOT_OFFSET 2 21 #define EXT4_INLINE_DOTDOT_SIZE 4 22 23 24 static int ext4_da_convert_inline_data_to_extent(struct address_space *mapping, 25 struct inode *inode, 26 void **fsdata); 27 28 static int ext4_get_inline_size(struct inode *inode) 29 { 30 if (EXT4_I(inode)->i_inline_off) 31 return EXT4_I(inode)->i_inline_size; 32 33 return 0; 34 } 35 36 static int get_max_inline_xattr_value_size(struct inode *inode, 37 struct ext4_iloc *iloc) 38 { 39 struct ext4_xattr_ibody_header *header; 40 struct ext4_xattr_entry *entry; 41 struct ext4_inode *raw_inode; 42 void *end; 43 int free, min_offs; 44 45 if (!EXT4_INODE_HAS_XATTR_SPACE(inode)) 46 return 0; 47 48 min_offs = EXT4_SB(inode->i_sb)->s_inode_size - 49 EXT4_GOOD_OLD_INODE_SIZE - 50 EXT4_I(inode)->i_extra_isize - 51 sizeof(struct ext4_xattr_ibody_header); 52 53 /* 54 * We need to subtract another sizeof(__u32) since an in-inode xattr 55 * needs an empty 4 bytes to indicate the gap between the xattr entry 56 * and the name/value pair. 57 */ 58 if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR)) 59 return EXT4_XATTR_SIZE(min_offs - 60 EXT4_XATTR_LEN(strlen(EXT4_XATTR_SYSTEM_DATA)) - 61 EXT4_XATTR_ROUND - sizeof(__u32)); 62 63 raw_inode = ext4_raw_inode(iloc); 64 header = IHDR(inode, raw_inode); 65 entry = IFIRST(header); 66 end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size; 67 68 /* Compute min_offs. */ 69 while (!IS_LAST_ENTRY(entry)) { 70 void *next = EXT4_XATTR_NEXT(entry); 71 72 if (next >= end) { 73 EXT4_ERROR_INODE(inode, 74 "corrupt xattr in inline inode"); 75 return 0; 76 } 77 if (!entry->e_value_inum && entry->e_value_size) { 78 size_t offs = le16_to_cpu(entry->e_value_offs); 79 if (offs < min_offs) 80 min_offs = offs; 81 } 82 entry = next; 83 } 84 free = min_offs - 85 ((void *)entry - (void *)IFIRST(header)) - sizeof(__u32); 86 87 if (EXT4_I(inode)->i_inline_off) { 88 entry = (struct ext4_xattr_entry *) 89 ((void *)raw_inode + EXT4_I(inode)->i_inline_off); 90 91 free += EXT4_XATTR_SIZE(le32_to_cpu(entry->e_value_size)); 92 goto out; 93 } 94 95 free -= EXT4_XATTR_LEN(strlen(EXT4_XATTR_SYSTEM_DATA)); 96 97 if (free > EXT4_XATTR_ROUND) 98 free = EXT4_XATTR_SIZE(free - EXT4_XATTR_ROUND); 99 else 100 free = 0; 101 102 out: 103 return free; 104 } 105 106 /* 107 * Get the maximum size we now can store in an inode. 108 * If we can't find the space for a xattr entry, don't use the space 109 * of the extents since we have no space to indicate the inline data. 110 */ 111 int ext4_get_max_inline_size(struct inode *inode) 112 { 113 int error, max_inline_size; 114 struct ext4_iloc iloc; 115 116 if (EXT4_I(inode)->i_extra_isize == 0) 117 return 0; 118 119 error = ext4_get_inode_loc(inode, &iloc); 120 if (error) { 121 ext4_error_inode_err(inode, __func__, __LINE__, 0, -error, 122 "can't get inode location %lu", 123 inode->i_ino); 124 return 0; 125 } 126 127 down_read(&EXT4_I(inode)->xattr_sem); 128 max_inline_size = get_max_inline_xattr_value_size(inode, &iloc); 129 up_read(&EXT4_I(inode)->xattr_sem); 130 131 brelse(iloc.bh); 132 133 if (!max_inline_size) 134 return 0; 135 136 return max_inline_size + EXT4_MIN_INLINE_DATA_SIZE; 137 } 138 139 /* 140 * this function does not take xattr_sem, which is OK because it is 141 * currently only used in a code path coming form ext4_iget, before 142 * the new inode has been unlocked 143 */ 144 int ext4_find_inline_data_nolock(struct inode *inode) 145 { 146 struct ext4_xattr_ibody_find is = { 147 .s = { .not_found = -ENODATA, }, 148 }; 149 struct ext4_xattr_info i = { 150 .name_index = EXT4_XATTR_INDEX_SYSTEM, 151 .name = EXT4_XATTR_SYSTEM_DATA, 152 }; 153 int error; 154 155 if (EXT4_I(inode)->i_extra_isize == 0) 156 return 0; 157 158 error = ext4_get_inode_loc(inode, &is.iloc); 159 if (error) 160 return error; 161 162 error = ext4_xattr_ibody_find(inode, &i, &is); 163 if (error) 164 goto out; 165 166 if (!is.s.not_found) { 167 if (is.s.here->e_value_inum) { 168 EXT4_ERROR_INODE(inode, "inline data xattr refers " 169 "to an external xattr inode"); 170 error = -EFSCORRUPTED; 171 goto out; 172 } 173 EXT4_I(inode)->i_inline_off = (u16)((void *)is.s.here - 174 (void *)ext4_raw_inode(&is.iloc)); 175 EXT4_I(inode)->i_inline_size = EXT4_MIN_INLINE_DATA_SIZE + 176 le32_to_cpu(is.s.here->e_value_size); 177 } 178 out: 179 brelse(is.iloc.bh); 180 return error; 181 } 182 183 static int ext4_read_inline_data(struct inode *inode, void *buffer, 184 unsigned int len, 185 struct ext4_iloc *iloc) 186 { 187 struct ext4_xattr_entry *entry; 188 struct ext4_xattr_ibody_header *header; 189 int cp_len = 0; 190 struct ext4_inode *raw_inode; 191 192 if (!len) 193 return 0; 194 195 BUG_ON(len > EXT4_I(inode)->i_inline_size); 196 197 cp_len = min_t(unsigned int, len, EXT4_MIN_INLINE_DATA_SIZE); 198 199 raw_inode = ext4_raw_inode(iloc); 200 memcpy(buffer, (void *)(raw_inode->i_block), cp_len); 201 202 len -= cp_len; 203 buffer += cp_len; 204 205 if (!len) 206 goto out; 207 208 header = IHDR(inode, raw_inode); 209 entry = (struct ext4_xattr_entry *)((void *)raw_inode + 210 EXT4_I(inode)->i_inline_off); 211 len = min_t(unsigned int, len, 212 (unsigned int)le32_to_cpu(entry->e_value_size)); 213 214 memcpy(buffer, 215 (void *)IFIRST(header) + le16_to_cpu(entry->e_value_offs), len); 216 cp_len += len; 217 218 out: 219 return cp_len; 220 } 221 222 /* 223 * write the buffer to the inline inode. 224 * If 'create' is set, we don't need to do the extra copy in the xattr 225 * value since it is already handled by ext4_xattr_ibody_set. 226 * That saves us one memcpy. 227 */ 228 static void ext4_write_inline_data(struct inode *inode, struct ext4_iloc *iloc, 229 void *buffer, loff_t pos, unsigned int len) 230 { 231 struct ext4_xattr_entry *entry; 232 struct ext4_xattr_ibody_header *header; 233 struct ext4_inode *raw_inode; 234 int cp_len = 0; 235 236 if (unlikely(ext4_emergency_state(inode->i_sb))) 237 return; 238 239 BUG_ON(!EXT4_I(inode)->i_inline_off); 240 BUG_ON(pos + len > EXT4_I(inode)->i_inline_size); 241 242 raw_inode = ext4_raw_inode(iloc); 243 buffer += pos; 244 245 if (pos < EXT4_MIN_INLINE_DATA_SIZE) { 246 cp_len = pos + len > EXT4_MIN_INLINE_DATA_SIZE ? 247 EXT4_MIN_INLINE_DATA_SIZE - pos : len; 248 memcpy((void *)raw_inode->i_block + pos, buffer, cp_len); 249 250 len -= cp_len; 251 buffer += cp_len; 252 pos += cp_len; 253 } 254 255 if (!len) 256 return; 257 258 pos -= EXT4_MIN_INLINE_DATA_SIZE; 259 header = IHDR(inode, raw_inode); 260 entry = (struct ext4_xattr_entry *)((void *)raw_inode + 261 EXT4_I(inode)->i_inline_off); 262 263 memcpy((void *)IFIRST(header) + le16_to_cpu(entry->e_value_offs) + pos, 264 buffer, len); 265 } 266 267 static int ext4_create_inline_data(handle_t *handle, 268 struct inode *inode, unsigned len) 269 { 270 int error; 271 void *value = NULL; 272 struct ext4_xattr_ibody_find is = { 273 .s = { .not_found = -ENODATA, }, 274 }; 275 struct ext4_xattr_info i = { 276 .name_index = EXT4_XATTR_INDEX_SYSTEM, 277 .name = EXT4_XATTR_SYSTEM_DATA, 278 }; 279 280 error = ext4_get_inode_loc(inode, &is.iloc); 281 if (error) 282 return error; 283 284 BUFFER_TRACE(is.iloc.bh, "get_write_access"); 285 error = ext4_journal_get_write_access(handle, inode->i_sb, is.iloc.bh, 286 EXT4_JTR_NONE); 287 if (error) 288 goto out; 289 290 if (len > EXT4_MIN_INLINE_DATA_SIZE) { 291 value = EXT4_ZERO_XATTR_VALUE; 292 len -= EXT4_MIN_INLINE_DATA_SIZE; 293 } else { 294 value = ""; 295 len = 0; 296 } 297 298 /* Insert the xttr entry. */ 299 i.value = value; 300 i.value_len = len; 301 302 error = ext4_xattr_ibody_find(inode, &i, &is); 303 if (error) 304 goto out; 305 306 BUG_ON(!is.s.not_found); 307 308 error = ext4_xattr_ibody_set(handle, inode, &i, &is); 309 if (error) { 310 if (error == -ENOSPC) 311 ext4_clear_inode_state(inode, 312 EXT4_STATE_MAY_INLINE_DATA); 313 goto out; 314 } 315 316 memset((void *)ext4_raw_inode(&is.iloc)->i_block, 317 0, EXT4_MIN_INLINE_DATA_SIZE); 318 319 EXT4_I(inode)->i_inline_off = (u16)((void *)is.s.here - 320 (void *)ext4_raw_inode(&is.iloc)); 321 EXT4_I(inode)->i_inline_size = len + EXT4_MIN_INLINE_DATA_SIZE; 322 ext4_clear_inode_flag(inode, EXT4_INODE_EXTENTS); 323 ext4_set_inode_flag(inode, EXT4_INODE_INLINE_DATA); 324 get_bh(is.iloc.bh); 325 error = ext4_mark_iloc_dirty(handle, inode, &is.iloc); 326 327 out: 328 brelse(is.iloc.bh); 329 return error; 330 } 331 332 static int ext4_update_inline_data(handle_t *handle, struct inode *inode, 333 unsigned int len) 334 { 335 int error; 336 void *value = NULL; 337 struct ext4_xattr_ibody_find is = { 338 .s = { .not_found = -ENODATA, }, 339 }; 340 struct ext4_xattr_info i = { 341 .name_index = EXT4_XATTR_INDEX_SYSTEM, 342 .name = EXT4_XATTR_SYSTEM_DATA, 343 }; 344 345 /* If the old space is ok, write the data directly. */ 346 if (len <= EXT4_I(inode)->i_inline_size) 347 return 0; 348 349 error = ext4_get_inode_loc(inode, &is.iloc); 350 if (error) 351 return error; 352 353 error = ext4_xattr_ibody_find(inode, &i, &is); 354 if (error) 355 goto out; 356 357 BUG_ON(is.s.not_found); 358 359 len -= EXT4_MIN_INLINE_DATA_SIZE; 360 value = kzalloc(len, GFP_NOFS); 361 if (!value) { 362 error = -ENOMEM; 363 goto out; 364 } 365 366 error = ext4_xattr_ibody_get(inode, i.name_index, i.name, 367 value, len); 368 if (error < 0) 369 goto out; 370 371 BUFFER_TRACE(is.iloc.bh, "get_write_access"); 372 error = ext4_journal_get_write_access(handle, inode->i_sb, is.iloc.bh, 373 EXT4_JTR_NONE); 374 if (error) 375 goto out; 376 377 /* Update the xattr entry. */ 378 i.value = value; 379 i.value_len = len; 380 381 error = ext4_xattr_ibody_set(handle, inode, &i, &is); 382 if (error) 383 goto out; 384 385 EXT4_I(inode)->i_inline_off = (u16)((void *)is.s.here - 386 (void *)ext4_raw_inode(&is.iloc)); 387 EXT4_I(inode)->i_inline_size = EXT4_MIN_INLINE_DATA_SIZE + 388 le32_to_cpu(is.s.here->e_value_size); 389 ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); 390 get_bh(is.iloc.bh); 391 error = ext4_mark_iloc_dirty(handle, inode, &is.iloc); 392 393 out: 394 kfree(value); 395 brelse(is.iloc.bh); 396 return error; 397 } 398 399 static int ext4_prepare_inline_data(handle_t *handle, struct inode *inode, 400 unsigned int len) 401 { 402 int ret, size, no_expand; 403 struct ext4_inode_info *ei = EXT4_I(inode); 404 405 if (!ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) 406 return -ENOSPC; 407 408 size = ext4_get_max_inline_size(inode); 409 if (size < len) 410 return -ENOSPC; 411 412 ext4_write_lock_xattr(inode, &no_expand); 413 414 if (ei->i_inline_off) 415 ret = ext4_update_inline_data(handle, inode, len); 416 else 417 ret = ext4_create_inline_data(handle, inode, len); 418 419 ext4_write_unlock_xattr(inode, &no_expand); 420 return ret; 421 } 422 423 static int ext4_destroy_inline_data_nolock(handle_t *handle, 424 struct inode *inode) 425 { 426 struct ext4_inode_info *ei = EXT4_I(inode); 427 struct ext4_xattr_ibody_find is = { 428 .s = { .not_found = 0, }, 429 }; 430 struct ext4_xattr_info i = { 431 .name_index = EXT4_XATTR_INDEX_SYSTEM, 432 .name = EXT4_XATTR_SYSTEM_DATA, 433 .value = NULL, 434 .value_len = 0, 435 }; 436 int error; 437 438 if (!ei->i_inline_off) 439 return 0; 440 441 error = ext4_get_inode_loc(inode, &is.iloc); 442 if (error) 443 return error; 444 445 error = ext4_xattr_ibody_find(inode, &i, &is); 446 if (error) 447 goto out; 448 449 BUFFER_TRACE(is.iloc.bh, "get_write_access"); 450 error = ext4_journal_get_write_access(handle, inode->i_sb, is.iloc.bh, 451 EXT4_JTR_NONE); 452 if (error) 453 goto out; 454 455 error = ext4_xattr_ibody_set(handle, inode, &i, &is); 456 if (error) 457 goto out; 458 459 memset((void *)ext4_raw_inode(&is.iloc)->i_block, 460 0, EXT4_MIN_INLINE_DATA_SIZE); 461 memset(ei->i_data, 0, EXT4_MIN_INLINE_DATA_SIZE); 462 463 if (ext4_has_feature_extents(inode->i_sb)) { 464 if (S_ISDIR(inode->i_mode) || 465 S_ISREG(inode->i_mode) || S_ISLNK(inode->i_mode)) { 466 ext4_set_inode_flag(inode, EXT4_INODE_EXTENTS); 467 ext4_ext_tree_init(handle, inode); 468 } 469 } 470 ext4_clear_inode_flag(inode, EXT4_INODE_INLINE_DATA); 471 472 get_bh(is.iloc.bh); 473 error = ext4_mark_iloc_dirty(handle, inode, &is.iloc); 474 475 EXT4_I(inode)->i_inline_off = 0; 476 EXT4_I(inode)->i_inline_size = 0; 477 ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); 478 out: 479 brelse(is.iloc.bh); 480 if (error == -ENODATA) 481 error = 0; 482 return error; 483 } 484 485 static int ext4_read_inline_folio(struct inode *inode, struct folio *folio) 486 { 487 void *kaddr; 488 int ret = 0; 489 size_t len; 490 struct ext4_iloc iloc; 491 492 BUG_ON(!folio_test_locked(folio)); 493 BUG_ON(!ext4_has_inline_data(inode)); 494 BUG_ON(folio->index); 495 496 if (!EXT4_I(inode)->i_inline_off) { 497 ext4_warning(inode->i_sb, "inode %lu doesn't have inline data.", 498 inode->i_ino); 499 goto out; 500 } 501 502 ret = ext4_get_inode_loc(inode, &iloc); 503 if (ret) 504 goto out; 505 506 len = min_t(size_t, ext4_get_inline_size(inode), i_size_read(inode)); 507 BUG_ON(len > PAGE_SIZE); 508 kaddr = kmap_local_folio(folio, 0); 509 ret = ext4_read_inline_data(inode, kaddr, len, &iloc); 510 kaddr = folio_zero_tail(folio, len, kaddr + len); 511 kunmap_local(kaddr); 512 folio_mark_uptodate(folio); 513 brelse(iloc.bh); 514 515 out: 516 return ret; 517 } 518 519 int ext4_readpage_inline(struct inode *inode, struct folio *folio) 520 { 521 int ret = 0; 522 523 down_read(&EXT4_I(inode)->xattr_sem); 524 if (!ext4_has_inline_data(inode)) { 525 up_read(&EXT4_I(inode)->xattr_sem); 526 return -EAGAIN; 527 } 528 529 /* 530 * Current inline data can only exist in the 1st page, 531 * So for all the other pages, just set them uptodate. 532 */ 533 if (!folio->index) 534 ret = ext4_read_inline_folio(inode, folio); 535 else if (!folio_test_uptodate(folio)) { 536 folio_zero_segment(folio, 0, folio_size(folio)); 537 folio_mark_uptodate(folio); 538 } 539 540 up_read(&EXT4_I(inode)->xattr_sem); 541 542 folio_unlock(folio); 543 return ret >= 0 ? 0 : ret; 544 } 545 546 static int ext4_convert_inline_data_to_extent(struct address_space *mapping, 547 struct inode *inode) 548 { 549 int ret, needed_blocks, no_expand; 550 handle_t *handle = NULL; 551 int retries = 0, sem_held = 0; 552 struct folio *folio = NULL; 553 unsigned from, to; 554 struct ext4_iloc iloc; 555 556 if (!ext4_has_inline_data(inode)) { 557 /* 558 * clear the flag so that no new write 559 * will trap here again. 560 */ 561 ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); 562 return 0; 563 } 564 565 needed_blocks = ext4_writepage_trans_blocks(inode); 566 567 ret = ext4_get_inode_loc(inode, &iloc); 568 if (ret) 569 return ret; 570 571 retry: 572 handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, needed_blocks); 573 if (IS_ERR(handle)) { 574 ret = PTR_ERR(handle); 575 handle = NULL; 576 goto out; 577 } 578 579 /* We cannot recurse into the filesystem as the transaction is already 580 * started */ 581 folio = __filemap_get_folio(mapping, 0, FGP_WRITEBEGIN | FGP_NOFS, 582 mapping_gfp_mask(mapping)); 583 if (IS_ERR(folio)) { 584 ret = PTR_ERR(folio); 585 goto out_nofolio; 586 } 587 588 ext4_write_lock_xattr(inode, &no_expand); 589 sem_held = 1; 590 /* If some one has already done this for us, just exit. */ 591 if (!ext4_has_inline_data(inode)) { 592 ret = 0; 593 goto out; 594 } 595 596 from = 0; 597 to = ext4_get_inline_size(inode); 598 if (!folio_test_uptodate(folio)) { 599 ret = ext4_read_inline_folio(inode, folio); 600 if (ret < 0) 601 goto out; 602 } 603 604 ret = ext4_destroy_inline_data_nolock(handle, inode); 605 if (ret) 606 goto out; 607 608 if (ext4_should_dioread_nolock(inode)) { 609 ret = ext4_block_write_begin(handle, folio, from, to, 610 ext4_get_block_unwritten); 611 } else 612 ret = ext4_block_write_begin(handle, folio, from, to, 613 ext4_get_block); 614 615 if (!ret && ext4_should_journal_data(inode)) { 616 ret = ext4_walk_page_buffers(handle, inode, 617 folio_buffers(folio), from, to, 618 NULL, do_journal_get_write_access); 619 } 620 621 if (ret) { 622 folio_unlock(folio); 623 folio_put(folio); 624 folio = NULL; 625 ext4_orphan_add(handle, inode); 626 ext4_write_unlock_xattr(inode, &no_expand); 627 sem_held = 0; 628 ext4_journal_stop(handle); 629 handle = NULL; 630 ext4_truncate_failed_write(inode); 631 /* 632 * If truncate failed early the inode might 633 * still be on the orphan list; we need to 634 * make sure the inode is removed from the 635 * orphan list in that case. 636 */ 637 if (inode->i_nlink) 638 ext4_orphan_del(NULL, inode); 639 } 640 641 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) 642 goto retry; 643 644 if (folio) 645 block_commit_write(&folio->page, from, to); 646 out: 647 if (folio) { 648 folio_unlock(folio); 649 folio_put(folio); 650 } 651 out_nofolio: 652 if (sem_held) 653 ext4_write_unlock_xattr(inode, &no_expand); 654 if (handle) 655 ext4_journal_stop(handle); 656 brelse(iloc.bh); 657 return ret; 658 } 659 660 /* 661 * Prepare the write for the inline data. 662 * If the data can be written into the inode, we just read 663 * the page and make it uptodate, and start the journal. 664 * Otherwise read the page, makes it dirty so that it can be 665 * handle in writepages(the i_disksize update is left to the 666 * normal ext4_da_write_end). 667 */ 668 int ext4_generic_write_inline_data(struct address_space *mapping, 669 struct inode *inode, 670 loff_t pos, unsigned len, 671 struct folio **foliop, 672 void **fsdata, bool da) 673 { 674 int ret; 675 handle_t *handle; 676 struct folio *folio; 677 struct ext4_iloc iloc; 678 int retries = 0; 679 680 ret = ext4_get_inode_loc(inode, &iloc); 681 if (ret) 682 return ret; 683 684 retry_journal: 685 handle = ext4_journal_start(inode, EXT4_HT_INODE, 1); 686 if (IS_ERR(handle)) { 687 ret = PTR_ERR(handle); 688 goto out_release_bh; 689 } 690 691 ret = ext4_prepare_inline_data(handle, inode, pos + len); 692 if (ret && ret != -ENOSPC) 693 goto out_stop_journal; 694 695 if (ret == -ENOSPC) { 696 ext4_journal_stop(handle); 697 if (!da) { 698 brelse(iloc.bh); 699 /* Retry inside */ 700 return ext4_convert_inline_data_to_extent(mapping, inode); 701 } 702 703 ret = ext4_da_convert_inline_data_to_extent(mapping, inode, fsdata); 704 if (ret == -ENOSPC && 705 ext4_should_retry_alloc(inode->i_sb, &retries)) 706 goto retry_journal; 707 goto out_release_bh; 708 } 709 710 folio = __filemap_get_folio(mapping, 0, FGP_WRITEBEGIN | FGP_NOFS, 711 mapping_gfp_mask(mapping)); 712 if (IS_ERR(folio)) { 713 ret = PTR_ERR(folio); 714 goto out_stop_journal; 715 } 716 717 down_read(&EXT4_I(inode)->xattr_sem); 718 /* Someone else had converted it to extent */ 719 if (!ext4_has_inline_data(inode)) { 720 ret = 0; 721 goto out_release_folio; 722 } 723 724 if (!folio_test_uptodate(folio)) { 725 ret = ext4_read_inline_folio(inode, folio); 726 if (ret < 0) 727 goto out_release_folio; 728 } 729 730 ret = ext4_journal_get_write_access(handle, inode->i_sb, iloc.bh, EXT4_JTR_NONE); 731 if (ret) 732 goto out_release_folio; 733 *foliop = folio; 734 up_read(&EXT4_I(inode)->xattr_sem); 735 brelse(iloc.bh); 736 return 1; 737 738 out_release_folio: 739 up_read(&EXT4_I(inode)->xattr_sem); 740 folio_unlock(folio); 741 folio_put(folio); 742 out_stop_journal: 743 ext4_journal_stop(handle); 744 out_release_bh: 745 brelse(iloc.bh); 746 return ret; 747 } 748 749 /* 750 * Try to write data in the inode. 751 * If the inode has inline data, check whether the new write can be 752 * in the inode also. If not, create the page the handle, move the data 753 * to the page make it update and let the later codes create extent for it. 754 */ 755 int ext4_try_to_write_inline_data(struct address_space *mapping, 756 struct inode *inode, 757 loff_t pos, unsigned len, 758 struct folio **foliop) 759 { 760 if (pos + len > ext4_get_max_inline_size(inode)) 761 return ext4_convert_inline_data_to_extent(mapping, inode); 762 return ext4_generic_write_inline_data(mapping, inode, pos, len, 763 foliop, NULL, false); 764 } 765 766 int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len, 767 unsigned copied, struct folio *folio) 768 { 769 handle_t *handle = ext4_journal_current_handle(); 770 int no_expand; 771 void *kaddr; 772 struct ext4_iloc iloc; 773 int ret = 0, ret2; 774 775 if (unlikely(copied < len) && !folio_test_uptodate(folio)) 776 copied = 0; 777 778 if (likely(copied)) { 779 ret = ext4_get_inode_loc(inode, &iloc); 780 if (ret) { 781 folio_unlock(folio); 782 folio_put(folio); 783 ext4_std_error(inode->i_sb, ret); 784 goto out; 785 } 786 ext4_write_lock_xattr(inode, &no_expand); 787 BUG_ON(!ext4_has_inline_data(inode)); 788 789 /* 790 * ei->i_inline_off may have changed since 791 * ext4_write_begin() called 792 * ext4_try_to_write_inline_data() 793 */ 794 (void) ext4_find_inline_data_nolock(inode); 795 796 kaddr = kmap_local_folio(folio, 0); 797 ext4_write_inline_data(inode, &iloc, kaddr, pos, copied); 798 kunmap_local(kaddr); 799 folio_mark_uptodate(folio); 800 /* clear dirty flag so that writepages wouldn't work for us. */ 801 folio_clear_dirty(folio); 802 803 ext4_write_unlock_xattr(inode, &no_expand); 804 brelse(iloc.bh); 805 806 /* 807 * It's important to update i_size while still holding folio 808 * lock: page writeout could otherwise come in and zero 809 * beyond i_size. 810 */ 811 ext4_update_inode_size(inode, pos + copied); 812 } 813 folio_unlock(folio); 814 folio_put(folio); 815 816 /* 817 * Don't mark the inode dirty under folio lock. First, it unnecessarily 818 * makes the holding time of folio lock longer. Second, it forces lock 819 * ordering of folio lock and transaction start for journaling 820 * filesystems. 821 */ 822 if (likely(copied)) 823 mark_inode_dirty(inode); 824 out: 825 /* 826 * If we didn't copy as much data as expected, we need to trim back 827 * size of xattr containing inline data. 828 */ 829 if (pos + len > inode->i_size && ext4_can_truncate(inode)) 830 ext4_orphan_add(handle, inode); 831 832 ret2 = ext4_journal_stop(handle); 833 if (!ret) 834 ret = ret2; 835 if (pos + len > inode->i_size) { 836 ext4_truncate_failed_write(inode); 837 /* 838 * If truncate failed early the inode might still be 839 * on the orphan list; we need to make sure the inode 840 * is removed from the orphan list in that case. 841 */ 842 if (inode->i_nlink) 843 ext4_orphan_del(NULL, inode); 844 } 845 return ret ? ret : copied; 846 } 847 848 /* 849 * Try to make the page cache and handle ready for the inline data case. 850 * We can call this function in 2 cases: 851 * 1. The inode is created and the first write exceeds inline size. We can 852 * clear the inode state safely. 853 * 2. The inode has inline data, then we need to read the data, make it 854 * update and dirty so that ext4_da_writepages can handle it. We don't 855 * need to start the journal since the file's metadata isn't changed now. 856 */ 857 static int ext4_da_convert_inline_data_to_extent(struct address_space *mapping, 858 struct inode *inode, 859 void **fsdata) 860 { 861 int ret = 0, inline_size; 862 struct folio *folio; 863 864 folio = __filemap_get_folio(mapping, 0, FGP_WRITEBEGIN, 865 mapping_gfp_mask(mapping)); 866 if (IS_ERR(folio)) 867 return PTR_ERR(folio); 868 869 down_read(&EXT4_I(inode)->xattr_sem); 870 if (!ext4_has_inline_data(inode)) { 871 ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); 872 goto out; 873 } 874 875 inline_size = ext4_get_inline_size(inode); 876 877 if (!folio_test_uptodate(folio)) { 878 ret = ext4_read_inline_folio(inode, folio); 879 if (ret < 0) 880 goto out; 881 } 882 883 ret = ext4_block_write_begin(NULL, folio, 0, inline_size, 884 ext4_da_get_block_prep); 885 if (ret) { 886 up_read(&EXT4_I(inode)->xattr_sem); 887 folio_unlock(folio); 888 folio_put(folio); 889 ext4_truncate_failed_write(inode); 890 return ret; 891 } 892 893 folio_mark_dirty(folio); 894 folio_mark_uptodate(folio); 895 ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); 896 *fsdata = (void *)CONVERT_INLINE_DATA; 897 898 out: 899 up_read(&EXT4_I(inode)->xattr_sem); 900 if (folio) { 901 folio_unlock(folio); 902 folio_put(folio); 903 } 904 return ret; 905 } 906 907 #ifdef INLINE_DIR_DEBUG 908 void ext4_show_inline_dir(struct inode *dir, struct buffer_head *bh, 909 void *inline_start, int inline_size) 910 { 911 int offset; 912 unsigned short de_len; 913 struct ext4_dir_entry_2 *de = inline_start; 914 void *dlimit = inline_start + inline_size; 915 916 trace_printk("inode %lu\n", dir->i_ino); 917 offset = 0; 918 while ((void *)de < dlimit) { 919 de_len = ext4_rec_len_from_disk(de->rec_len, inline_size); 920 trace_printk("de: off %u rlen %u name %.*s nlen %u ino %u\n", 921 offset, de_len, de->name_len, de->name, 922 de->name_len, le32_to_cpu(de->inode)); 923 if (ext4_check_dir_entry(dir, NULL, de, bh, 924 inline_start, inline_size, offset)) 925 BUG(); 926 927 offset += de_len; 928 de = (struct ext4_dir_entry_2 *) ((char *) de + de_len); 929 } 930 } 931 #else 932 #define ext4_show_inline_dir(dir, bh, inline_start, inline_size) 933 #endif 934 935 /* 936 * Add a new entry into a inline dir. 937 * It will return -ENOSPC if no space is available, and -EIO 938 * and -EEXIST if directory entry already exists. 939 */ 940 static int ext4_add_dirent_to_inline(handle_t *handle, 941 struct ext4_filename *fname, 942 struct inode *dir, 943 struct inode *inode, 944 struct ext4_iloc *iloc, 945 void *inline_start, int inline_size) 946 { 947 int err; 948 struct ext4_dir_entry_2 *de; 949 950 err = ext4_find_dest_de(dir, iloc->bh, inline_start, 951 inline_size, fname, &de); 952 if (err) 953 return err; 954 955 BUFFER_TRACE(iloc->bh, "get_write_access"); 956 err = ext4_journal_get_write_access(handle, dir->i_sb, iloc->bh, 957 EXT4_JTR_NONE); 958 if (err) 959 return err; 960 ext4_insert_dentry(dir, inode, de, inline_size, fname); 961 962 ext4_show_inline_dir(dir, iloc->bh, inline_start, inline_size); 963 964 /* 965 * XXX shouldn't update any times until successful 966 * completion of syscall, but too many callers depend 967 * on this. 968 * 969 * XXX similarly, too many callers depend on 970 * ext4_new_inode() setting the times, but error 971 * recovery deletes the inode, so the worst that can 972 * happen is that the times are slightly out of date 973 * and/or different from the directory change time. 974 */ 975 inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); 976 ext4_update_dx_flag(dir); 977 inode_inc_iversion(dir); 978 return 1; 979 } 980 981 static void *ext4_get_inline_xattr_pos(struct inode *inode, 982 struct ext4_iloc *iloc) 983 { 984 struct ext4_xattr_entry *entry; 985 struct ext4_xattr_ibody_header *header; 986 987 BUG_ON(!EXT4_I(inode)->i_inline_off); 988 989 header = IHDR(inode, ext4_raw_inode(iloc)); 990 entry = (struct ext4_xattr_entry *)((void *)ext4_raw_inode(iloc) + 991 EXT4_I(inode)->i_inline_off); 992 993 return (void *)IFIRST(header) + le16_to_cpu(entry->e_value_offs); 994 } 995 996 /* Set the final de to cover the whole block. */ 997 static void ext4_update_final_de(void *de_buf, int old_size, int new_size) 998 { 999 struct ext4_dir_entry_2 *de, *prev_de; 1000 void *limit; 1001 int de_len; 1002 1003 de = de_buf; 1004 if (old_size) { 1005 limit = de_buf + old_size; 1006 do { 1007 prev_de = de; 1008 de_len = ext4_rec_len_from_disk(de->rec_len, old_size); 1009 de_buf += de_len; 1010 de = de_buf; 1011 } while (de_buf < limit); 1012 1013 prev_de->rec_len = ext4_rec_len_to_disk(de_len + new_size - 1014 old_size, new_size); 1015 } else { 1016 /* this is just created, so create an empty entry. */ 1017 de->inode = 0; 1018 de->rec_len = ext4_rec_len_to_disk(new_size, new_size); 1019 } 1020 } 1021 1022 static int ext4_update_inline_dir(handle_t *handle, struct inode *dir, 1023 struct ext4_iloc *iloc) 1024 { 1025 int ret; 1026 int old_size = EXT4_I(dir)->i_inline_size - EXT4_MIN_INLINE_DATA_SIZE; 1027 int new_size = get_max_inline_xattr_value_size(dir, iloc); 1028 1029 if (new_size - old_size <= ext4_dir_rec_len(1, NULL)) 1030 return -ENOSPC; 1031 1032 ret = ext4_update_inline_data(handle, dir, 1033 new_size + EXT4_MIN_INLINE_DATA_SIZE); 1034 if (ret) 1035 return ret; 1036 1037 ext4_update_final_de(ext4_get_inline_xattr_pos(dir, iloc), old_size, 1038 EXT4_I(dir)->i_inline_size - 1039 EXT4_MIN_INLINE_DATA_SIZE); 1040 dir->i_size = EXT4_I(dir)->i_disksize = EXT4_I(dir)->i_inline_size; 1041 return 0; 1042 } 1043 1044 static void ext4_restore_inline_data(handle_t *handle, struct inode *inode, 1045 struct ext4_iloc *iloc, 1046 void *buf, int inline_size) 1047 { 1048 int ret; 1049 1050 ret = ext4_create_inline_data(handle, inode, inline_size); 1051 if (ret) { 1052 ext4_msg(inode->i_sb, KERN_EMERG, 1053 "error restoring inline_data for inode -- potential data loss! (inode %lu, error %d)", 1054 inode->i_ino, ret); 1055 return; 1056 } 1057 ext4_write_inline_data(inode, iloc, buf, 0, inline_size); 1058 ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); 1059 } 1060 1061 static int ext4_finish_convert_inline_dir(handle_t *handle, 1062 struct inode *inode, 1063 struct buffer_head *dir_block, 1064 void *buf, 1065 int inline_size) 1066 { 1067 int err, csum_size = 0, header_size = 0; 1068 struct ext4_dir_entry_2 *de; 1069 void *target = dir_block->b_data; 1070 1071 /* 1072 * First create "." and ".." and then copy the dir information 1073 * back to the block. 1074 */ 1075 de = target; 1076 de = ext4_init_dot_dotdot(inode, de, 1077 inode->i_sb->s_blocksize, csum_size, 1078 le32_to_cpu(((struct ext4_dir_entry_2 *)buf)->inode), 1); 1079 header_size = (void *)de - target; 1080 1081 memcpy((void *)de, buf + EXT4_INLINE_DOTDOT_SIZE, 1082 inline_size - EXT4_INLINE_DOTDOT_SIZE); 1083 1084 if (ext4_has_feature_metadata_csum(inode->i_sb)) 1085 csum_size = sizeof(struct ext4_dir_entry_tail); 1086 1087 inode->i_size = inode->i_sb->s_blocksize; 1088 i_size_write(inode, inode->i_sb->s_blocksize); 1089 EXT4_I(inode)->i_disksize = inode->i_sb->s_blocksize; 1090 ext4_update_final_de(dir_block->b_data, 1091 inline_size - EXT4_INLINE_DOTDOT_SIZE + header_size, 1092 inode->i_sb->s_blocksize - csum_size); 1093 1094 if (csum_size) 1095 ext4_initialize_dirent_tail(dir_block, 1096 inode->i_sb->s_blocksize); 1097 set_buffer_uptodate(dir_block); 1098 unlock_buffer(dir_block); 1099 err = ext4_handle_dirty_dirblock(handle, inode, dir_block); 1100 if (err) 1101 return err; 1102 set_buffer_verified(dir_block); 1103 return ext4_mark_inode_dirty(handle, inode); 1104 } 1105 1106 static int ext4_convert_inline_data_nolock(handle_t *handle, 1107 struct inode *inode, 1108 struct ext4_iloc *iloc) 1109 { 1110 int error; 1111 void *buf = NULL; 1112 struct buffer_head *data_bh = NULL; 1113 struct ext4_map_blocks map; 1114 int inline_size; 1115 1116 inline_size = ext4_get_inline_size(inode); 1117 buf = kmalloc(inline_size, GFP_NOFS); 1118 if (!buf) { 1119 error = -ENOMEM; 1120 goto out; 1121 } 1122 1123 error = ext4_read_inline_data(inode, buf, inline_size, iloc); 1124 if (error < 0) 1125 goto out; 1126 1127 /* 1128 * Make sure the inline directory entries pass checks before we try to 1129 * convert them, so that we avoid touching stuff that needs fsck. 1130 */ 1131 if (S_ISDIR(inode->i_mode)) { 1132 error = ext4_check_all_de(inode, iloc->bh, 1133 buf + EXT4_INLINE_DOTDOT_SIZE, 1134 inline_size - EXT4_INLINE_DOTDOT_SIZE); 1135 if (error) 1136 goto out; 1137 } 1138 1139 error = ext4_destroy_inline_data_nolock(handle, inode); 1140 if (error) 1141 goto out; 1142 1143 map.m_lblk = 0; 1144 map.m_len = 1; 1145 map.m_flags = 0; 1146 error = ext4_map_blocks(handle, inode, &map, EXT4_GET_BLOCKS_CREATE); 1147 if (error < 0) 1148 goto out_restore; 1149 if (!(map.m_flags & EXT4_MAP_MAPPED)) { 1150 error = -EIO; 1151 goto out_restore; 1152 } 1153 1154 data_bh = sb_getblk(inode->i_sb, map.m_pblk); 1155 if (!data_bh) { 1156 error = -ENOMEM; 1157 goto out_restore; 1158 } 1159 1160 lock_buffer(data_bh); 1161 error = ext4_journal_get_create_access(handle, inode->i_sb, data_bh, 1162 EXT4_JTR_NONE); 1163 if (error) { 1164 unlock_buffer(data_bh); 1165 error = -EIO; 1166 goto out_restore; 1167 } 1168 memset(data_bh->b_data, 0, inode->i_sb->s_blocksize); 1169 1170 if (!S_ISDIR(inode->i_mode)) { 1171 memcpy(data_bh->b_data, buf, inline_size); 1172 set_buffer_uptodate(data_bh); 1173 unlock_buffer(data_bh); 1174 error = ext4_handle_dirty_metadata(handle, 1175 inode, data_bh); 1176 } else { 1177 error = ext4_finish_convert_inline_dir(handle, inode, data_bh, 1178 buf, inline_size); 1179 } 1180 1181 out_restore: 1182 if (error) 1183 ext4_restore_inline_data(handle, inode, iloc, buf, inline_size); 1184 1185 out: 1186 brelse(data_bh); 1187 kfree(buf); 1188 return error; 1189 } 1190 1191 /* 1192 * Try to add the new entry to the inline data. 1193 * If succeeds, return 0. If not, extended the inline dir and copied data to 1194 * the new created block. 1195 */ 1196 int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname, 1197 struct inode *dir, struct inode *inode) 1198 { 1199 int ret, ret2, inline_size, no_expand; 1200 void *inline_start; 1201 struct ext4_iloc iloc; 1202 1203 ret = ext4_get_inode_loc(dir, &iloc); 1204 if (ret) 1205 return ret; 1206 1207 ext4_write_lock_xattr(dir, &no_expand); 1208 if (!ext4_has_inline_data(dir)) 1209 goto out; 1210 1211 inline_start = (void *)ext4_raw_inode(&iloc)->i_block + 1212 EXT4_INLINE_DOTDOT_SIZE; 1213 inline_size = EXT4_MIN_INLINE_DATA_SIZE - EXT4_INLINE_DOTDOT_SIZE; 1214 1215 ret = ext4_add_dirent_to_inline(handle, fname, dir, inode, &iloc, 1216 inline_start, inline_size); 1217 if (ret != -ENOSPC) 1218 goto out; 1219 1220 /* check whether it can be inserted to inline xattr space. */ 1221 inline_size = EXT4_I(dir)->i_inline_size - 1222 EXT4_MIN_INLINE_DATA_SIZE; 1223 if (!inline_size) { 1224 /* Try to use the xattr space.*/ 1225 ret = ext4_update_inline_dir(handle, dir, &iloc); 1226 if (ret && ret != -ENOSPC) 1227 goto out; 1228 1229 inline_size = EXT4_I(dir)->i_inline_size - 1230 EXT4_MIN_INLINE_DATA_SIZE; 1231 } 1232 1233 if (inline_size) { 1234 inline_start = ext4_get_inline_xattr_pos(dir, &iloc); 1235 1236 ret = ext4_add_dirent_to_inline(handle, fname, dir, 1237 inode, &iloc, inline_start, 1238 inline_size); 1239 1240 if (ret != -ENOSPC) 1241 goto out; 1242 } 1243 1244 /* 1245 * The inline space is filled up, so create a new block for it. 1246 * As the extent tree will be created, we have to save the inline 1247 * dir first. 1248 */ 1249 ret = ext4_convert_inline_data_nolock(handle, dir, &iloc); 1250 1251 out: 1252 ext4_write_unlock_xattr(dir, &no_expand); 1253 ret2 = ext4_mark_inode_dirty(handle, dir); 1254 if (unlikely(ret2 && !ret)) 1255 ret = ret2; 1256 brelse(iloc.bh); 1257 return ret; 1258 } 1259 1260 /* 1261 * This function fills a red-black tree with information from an 1262 * inlined dir. It returns the number directory entries loaded 1263 * into the tree. If there is an error it is returned in err. 1264 */ 1265 int ext4_inlinedir_to_tree(struct file *dir_file, 1266 struct inode *dir, ext4_lblk_t block, 1267 struct dx_hash_info *hinfo, 1268 __u32 start_hash, __u32 start_minor_hash, 1269 int *has_inline_data) 1270 { 1271 int err = 0, count = 0; 1272 unsigned int parent_ino; 1273 int pos; 1274 struct ext4_dir_entry_2 *de; 1275 struct inode *inode = file_inode(dir_file); 1276 int ret, inline_size = 0; 1277 struct ext4_iloc iloc; 1278 void *dir_buf = NULL; 1279 struct ext4_dir_entry_2 fake; 1280 struct fscrypt_str tmp_str; 1281 1282 ret = ext4_get_inode_loc(inode, &iloc); 1283 if (ret) 1284 return ret; 1285 1286 down_read(&EXT4_I(inode)->xattr_sem); 1287 if (!ext4_has_inline_data(inode)) { 1288 up_read(&EXT4_I(inode)->xattr_sem); 1289 *has_inline_data = 0; 1290 goto out; 1291 } 1292 1293 inline_size = ext4_get_inline_size(inode); 1294 dir_buf = kmalloc(inline_size, GFP_NOFS); 1295 if (!dir_buf) { 1296 ret = -ENOMEM; 1297 up_read(&EXT4_I(inode)->xattr_sem); 1298 goto out; 1299 } 1300 1301 ret = ext4_read_inline_data(inode, dir_buf, inline_size, &iloc); 1302 up_read(&EXT4_I(inode)->xattr_sem); 1303 if (ret < 0) 1304 goto out; 1305 1306 pos = 0; 1307 parent_ino = le32_to_cpu(((struct ext4_dir_entry_2 *)dir_buf)->inode); 1308 while (pos < inline_size) { 1309 /* 1310 * As inlined dir doesn't store any information about '.' and 1311 * only the inode number of '..' is stored, we have to handle 1312 * them differently. 1313 */ 1314 if (pos == 0) { 1315 fake.inode = cpu_to_le32(inode->i_ino); 1316 fake.name_len = 1; 1317 strcpy(fake.name, "."); 1318 fake.rec_len = ext4_rec_len_to_disk( 1319 ext4_dir_rec_len(fake.name_len, NULL), 1320 inline_size); 1321 ext4_set_de_type(inode->i_sb, &fake, S_IFDIR); 1322 de = &fake; 1323 pos = EXT4_INLINE_DOTDOT_OFFSET; 1324 } else if (pos == EXT4_INLINE_DOTDOT_OFFSET) { 1325 fake.inode = cpu_to_le32(parent_ino); 1326 fake.name_len = 2; 1327 strcpy(fake.name, ".."); 1328 fake.rec_len = ext4_rec_len_to_disk( 1329 ext4_dir_rec_len(fake.name_len, NULL), 1330 inline_size); 1331 ext4_set_de_type(inode->i_sb, &fake, S_IFDIR); 1332 de = &fake; 1333 pos = EXT4_INLINE_DOTDOT_SIZE; 1334 } else { 1335 de = (struct ext4_dir_entry_2 *)(dir_buf + pos); 1336 pos += ext4_rec_len_from_disk(de->rec_len, inline_size); 1337 if (ext4_check_dir_entry(inode, dir_file, de, 1338 iloc.bh, dir_buf, 1339 inline_size, pos)) { 1340 ret = count; 1341 goto out; 1342 } 1343 } 1344 1345 if (ext4_hash_in_dirent(dir)) { 1346 hinfo->hash = EXT4_DIRENT_HASH(de); 1347 hinfo->minor_hash = EXT4_DIRENT_MINOR_HASH(de); 1348 } else { 1349 err = ext4fs_dirhash(dir, de->name, de->name_len, hinfo); 1350 if (err) { 1351 ret = err; 1352 goto out; 1353 } 1354 } 1355 if ((hinfo->hash < start_hash) || 1356 ((hinfo->hash == start_hash) && 1357 (hinfo->minor_hash < start_minor_hash))) 1358 continue; 1359 if (de->inode == 0) 1360 continue; 1361 tmp_str.name = de->name; 1362 tmp_str.len = de->name_len; 1363 err = ext4_htree_store_dirent(dir_file, hinfo->hash, 1364 hinfo->minor_hash, de, &tmp_str); 1365 if (err) { 1366 ret = err; 1367 goto out; 1368 } 1369 count++; 1370 } 1371 ret = count; 1372 out: 1373 kfree(dir_buf); 1374 brelse(iloc.bh); 1375 return ret; 1376 } 1377 1378 /* 1379 * So this function is called when the volume is mkfsed with 1380 * dir_index disabled. In order to keep f_pos persistent 1381 * after we convert from an inlined dir to a blocked based, 1382 * we just pretend that we are a normal dir and return the 1383 * offset as if '.' and '..' really take place. 1384 * 1385 */ 1386 int ext4_read_inline_dir(struct file *file, 1387 struct dir_context *ctx, 1388 int *has_inline_data) 1389 { 1390 unsigned int offset, parent_ino; 1391 int i; 1392 struct ext4_dir_entry_2 *de; 1393 struct super_block *sb; 1394 struct inode *inode = file_inode(file); 1395 int ret, inline_size = 0; 1396 struct ext4_iloc iloc; 1397 void *dir_buf = NULL; 1398 int dotdot_offset, dotdot_size, extra_offset, extra_size; 1399 struct dir_private_info *info = file->private_data; 1400 1401 ret = ext4_get_inode_loc(inode, &iloc); 1402 if (ret) 1403 return ret; 1404 1405 down_read(&EXT4_I(inode)->xattr_sem); 1406 if (!ext4_has_inline_data(inode)) { 1407 up_read(&EXT4_I(inode)->xattr_sem); 1408 *has_inline_data = 0; 1409 goto out; 1410 } 1411 1412 inline_size = ext4_get_inline_size(inode); 1413 dir_buf = kmalloc(inline_size, GFP_NOFS); 1414 if (!dir_buf) { 1415 ret = -ENOMEM; 1416 up_read(&EXT4_I(inode)->xattr_sem); 1417 goto out; 1418 } 1419 1420 ret = ext4_read_inline_data(inode, dir_buf, inline_size, &iloc); 1421 up_read(&EXT4_I(inode)->xattr_sem); 1422 if (ret < 0) 1423 goto out; 1424 1425 ret = 0; 1426 sb = inode->i_sb; 1427 parent_ino = le32_to_cpu(((struct ext4_dir_entry_2 *)dir_buf)->inode); 1428 offset = ctx->pos; 1429 1430 /* 1431 * dotdot_offset and dotdot_size is the real offset and 1432 * size for ".." and "." if the dir is block based while 1433 * the real size for them are only EXT4_INLINE_DOTDOT_SIZE. 1434 * So we will use extra_offset and extra_size to indicate them 1435 * during the inline dir iteration. 1436 */ 1437 dotdot_offset = ext4_dir_rec_len(1, NULL); 1438 dotdot_size = dotdot_offset + ext4_dir_rec_len(2, NULL); 1439 extra_offset = dotdot_size - EXT4_INLINE_DOTDOT_SIZE; 1440 extra_size = extra_offset + inline_size; 1441 1442 /* 1443 * If the cookie has changed since the last call to 1444 * readdir(2), then we might be pointing to an invalid 1445 * dirent right now. Scan from the start of the inline 1446 * dir to make sure. 1447 */ 1448 if (!inode_eq_iversion(inode, info->cookie)) { 1449 for (i = 0; i < extra_size && i < offset;) { 1450 /* 1451 * "." is with offset 0 and 1452 * ".." is dotdot_offset. 1453 */ 1454 if (!i) { 1455 i = dotdot_offset; 1456 continue; 1457 } else if (i == dotdot_offset) { 1458 i = dotdot_size; 1459 continue; 1460 } 1461 /* for other entry, the real offset in 1462 * the buf has to be tuned accordingly. 1463 */ 1464 de = (struct ext4_dir_entry_2 *) 1465 (dir_buf + i - extra_offset); 1466 /* It's too expensive to do a full 1467 * dirent test each time round this 1468 * loop, but we do have to test at 1469 * least that it is non-zero. A 1470 * failure will be detected in the 1471 * dirent test below. */ 1472 if (ext4_rec_len_from_disk(de->rec_len, extra_size) 1473 < ext4_dir_rec_len(1, NULL)) 1474 break; 1475 i += ext4_rec_len_from_disk(de->rec_len, 1476 extra_size); 1477 } 1478 offset = i; 1479 ctx->pos = offset; 1480 info->cookie = inode_query_iversion(inode); 1481 } 1482 1483 while (ctx->pos < extra_size) { 1484 if (ctx->pos == 0) { 1485 if (!dir_emit(ctx, ".", 1, inode->i_ino, DT_DIR)) 1486 goto out; 1487 ctx->pos = dotdot_offset; 1488 continue; 1489 } 1490 1491 if (ctx->pos == dotdot_offset) { 1492 if (!dir_emit(ctx, "..", 2, parent_ino, DT_DIR)) 1493 goto out; 1494 ctx->pos = dotdot_size; 1495 continue; 1496 } 1497 1498 de = (struct ext4_dir_entry_2 *) 1499 (dir_buf + ctx->pos - extra_offset); 1500 if (ext4_check_dir_entry(inode, file, de, iloc.bh, dir_buf, 1501 extra_size, ctx->pos)) 1502 goto out; 1503 if (le32_to_cpu(de->inode)) { 1504 if (!dir_emit(ctx, de->name, de->name_len, 1505 le32_to_cpu(de->inode), 1506 get_dtype(sb, de->file_type))) 1507 goto out; 1508 } 1509 ctx->pos += ext4_rec_len_from_disk(de->rec_len, extra_size); 1510 } 1511 out: 1512 kfree(dir_buf); 1513 brelse(iloc.bh); 1514 return ret; 1515 } 1516 1517 void *ext4_read_inline_link(struct inode *inode) 1518 { 1519 struct ext4_iloc iloc; 1520 int ret, inline_size; 1521 void *link; 1522 1523 ret = ext4_get_inode_loc(inode, &iloc); 1524 if (ret) 1525 return ERR_PTR(ret); 1526 1527 ret = -ENOMEM; 1528 inline_size = ext4_get_inline_size(inode); 1529 link = kmalloc(inline_size + 1, GFP_NOFS); 1530 if (!link) 1531 goto out; 1532 1533 ret = ext4_read_inline_data(inode, link, inline_size, &iloc); 1534 if (ret < 0) { 1535 kfree(link); 1536 goto out; 1537 } 1538 nd_terminate_link(link, inode->i_size, ret); 1539 out: 1540 if (ret < 0) 1541 link = ERR_PTR(ret); 1542 brelse(iloc.bh); 1543 return link; 1544 } 1545 1546 struct buffer_head *ext4_get_first_inline_block(struct inode *inode, 1547 struct ext4_dir_entry_2 **parent_de, 1548 int *retval) 1549 { 1550 struct ext4_iloc iloc; 1551 1552 *retval = ext4_get_inode_loc(inode, &iloc); 1553 if (*retval) 1554 return NULL; 1555 1556 *parent_de = (struct ext4_dir_entry_2 *)ext4_raw_inode(&iloc)->i_block; 1557 1558 return iloc.bh; 1559 } 1560 1561 /* 1562 * Try to create the inline data for the new dir. 1563 * If it succeeds, return 0, otherwise return the error. 1564 * In case of ENOSPC, the caller should create the normal disk layout dir. 1565 */ 1566 int ext4_try_create_inline_dir(handle_t *handle, struct inode *parent, 1567 struct inode *inode) 1568 { 1569 int ret, inline_size = EXT4_MIN_INLINE_DATA_SIZE; 1570 struct ext4_iloc iloc; 1571 struct ext4_dir_entry_2 *de; 1572 1573 ret = ext4_get_inode_loc(inode, &iloc); 1574 if (ret) 1575 return ret; 1576 1577 ret = ext4_prepare_inline_data(handle, inode, inline_size); 1578 if (ret) 1579 goto out; 1580 1581 /* 1582 * For inline dir, we only save the inode information for the ".." 1583 * and create a fake dentry to cover the left space. 1584 */ 1585 de = (struct ext4_dir_entry_2 *)ext4_raw_inode(&iloc)->i_block; 1586 de->inode = cpu_to_le32(parent->i_ino); 1587 de = (struct ext4_dir_entry_2 *)((void *)de + EXT4_INLINE_DOTDOT_SIZE); 1588 de->inode = 0; 1589 de->rec_len = ext4_rec_len_to_disk( 1590 inline_size - EXT4_INLINE_DOTDOT_SIZE, 1591 inline_size); 1592 set_nlink(inode, 2); 1593 inode->i_size = EXT4_I(inode)->i_disksize = inline_size; 1594 out: 1595 brelse(iloc.bh); 1596 return ret; 1597 } 1598 1599 struct buffer_head *ext4_find_inline_entry(struct inode *dir, 1600 struct ext4_filename *fname, 1601 struct ext4_dir_entry_2 **res_dir, 1602 int *has_inline_data) 1603 { 1604 struct ext4_xattr_ibody_find is = { 1605 .s = { .not_found = -ENODATA, }, 1606 }; 1607 struct ext4_xattr_info i = { 1608 .name_index = EXT4_XATTR_INDEX_SYSTEM, 1609 .name = EXT4_XATTR_SYSTEM_DATA, 1610 }; 1611 int ret; 1612 void *inline_start; 1613 int inline_size; 1614 1615 ret = ext4_get_inode_loc(dir, &is.iloc); 1616 if (ret) 1617 return ERR_PTR(ret); 1618 1619 down_read(&EXT4_I(dir)->xattr_sem); 1620 1621 ret = ext4_xattr_ibody_find(dir, &i, &is); 1622 if (ret) 1623 goto out; 1624 1625 if (!ext4_has_inline_data(dir)) { 1626 *has_inline_data = 0; 1627 goto out; 1628 } 1629 1630 inline_start = (void *)ext4_raw_inode(&is.iloc)->i_block + 1631 EXT4_INLINE_DOTDOT_SIZE; 1632 inline_size = EXT4_MIN_INLINE_DATA_SIZE - EXT4_INLINE_DOTDOT_SIZE; 1633 ret = ext4_search_dir(is.iloc.bh, inline_start, inline_size, 1634 dir, fname, 0, res_dir); 1635 if (ret == 1) 1636 goto out_find; 1637 if (ret < 0) 1638 goto out; 1639 1640 if (ext4_get_inline_size(dir) == EXT4_MIN_INLINE_DATA_SIZE) 1641 goto out; 1642 1643 inline_start = ext4_get_inline_xattr_pos(dir, &is.iloc); 1644 inline_size = ext4_get_inline_size(dir) - EXT4_MIN_INLINE_DATA_SIZE; 1645 1646 ret = ext4_search_dir(is.iloc.bh, inline_start, inline_size, 1647 dir, fname, 0, res_dir); 1648 if (ret == 1) 1649 goto out_find; 1650 1651 out: 1652 brelse(is.iloc.bh); 1653 if (ret < 0) 1654 is.iloc.bh = ERR_PTR(ret); 1655 else 1656 is.iloc.bh = NULL; 1657 out_find: 1658 up_read(&EXT4_I(dir)->xattr_sem); 1659 return is.iloc.bh; 1660 } 1661 1662 int ext4_delete_inline_entry(handle_t *handle, 1663 struct inode *dir, 1664 struct ext4_dir_entry_2 *de_del, 1665 struct buffer_head *bh, 1666 int *has_inline_data) 1667 { 1668 int err, inline_size, no_expand; 1669 struct ext4_iloc iloc; 1670 void *inline_start; 1671 1672 err = ext4_get_inode_loc(dir, &iloc); 1673 if (err) 1674 return err; 1675 1676 ext4_write_lock_xattr(dir, &no_expand); 1677 if (!ext4_has_inline_data(dir)) { 1678 *has_inline_data = 0; 1679 goto out; 1680 } 1681 1682 if ((void *)de_del - ((void *)ext4_raw_inode(&iloc)->i_block) < 1683 EXT4_MIN_INLINE_DATA_SIZE) { 1684 inline_start = (void *)ext4_raw_inode(&iloc)->i_block + 1685 EXT4_INLINE_DOTDOT_SIZE; 1686 inline_size = EXT4_MIN_INLINE_DATA_SIZE - 1687 EXT4_INLINE_DOTDOT_SIZE; 1688 } else { 1689 inline_start = ext4_get_inline_xattr_pos(dir, &iloc); 1690 inline_size = ext4_get_inline_size(dir) - 1691 EXT4_MIN_INLINE_DATA_SIZE; 1692 } 1693 1694 BUFFER_TRACE(bh, "get_write_access"); 1695 err = ext4_journal_get_write_access(handle, dir->i_sb, bh, 1696 EXT4_JTR_NONE); 1697 if (err) 1698 goto out; 1699 1700 err = ext4_generic_delete_entry(dir, de_del, bh, 1701 inline_start, inline_size, 0); 1702 if (err) 1703 goto out; 1704 1705 ext4_show_inline_dir(dir, iloc.bh, inline_start, inline_size); 1706 out: 1707 ext4_write_unlock_xattr(dir, &no_expand); 1708 if (likely(err == 0)) 1709 err = ext4_mark_inode_dirty(handle, dir); 1710 brelse(iloc.bh); 1711 if (err != -ENOENT) 1712 ext4_std_error(dir->i_sb, err); 1713 return err; 1714 } 1715 1716 /* 1717 * Get the inline dentry at offset. 1718 */ 1719 static inline struct ext4_dir_entry_2 * 1720 ext4_get_inline_entry(struct inode *inode, 1721 struct ext4_iloc *iloc, 1722 unsigned int offset, 1723 void **inline_start, 1724 int *inline_size) 1725 { 1726 void *inline_pos; 1727 1728 BUG_ON(offset > ext4_get_inline_size(inode)); 1729 1730 if (offset < EXT4_MIN_INLINE_DATA_SIZE) { 1731 inline_pos = (void *)ext4_raw_inode(iloc)->i_block; 1732 *inline_size = EXT4_MIN_INLINE_DATA_SIZE; 1733 } else { 1734 inline_pos = ext4_get_inline_xattr_pos(inode, iloc); 1735 offset -= EXT4_MIN_INLINE_DATA_SIZE; 1736 *inline_size = ext4_get_inline_size(inode) - 1737 EXT4_MIN_INLINE_DATA_SIZE; 1738 } 1739 1740 if (inline_start) 1741 *inline_start = inline_pos; 1742 return (struct ext4_dir_entry_2 *)(inline_pos + offset); 1743 } 1744 1745 bool empty_inline_dir(struct inode *dir, int *has_inline_data) 1746 { 1747 int err, inline_size; 1748 struct ext4_iloc iloc; 1749 size_t inline_len; 1750 void *inline_pos; 1751 unsigned int offset; 1752 struct ext4_dir_entry_2 *de; 1753 bool ret = false; 1754 1755 err = ext4_get_inode_loc(dir, &iloc); 1756 if (err) { 1757 EXT4_ERROR_INODE_ERR(dir, -err, 1758 "error %d getting inode %lu block", 1759 err, dir->i_ino); 1760 return false; 1761 } 1762 1763 down_read(&EXT4_I(dir)->xattr_sem); 1764 if (!ext4_has_inline_data(dir)) { 1765 *has_inline_data = 0; 1766 ret = true; 1767 goto out; 1768 } 1769 1770 de = (struct ext4_dir_entry_2 *)ext4_raw_inode(&iloc)->i_block; 1771 if (!le32_to_cpu(de->inode)) { 1772 ext4_warning(dir->i_sb, 1773 "bad inline directory (dir #%lu) - no `..'", 1774 dir->i_ino); 1775 goto out; 1776 } 1777 1778 inline_len = ext4_get_inline_size(dir); 1779 offset = EXT4_INLINE_DOTDOT_SIZE; 1780 while (offset < inline_len) { 1781 de = ext4_get_inline_entry(dir, &iloc, offset, 1782 &inline_pos, &inline_size); 1783 if (ext4_check_dir_entry(dir, NULL, de, 1784 iloc.bh, inline_pos, 1785 inline_size, offset)) { 1786 ext4_warning(dir->i_sb, 1787 "bad inline directory (dir #%lu) - " 1788 "inode %u, rec_len %u, name_len %d" 1789 "inline size %d", 1790 dir->i_ino, le32_to_cpu(de->inode), 1791 le16_to_cpu(de->rec_len), de->name_len, 1792 inline_size); 1793 goto out; 1794 } 1795 if (le32_to_cpu(de->inode)) { 1796 goto out; 1797 } 1798 offset += ext4_rec_len_from_disk(de->rec_len, inline_size); 1799 } 1800 1801 ret = true; 1802 out: 1803 up_read(&EXT4_I(dir)->xattr_sem); 1804 brelse(iloc.bh); 1805 return ret; 1806 } 1807 1808 int ext4_destroy_inline_data(handle_t *handle, struct inode *inode) 1809 { 1810 int ret, no_expand; 1811 1812 ext4_write_lock_xattr(inode, &no_expand); 1813 ret = ext4_destroy_inline_data_nolock(handle, inode); 1814 ext4_write_unlock_xattr(inode, &no_expand); 1815 1816 return ret; 1817 } 1818 1819 int ext4_inline_data_iomap(struct inode *inode, struct iomap *iomap) 1820 { 1821 __u64 addr; 1822 int error = -EAGAIN; 1823 struct ext4_iloc iloc; 1824 1825 down_read(&EXT4_I(inode)->xattr_sem); 1826 if (!ext4_has_inline_data(inode)) 1827 goto out; 1828 1829 error = ext4_get_inode_loc(inode, &iloc); 1830 if (error) 1831 goto out; 1832 1833 addr = (__u64)iloc.bh->b_blocknr << inode->i_sb->s_blocksize_bits; 1834 addr += (char *)ext4_raw_inode(&iloc) - iloc.bh->b_data; 1835 addr += offsetof(struct ext4_inode, i_block); 1836 1837 brelse(iloc.bh); 1838 1839 iomap->addr = addr; 1840 iomap->offset = 0; 1841 iomap->length = min_t(loff_t, ext4_get_inline_size(inode), 1842 i_size_read(inode)); 1843 iomap->type = IOMAP_INLINE; 1844 iomap->flags = 0; 1845 1846 out: 1847 up_read(&EXT4_I(inode)->xattr_sem); 1848 return error; 1849 } 1850 1851 int ext4_inline_data_truncate(struct inode *inode, int *has_inline) 1852 { 1853 handle_t *handle; 1854 int inline_size, value_len, needed_blocks, no_expand, err = 0; 1855 size_t i_size; 1856 void *value = NULL; 1857 struct ext4_xattr_ibody_find is = { 1858 .s = { .not_found = -ENODATA, }, 1859 }; 1860 struct ext4_xattr_info i = { 1861 .name_index = EXT4_XATTR_INDEX_SYSTEM, 1862 .name = EXT4_XATTR_SYSTEM_DATA, 1863 }; 1864 1865 1866 needed_blocks = ext4_writepage_trans_blocks(inode); 1867 handle = ext4_journal_start(inode, EXT4_HT_INODE, needed_blocks); 1868 if (IS_ERR(handle)) 1869 return PTR_ERR(handle); 1870 1871 ext4_write_lock_xattr(inode, &no_expand); 1872 if (!ext4_has_inline_data(inode)) { 1873 ext4_write_unlock_xattr(inode, &no_expand); 1874 *has_inline = 0; 1875 ext4_journal_stop(handle); 1876 return 0; 1877 } 1878 1879 if ((err = ext4_orphan_add(handle, inode)) != 0) 1880 goto out; 1881 1882 if ((err = ext4_get_inode_loc(inode, &is.iloc)) != 0) 1883 goto out; 1884 1885 down_write(&EXT4_I(inode)->i_data_sem); 1886 i_size = inode->i_size; 1887 inline_size = ext4_get_inline_size(inode); 1888 EXT4_I(inode)->i_disksize = i_size; 1889 1890 if (i_size < inline_size) { 1891 /* 1892 * if there's inline data to truncate and this file was 1893 * converted to extents after that inline data was written, 1894 * the extent status cache must be cleared to avoid leaving 1895 * behind stale delayed allocated extent entries 1896 */ 1897 if (!ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) 1898 ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS); 1899 1900 /* Clear the content in the xattr space. */ 1901 if (inline_size > EXT4_MIN_INLINE_DATA_SIZE) { 1902 if ((err = ext4_xattr_ibody_find(inode, &i, &is)) != 0) 1903 goto out_error; 1904 1905 BUG_ON(is.s.not_found); 1906 1907 value_len = le32_to_cpu(is.s.here->e_value_size); 1908 value = kmalloc(value_len, GFP_NOFS); 1909 if (!value) { 1910 err = -ENOMEM; 1911 goto out_error; 1912 } 1913 1914 err = ext4_xattr_ibody_get(inode, i.name_index, 1915 i.name, value, value_len); 1916 if (err <= 0) 1917 goto out_error; 1918 1919 i.value = value; 1920 i.value_len = i_size > EXT4_MIN_INLINE_DATA_SIZE ? 1921 i_size - EXT4_MIN_INLINE_DATA_SIZE : 0; 1922 err = ext4_xattr_ibody_set(handle, inode, &i, &is); 1923 if (err) 1924 goto out_error; 1925 } 1926 1927 /* Clear the content within i_blocks. */ 1928 if (i_size < EXT4_MIN_INLINE_DATA_SIZE) { 1929 void *p = (void *) ext4_raw_inode(&is.iloc)->i_block; 1930 memset(p + i_size, 0, 1931 EXT4_MIN_INLINE_DATA_SIZE - i_size); 1932 } 1933 1934 EXT4_I(inode)->i_inline_size = i_size < 1935 EXT4_MIN_INLINE_DATA_SIZE ? 1936 EXT4_MIN_INLINE_DATA_SIZE : i_size; 1937 } 1938 1939 out_error: 1940 up_write(&EXT4_I(inode)->i_data_sem); 1941 out: 1942 brelse(is.iloc.bh); 1943 ext4_write_unlock_xattr(inode, &no_expand); 1944 kfree(value); 1945 if (inode->i_nlink) 1946 ext4_orphan_del(handle, inode); 1947 1948 if (err == 0) { 1949 inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); 1950 err = ext4_mark_inode_dirty(handle, inode); 1951 if (IS_SYNC(inode)) 1952 ext4_handle_sync(handle); 1953 } 1954 ext4_journal_stop(handle); 1955 return err; 1956 } 1957 1958 int ext4_convert_inline_data(struct inode *inode) 1959 { 1960 int error, needed_blocks, no_expand; 1961 handle_t *handle; 1962 struct ext4_iloc iloc; 1963 1964 if (!ext4_has_inline_data(inode)) { 1965 ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); 1966 return 0; 1967 } else if (!ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) { 1968 /* 1969 * Inode has inline data but EXT4_STATE_MAY_INLINE_DATA is 1970 * cleared. This means we are in the middle of moving of 1971 * inline data to delay allocated block. Just force writeout 1972 * here to finish conversion. 1973 */ 1974 error = filemap_flush(inode->i_mapping); 1975 if (error) 1976 return error; 1977 if (!ext4_has_inline_data(inode)) 1978 return 0; 1979 } 1980 1981 needed_blocks = ext4_writepage_trans_blocks(inode); 1982 1983 iloc.bh = NULL; 1984 error = ext4_get_inode_loc(inode, &iloc); 1985 if (error) 1986 return error; 1987 1988 handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, needed_blocks); 1989 if (IS_ERR(handle)) { 1990 error = PTR_ERR(handle); 1991 goto out_free; 1992 } 1993 1994 ext4_write_lock_xattr(inode, &no_expand); 1995 if (ext4_has_inline_data(inode)) 1996 error = ext4_convert_inline_data_nolock(handle, inode, &iloc); 1997 ext4_write_unlock_xattr(inode, &no_expand); 1998 ext4_journal_stop(handle); 1999 out_free: 2000 brelse(iloc.bh); 2001 return error; 2002 } 2003