1 // SPDX-License-Identifier: LGPL-2.1 2 /* 3 * Copyright (c) 2012 Taobao. 4 * Written by Tao Ma <boyu.mt@taobao.com> 5 */ 6 7 #include <linux/iomap.h> 8 #include <linux/fiemap.h> 9 #include <linux/namei.h> 10 #include <linux/iversion.h> 11 #include <linux/sched/mm.h> 12 13 #include "ext4_jbd2.h" 14 #include "ext4.h" 15 #include "xattr.h" 16 #include "truncate.h" 17 18 #define EXT4_XATTR_SYSTEM_DATA "data" 19 #define EXT4_MIN_INLINE_DATA_SIZE ((sizeof(__le32) * EXT4_N_BLOCKS)) 20 #define EXT4_INLINE_DOTDOT_OFFSET 2 21 #define EXT4_INLINE_DOTDOT_SIZE 4 22 23 24 static int ext4_da_convert_inline_data_to_extent(struct address_space *mapping, 25 struct inode *inode, 26 void **fsdata); 27 28 static int ext4_get_inline_size(struct inode *inode) 29 { 30 if (EXT4_I(inode)->i_inline_off) 31 return EXT4_I(inode)->i_inline_size; 32 33 return 0; 34 } 35 36 static int get_max_inline_xattr_value_size(struct inode *inode, 37 struct ext4_iloc *iloc) 38 { 39 struct ext4_xattr_ibody_header *header; 40 struct ext4_xattr_entry *entry; 41 struct ext4_inode *raw_inode; 42 void *end; 43 int free, min_offs; 44 45 if (!EXT4_INODE_HAS_XATTR_SPACE(inode)) 46 return 0; 47 48 min_offs = EXT4_SB(inode->i_sb)->s_inode_size - 49 EXT4_GOOD_OLD_INODE_SIZE - 50 EXT4_I(inode)->i_extra_isize - 51 sizeof(struct ext4_xattr_ibody_header); 52 53 /* 54 * We need to subtract another sizeof(__u32) since an in-inode xattr 55 * needs an empty 4 bytes to indicate the gap between the xattr entry 56 * and the name/value pair. 57 */ 58 if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR)) 59 return EXT4_XATTR_SIZE(min_offs - 60 EXT4_XATTR_LEN(strlen(EXT4_XATTR_SYSTEM_DATA)) - 61 EXT4_XATTR_ROUND - sizeof(__u32)); 62 63 raw_inode = ext4_raw_inode(iloc); 64 header = IHDR(inode, raw_inode); 65 entry = IFIRST(header); 66 end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size; 67 68 /* Compute min_offs. */ 69 while (!IS_LAST_ENTRY(entry)) { 70 void *next = EXT4_XATTR_NEXT(entry); 71 72 if (next >= end) { 73 EXT4_ERROR_INODE(inode, 74 "corrupt xattr in inline inode"); 75 return 0; 76 } 77 if (!entry->e_value_inum && entry->e_value_size) { 78 size_t offs = le16_to_cpu(entry->e_value_offs); 79 if (offs < min_offs) 80 min_offs = offs; 81 } 82 entry = next; 83 } 84 free = min_offs - 85 ((void *)entry - (void *)IFIRST(header)) - sizeof(__u32); 86 87 if (EXT4_I(inode)->i_inline_off) { 88 entry = (struct ext4_xattr_entry *) 89 ((void *)raw_inode + EXT4_I(inode)->i_inline_off); 90 91 free += EXT4_XATTR_SIZE(le32_to_cpu(entry->e_value_size)); 92 goto out; 93 } 94 95 free -= EXT4_XATTR_LEN(strlen(EXT4_XATTR_SYSTEM_DATA)); 96 97 if (free > EXT4_XATTR_ROUND) 98 free = EXT4_XATTR_SIZE(free - EXT4_XATTR_ROUND); 99 else 100 free = 0; 101 102 out: 103 return free; 104 } 105 106 /* 107 * Get the maximum size we now can store in an inode. 108 * If we can't find the space for a xattr entry, don't use the space 109 * of the extents since we have no space to indicate the inline data. 110 */ 111 int ext4_get_max_inline_size(struct inode *inode) 112 { 113 int error, max_inline_size; 114 struct ext4_iloc iloc; 115 116 if (EXT4_I(inode)->i_extra_isize == 0) 117 return 0; 118 119 error = ext4_get_inode_loc(inode, &iloc); 120 if (error) { 121 ext4_error_inode_err(inode, __func__, __LINE__, 0, -error, 122 "can't get inode location %llu", 123 inode->i_ino); 124 return 0; 125 } 126 127 down_read(&EXT4_I(inode)->xattr_sem); 128 max_inline_size = get_max_inline_xattr_value_size(inode, &iloc); 129 up_read(&EXT4_I(inode)->xattr_sem); 130 131 brelse(iloc.bh); 132 133 if (!max_inline_size) 134 return 0; 135 136 return max_inline_size + EXT4_MIN_INLINE_DATA_SIZE; 137 } 138 139 /* 140 * this function does not take xattr_sem, which is OK because it is 141 * currently only used in a code path coming form ext4_iget, before 142 * the new inode has been unlocked 143 */ 144 int ext4_find_inline_data_nolock(struct inode *inode) 145 { 146 struct ext4_xattr_ibody_find is = { 147 .s = { .not_found = -ENODATA, }, 148 }; 149 struct ext4_xattr_info i = { 150 .name_index = EXT4_XATTR_INDEX_SYSTEM, 151 .name = EXT4_XATTR_SYSTEM_DATA, 152 }; 153 int error; 154 155 if (EXT4_I(inode)->i_extra_isize == 0) 156 return 0; 157 158 error = ext4_get_inode_loc(inode, &is.iloc); 159 if (error) 160 return error; 161 162 error = ext4_xattr_ibody_find(inode, &i, &is); 163 if (error) 164 goto out; 165 166 if (!is.s.not_found) { 167 if (is.s.here->e_value_inum) { 168 EXT4_ERROR_INODE(inode, "inline data xattr refers " 169 "to an external xattr inode"); 170 error = -EFSCORRUPTED; 171 goto out; 172 } 173 EXT4_I(inode)->i_inline_off = (u16)((void *)is.s.here - 174 (void *)ext4_raw_inode(&is.iloc)); 175 EXT4_I(inode)->i_inline_size = EXT4_MIN_INLINE_DATA_SIZE + 176 le32_to_cpu(is.s.here->e_value_size); 177 } 178 out: 179 brelse(is.iloc.bh); 180 return error; 181 } 182 183 static int ext4_read_inline_data(struct inode *inode, void *buffer, 184 unsigned int len, 185 struct ext4_iloc *iloc) 186 { 187 struct ext4_xattr_entry *entry; 188 struct ext4_xattr_ibody_header *header; 189 int cp_len = 0; 190 struct ext4_inode *raw_inode; 191 192 if (!len) 193 return 0; 194 195 BUG_ON(len > EXT4_I(inode)->i_inline_size); 196 197 cp_len = min_t(unsigned int, len, EXT4_MIN_INLINE_DATA_SIZE); 198 199 raw_inode = ext4_raw_inode(iloc); 200 memcpy(buffer, (void *)(raw_inode->i_block), cp_len); 201 202 len -= cp_len; 203 buffer += cp_len; 204 205 if (!len) 206 goto out; 207 208 header = IHDR(inode, raw_inode); 209 entry = (struct ext4_xattr_entry *)((void *)raw_inode + 210 EXT4_I(inode)->i_inline_off); 211 len = min_t(unsigned int, len, 212 (unsigned int)le32_to_cpu(entry->e_value_size)); 213 214 memcpy(buffer, 215 (void *)IFIRST(header) + le16_to_cpu(entry->e_value_offs), len); 216 cp_len += len; 217 218 out: 219 return cp_len; 220 } 221 222 /* 223 * write the buffer to the inline inode. 224 * If 'create' is set, we don't need to do the extra copy in the xattr 225 * value since it is already handled by ext4_xattr_ibody_set. 226 * That saves us one memcpy. 227 */ 228 static void ext4_write_inline_data(struct inode *inode, struct ext4_iloc *iloc, 229 void *buffer, loff_t pos, unsigned int len) 230 { 231 struct ext4_xattr_entry *entry; 232 struct ext4_xattr_ibody_header *header; 233 struct ext4_inode *raw_inode; 234 int cp_len = 0; 235 236 if (unlikely(ext4_emergency_state(inode->i_sb))) 237 return; 238 239 BUG_ON(!EXT4_I(inode)->i_inline_off); 240 BUG_ON(pos + len > EXT4_I(inode)->i_inline_size); 241 242 raw_inode = ext4_raw_inode(iloc); 243 buffer += pos; 244 245 if (pos < EXT4_MIN_INLINE_DATA_SIZE) { 246 cp_len = pos + len > EXT4_MIN_INLINE_DATA_SIZE ? 247 EXT4_MIN_INLINE_DATA_SIZE - pos : len; 248 memcpy((void *)raw_inode->i_block + pos, buffer, cp_len); 249 250 len -= cp_len; 251 buffer += cp_len; 252 pos += cp_len; 253 } 254 255 if (!len) 256 return; 257 258 pos -= EXT4_MIN_INLINE_DATA_SIZE; 259 header = IHDR(inode, raw_inode); 260 entry = (struct ext4_xattr_entry *)((void *)raw_inode + 261 EXT4_I(inode)->i_inline_off); 262 263 memcpy((void *)IFIRST(header) + le16_to_cpu(entry->e_value_offs) + pos, 264 buffer, len); 265 } 266 267 static int ext4_create_inline_data(handle_t *handle, 268 struct inode *inode, unsigned len) 269 { 270 int error; 271 void *value = NULL; 272 struct ext4_xattr_ibody_find is = { 273 .s = { .not_found = -ENODATA, }, 274 }; 275 struct ext4_xattr_info i = { 276 .name_index = EXT4_XATTR_INDEX_SYSTEM, 277 .name = EXT4_XATTR_SYSTEM_DATA, 278 }; 279 280 error = ext4_get_inode_loc(inode, &is.iloc); 281 if (error) 282 return error; 283 284 BUFFER_TRACE(is.iloc.bh, "get_write_access"); 285 error = ext4_journal_get_write_access(handle, inode->i_sb, is.iloc.bh, 286 EXT4_JTR_NONE); 287 if (error) 288 goto out; 289 290 if (len > EXT4_MIN_INLINE_DATA_SIZE) { 291 value = EXT4_ZERO_XATTR_VALUE; 292 len -= EXT4_MIN_INLINE_DATA_SIZE; 293 } else { 294 value = ""; 295 len = 0; 296 } 297 298 /* Insert the xttr entry. */ 299 i.value = value; 300 i.value_len = len; 301 302 error = ext4_xattr_ibody_find(inode, &i, &is); 303 if (error) 304 goto out; 305 306 if (!is.s.not_found) { 307 EXT4_ERROR_INODE(inode, "unexpected inline data xattr"); 308 error = -EFSCORRUPTED; 309 goto out; 310 } 311 312 error = ext4_xattr_ibody_set(handle, inode, &i, &is); 313 if (error) { 314 if (error == -ENOSPC) 315 ext4_clear_inode_state(inode, 316 EXT4_STATE_MAY_INLINE_DATA); 317 goto out; 318 } 319 320 memset((void *)ext4_raw_inode(&is.iloc)->i_block, 321 0, EXT4_MIN_INLINE_DATA_SIZE); 322 323 EXT4_I(inode)->i_inline_off = (u16)((void *)is.s.here - 324 (void *)ext4_raw_inode(&is.iloc)); 325 EXT4_I(inode)->i_inline_size = len + EXT4_MIN_INLINE_DATA_SIZE; 326 ext4_clear_inode_flag(inode, EXT4_INODE_EXTENTS); 327 ext4_set_inode_flag(inode, EXT4_INODE_INLINE_DATA); 328 get_bh(is.iloc.bh); 329 error = ext4_mark_iloc_dirty(handle, inode, &is.iloc); 330 331 out: 332 brelse(is.iloc.bh); 333 return error; 334 } 335 336 static int ext4_update_inline_data(handle_t *handle, struct inode *inode, 337 unsigned int len) 338 { 339 int error; 340 void *value = NULL; 341 struct ext4_xattr_ibody_find is = { 342 .s = { .not_found = -ENODATA, }, 343 }; 344 struct ext4_xattr_info i = { 345 .name_index = EXT4_XATTR_INDEX_SYSTEM, 346 .name = EXT4_XATTR_SYSTEM_DATA, 347 }; 348 349 /* If the old space is ok, write the data directly. */ 350 if (len <= EXT4_I(inode)->i_inline_size) 351 return 0; 352 353 error = ext4_get_inode_loc(inode, &is.iloc); 354 if (error) 355 return error; 356 357 error = ext4_xattr_ibody_find(inode, &i, &is); 358 if (error) 359 goto out; 360 361 if (is.s.not_found) { 362 EXT4_ERROR_INODE(inode, "missing inline data xattr"); 363 error = -EFSCORRUPTED; 364 goto out; 365 } 366 367 len -= EXT4_MIN_INLINE_DATA_SIZE; 368 value = kzalloc(len, GFP_NOFS); 369 if (!value) { 370 error = -ENOMEM; 371 goto out; 372 } 373 374 error = ext4_xattr_ibody_get(inode, i.name_index, i.name, 375 value, len); 376 if (error < 0) 377 goto out; 378 379 BUFFER_TRACE(is.iloc.bh, "get_write_access"); 380 error = ext4_journal_get_write_access(handle, inode->i_sb, is.iloc.bh, 381 EXT4_JTR_NONE); 382 if (error) 383 goto out; 384 385 /* Update the xattr entry. */ 386 i.value = value; 387 i.value_len = len; 388 389 error = ext4_xattr_ibody_set(handle, inode, &i, &is); 390 if (error) 391 goto out; 392 393 EXT4_I(inode)->i_inline_off = (u16)((void *)is.s.here - 394 (void *)ext4_raw_inode(&is.iloc)); 395 EXT4_I(inode)->i_inline_size = EXT4_MIN_INLINE_DATA_SIZE + 396 le32_to_cpu(is.s.here->e_value_size); 397 ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); 398 get_bh(is.iloc.bh); 399 error = ext4_mark_iloc_dirty(handle, inode, &is.iloc); 400 401 out: 402 kfree(value); 403 brelse(is.iloc.bh); 404 return error; 405 } 406 407 static int ext4_prepare_inline_data(handle_t *handle, struct inode *inode, 408 loff_t len) 409 { 410 int ret, size, no_expand; 411 struct ext4_inode_info *ei = EXT4_I(inode); 412 413 if (!ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) 414 return -ENOSPC; 415 416 size = ext4_get_max_inline_size(inode); 417 if (size < len) 418 return -ENOSPC; 419 420 ext4_write_lock_xattr(inode, &no_expand); 421 /* 422 * ei->i_inline_size may have changed since the initial check 423 * if other xattrs were added. Recalculate to ensure 424 * ext4_update_inline_data() validates against current capacity. 425 */ 426 (void) ext4_find_inline_data_nolock(inode); 427 if (ei->i_inline_off) 428 ret = ext4_update_inline_data(handle, inode, len); 429 else 430 ret = ext4_create_inline_data(handle, inode, len); 431 432 ext4_write_unlock_xattr(inode, &no_expand); 433 return ret; 434 } 435 436 static int ext4_destroy_inline_data_nolock(handle_t *handle, 437 struct inode *inode) 438 { 439 struct ext4_inode_info *ei = EXT4_I(inode); 440 struct ext4_xattr_ibody_find is = { 441 .s = { .not_found = 0, }, 442 }; 443 struct ext4_xattr_info i = { 444 .name_index = EXT4_XATTR_INDEX_SYSTEM, 445 .name = EXT4_XATTR_SYSTEM_DATA, 446 .value = NULL, 447 .value_len = 0, 448 }; 449 int error; 450 451 if (!ei->i_inline_off) 452 return 0; 453 454 down_write(&ei->i_data_sem); 455 456 error = ext4_get_inode_loc(inode, &is.iloc); 457 if (error) { 458 up_write(&ei->i_data_sem); 459 return error; 460 } 461 462 error = ext4_xattr_ibody_find(inode, &i, &is); 463 if (error) 464 goto out; 465 466 BUFFER_TRACE(is.iloc.bh, "get_write_access"); 467 error = ext4_journal_get_write_access(handle, inode->i_sb, is.iloc.bh, 468 EXT4_JTR_NONE); 469 if (error) 470 goto out; 471 472 error = ext4_xattr_ibody_set(handle, inode, &i, &is); 473 if (error) 474 goto out; 475 476 memset((void *)ext4_raw_inode(&is.iloc)->i_block, 477 0, EXT4_MIN_INLINE_DATA_SIZE); 478 memset(ei->i_data, 0, EXT4_MIN_INLINE_DATA_SIZE); 479 480 if (ext4_has_feature_extents(inode->i_sb)) { 481 if (S_ISDIR(inode->i_mode) || 482 S_ISREG(inode->i_mode) || S_ISLNK(inode->i_mode)) { 483 ext4_set_inode_flag(inode, EXT4_INODE_EXTENTS); 484 ext4_ext_tree_init(handle, inode); 485 } 486 } 487 ext4_clear_inode_flag(inode, EXT4_INODE_INLINE_DATA); 488 489 get_bh(is.iloc.bh); 490 error = ext4_mark_iloc_dirty(handle, inode, &is.iloc); 491 492 EXT4_I(inode)->i_inline_off = 0; 493 EXT4_I(inode)->i_inline_size = 0; 494 ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); 495 out: 496 brelse(is.iloc.bh); 497 if (error == -ENODATA) 498 error = 0; 499 up_write(&ei->i_data_sem); 500 return error; 501 } 502 503 static int ext4_read_inline_folio(struct inode *inode, struct folio *folio) 504 { 505 void *kaddr; 506 int ret = 0; 507 size_t len; 508 struct ext4_iloc iloc; 509 510 BUG_ON(!folio_test_locked(folio)); 511 BUG_ON(!ext4_has_inline_data(inode)); 512 BUG_ON(folio->index); 513 514 if (!EXT4_I(inode)->i_inline_off) { 515 ext4_warning(inode->i_sb, "inode %llu doesn't have inline data.", 516 inode->i_ino); 517 goto out; 518 } 519 520 ret = ext4_get_inode_loc(inode, &iloc); 521 if (ret) 522 goto out; 523 524 len = min_t(size_t, ext4_get_inline_size(inode), i_size_read(inode)); 525 526 if (len > PAGE_SIZE) { 527 ext4_error_inode(inode, __func__, __LINE__, 0, 528 "inline size %zu exceeds PAGE_SIZE", len); 529 ret = -EFSCORRUPTED; 530 brelse(iloc.bh); 531 goto out; 532 } 533 534 kaddr = kmap_local_folio(folio, 0); 535 ret = ext4_read_inline_data(inode, kaddr, len, &iloc); 536 kaddr = folio_zero_tail(folio, len, kaddr + len); 537 kunmap_local(kaddr); 538 folio_mark_uptodate(folio); 539 brelse(iloc.bh); 540 541 out: 542 return ret; 543 } 544 545 int ext4_readpage_inline(struct inode *inode, struct folio *folio) 546 { 547 int ret = 0; 548 549 down_read(&EXT4_I(inode)->xattr_sem); 550 if (!ext4_has_inline_data(inode)) { 551 up_read(&EXT4_I(inode)->xattr_sem); 552 return -EAGAIN; 553 } 554 555 /* 556 * Current inline data can only exist in the 1st page, 557 * So for all the other pages, just set them uptodate. 558 */ 559 if (!folio->index) 560 ret = ext4_read_inline_folio(inode, folio); 561 else if (!folio_test_uptodate(folio)) { 562 folio_zero_segment(folio, 0, folio_size(folio)); 563 folio_mark_uptodate(folio); 564 } 565 566 up_read(&EXT4_I(inode)->xattr_sem); 567 568 folio_unlock(folio); 569 return ret >= 0 ? 0 : ret; 570 } 571 572 static int ext4_convert_inline_data_to_extent(struct address_space *mapping, 573 struct inode *inode) 574 { 575 int ret, needed_blocks, no_expand; 576 handle_t *handle = NULL; 577 int retries = 0, sem_held = 0; 578 struct folio *folio = NULL; 579 unsigned from, to; 580 struct ext4_iloc iloc; 581 582 if (!ext4_has_inline_data(inode)) { 583 /* 584 * clear the flag so that no new write 585 * will trap here again. 586 */ 587 ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); 588 return 0; 589 } 590 591 needed_blocks = ext4_chunk_trans_extent(inode, 1); 592 593 ret = ext4_get_inode_loc(inode, &iloc); 594 if (ret) 595 return ret; 596 597 retry: 598 handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, needed_blocks); 599 if (IS_ERR(handle)) { 600 ret = PTR_ERR(handle); 601 handle = NULL; 602 goto out; 603 } 604 605 /* We cannot recurse into the filesystem as the transaction is already 606 * started */ 607 folio = __filemap_get_folio(mapping, 0, FGP_WRITEBEGIN | FGP_NOFS, 608 mapping_gfp_mask(mapping)); 609 if (IS_ERR(folio)) { 610 ret = PTR_ERR(folio); 611 goto out_nofolio; 612 } 613 614 ext4_write_lock_xattr(inode, &no_expand); 615 sem_held = 1; 616 /* If some one has already done this for us, just exit. */ 617 if (!ext4_has_inline_data(inode)) { 618 ret = 0; 619 goto out; 620 } 621 622 from = 0; 623 to = ext4_get_inline_size(inode); 624 if (!folio_test_uptodate(folio)) { 625 ret = ext4_read_inline_folio(inode, folio); 626 if (ret < 0) 627 goto out; 628 } 629 630 ext4_fc_track_inode(handle, inode); 631 ret = ext4_destroy_inline_data_nolock(handle, inode); 632 if (ret) 633 goto out; 634 635 if (ext4_should_dioread_nolock(inode)) { 636 ret = ext4_block_write_begin(handle, folio, from, to, 637 ext4_get_block_unwritten); 638 } else 639 ret = ext4_block_write_begin(handle, folio, from, to, 640 ext4_get_block); 641 clear_buffer_new(folio_buffers(folio)); 642 643 if (!ret && ext4_should_journal_data(inode)) { 644 ret = ext4_walk_page_buffers(handle, inode, 645 folio_buffers(folio), from, to, 646 NULL, do_journal_get_write_access); 647 } 648 649 if (ret) { 650 folio_unlock(folio); 651 folio_put(folio); 652 folio = NULL; 653 ext4_orphan_add(handle, inode); 654 ext4_write_unlock_xattr(inode, &no_expand); 655 sem_held = 0; 656 ext4_journal_stop(handle); 657 handle = NULL; 658 ext4_truncate_failed_write(inode); 659 /* 660 * If truncate failed early the inode might 661 * still be on the orphan list; we need to 662 * make sure the inode is removed from the 663 * orphan list in that case. 664 */ 665 if (inode->i_nlink) 666 ext4_orphan_del(NULL, inode); 667 } 668 669 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) 670 goto retry; 671 672 if (folio) 673 block_commit_write(folio, from, to); 674 out: 675 if (folio) { 676 folio_unlock(folio); 677 folio_put(folio); 678 } 679 out_nofolio: 680 if (sem_held) 681 ext4_write_unlock_xattr(inode, &no_expand); 682 if (handle) 683 ext4_journal_stop(handle); 684 brelse(iloc.bh); 685 return ret; 686 } 687 688 /* 689 * Prepare the write for the inline data. 690 * If the data can be written into the inode, we just read 691 * the page and make it uptodate, and start the journal. 692 * Otherwise read the page, makes it dirty so that it can be 693 * handle in writepages(the i_disksize update is left to the 694 * normal ext4_da_write_end). 695 */ 696 int ext4_generic_write_inline_data(struct address_space *mapping, 697 struct inode *inode, 698 loff_t pos, unsigned len, 699 struct folio **foliop, 700 void **fsdata, bool da) 701 { 702 int ret; 703 handle_t *handle; 704 struct folio *folio; 705 struct ext4_iloc iloc; 706 int retries = 0; 707 708 ret = ext4_get_inode_loc(inode, &iloc); 709 if (ret) 710 return ret; 711 712 retry_journal: 713 handle = ext4_journal_start(inode, EXT4_HT_INODE, 1); 714 if (IS_ERR(handle)) { 715 ret = PTR_ERR(handle); 716 goto out_release_bh; 717 } 718 719 ret = ext4_prepare_inline_data(handle, inode, pos + len); 720 if (ret && ret != -ENOSPC) 721 goto out_stop_journal; 722 723 if (ret == -ENOSPC) { 724 ext4_journal_stop(handle); 725 if (!da) { 726 brelse(iloc.bh); 727 /* Retry inside */ 728 return ext4_convert_inline_data_to_extent(mapping, inode); 729 } 730 731 ret = ext4_da_convert_inline_data_to_extent(mapping, inode, fsdata); 732 if (ret == -ENOSPC && 733 ext4_should_retry_alloc(inode->i_sb, &retries)) 734 goto retry_journal; 735 goto out_release_bh; 736 } 737 738 folio = __filemap_get_folio(mapping, 0, FGP_WRITEBEGIN | FGP_NOFS, 739 mapping_gfp_mask(mapping)); 740 if (IS_ERR(folio)) { 741 ret = PTR_ERR(folio); 742 goto out_stop_journal; 743 } 744 745 down_read(&EXT4_I(inode)->xattr_sem); 746 /* Someone else had converted it to extent */ 747 if (!ext4_has_inline_data(inode)) { 748 ret = 0; 749 goto out_release_folio; 750 } 751 752 if (!folio_test_uptodate(folio)) { 753 ret = ext4_read_inline_folio(inode, folio); 754 if (ret < 0) 755 goto out_release_folio; 756 } 757 758 ret = ext4_journal_get_write_access(handle, inode->i_sb, iloc.bh, EXT4_JTR_NONE); 759 if (ret) 760 goto out_release_folio; 761 *foliop = folio; 762 up_read(&EXT4_I(inode)->xattr_sem); 763 brelse(iloc.bh); 764 return 1; 765 766 out_release_folio: 767 up_read(&EXT4_I(inode)->xattr_sem); 768 folio_unlock(folio); 769 folio_put(folio); 770 out_stop_journal: 771 ext4_journal_stop(handle); 772 out_release_bh: 773 brelse(iloc.bh); 774 return ret; 775 } 776 777 /* 778 * Try to write data in the inode. 779 * If the inode has inline data, check whether the new write can be 780 * in the inode also. If not, create the page the handle, move the data 781 * to the page make it update and let the later codes create extent for it. 782 */ 783 int ext4_try_to_write_inline_data(struct address_space *mapping, 784 struct inode *inode, 785 loff_t pos, unsigned len, 786 struct folio **foliop) 787 { 788 if (pos + len > ext4_get_max_inline_size(inode)) 789 return ext4_convert_inline_data_to_extent(mapping, inode); 790 return ext4_generic_write_inline_data(mapping, inode, pos, len, 791 foliop, NULL, false); 792 } 793 794 int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len, 795 unsigned copied, struct folio *folio) 796 { 797 handle_t *handle = ext4_journal_current_handle(); 798 int no_expand; 799 void *kaddr; 800 struct ext4_iloc iloc; 801 int ret = 0, ret2; 802 803 if (unlikely(copied < len) && !folio_test_uptodate(folio)) 804 copied = 0; 805 806 if (likely(copied)) { 807 ret = ext4_get_inode_loc(inode, &iloc); 808 if (ret) { 809 folio_unlock(folio); 810 folio_put(folio); 811 ext4_std_error(inode->i_sb, ret); 812 goto out; 813 } 814 ext4_write_lock_xattr(inode, &no_expand); 815 BUG_ON(!ext4_has_inline_data(inode)); 816 817 /* 818 * ei->i_inline_off may have changed since 819 * ext4_write_begin() called 820 * ext4_try_to_write_inline_data() 821 */ 822 (void) ext4_find_inline_data_nolock(inode); 823 824 kaddr = kmap_local_folio(folio, 0); 825 ext4_write_inline_data(inode, &iloc, kaddr, pos, copied); 826 kunmap_local(kaddr); 827 folio_mark_uptodate(folio); 828 /* clear dirty flag so that writepages wouldn't work for us. */ 829 folio_clear_dirty(folio); 830 831 ext4_write_unlock_xattr(inode, &no_expand); 832 brelse(iloc.bh); 833 834 /* 835 * It's important to update i_size while still holding folio 836 * lock: page writeout could otherwise come in and zero 837 * beyond i_size. 838 */ 839 ext4_update_inode_size(inode, pos + copied); 840 } 841 folio_unlock(folio); 842 folio_put(folio); 843 844 /* 845 * Don't mark the inode dirty under folio lock. First, it unnecessarily 846 * makes the holding time of folio lock longer. Second, it forces lock 847 * ordering of folio lock and transaction start for journaling 848 * filesystems. 849 */ 850 if (likely(copied)) 851 mark_inode_dirty(inode); 852 out: 853 /* 854 * If we didn't copy as much data as expected, we need to trim back 855 * size of xattr containing inline data. 856 */ 857 if (pos + len > inode->i_size && ext4_can_truncate(inode)) 858 ext4_orphan_add(handle, inode); 859 860 ret2 = ext4_journal_stop(handle); 861 if (!ret) 862 ret = ret2; 863 if (pos + len > inode->i_size) { 864 ext4_truncate_failed_write(inode); 865 /* 866 * If truncate failed early the inode might still be 867 * on the orphan list; we need to make sure the inode 868 * is removed from the orphan list in that case. 869 */ 870 if (inode->i_nlink) 871 ext4_orphan_del(NULL, inode); 872 } 873 return ret ? ret : copied; 874 } 875 876 /* 877 * Try to make the page cache and handle ready for the inline data case. 878 * We can call this function in 2 cases: 879 * 1. The inode is created and the first write exceeds inline size. We can 880 * clear the inode state safely. 881 * 2. The inode has inline data, then we need to read the data, make it 882 * update and dirty so that ext4_da_writepages can handle it. We don't 883 * need to start the journal since the file's metadata isn't changed now. 884 */ 885 static int ext4_da_convert_inline_data_to_extent(struct address_space *mapping, 886 struct inode *inode, 887 void **fsdata) 888 { 889 int ret = 0, inline_size; 890 struct folio *folio; 891 892 folio = __filemap_get_folio(mapping, 0, FGP_WRITEBEGIN, 893 mapping_gfp_mask(mapping)); 894 if (IS_ERR(folio)) 895 return PTR_ERR(folio); 896 897 down_read(&EXT4_I(inode)->xattr_sem); 898 if (!ext4_has_inline_data(inode)) { 899 ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); 900 goto out; 901 } 902 903 inline_size = ext4_get_inline_size(inode); 904 905 if (!folio_test_uptodate(folio)) { 906 ret = ext4_read_inline_folio(inode, folio); 907 if (ret < 0) 908 goto out; 909 } 910 911 ret = ext4_block_write_begin(NULL, folio, 0, inline_size, 912 ext4_da_get_block_prep); 913 if (ret) { 914 up_read(&EXT4_I(inode)->xattr_sem); 915 folio_unlock(folio); 916 folio_put(folio); 917 ext4_truncate_failed_write(inode); 918 return ret; 919 } 920 921 clear_buffer_new(folio_buffers(folio)); 922 folio_mark_dirty(folio); 923 folio_mark_uptodate(folio); 924 ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); 925 *fsdata = (void *)CONVERT_INLINE_DATA; 926 927 out: 928 up_read(&EXT4_I(inode)->xattr_sem); 929 if (folio) { 930 folio_unlock(folio); 931 folio_put(folio); 932 } 933 return ret; 934 } 935 936 #ifdef INLINE_DIR_DEBUG 937 void ext4_show_inline_dir(struct inode *dir, struct buffer_head *bh, 938 void *inline_start, int inline_size) 939 { 940 int offset; 941 unsigned short de_len; 942 struct ext4_dir_entry_2 *de = inline_start; 943 void *dlimit = inline_start + inline_size; 944 945 trace_printk("inode %llu\n", dir->i_ino); 946 offset = 0; 947 while ((void *)de < dlimit) { 948 de_len = ext4_rec_len_from_disk(de->rec_len, inline_size); 949 trace_printk("de: off %u rlen %u name %.*s nlen %u ino %u\n", 950 offset, de_len, de->name_len, de->name, 951 de->name_len, le32_to_cpu(de->inode)); 952 if (ext4_check_dir_entry(dir, NULL, de, bh, 953 inline_start, inline_size, offset)) 954 BUG(); 955 956 offset += de_len; 957 de = (struct ext4_dir_entry_2 *) ((char *) de + de_len); 958 } 959 } 960 #else 961 #define ext4_show_inline_dir(dir, bh, inline_start, inline_size) 962 #endif 963 964 /* 965 * Add a new entry into a inline dir. 966 * It will return -ENOSPC if no space is available, and -EIO 967 * and -EEXIST if directory entry already exists. 968 */ 969 static int ext4_add_dirent_to_inline(handle_t *handle, 970 struct ext4_filename *fname, 971 struct inode *dir, 972 struct inode *inode, 973 struct ext4_iloc *iloc, 974 void *inline_start, int inline_size) 975 { 976 int err; 977 struct ext4_dir_entry_2 *de; 978 979 err = ext4_find_dest_de(dir, iloc->bh, inline_start, 980 inline_size, fname, &de); 981 if (err) 982 return err; 983 984 BUFFER_TRACE(iloc->bh, "get_write_access"); 985 err = ext4_journal_get_write_access(handle, dir->i_sb, iloc->bh, 986 EXT4_JTR_NONE); 987 if (err) 988 return err; 989 ext4_insert_dentry(dir, inode, de, inline_size, fname); 990 991 ext4_show_inline_dir(dir, iloc->bh, inline_start, inline_size); 992 993 /* 994 * XXX shouldn't update any times until successful 995 * completion of syscall, but too many callers depend 996 * on this. 997 * 998 * XXX similarly, too many callers depend on 999 * ext4_new_inode() setting the times, but error 1000 * recovery deletes the inode, so the worst that can 1001 * happen is that the times are slightly out of date 1002 * and/or different from the directory change time. 1003 */ 1004 inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); 1005 ext4_update_dx_flag(dir); 1006 inode_inc_iversion(dir); 1007 return 1; 1008 } 1009 1010 static void *ext4_get_inline_xattr_pos(struct inode *inode, 1011 struct ext4_iloc *iloc) 1012 { 1013 struct ext4_xattr_entry *entry; 1014 struct ext4_xattr_ibody_header *header; 1015 1016 BUG_ON(!EXT4_I(inode)->i_inline_off); 1017 1018 header = IHDR(inode, ext4_raw_inode(iloc)); 1019 entry = (struct ext4_xattr_entry *)((void *)ext4_raw_inode(iloc) + 1020 EXT4_I(inode)->i_inline_off); 1021 1022 return (void *)IFIRST(header) + le16_to_cpu(entry->e_value_offs); 1023 } 1024 1025 /* Set the final de to cover the whole block. */ 1026 void ext4_update_final_de(void *de_buf, int old_size, int new_size) 1027 { 1028 struct ext4_dir_entry_2 *de, *prev_de; 1029 void *limit; 1030 int de_len; 1031 1032 de = de_buf; 1033 if (old_size) { 1034 limit = de_buf + old_size; 1035 do { 1036 prev_de = de; 1037 de_len = ext4_rec_len_from_disk(de->rec_len, old_size); 1038 de_buf += de_len; 1039 de = de_buf; 1040 } while (de_buf < limit); 1041 1042 prev_de->rec_len = ext4_rec_len_to_disk(de_len + new_size - 1043 old_size, new_size); 1044 } else { 1045 /* this is just created, so create an empty entry. */ 1046 de->inode = 0; 1047 de->rec_len = ext4_rec_len_to_disk(new_size, new_size); 1048 } 1049 } 1050 1051 static int ext4_update_inline_dir(handle_t *handle, struct inode *dir, 1052 struct ext4_iloc *iloc) 1053 { 1054 int ret; 1055 int old_size = EXT4_I(dir)->i_inline_size - EXT4_MIN_INLINE_DATA_SIZE; 1056 int new_size = get_max_inline_xattr_value_size(dir, iloc); 1057 1058 if (new_size - old_size <= ext4_dir_rec_len(1, NULL)) 1059 return -ENOSPC; 1060 1061 ret = ext4_update_inline_data(handle, dir, 1062 new_size + EXT4_MIN_INLINE_DATA_SIZE); 1063 if (ret) 1064 return ret; 1065 1066 ext4_update_final_de(ext4_get_inline_xattr_pos(dir, iloc), old_size, 1067 EXT4_I(dir)->i_inline_size - 1068 EXT4_MIN_INLINE_DATA_SIZE); 1069 dir->i_size = EXT4_I(dir)->i_disksize = EXT4_I(dir)->i_inline_size; 1070 return 0; 1071 } 1072 1073 static void ext4_restore_inline_data(handle_t *handle, struct inode *inode, 1074 struct ext4_iloc *iloc, 1075 void *buf, int inline_size) 1076 { 1077 int ret; 1078 1079 ret = ext4_create_inline_data(handle, inode, inline_size); 1080 if (ret) { 1081 ext4_msg(inode->i_sb, KERN_EMERG, 1082 "error restoring inline_data for inode -- potential data loss! (inode %llu, error %d)", 1083 inode->i_ino, ret); 1084 return; 1085 } 1086 ext4_write_inline_data(inode, iloc, buf, 0, inline_size); 1087 ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); 1088 } 1089 1090 static int ext4_convert_inline_data_nolock(handle_t *handle, 1091 struct inode *inode, 1092 struct ext4_iloc *iloc) 1093 { 1094 int error; 1095 void *buf = NULL; 1096 struct buffer_head *data_bh = NULL; 1097 struct ext4_map_blocks map; 1098 int inline_size; 1099 1100 inline_size = ext4_get_inline_size(inode); 1101 buf = kmalloc(inline_size, GFP_NOFS); 1102 if (!buf) { 1103 error = -ENOMEM; 1104 goto out; 1105 } 1106 1107 error = ext4_read_inline_data(inode, buf, inline_size, iloc); 1108 if (error < 0) 1109 goto out; 1110 1111 /* 1112 * Make sure the inline directory entries pass checks before we try to 1113 * convert them, so that we avoid touching stuff that needs fsck. 1114 */ 1115 if (S_ISDIR(inode->i_mode)) { 1116 error = ext4_check_all_de(inode, iloc->bh, 1117 buf + EXT4_INLINE_DOTDOT_SIZE, 1118 inline_size - EXT4_INLINE_DOTDOT_SIZE); 1119 if (error) 1120 goto out; 1121 } 1122 1123 error = ext4_destroy_inline_data_nolock(handle, inode); 1124 if (error) 1125 goto out; 1126 1127 map.m_lblk = 0; 1128 map.m_len = 1; 1129 map.m_flags = 0; 1130 error = ext4_map_blocks(handle, inode, &map, EXT4_GET_BLOCKS_CREATE); 1131 if (error < 0) 1132 goto out_restore; 1133 if (!(map.m_flags & EXT4_MAP_MAPPED)) { 1134 error = -EIO; 1135 goto out_restore; 1136 } 1137 1138 data_bh = sb_getblk(inode->i_sb, map.m_pblk); 1139 if (!data_bh) { 1140 error = -ENOMEM; 1141 goto out_restore; 1142 } 1143 1144 lock_buffer(data_bh); 1145 error = ext4_journal_get_create_access(handle, inode->i_sb, data_bh, 1146 EXT4_JTR_NONE); 1147 if (error) { 1148 unlock_buffer(data_bh); 1149 error = -EIO; 1150 goto out_restore; 1151 } 1152 memset(data_bh->b_data, 0, inode->i_sb->s_blocksize); 1153 1154 if (!S_ISDIR(inode->i_mode)) { 1155 memcpy(data_bh->b_data, buf, inline_size); 1156 set_buffer_uptodate(data_bh); 1157 unlock_buffer(data_bh); 1158 error = ext4_handle_dirty_metadata(handle, 1159 inode, data_bh); 1160 } else { 1161 unlock_buffer(data_bh); 1162 inode->i_size = inode->i_sb->s_blocksize; 1163 i_size_write(inode, inode->i_sb->s_blocksize); 1164 EXT4_I(inode)->i_disksize = inode->i_sb->s_blocksize; 1165 1166 error = ext4_init_dirblock(handle, inode, data_bh, 1167 le32_to_cpu(((struct ext4_dir_entry_2 *)buf)->inode), 1168 buf + EXT4_INLINE_DOTDOT_SIZE, 1169 inline_size - EXT4_INLINE_DOTDOT_SIZE); 1170 if (!error) 1171 error = ext4_mark_inode_dirty(handle, inode); 1172 } 1173 1174 out_restore: 1175 if (error) 1176 ext4_restore_inline_data(handle, inode, iloc, buf, inline_size); 1177 1178 out: 1179 brelse(data_bh); 1180 kfree(buf); 1181 return error; 1182 } 1183 1184 /* 1185 * Try to add the new entry to the inline data. 1186 * If succeeds, return 0. If not, extended the inline dir and copied data to 1187 * the new created block. 1188 */ 1189 int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname, 1190 struct inode *dir, struct inode *inode) 1191 { 1192 int ret, ret2, inline_size, no_expand; 1193 void *inline_start; 1194 struct ext4_iloc iloc; 1195 1196 ret = ext4_get_inode_loc(dir, &iloc); 1197 if (ret) 1198 return ret; 1199 1200 ext4_write_lock_xattr(dir, &no_expand); 1201 if (!ext4_has_inline_data(dir)) 1202 goto out; 1203 1204 inline_start = (void *)ext4_raw_inode(&iloc)->i_block + 1205 EXT4_INLINE_DOTDOT_SIZE; 1206 inline_size = EXT4_MIN_INLINE_DATA_SIZE - EXT4_INLINE_DOTDOT_SIZE; 1207 1208 ret = ext4_add_dirent_to_inline(handle, fname, dir, inode, &iloc, 1209 inline_start, inline_size); 1210 if (ret != -ENOSPC) 1211 goto out; 1212 1213 /* check whether it can be inserted to inline xattr space. */ 1214 inline_size = EXT4_I(dir)->i_inline_size - 1215 EXT4_MIN_INLINE_DATA_SIZE; 1216 if (!inline_size) { 1217 /* Try to use the xattr space.*/ 1218 ret = ext4_update_inline_dir(handle, dir, &iloc); 1219 if (ret && ret != -ENOSPC) 1220 goto out; 1221 1222 inline_size = EXT4_I(dir)->i_inline_size - 1223 EXT4_MIN_INLINE_DATA_SIZE; 1224 } 1225 1226 if (inline_size) { 1227 inline_start = ext4_get_inline_xattr_pos(dir, &iloc); 1228 1229 ret = ext4_add_dirent_to_inline(handle, fname, dir, 1230 inode, &iloc, inline_start, 1231 inline_size); 1232 1233 if (ret != -ENOSPC) 1234 goto out; 1235 } 1236 1237 /* 1238 * The inline space is filled up, so create a new block for it. 1239 * As the extent tree will be created, we have to save the inline 1240 * dir first. 1241 */ 1242 ret = ext4_convert_inline_data_nolock(handle, dir, &iloc); 1243 1244 out: 1245 ext4_write_unlock_xattr(dir, &no_expand); 1246 ret2 = ext4_mark_inode_dirty(handle, dir); 1247 if (unlikely(ret2 && !ret)) 1248 ret = ret2; 1249 brelse(iloc.bh); 1250 return ret; 1251 } 1252 1253 /* 1254 * This function fills a red-black tree with information from an 1255 * inlined dir. It returns the number directory entries loaded 1256 * into the tree. If there is an error it is returned in err. 1257 */ 1258 int ext4_inlinedir_to_tree(struct file *dir_file, 1259 struct inode *dir, ext4_lblk_t block, 1260 struct dx_hash_info *hinfo, 1261 __u32 start_hash, __u32 start_minor_hash, 1262 int *has_inline_data) 1263 { 1264 int err = 0, count = 0; 1265 unsigned int parent_ino; 1266 int pos; 1267 struct ext4_dir_entry_2 *de; 1268 struct inode *inode = file_inode(dir_file); 1269 int ret, inline_size = 0; 1270 struct ext4_iloc iloc; 1271 void *dir_buf = NULL; 1272 struct ext4_dir_entry_2 fake; 1273 struct fscrypt_str tmp_str; 1274 1275 ret = ext4_get_inode_loc(inode, &iloc); 1276 if (ret) 1277 return ret; 1278 1279 down_read(&EXT4_I(inode)->xattr_sem); 1280 if (!ext4_has_inline_data(inode)) { 1281 up_read(&EXT4_I(inode)->xattr_sem); 1282 *has_inline_data = 0; 1283 goto out; 1284 } 1285 1286 inline_size = ext4_get_inline_size(inode); 1287 dir_buf = kmalloc(inline_size, GFP_NOFS); 1288 if (!dir_buf) { 1289 ret = -ENOMEM; 1290 up_read(&EXT4_I(inode)->xattr_sem); 1291 goto out; 1292 } 1293 1294 ret = ext4_read_inline_data(inode, dir_buf, inline_size, &iloc); 1295 up_read(&EXT4_I(inode)->xattr_sem); 1296 if (ret < 0) 1297 goto out; 1298 1299 pos = 0; 1300 parent_ino = le32_to_cpu(((struct ext4_dir_entry_2 *)dir_buf)->inode); 1301 while (pos < inline_size) { 1302 /* 1303 * As inlined dir doesn't store any information about '.' and 1304 * only the inode number of '..' is stored, we have to handle 1305 * them differently. 1306 */ 1307 if (pos == 0) { 1308 fake.inode = cpu_to_le32(inode->i_ino); 1309 fake.name_len = 1; 1310 memcpy(fake.name, ".", 2); 1311 fake.rec_len = ext4_rec_len_to_disk( 1312 ext4_dir_rec_len(fake.name_len, NULL), 1313 inline_size); 1314 ext4_set_de_type(inode->i_sb, &fake, S_IFDIR); 1315 de = &fake; 1316 pos = EXT4_INLINE_DOTDOT_OFFSET; 1317 } else if (pos == EXT4_INLINE_DOTDOT_OFFSET) { 1318 fake.inode = cpu_to_le32(parent_ino); 1319 fake.name_len = 2; 1320 memcpy(fake.name, "..", 3); 1321 fake.rec_len = ext4_rec_len_to_disk( 1322 ext4_dir_rec_len(fake.name_len, NULL), 1323 inline_size); 1324 ext4_set_de_type(inode->i_sb, &fake, S_IFDIR); 1325 de = &fake; 1326 pos = EXT4_INLINE_DOTDOT_SIZE; 1327 } else { 1328 de = (struct ext4_dir_entry_2 *)(dir_buf + pos); 1329 pos += ext4_rec_len_from_disk(de->rec_len, inline_size); 1330 if (ext4_check_dir_entry(inode, dir_file, de, 1331 iloc.bh, dir_buf, 1332 inline_size, pos)) { 1333 ret = count; 1334 goto out; 1335 } 1336 } 1337 1338 if (ext4_hash_in_dirent(dir)) { 1339 hinfo->hash = EXT4_DIRENT_HASH(de); 1340 hinfo->minor_hash = EXT4_DIRENT_MINOR_HASH(de); 1341 } else { 1342 err = ext4fs_dirhash(dir, de->name, de->name_len, hinfo); 1343 if (err) { 1344 ret = err; 1345 goto out; 1346 } 1347 } 1348 if ((hinfo->hash < start_hash) || 1349 ((hinfo->hash == start_hash) && 1350 (hinfo->minor_hash < start_minor_hash))) 1351 continue; 1352 if (de->inode == 0) 1353 continue; 1354 tmp_str.name = de->name; 1355 tmp_str.len = de->name_len; 1356 err = ext4_htree_store_dirent(dir_file, hinfo->hash, 1357 hinfo->minor_hash, de, &tmp_str); 1358 if (err) { 1359 ret = err; 1360 goto out; 1361 } 1362 count++; 1363 } 1364 ret = count; 1365 out: 1366 kfree(dir_buf); 1367 brelse(iloc.bh); 1368 return ret; 1369 } 1370 1371 /* 1372 * So this function is called when the volume is mkfsed with 1373 * dir_index disabled. In order to keep f_pos persistent 1374 * after we convert from an inlined dir to a blocked based, 1375 * we just pretend that we are a normal dir and return the 1376 * offset as if '.' and '..' really take place. 1377 * 1378 */ 1379 int ext4_read_inline_dir(struct file *file, 1380 struct dir_context *ctx, 1381 int *has_inline_data) 1382 { 1383 unsigned int offset, parent_ino; 1384 int i; 1385 struct ext4_dir_entry_2 *de; 1386 struct super_block *sb; 1387 struct inode *inode = file_inode(file); 1388 int ret, inline_size = 0; 1389 struct ext4_iloc iloc; 1390 void *dir_buf = NULL; 1391 int dotdot_offset, dotdot_size, extra_offset, extra_size; 1392 struct dir_private_info *info = file->private_data; 1393 1394 ret = ext4_get_inode_loc(inode, &iloc); 1395 if (ret) 1396 return ret; 1397 1398 down_read(&EXT4_I(inode)->xattr_sem); 1399 if (!ext4_has_inline_data(inode)) { 1400 up_read(&EXT4_I(inode)->xattr_sem); 1401 *has_inline_data = 0; 1402 goto out; 1403 } 1404 1405 inline_size = ext4_get_inline_size(inode); 1406 dir_buf = kmalloc(inline_size, GFP_NOFS); 1407 if (!dir_buf) { 1408 ret = -ENOMEM; 1409 up_read(&EXT4_I(inode)->xattr_sem); 1410 goto out; 1411 } 1412 1413 ret = ext4_read_inline_data(inode, dir_buf, inline_size, &iloc); 1414 up_read(&EXT4_I(inode)->xattr_sem); 1415 if (ret < 0) 1416 goto out; 1417 1418 ret = 0; 1419 sb = inode->i_sb; 1420 parent_ino = le32_to_cpu(((struct ext4_dir_entry_2 *)dir_buf)->inode); 1421 offset = ctx->pos; 1422 1423 /* 1424 * dotdot_offset and dotdot_size is the real offset and 1425 * size for ".." and "." if the dir is block based while 1426 * the real size for them are only EXT4_INLINE_DOTDOT_SIZE. 1427 * So we will use extra_offset and extra_size to indicate them 1428 * during the inline dir iteration. 1429 */ 1430 dotdot_offset = ext4_dir_rec_len(1, NULL); 1431 dotdot_size = dotdot_offset + ext4_dir_rec_len(2, NULL); 1432 extra_offset = dotdot_size - EXT4_INLINE_DOTDOT_SIZE; 1433 extra_size = extra_offset + inline_size; 1434 1435 /* 1436 * If the cookie has changed since the last call to 1437 * readdir(2), then we might be pointing to an invalid 1438 * dirent right now. Scan from the start of the inline 1439 * dir to make sure. 1440 */ 1441 if (!inode_eq_iversion(inode, info->cookie)) { 1442 for (i = 0; i < extra_size && i < offset;) { 1443 /* 1444 * "." is with offset 0 and 1445 * ".." is dotdot_offset. 1446 */ 1447 if (!i) { 1448 i = dotdot_offset; 1449 continue; 1450 } else if (i == dotdot_offset) { 1451 i = dotdot_size; 1452 continue; 1453 } 1454 /* for other entry, the real offset in 1455 * the buf has to be tuned accordingly. 1456 */ 1457 de = (struct ext4_dir_entry_2 *) 1458 (dir_buf + i - extra_offset); 1459 /* It's too expensive to do a full 1460 * dirent test each time round this 1461 * loop, but we do have to test at 1462 * least that it is non-zero. A 1463 * failure will be detected in the 1464 * dirent test below. */ 1465 if (ext4_rec_len_from_disk(de->rec_len, extra_size) 1466 < ext4_dir_rec_len(1, NULL)) 1467 break; 1468 i += ext4_rec_len_from_disk(de->rec_len, 1469 extra_size); 1470 } 1471 offset = i; 1472 ctx->pos = offset; 1473 info->cookie = inode_query_iversion(inode); 1474 } 1475 1476 while (ctx->pos < extra_size) { 1477 if (ctx->pos == 0) { 1478 if (!dir_emit(ctx, ".", 1, inode->i_ino, DT_DIR)) 1479 goto out; 1480 ctx->pos = dotdot_offset; 1481 continue; 1482 } 1483 1484 if (ctx->pos == dotdot_offset) { 1485 if (!dir_emit(ctx, "..", 2, parent_ino, DT_DIR)) 1486 goto out; 1487 ctx->pos = dotdot_size; 1488 continue; 1489 } 1490 1491 de = (struct ext4_dir_entry_2 *) 1492 (dir_buf + ctx->pos - extra_offset); 1493 if (ext4_check_dir_entry(inode, file, de, iloc.bh, dir_buf, 1494 extra_size, ctx->pos)) 1495 goto out; 1496 if (le32_to_cpu(de->inode)) { 1497 if (!dir_emit(ctx, de->name, de->name_len, 1498 le32_to_cpu(de->inode), 1499 get_dtype(sb, de->file_type))) 1500 goto out; 1501 } 1502 ctx->pos += ext4_rec_len_from_disk(de->rec_len, extra_size); 1503 } 1504 out: 1505 kfree(dir_buf); 1506 brelse(iloc.bh); 1507 return ret; 1508 } 1509 1510 void *ext4_read_inline_link(struct inode *inode) 1511 { 1512 struct ext4_iloc iloc; 1513 int ret, inline_size; 1514 void *link; 1515 1516 ret = ext4_get_inode_loc(inode, &iloc); 1517 if (ret) 1518 return ERR_PTR(ret); 1519 1520 ret = -ENOMEM; 1521 inline_size = ext4_get_inline_size(inode); 1522 link = kmalloc(inline_size + 1, GFP_NOFS); 1523 if (!link) 1524 goto out; 1525 1526 ret = ext4_read_inline_data(inode, link, inline_size, &iloc); 1527 if (ret < 0) { 1528 kfree(link); 1529 goto out; 1530 } 1531 nd_terminate_link(link, inode->i_size, ret); 1532 out: 1533 if (ret < 0) 1534 link = ERR_PTR(ret); 1535 brelse(iloc.bh); 1536 return link; 1537 } 1538 1539 struct buffer_head *ext4_get_first_inline_block(struct inode *inode, 1540 struct ext4_dir_entry_2 **parent_de, 1541 int *retval) 1542 { 1543 struct ext4_iloc iloc; 1544 1545 *retval = ext4_get_inode_loc(inode, &iloc); 1546 if (*retval) 1547 return NULL; 1548 1549 *parent_de = (struct ext4_dir_entry_2 *)ext4_raw_inode(&iloc)->i_block; 1550 1551 return iloc.bh; 1552 } 1553 1554 /* 1555 * Try to create the inline data for the new dir. 1556 * If it succeeds, return 0, otherwise return the error. 1557 * In case of ENOSPC, the caller should create the normal disk layout dir. 1558 */ 1559 int ext4_try_create_inline_dir(handle_t *handle, struct inode *parent, 1560 struct inode *inode) 1561 { 1562 int ret, inline_size = EXT4_MIN_INLINE_DATA_SIZE; 1563 struct ext4_iloc iloc; 1564 struct ext4_dir_entry_2 *de; 1565 1566 ret = ext4_get_inode_loc(inode, &iloc); 1567 if (ret) 1568 return ret; 1569 1570 ret = ext4_prepare_inline_data(handle, inode, inline_size); 1571 if (ret) 1572 goto out; 1573 1574 /* 1575 * For inline dir, we only save the inode information for the ".." 1576 * and create a fake dentry to cover the left space. 1577 */ 1578 de = (struct ext4_dir_entry_2 *)ext4_raw_inode(&iloc)->i_block; 1579 de->inode = cpu_to_le32(parent->i_ino); 1580 de = (struct ext4_dir_entry_2 *)((void *)de + EXT4_INLINE_DOTDOT_SIZE); 1581 de->inode = 0; 1582 de->rec_len = ext4_rec_len_to_disk( 1583 inline_size - EXT4_INLINE_DOTDOT_SIZE, 1584 inline_size); 1585 set_nlink(inode, 2); 1586 inode->i_size = EXT4_I(inode)->i_disksize = inline_size; 1587 out: 1588 brelse(iloc.bh); 1589 return ret; 1590 } 1591 1592 struct buffer_head *ext4_find_inline_entry(struct inode *dir, 1593 struct ext4_filename *fname, 1594 struct ext4_dir_entry_2 **res_dir, 1595 int *has_inline_data) 1596 { 1597 struct ext4_xattr_ibody_find is = { 1598 .s = { .not_found = -ENODATA, }, 1599 }; 1600 struct ext4_xattr_info i = { 1601 .name_index = EXT4_XATTR_INDEX_SYSTEM, 1602 .name = EXT4_XATTR_SYSTEM_DATA, 1603 }; 1604 int ret; 1605 void *inline_start; 1606 int inline_size; 1607 1608 ret = ext4_get_inode_loc(dir, &is.iloc); 1609 if (ret) 1610 return ERR_PTR(ret); 1611 1612 down_read(&EXT4_I(dir)->xattr_sem); 1613 1614 ret = ext4_xattr_ibody_find(dir, &i, &is); 1615 if (ret) 1616 goto out; 1617 1618 if (!ext4_has_inline_data(dir)) { 1619 *has_inline_data = 0; 1620 goto out; 1621 } 1622 1623 inline_start = (void *)ext4_raw_inode(&is.iloc)->i_block + 1624 EXT4_INLINE_DOTDOT_SIZE; 1625 inline_size = EXT4_MIN_INLINE_DATA_SIZE - EXT4_INLINE_DOTDOT_SIZE; 1626 ret = ext4_search_dir(is.iloc.bh, inline_start, inline_size, 1627 dir, fname, 0, res_dir); 1628 if (ret == 1) 1629 goto out_find; 1630 if (ret < 0) 1631 goto out; 1632 1633 if (ext4_get_inline_size(dir) == EXT4_MIN_INLINE_DATA_SIZE) 1634 goto out; 1635 1636 inline_start = ext4_get_inline_xattr_pos(dir, &is.iloc); 1637 inline_size = ext4_get_inline_size(dir) - EXT4_MIN_INLINE_DATA_SIZE; 1638 1639 ret = ext4_search_dir(is.iloc.bh, inline_start, inline_size, 1640 dir, fname, 0, res_dir); 1641 if (ret == 1) 1642 goto out_find; 1643 1644 out: 1645 brelse(is.iloc.bh); 1646 if (ret < 0) 1647 is.iloc.bh = ERR_PTR(ret); 1648 else 1649 is.iloc.bh = NULL; 1650 out_find: 1651 up_read(&EXT4_I(dir)->xattr_sem); 1652 return is.iloc.bh; 1653 } 1654 1655 int ext4_delete_inline_entry(handle_t *handle, 1656 struct inode *dir, 1657 struct ext4_dir_entry_2 *de_del, 1658 struct buffer_head *bh, 1659 int *has_inline_data) 1660 { 1661 int err, inline_size, no_expand; 1662 struct ext4_iloc iloc; 1663 void *inline_start; 1664 1665 err = ext4_get_inode_loc(dir, &iloc); 1666 if (err) 1667 return err; 1668 1669 ext4_write_lock_xattr(dir, &no_expand); 1670 if (!ext4_has_inline_data(dir)) { 1671 *has_inline_data = 0; 1672 goto out; 1673 } 1674 1675 if ((void *)de_del - ((void *)ext4_raw_inode(&iloc)->i_block) < 1676 EXT4_MIN_INLINE_DATA_SIZE) { 1677 inline_start = (void *)ext4_raw_inode(&iloc)->i_block + 1678 EXT4_INLINE_DOTDOT_SIZE; 1679 inline_size = EXT4_MIN_INLINE_DATA_SIZE - 1680 EXT4_INLINE_DOTDOT_SIZE; 1681 } else { 1682 inline_start = ext4_get_inline_xattr_pos(dir, &iloc); 1683 inline_size = ext4_get_inline_size(dir) - 1684 EXT4_MIN_INLINE_DATA_SIZE; 1685 } 1686 1687 BUFFER_TRACE(bh, "get_write_access"); 1688 err = ext4_journal_get_write_access(handle, dir->i_sb, bh, 1689 EXT4_JTR_NONE); 1690 if (err) 1691 goto out; 1692 1693 err = ext4_generic_delete_entry(dir, de_del, bh, 1694 inline_start, inline_size, 0); 1695 if (err) 1696 goto out; 1697 1698 ext4_show_inline_dir(dir, iloc.bh, inline_start, inline_size); 1699 out: 1700 ext4_write_unlock_xattr(dir, &no_expand); 1701 if (likely(err == 0)) 1702 err = ext4_mark_inode_dirty(handle, dir); 1703 brelse(iloc.bh); 1704 if (err != -ENOENT) 1705 ext4_std_error(dir->i_sb, err); 1706 return err; 1707 } 1708 1709 /* 1710 * Get the inline dentry at offset. 1711 */ 1712 static inline struct ext4_dir_entry_2 * 1713 ext4_get_inline_entry(struct inode *inode, 1714 struct ext4_iloc *iloc, 1715 unsigned int offset, 1716 void **inline_start, 1717 int *inline_size) 1718 { 1719 void *inline_pos; 1720 1721 BUG_ON(offset > ext4_get_inline_size(inode)); 1722 1723 if (offset < EXT4_MIN_INLINE_DATA_SIZE) { 1724 inline_pos = (void *)ext4_raw_inode(iloc)->i_block; 1725 *inline_size = EXT4_MIN_INLINE_DATA_SIZE; 1726 } else { 1727 inline_pos = ext4_get_inline_xattr_pos(inode, iloc); 1728 offset -= EXT4_MIN_INLINE_DATA_SIZE; 1729 *inline_size = ext4_get_inline_size(inode) - 1730 EXT4_MIN_INLINE_DATA_SIZE; 1731 } 1732 1733 if (inline_start) 1734 *inline_start = inline_pos; 1735 return (struct ext4_dir_entry_2 *)(inline_pos + offset); 1736 } 1737 1738 bool empty_inline_dir(struct inode *dir, int *has_inline_data) 1739 { 1740 int err, inline_size; 1741 struct ext4_iloc iloc; 1742 size_t inline_len; 1743 void *inline_pos; 1744 unsigned int offset; 1745 struct ext4_dir_entry_2 *de; 1746 bool ret = false; 1747 1748 err = ext4_get_inode_loc(dir, &iloc); 1749 if (err) { 1750 EXT4_ERROR_INODE_ERR(dir, -err, 1751 "error %d getting inode %llu block", 1752 err, dir->i_ino); 1753 return false; 1754 } 1755 1756 down_read(&EXT4_I(dir)->xattr_sem); 1757 if (!ext4_has_inline_data(dir)) { 1758 *has_inline_data = 0; 1759 ret = true; 1760 goto out; 1761 } 1762 1763 de = (struct ext4_dir_entry_2 *)ext4_raw_inode(&iloc)->i_block; 1764 if (!le32_to_cpu(de->inode)) { 1765 ext4_warning(dir->i_sb, 1766 "bad inline directory (dir #%llu) - no `..'", 1767 dir->i_ino); 1768 goto out; 1769 } 1770 1771 inline_len = ext4_get_inline_size(dir); 1772 offset = EXT4_INLINE_DOTDOT_SIZE; 1773 while (offset < inline_len) { 1774 de = ext4_get_inline_entry(dir, &iloc, offset, 1775 &inline_pos, &inline_size); 1776 if (ext4_check_dir_entry(dir, NULL, de, 1777 iloc.bh, inline_pos, 1778 inline_size, offset)) { 1779 ext4_warning(dir->i_sb, 1780 "bad inline directory (dir #%llu) - " 1781 "inode %u, rec_len %u, name_len %d" 1782 "inline size %d", 1783 dir->i_ino, le32_to_cpu(de->inode), 1784 le16_to_cpu(de->rec_len), de->name_len, 1785 inline_size); 1786 goto out; 1787 } 1788 if (le32_to_cpu(de->inode)) { 1789 goto out; 1790 } 1791 offset += ext4_rec_len_from_disk(de->rec_len, inline_size); 1792 } 1793 1794 ret = true; 1795 out: 1796 up_read(&EXT4_I(dir)->xattr_sem); 1797 brelse(iloc.bh); 1798 return ret; 1799 } 1800 1801 int ext4_destroy_inline_data(handle_t *handle, struct inode *inode) 1802 { 1803 int ret, no_expand; 1804 1805 ext4_write_lock_xattr(inode, &no_expand); 1806 ret = ext4_destroy_inline_data_nolock(handle, inode); 1807 ext4_write_unlock_xattr(inode, &no_expand); 1808 1809 return ret; 1810 } 1811 1812 int ext4_inline_data_iomap(struct inode *inode, struct iomap *iomap) 1813 { 1814 __u64 addr; 1815 int error = -EAGAIN; 1816 struct ext4_iloc iloc; 1817 1818 down_read(&EXT4_I(inode)->xattr_sem); 1819 if (!ext4_has_inline_data(inode)) 1820 goto out; 1821 1822 error = ext4_get_inode_loc(inode, &iloc); 1823 if (error) 1824 goto out; 1825 1826 addr = (__u64)iloc.bh->b_blocknr << inode->i_sb->s_blocksize_bits; 1827 addr += (char *)ext4_raw_inode(&iloc) - iloc.bh->b_data; 1828 addr += offsetof(struct ext4_inode, i_block); 1829 1830 brelse(iloc.bh); 1831 1832 iomap->addr = addr; 1833 iomap->offset = 0; 1834 iomap->length = min_t(loff_t, ext4_get_inline_size(inode), 1835 i_size_read(inode)); 1836 iomap->type = IOMAP_INLINE; 1837 iomap->flags = 0; 1838 1839 out: 1840 up_read(&EXT4_I(inode)->xattr_sem); 1841 return error; 1842 } 1843 1844 int ext4_inline_data_truncate(struct inode *inode, int *has_inline) 1845 { 1846 handle_t *handle; 1847 int inline_size, value_len, needed_blocks, no_expand, err = 0; 1848 size_t i_size; 1849 void *value = NULL; 1850 struct ext4_xattr_ibody_find is = { 1851 .s = { .not_found = -ENODATA, }, 1852 }; 1853 struct ext4_xattr_info i = { 1854 .name_index = EXT4_XATTR_INDEX_SYSTEM, 1855 .name = EXT4_XATTR_SYSTEM_DATA, 1856 }; 1857 1858 1859 needed_blocks = ext4_chunk_trans_extent(inode, 1); 1860 handle = ext4_journal_start(inode, EXT4_HT_INODE, needed_blocks); 1861 if (IS_ERR(handle)) 1862 return PTR_ERR(handle); 1863 1864 ext4_write_lock_xattr(inode, &no_expand); 1865 if (!ext4_has_inline_data(inode)) { 1866 ext4_write_unlock_xattr(inode, &no_expand); 1867 *has_inline = 0; 1868 ext4_journal_stop(handle); 1869 return 0; 1870 } 1871 1872 if ((err = ext4_orphan_add(handle, inode)) != 0) 1873 goto out; 1874 1875 if ((err = ext4_get_inode_loc(inode, &is.iloc)) != 0) 1876 goto out; 1877 1878 down_write(&EXT4_I(inode)->i_data_sem); 1879 i_size = inode->i_size; 1880 inline_size = ext4_get_inline_size(inode); 1881 EXT4_I(inode)->i_disksize = i_size; 1882 1883 if (i_size < inline_size) { 1884 /* 1885 * if there's inline data to truncate and this file was 1886 * converted to extents after that inline data was written, 1887 * the extent status cache must be cleared to avoid leaving 1888 * behind stale delayed allocated extent entries 1889 */ 1890 if (!ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) 1891 ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS); 1892 1893 /* Clear the content in the xattr space. */ 1894 if (inline_size > EXT4_MIN_INLINE_DATA_SIZE) { 1895 if ((err = ext4_xattr_ibody_find(inode, &i, &is)) != 0) 1896 goto out_error; 1897 1898 if (is.s.not_found) { 1899 EXT4_ERROR_INODE(inode, 1900 "missing inline data xattr"); 1901 err = -EFSCORRUPTED; 1902 goto out_error; 1903 } 1904 1905 value_len = le32_to_cpu(is.s.here->e_value_size); 1906 value = kmalloc(value_len, GFP_NOFS); 1907 if (!value) { 1908 err = -ENOMEM; 1909 goto out_error; 1910 } 1911 1912 err = ext4_xattr_ibody_get(inode, i.name_index, 1913 i.name, value, value_len); 1914 if (err <= 0) 1915 goto out_error; 1916 1917 i.value = value; 1918 i.value_len = i_size > EXT4_MIN_INLINE_DATA_SIZE ? 1919 i_size - EXT4_MIN_INLINE_DATA_SIZE : 0; 1920 err = ext4_xattr_ibody_set(handle, inode, &i, &is); 1921 if (err) 1922 goto out_error; 1923 } 1924 1925 /* Clear the content within i_blocks. */ 1926 if (i_size < EXT4_MIN_INLINE_DATA_SIZE) { 1927 void *p = (void *) ext4_raw_inode(&is.iloc)->i_block; 1928 memset(p + i_size, 0, 1929 EXT4_MIN_INLINE_DATA_SIZE - i_size); 1930 } 1931 1932 EXT4_I(inode)->i_inline_size = i_size < 1933 EXT4_MIN_INLINE_DATA_SIZE ? 1934 EXT4_MIN_INLINE_DATA_SIZE : i_size; 1935 } 1936 1937 out_error: 1938 up_write(&EXT4_I(inode)->i_data_sem); 1939 out: 1940 brelse(is.iloc.bh); 1941 ext4_write_unlock_xattr(inode, &no_expand); 1942 kfree(value); 1943 if (inode->i_nlink) 1944 ext4_orphan_del(handle, inode); 1945 1946 if (err == 0) { 1947 inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); 1948 err = ext4_mark_inode_dirty(handle, inode); 1949 if (IS_SYNC(inode)) 1950 ext4_handle_sync(handle); 1951 } 1952 ext4_journal_stop(handle); 1953 return err; 1954 } 1955 1956 int ext4_convert_inline_data(struct inode *inode) 1957 { 1958 int error, needed_blocks, no_expand; 1959 handle_t *handle; 1960 struct ext4_iloc iloc; 1961 1962 if (!ext4_has_inline_data(inode)) { 1963 ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); 1964 return 0; 1965 } else if (!ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) { 1966 /* 1967 * Inode has inline data but EXT4_STATE_MAY_INLINE_DATA is 1968 * cleared. This means we are in the middle of moving of 1969 * inline data to delay allocated block. Just force writeout 1970 * here to finish conversion. 1971 */ 1972 error = filemap_flush(inode->i_mapping); 1973 if (error) 1974 return error; 1975 if (!ext4_has_inline_data(inode)) 1976 return 0; 1977 } 1978 1979 needed_blocks = ext4_chunk_trans_extent(inode, 1); 1980 1981 iloc.bh = NULL; 1982 error = ext4_get_inode_loc(inode, &iloc); 1983 if (error) 1984 return error; 1985 1986 handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, needed_blocks); 1987 if (IS_ERR(handle)) { 1988 error = PTR_ERR(handle); 1989 goto out_free; 1990 } 1991 1992 ext4_write_lock_xattr(inode, &no_expand); 1993 if (ext4_has_inline_data(inode)) 1994 error = ext4_convert_inline_data_nolock(handle, inode, &iloc); 1995 ext4_write_unlock_xattr(inode, &no_expand); 1996 ext4_journal_stop(handle); 1997 out_free: 1998 brelse(iloc.bh); 1999 return error; 2000 } 2001