1 // SPDX-License-Identifier: LGPL-2.1 2 /* 3 * Copyright (c) 2012 Taobao. 4 * Written by Tao Ma <boyu.mt@taobao.com> 5 */ 6 7 #include <linux/iomap.h> 8 #include <linux/fiemap.h> 9 #include <linux/namei.h> 10 #include <linux/iversion.h> 11 #include <linux/sched/mm.h> 12 13 #include "ext4_jbd2.h" 14 #include "ext4.h" 15 #include "xattr.h" 16 #include "truncate.h" 17 18 #define EXT4_XATTR_SYSTEM_DATA "data" 19 #define EXT4_MIN_INLINE_DATA_SIZE ((sizeof(__le32) * EXT4_N_BLOCKS)) 20 #define EXT4_INLINE_DOTDOT_OFFSET 2 21 #define EXT4_INLINE_DOTDOT_SIZE 4 22 23 24 static int ext4_da_convert_inline_data_to_extent(struct address_space *mapping, 25 struct inode *inode, 26 void **fsdata); 27 28 static int ext4_get_inline_size(struct inode *inode) 29 { 30 if (EXT4_I(inode)->i_inline_off) 31 return EXT4_I(inode)->i_inline_size; 32 33 return 0; 34 } 35 36 static int get_max_inline_xattr_value_size(struct inode *inode, 37 struct ext4_iloc *iloc) 38 { 39 struct ext4_xattr_ibody_header *header; 40 struct ext4_xattr_entry *entry; 41 struct ext4_inode *raw_inode; 42 void *end; 43 int free, min_offs; 44 45 if (!EXT4_INODE_HAS_XATTR_SPACE(inode)) 46 return 0; 47 48 min_offs = EXT4_SB(inode->i_sb)->s_inode_size - 49 EXT4_GOOD_OLD_INODE_SIZE - 50 EXT4_I(inode)->i_extra_isize - 51 sizeof(struct ext4_xattr_ibody_header); 52 53 /* 54 * We need to subtract another sizeof(__u32) since an in-inode xattr 55 * needs an empty 4 bytes to indicate the gap between the xattr entry 56 * and the name/value pair. 57 */ 58 if (!ext4_test_inode_state(inode, EXT4_STATE_XATTR)) 59 return EXT4_XATTR_SIZE(min_offs - 60 EXT4_XATTR_LEN(strlen(EXT4_XATTR_SYSTEM_DATA)) - 61 EXT4_XATTR_ROUND - sizeof(__u32)); 62 63 raw_inode = ext4_raw_inode(iloc); 64 header = IHDR(inode, raw_inode); 65 entry = IFIRST(header); 66 end = (void *)raw_inode + EXT4_SB(inode->i_sb)->s_inode_size; 67 68 /* Compute min_offs. */ 69 while (!IS_LAST_ENTRY(entry)) { 70 void *next = EXT4_XATTR_NEXT(entry); 71 72 if (next >= end) { 73 EXT4_ERROR_INODE(inode, 74 "corrupt xattr in inline inode"); 75 return 0; 76 } 77 if (!entry->e_value_inum && entry->e_value_size) { 78 size_t offs = le16_to_cpu(entry->e_value_offs); 79 if (offs < min_offs) 80 min_offs = offs; 81 } 82 entry = next; 83 } 84 free = min_offs - 85 ((void *)entry - (void *)IFIRST(header)) - sizeof(__u32); 86 87 if (EXT4_I(inode)->i_inline_off) { 88 entry = (struct ext4_xattr_entry *) 89 ((void *)raw_inode + EXT4_I(inode)->i_inline_off); 90 91 free += EXT4_XATTR_SIZE(le32_to_cpu(entry->e_value_size)); 92 goto out; 93 } 94 95 free -= EXT4_XATTR_LEN(strlen(EXT4_XATTR_SYSTEM_DATA)); 96 97 if (free > EXT4_XATTR_ROUND) 98 free = EXT4_XATTR_SIZE(free - EXT4_XATTR_ROUND); 99 else 100 free = 0; 101 102 out: 103 return free; 104 } 105 106 /* 107 * Get the maximum size we now can store in an inode. 108 * If we can't find the space for a xattr entry, don't use the space 109 * of the extents since we have no space to indicate the inline data. 110 */ 111 int ext4_get_max_inline_size(struct inode *inode) 112 { 113 int error, max_inline_size; 114 struct ext4_iloc iloc; 115 116 if (EXT4_I(inode)->i_extra_isize == 0) 117 return 0; 118 119 error = ext4_get_inode_loc(inode, &iloc); 120 if (error) { 121 ext4_error_inode_err(inode, __func__, __LINE__, 0, -error, 122 "can't get inode location %lu", 123 inode->i_ino); 124 return 0; 125 } 126 127 down_read(&EXT4_I(inode)->xattr_sem); 128 max_inline_size = get_max_inline_xattr_value_size(inode, &iloc); 129 up_read(&EXT4_I(inode)->xattr_sem); 130 131 brelse(iloc.bh); 132 133 if (!max_inline_size) 134 return 0; 135 136 return max_inline_size + EXT4_MIN_INLINE_DATA_SIZE; 137 } 138 139 /* 140 * this function does not take xattr_sem, which is OK because it is 141 * currently only used in a code path coming form ext4_iget, before 142 * the new inode has been unlocked 143 */ 144 int ext4_find_inline_data_nolock(struct inode *inode) 145 { 146 struct ext4_xattr_ibody_find is = { 147 .s = { .not_found = -ENODATA, }, 148 }; 149 struct ext4_xattr_info i = { 150 .name_index = EXT4_XATTR_INDEX_SYSTEM, 151 .name = EXT4_XATTR_SYSTEM_DATA, 152 }; 153 int error; 154 155 if (EXT4_I(inode)->i_extra_isize == 0) 156 return 0; 157 158 error = ext4_get_inode_loc(inode, &is.iloc); 159 if (error) 160 return error; 161 162 error = ext4_xattr_ibody_find(inode, &i, &is); 163 if (error) 164 goto out; 165 166 if (!is.s.not_found) { 167 if (is.s.here->e_value_inum) { 168 EXT4_ERROR_INODE(inode, "inline data xattr refers " 169 "to an external xattr inode"); 170 error = -EFSCORRUPTED; 171 goto out; 172 } 173 EXT4_I(inode)->i_inline_off = (u16)((void *)is.s.here - 174 (void *)ext4_raw_inode(&is.iloc)); 175 EXT4_I(inode)->i_inline_size = EXT4_MIN_INLINE_DATA_SIZE + 176 le32_to_cpu(is.s.here->e_value_size); 177 } 178 out: 179 brelse(is.iloc.bh); 180 return error; 181 } 182 183 static int ext4_read_inline_data(struct inode *inode, void *buffer, 184 unsigned int len, 185 struct ext4_iloc *iloc) 186 { 187 struct ext4_xattr_entry *entry; 188 struct ext4_xattr_ibody_header *header; 189 int cp_len = 0; 190 struct ext4_inode *raw_inode; 191 192 if (!len) 193 return 0; 194 195 BUG_ON(len > EXT4_I(inode)->i_inline_size); 196 197 cp_len = min_t(unsigned int, len, EXT4_MIN_INLINE_DATA_SIZE); 198 199 raw_inode = ext4_raw_inode(iloc); 200 memcpy(buffer, (void *)(raw_inode->i_block), cp_len); 201 202 len -= cp_len; 203 buffer += cp_len; 204 205 if (!len) 206 goto out; 207 208 header = IHDR(inode, raw_inode); 209 entry = (struct ext4_xattr_entry *)((void *)raw_inode + 210 EXT4_I(inode)->i_inline_off); 211 len = min_t(unsigned int, len, 212 (unsigned int)le32_to_cpu(entry->e_value_size)); 213 214 memcpy(buffer, 215 (void *)IFIRST(header) + le16_to_cpu(entry->e_value_offs), len); 216 cp_len += len; 217 218 out: 219 return cp_len; 220 } 221 222 /* 223 * write the buffer to the inline inode. 224 * If 'create' is set, we don't need to do the extra copy in the xattr 225 * value since it is already handled by ext4_xattr_ibody_set. 226 * That saves us one memcpy. 227 */ 228 static void ext4_write_inline_data(struct inode *inode, struct ext4_iloc *iloc, 229 void *buffer, loff_t pos, unsigned int len) 230 { 231 struct ext4_xattr_entry *entry; 232 struct ext4_xattr_ibody_header *header; 233 struct ext4_inode *raw_inode; 234 int cp_len = 0; 235 236 if (unlikely(ext4_emergency_state(inode->i_sb))) 237 return; 238 239 BUG_ON(!EXT4_I(inode)->i_inline_off); 240 BUG_ON(pos + len > EXT4_I(inode)->i_inline_size); 241 242 raw_inode = ext4_raw_inode(iloc); 243 buffer += pos; 244 245 if (pos < EXT4_MIN_INLINE_DATA_SIZE) { 246 cp_len = pos + len > EXT4_MIN_INLINE_DATA_SIZE ? 247 EXT4_MIN_INLINE_DATA_SIZE - pos : len; 248 memcpy((void *)raw_inode->i_block + pos, buffer, cp_len); 249 250 len -= cp_len; 251 buffer += cp_len; 252 pos += cp_len; 253 } 254 255 if (!len) 256 return; 257 258 pos -= EXT4_MIN_INLINE_DATA_SIZE; 259 header = IHDR(inode, raw_inode); 260 entry = (struct ext4_xattr_entry *)((void *)raw_inode + 261 EXT4_I(inode)->i_inline_off); 262 263 memcpy((void *)IFIRST(header) + le16_to_cpu(entry->e_value_offs) + pos, 264 buffer, len); 265 } 266 267 static int ext4_create_inline_data(handle_t *handle, 268 struct inode *inode, unsigned len) 269 { 270 int error; 271 void *value = NULL; 272 struct ext4_xattr_ibody_find is = { 273 .s = { .not_found = -ENODATA, }, 274 }; 275 struct ext4_xattr_info i = { 276 .name_index = EXT4_XATTR_INDEX_SYSTEM, 277 .name = EXT4_XATTR_SYSTEM_DATA, 278 }; 279 280 error = ext4_get_inode_loc(inode, &is.iloc); 281 if (error) 282 return error; 283 284 BUFFER_TRACE(is.iloc.bh, "get_write_access"); 285 error = ext4_journal_get_write_access(handle, inode->i_sb, is.iloc.bh, 286 EXT4_JTR_NONE); 287 if (error) 288 goto out; 289 290 if (len > EXT4_MIN_INLINE_DATA_SIZE) { 291 value = EXT4_ZERO_XATTR_VALUE; 292 len -= EXT4_MIN_INLINE_DATA_SIZE; 293 } else { 294 value = ""; 295 len = 0; 296 } 297 298 /* Insert the xttr entry. */ 299 i.value = value; 300 i.value_len = len; 301 302 error = ext4_xattr_ibody_find(inode, &i, &is); 303 if (error) 304 goto out; 305 306 if (!is.s.not_found) { 307 EXT4_ERROR_INODE(inode, "unexpected inline data xattr"); 308 error = -EFSCORRUPTED; 309 goto out; 310 } 311 312 error = ext4_xattr_ibody_set(handle, inode, &i, &is); 313 if (error) { 314 if (error == -ENOSPC) 315 ext4_clear_inode_state(inode, 316 EXT4_STATE_MAY_INLINE_DATA); 317 goto out; 318 } 319 320 memset((void *)ext4_raw_inode(&is.iloc)->i_block, 321 0, EXT4_MIN_INLINE_DATA_SIZE); 322 323 EXT4_I(inode)->i_inline_off = (u16)((void *)is.s.here - 324 (void *)ext4_raw_inode(&is.iloc)); 325 EXT4_I(inode)->i_inline_size = len + EXT4_MIN_INLINE_DATA_SIZE; 326 ext4_clear_inode_flag(inode, EXT4_INODE_EXTENTS); 327 ext4_set_inode_flag(inode, EXT4_INODE_INLINE_DATA); 328 get_bh(is.iloc.bh); 329 error = ext4_mark_iloc_dirty(handle, inode, &is.iloc); 330 331 out: 332 brelse(is.iloc.bh); 333 return error; 334 } 335 336 static int ext4_update_inline_data(handle_t *handle, struct inode *inode, 337 unsigned int len) 338 { 339 int error; 340 void *value = NULL; 341 struct ext4_xattr_ibody_find is = { 342 .s = { .not_found = -ENODATA, }, 343 }; 344 struct ext4_xattr_info i = { 345 .name_index = EXT4_XATTR_INDEX_SYSTEM, 346 .name = EXT4_XATTR_SYSTEM_DATA, 347 }; 348 349 /* If the old space is ok, write the data directly. */ 350 if (len <= EXT4_I(inode)->i_inline_size) 351 return 0; 352 353 error = ext4_get_inode_loc(inode, &is.iloc); 354 if (error) 355 return error; 356 357 error = ext4_xattr_ibody_find(inode, &i, &is); 358 if (error) 359 goto out; 360 361 if (is.s.not_found) { 362 EXT4_ERROR_INODE(inode, "missing inline data xattr"); 363 error = -EFSCORRUPTED; 364 goto out; 365 } 366 367 len -= EXT4_MIN_INLINE_DATA_SIZE; 368 value = kzalloc(len, GFP_NOFS); 369 if (!value) { 370 error = -ENOMEM; 371 goto out; 372 } 373 374 error = ext4_xattr_ibody_get(inode, i.name_index, i.name, 375 value, len); 376 if (error < 0) 377 goto out; 378 379 BUFFER_TRACE(is.iloc.bh, "get_write_access"); 380 error = ext4_journal_get_write_access(handle, inode->i_sb, is.iloc.bh, 381 EXT4_JTR_NONE); 382 if (error) 383 goto out; 384 385 /* Update the xattr entry. */ 386 i.value = value; 387 i.value_len = len; 388 389 error = ext4_xattr_ibody_set(handle, inode, &i, &is); 390 if (error) 391 goto out; 392 393 EXT4_I(inode)->i_inline_off = (u16)((void *)is.s.here - 394 (void *)ext4_raw_inode(&is.iloc)); 395 EXT4_I(inode)->i_inline_size = EXT4_MIN_INLINE_DATA_SIZE + 396 le32_to_cpu(is.s.here->e_value_size); 397 ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); 398 get_bh(is.iloc.bh); 399 error = ext4_mark_iloc_dirty(handle, inode, &is.iloc); 400 401 out: 402 kfree(value); 403 brelse(is.iloc.bh); 404 return error; 405 } 406 407 static int ext4_prepare_inline_data(handle_t *handle, struct inode *inode, 408 loff_t len) 409 { 410 int ret, size, no_expand; 411 struct ext4_inode_info *ei = EXT4_I(inode); 412 413 if (!ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) 414 return -ENOSPC; 415 416 size = ext4_get_max_inline_size(inode); 417 if (size < len) 418 return -ENOSPC; 419 420 ext4_write_lock_xattr(inode, &no_expand); 421 /* 422 * ei->i_inline_size may have changed since the initial check 423 * if other xattrs were added. Recalculate to ensure 424 * ext4_update_inline_data() validates against current capacity. 425 */ 426 (void) ext4_find_inline_data_nolock(inode); 427 if (ei->i_inline_off) 428 ret = ext4_update_inline_data(handle, inode, len); 429 else 430 ret = ext4_create_inline_data(handle, inode, len); 431 432 ext4_write_unlock_xattr(inode, &no_expand); 433 return ret; 434 } 435 436 static int ext4_destroy_inline_data_nolock(handle_t *handle, 437 struct inode *inode) 438 { 439 struct ext4_inode_info *ei = EXT4_I(inode); 440 struct ext4_xattr_ibody_find is = { 441 .s = { .not_found = 0, }, 442 }; 443 struct ext4_xattr_info i = { 444 .name_index = EXT4_XATTR_INDEX_SYSTEM, 445 .name = EXT4_XATTR_SYSTEM_DATA, 446 .value = NULL, 447 .value_len = 0, 448 }; 449 int error; 450 451 if (!ei->i_inline_off) 452 return 0; 453 454 down_write(&ei->i_data_sem); 455 456 error = ext4_get_inode_loc(inode, &is.iloc); 457 if (error) { 458 up_write(&ei->i_data_sem); 459 return error; 460 } 461 462 error = ext4_xattr_ibody_find(inode, &i, &is); 463 if (error) 464 goto out; 465 466 BUFFER_TRACE(is.iloc.bh, "get_write_access"); 467 error = ext4_journal_get_write_access(handle, inode->i_sb, is.iloc.bh, 468 EXT4_JTR_NONE); 469 if (error) 470 goto out; 471 472 error = ext4_xattr_ibody_set(handle, inode, &i, &is); 473 if (error) 474 goto out; 475 476 memset((void *)ext4_raw_inode(&is.iloc)->i_block, 477 0, EXT4_MIN_INLINE_DATA_SIZE); 478 memset(ei->i_data, 0, EXT4_MIN_INLINE_DATA_SIZE); 479 480 if (ext4_has_feature_extents(inode->i_sb)) { 481 if (S_ISDIR(inode->i_mode) || 482 S_ISREG(inode->i_mode) || S_ISLNK(inode->i_mode)) { 483 ext4_set_inode_flag(inode, EXT4_INODE_EXTENTS); 484 ext4_ext_tree_init(handle, inode); 485 } 486 } 487 ext4_clear_inode_flag(inode, EXT4_INODE_INLINE_DATA); 488 489 get_bh(is.iloc.bh); 490 error = ext4_mark_iloc_dirty(handle, inode, &is.iloc); 491 492 EXT4_I(inode)->i_inline_off = 0; 493 EXT4_I(inode)->i_inline_size = 0; 494 ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); 495 out: 496 brelse(is.iloc.bh); 497 if (error == -ENODATA) 498 error = 0; 499 up_write(&ei->i_data_sem); 500 return error; 501 } 502 503 static int ext4_read_inline_folio(struct inode *inode, struct folio *folio) 504 { 505 void *kaddr; 506 int ret = 0; 507 size_t len; 508 struct ext4_iloc iloc; 509 510 BUG_ON(!folio_test_locked(folio)); 511 BUG_ON(!ext4_has_inline_data(inode)); 512 BUG_ON(folio->index); 513 514 if (!EXT4_I(inode)->i_inline_off) { 515 ext4_warning(inode->i_sb, "inode %lu doesn't have inline data.", 516 inode->i_ino); 517 goto out; 518 } 519 520 ret = ext4_get_inode_loc(inode, &iloc); 521 if (ret) 522 goto out; 523 524 len = min_t(size_t, ext4_get_inline_size(inode), i_size_read(inode)); 525 BUG_ON(len > PAGE_SIZE); 526 kaddr = kmap_local_folio(folio, 0); 527 ret = ext4_read_inline_data(inode, kaddr, len, &iloc); 528 kaddr = folio_zero_tail(folio, len, kaddr + len); 529 kunmap_local(kaddr); 530 folio_mark_uptodate(folio); 531 brelse(iloc.bh); 532 533 out: 534 return ret; 535 } 536 537 int ext4_readpage_inline(struct inode *inode, struct folio *folio) 538 { 539 int ret = 0; 540 541 down_read(&EXT4_I(inode)->xattr_sem); 542 if (!ext4_has_inline_data(inode)) { 543 up_read(&EXT4_I(inode)->xattr_sem); 544 return -EAGAIN; 545 } 546 547 /* 548 * Current inline data can only exist in the 1st page, 549 * So for all the other pages, just set them uptodate. 550 */ 551 if (!folio->index) 552 ret = ext4_read_inline_folio(inode, folio); 553 else if (!folio_test_uptodate(folio)) { 554 folio_zero_segment(folio, 0, folio_size(folio)); 555 folio_mark_uptodate(folio); 556 } 557 558 up_read(&EXT4_I(inode)->xattr_sem); 559 560 folio_unlock(folio); 561 return ret >= 0 ? 0 : ret; 562 } 563 564 static int ext4_convert_inline_data_to_extent(struct address_space *mapping, 565 struct inode *inode) 566 { 567 int ret, needed_blocks, no_expand; 568 handle_t *handle = NULL; 569 int retries = 0, sem_held = 0; 570 struct folio *folio = NULL; 571 unsigned from, to; 572 struct ext4_iloc iloc; 573 574 if (!ext4_has_inline_data(inode)) { 575 /* 576 * clear the flag so that no new write 577 * will trap here again. 578 */ 579 ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); 580 return 0; 581 } 582 583 needed_blocks = ext4_chunk_trans_extent(inode, 1); 584 585 ret = ext4_get_inode_loc(inode, &iloc); 586 if (ret) 587 return ret; 588 589 retry: 590 handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, needed_blocks); 591 if (IS_ERR(handle)) { 592 ret = PTR_ERR(handle); 593 handle = NULL; 594 goto out; 595 } 596 597 /* We cannot recurse into the filesystem as the transaction is already 598 * started */ 599 folio = __filemap_get_folio(mapping, 0, FGP_WRITEBEGIN | FGP_NOFS, 600 mapping_gfp_mask(mapping)); 601 if (IS_ERR(folio)) { 602 ret = PTR_ERR(folio); 603 goto out_nofolio; 604 } 605 606 ext4_write_lock_xattr(inode, &no_expand); 607 sem_held = 1; 608 /* If some one has already done this for us, just exit. */ 609 if (!ext4_has_inline_data(inode)) { 610 ret = 0; 611 goto out; 612 } 613 614 from = 0; 615 to = ext4_get_inline_size(inode); 616 if (!folio_test_uptodate(folio)) { 617 ret = ext4_read_inline_folio(inode, folio); 618 if (ret < 0) 619 goto out; 620 } 621 622 ext4_fc_track_inode(handle, inode); 623 ret = ext4_destroy_inline_data_nolock(handle, inode); 624 if (ret) 625 goto out; 626 627 if (ext4_should_dioread_nolock(inode)) { 628 ret = ext4_block_write_begin(handle, folio, from, to, 629 ext4_get_block_unwritten); 630 } else 631 ret = ext4_block_write_begin(handle, folio, from, to, 632 ext4_get_block); 633 clear_buffer_new(folio_buffers(folio)); 634 635 if (!ret && ext4_should_journal_data(inode)) { 636 ret = ext4_walk_page_buffers(handle, inode, 637 folio_buffers(folio), from, to, 638 NULL, do_journal_get_write_access); 639 } 640 641 if (ret) { 642 folio_unlock(folio); 643 folio_put(folio); 644 folio = NULL; 645 ext4_orphan_add(handle, inode); 646 ext4_write_unlock_xattr(inode, &no_expand); 647 sem_held = 0; 648 ext4_journal_stop(handle); 649 handle = NULL; 650 ext4_truncate_failed_write(inode); 651 /* 652 * If truncate failed early the inode might 653 * still be on the orphan list; we need to 654 * make sure the inode is removed from the 655 * orphan list in that case. 656 */ 657 if (inode->i_nlink) 658 ext4_orphan_del(NULL, inode); 659 } 660 661 if (ret == -ENOSPC && ext4_should_retry_alloc(inode->i_sb, &retries)) 662 goto retry; 663 664 if (folio) 665 block_commit_write(folio, from, to); 666 out: 667 if (folio) { 668 folio_unlock(folio); 669 folio_put(folio); 670 } 671 out_nofolio: 672 if (sem_held) 673 ext4_write_unlock_xattr(inode, &no_expand); 674 if (handle) 675 ext4_journal_stop(handle); 676 brelse(iloc.bh); 677 return ret; 678 } 679 680 /* 681 * Prepare the write for the inline data. 682 * If the data can be written into the inode, we just read 683 * the page and make it uptodate, and start the journal. 684 * Otherwise read the page, makes it dirty so that it can be 685 * handle in writepages(the i_disksize update is left to the 686 * normal ext4_da_write_end). 687 */ 688 int ext4_generic_write_inline_data(struct address_space *mapping, 689 struct inode *inode, 690 loff_t pos, unsigned len, 691 struct folio **foliop, 692 void **fsdata, bool da) 693 { 694 int ret; 695 handle_t *handle; 696 struct folio *folio; 697 struct ext4_iloc iloc; 698 int retries = 0; 699 700 ret = ext4_get_inode_loc(inode, &iloc); 701 if (ret) 702 return ret; 703 704 retry_journal: 705 handle = ext4_journal_start(inode, EXT4_HT_INODE, 1); 706 if (IS_ERR(handle)) { 707 ret = PTR_ERR(handle); 708 goto out_release_bh; 709 } 710 711 ret = ext4_prepare_inline_data(handle, inode, pos + len); 712 if (ret && ret != -ENOSPC) 713 goto out_stop_journal; 714 715 if (ret == -ENOSPC) { 716 ext4_journal_stop(handle); 717 if (!da) { 718 brelse(iloc.bh); 719 /* Retry inside */ 720 return ext4_convert_inline_data_to_extent(mapping, inode); 721 } 722 723 ret = ext4_da_convert_inline_data_to_extent(mapping, inode, fsdata); 724 if (ret == -ENOSPC && 725 ext4_should_retry_alloc(inode->i_sb, &retries)) 726 goto retry_journal; 727 goto out_release_bh; 728 } 729 730 folio = __filemap_get_folio(mapping, 0, FGP_WRITEBEGIN | FGP_NOFS, 731 mapping_gfp_mask(mapping)); 732 if (IS_ERR(folio)) { 733 ret = PTR_ERR(folio); 734 goto out_stop_journal; 735 } 736 737 down_read(&EXT4_I(inode)->xattr_sem); 738 /* Someone else had converted it to extent */ 739 if (!ext4_has_inline_data(inode)) { 740 ret = 0; 741 goto out_release_folio; 742 } 743 744 if (!folio_test_uptodate(folio)) { 745 ret = ext4_read_inline_folio(inode, folio); 746 if (ret < 0) 747 goto out_release_folio; 748 } 749 750 ret = ext4_journal_get_write_access(handle, inode->i_sb, iloc.bh, EXT4_JTR_NONE); 751 if (ret) 752 goto out_release_folio; 753 *foliop = folio; 754 up_read(&EXT4_I(inode)->xattr_sem); 755 brelse(iloc.bh); 756 return 1; 757 758 out_release_folio: 759 up_read(&EXT4_I(inode)->xattr_sem); 760 folio_unlock(folio); 761 folio_put(folio); 762 out_stop_journal: 763 ext4_journal_stop(handle); 764 out_release_bh: 765 brelse(iloc.bh); 766 return ret; 767 } 768 769 /* 770 * Try to write data in the inode. 771 * If the inode has inline data, check whether the new write can be 772 * in the inode also. If not, create the page the handle, move the data 773 * to the page make it update and let the later codes create extent for it. 774 */ 775 int ext4_try_to_write_inline_data(struct address_space *mapping, 776 struct inode *inode, 777 loff_t pos, unsigned len, 778 struct folio **foliop) 779 { 780 if (pos + len > ext4_get_max_inline_size(inode)) 781 return ext4_convert_inline_data_to_extent(mapping, inode); 782 return ext4_generic_write_inline_data(mapping, inode, pos, len, 783 foliop, NULL, false); 784 } 785 786 int ext4_write_inline_data_end(struct inode *inode, loff_t pos, unsigned len, 787 unsigned copied, struct folio *folio) 788 { 789 handle_t *handle = ext4_journal_current_handle(); 790 int no_expand; 791 void *kaddr; 792 struct ext4_iloc iloc; 793 int ret = 0, ret2; 794 795 if (unlikely(copied < len) && !folio_test_uptodate(folio)) 796 copied = 0; 797 798 if (likely(copied)) { 799 ret = ext4_get_inode_loc(inode, &iloc); 800 if (ret) { 801 folio_unlock(folio); 802 folio_put(folio); 803 ext4_std_error(inode->i_sb, ret); 804 goto out; 805 } 806 ext4_write_lock_xattr(inode, &no_expand); 807 BUG_ON(!ext4_has_inline_data(inode)); 808 809 /* 810 * ei->i_inline_off may have changed since 811 * ext4_write_begin() called 812 * ext4_try_to_write_inline_data() 813 */ 814 (void) ext4_find_inline_data_nolock(inode); 815 816 kaddr = kmap_local_folio(folio, 0); 817 ext4_write_inline_data(inode, &iloc, kaddr, pos, copied); 818 kunmap_local(kaddr); 819 folio_mark_uptodate(folio); 820 /* clear dirty flag so that writepages wouldn't work for us. */ 821 folio_clear_dirty(folio); 822 823 ext4_write_unlock_xattr(inode, &no_expand); 824 brelse(iloc.bh); 825 826 /* 827 * It's important to update i_size while still holding folio 828 * lock: page writeout could otherwise come in and zero 829 * beyond i_size. 830 */ 831 ext4_update_inode_size(inode, pos + copied); 832 } 833 folio_unlock(folio); 834 folio_put(folio); 835 836 /* 837 * Don't mark the inode dirty under folio lock. First, it unnecessarily 838 * makes the holding time of folio lock longer. Second, it forces lock 839 * ordering of folio lock and transaction start for journaling 840 * filesystems. 841 */ 842 if (likely(copied)) 843 mark_inode_dirty(inode); 844 out: 845 /* 846 * If we didn't copy as much data as expected, we need to trim back 847 * size of xattr containing inline data. 848 */ 849 if (pos + len > inode->i_size && ext4_can_truncate(inode)) 850 ext4_orphan_add(handle, inode); 851 852 ret2 = ext4_journal_stop(handle); 853 if (!ret) 854 ret = ret2; 855 if (pos + len > inode->i_size) { 856 ext4_truncate_failed_write(inode); 857 /* 858 * If truncate failed early the inode might still be 859 * on the orphan list; we need to make sure the inode 860 * is removed from the orphan list in that case. 861 */ 862 if (inode->i_nlink) 863 ext4_orphan_del(NULL, inode); 864 } 865 return ret ? ret : copied; 866 } 867 868 /* 869 * Try to make the page cache and handle ready for the inline data case. 870 * We can call this function in 2 cases: 871 * 1. The inode is created and the first write exceeds inline size. We can 872 * clear the inode state safely. 873 * 2. The inode has inline data, then we need to read the data, make it 874 * update and dirty so that ext4_da_writepages can handle it. We don't 875 * need to start the journal since the file's metadata isn't changed now. 876 */ 877 static int ext4_da_convert_inline_data_to_extent(struct address_space *mapping, 878 struct inode *inode, 879 void **fsdata) 880 { 881 int ret = 0, inline_size; 882 struct folio *folio; 883 884 folio = __filemap_get_folio(mapping, 0, FGP_WRITEBEGIN, 885 mapping_gfp_mask(mapping)); 886 if (IS_ERR(folio)) 887 return PTR_ERR(folio); 888 889 down_read(&EXT4_I(inode)->xattr_sem); 890 if (!ext4_has_inline_data(inode)) { 891 ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); 892 goto out; 893 } 894 895 inline_size = ext4_get_inline_size(inode); 896 897 if (!folio_test_uptodate(folio)) { 898 ret = ext4_read_inline_folio(inode, folio); 899 if (ret < 0) 900 goto out; 901 } 902 903 ret = ext4_block_write_begin(NULL, folio, 0, inline_size, 904 ext4_da_get_block_prep); 905 if (ret) { 906 up_read(&EXT4_I(inode)->xattr_sem); 907 folio_unlock(folio); 908 folio_put(folio); 909 ext4_truncate_failed_write(inode); 910 return ret; 911 } 912 913 clear_buffer_new(folio_buffers(folio)); 914 folio_mark_dirty(folio); 915 folio_mark_uptodate(folio); 916 ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); 917 *fsdata = (void *)CONVERT_INLINE_DATA; 918 919 out: 920 up_read(&EXT4_I(inode)->xattr_sem); 921 if (folio) { 922 folio_unlock(folio); 923 folio_put(folio); 924 } 925 return ret; 926 } 927 928 #ifdef INLINE_DIR_DEBUG 929 void ext4_show_inline_dir(struct inode *dir, struct buffer_head *bh, 930 void *inline_start, int inline_size) 931 { 932 int offset; 933 unsigned short de_len; 934 struct ext4_dir_entry_2 *de = inline_start; 935 void *dlimit = inline_start + inline_size; 936 937 trace_printk("inode %lu\n", dir->i_ino); 938 offset = 0; 939 while ((void *)de < dlimit) { 940 de_len = ext4_rec_len_from_disk(de->rec_len, inline_size); 941 trace_printk("de: off %u rlen %u name %.*s nlen %u ino %u\n", 942 offset, de_len, de->name_len, de->name, 943 de->name_len, le32_to_cpu(de->inode)); 944 if (ext4_check_dir_entry(dir, NULL, de, bh, 945 inline_start, inline_size, offset)) 946 BUG(); 947 948 offset += de_len; 949 de = (struct ext4_dir_entry_2 *) ((char *) de + de_len); 950 } 951 } 952 #else 953 #define ext4_show_inline_dir(dir, bh, inline_start, inline_size) 954 #endif 955 956 /* 957 * Add a new entry into a inline dir. 958 * It will return -ENOSPC if no space is available, and -EIO 959 * and -EEXIST if directory entry already exists. 960 */ 961 static int ext4_add_dirent_to_inline(handle_t *handle, 962 struct ext4_filename *fname, 963 struct inode *dir, 964 struct inode *inode, 965 struct ext4_iloc *iloc, 966 void *inline_start, int inline_size) 967 { 968 int err; 969 struct ext4_dir_entry_2 *de; 970 971 err = ext4_find_dest_de(dir, iloc->bh, inline_start, 972 inline_size, fname, &de); 973 if (err) 974 return err; 975 976 BUFFER_TRACE(iloc->bh, "get_write_access"); 977 err = ext4_journal_get_write_access(handle, dir->i_sb, iloc->bh, 978 EXT4_JTR_NONE); 979 if (err) 980 return err; 981 ext4_insert_dentry(dir, inode, de, inline_size, fname); 982 983 ext4_show_inline_dir(dir, iloc->bh, inline_start, inline_size); 984 985 /* 986 * XXX shouldn't update any times until successful 987 * completion of syscall, but too many callers depend 988 * on this. 989 * 990 * XXX similarly, too many callers depend on 991 * ext4_new_inode() setting the times, but error 992 * recovery deletes the inode, so the worst that can 993 * happen is that the times are slightly out of date 994 * and/or different from the directory change time. 995 */ 996 inode_set_mtime_to_ts(dir, inode_set_ctime_current(dir)); 997 ext4_update_dx_flag(dir); 998 inode_inc_iversion(dir); 999 return 1; 1000 } 1001 1002 static void *ext4_get_inline_xattr_pos(struct inode *inode, 1003 struct ext4_iloc *iloc) 1004 { 1005 struct ext4_xattr_entry *entry; 1006 struct ext4_xattr_ibody_header *header; 1007 1008 BUG_ON(!EXT4_I(inode)->i_inline_off); 1009 1010 header = IHDR(inode, ext4_raw_inode(iloc)); 1011 entry = (struct ext4_xattr_entry *)((void *)ext4_raw_inode(iloc) + 1012 EXT4_I(inode)->i_inline_off); 1013 1014 return (void *)IFIRST(header) + le16_to_cpu(entry->e_value_offs); 1015 } 1016 1017 /* Set the final de to cover the whole block. */ 1018 void ext4_update_final_de(void *de_buf, int old_size, int new_size) 1019 { 1020 struct ext4_dir_entry_2 *de, *prev_de; 1021 void *limit; 1022 int de_len; 1023 1024 de = de_buf; 1025 if (old_size) { 1026 limit = de_buf + old_size; 1027 do { 1028 prev_de = de; 1029 de_len = ext4_rec_len_from_disk(de->rec_len, old_size); 1030 de_buf += de_len; 1031 de = de_buf; 1032 } while (de_buf < limit); 1033 1034 prev_de->rec_len = ext4_rec_len_to_disk(de_len + new_size - 1035 old_size, new_size); 1036 } else { 1037 /* this is just created, so create an empty entry. */ 1038 de->inode = 0; 1039 de->rec_len = ext4_rec_len_to_disk(new_size, new_size); 1040 } 1041 } 1042 1043 static int ext4_update_inline_dir(handle_t *handle, struct inode *dir, 1044 struct ext4_iloc *iloc) 1045 { 1046 int ret; 1047 int old_size = EXT4_I(dir)->i_inline_size - EXT4_MIN_INLINE_DATA_SIZE; 1048 int new_size = get_max_inline_xattr_value_size(dir, iloc); 1049 1050 if (new_size - old_size <= ext4_dir_rec_len(1, NULL)) 1051 return -ENOSPC; 1052 1053 ret = ext4_update_inline_data(handle, dir, 1054 new_size + EXT4_MIN_INLINE_DATA_SIZE); 1055 if (ret) 1056 return ret; 1057 1058 ext4_update_final_de(ext4_get_inline_xattr_pos(dir, iloc), old_size, 1059 EXT4_I(dir)->i_inline_size - 1060 EXT4_MIN_INLINE_DATA_SIZE); 1061 dir->i_size = EXT4_I(dir)->i_disksize = EXT4_I(dir)->i_inline_size; 1062 return 0; 1063 } 1064 1065 static void ext4_restore_inline_data(handle_t *handle, struct inode *inode, 1066 struct ext4_iloc *iloc, 1067 void *buf, int inline_size) 1068 { 1069 int ret; 1070 1071 ret = ext4_create_inline_data(handle, inode, inline_size); 1072 if (ret) { 1073 ext4_msg(inode->i_sb, KERN_EMERG, 1074 "error restoring inline_data for inode -- potential data loss! (inode %lu, error %d)", 1075 inode->i_ino, ret); 1076 return; 1077 } 1078 ext4_write_inline_data(inode, iloc, buf, 0, inline_size); 1079 ext4_set_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); 1080 } 1081 1082 static int ext4_convert_inline_data_nolock(handle_t *handle, 1083 struct inode *inode, 1084 struct ext4_iloc *iloc) 1085 { 1086 int error; 1087 void *buf = NULL; 1088 struct buffer_head *data_bh = NULL; 1089 struct ext4_map_blocks map; 1090 int inline_size; 1091 1092 inline_size = ext4_get_inline_size(inode); 1093 buf = kmalloc(inline_size, GFP_NOFS); 1094 if (!buf) { 1095 error = -ENOMEM; 1096 goto out; 1097 } 1098 1099 error = ext4_read_inline_data(inode, buf, inline_size, iloc); 1100 if (error < 0) 1101 goto out; 1102 1103 /* 1104 * Make sure the inline directory entries pass checks before we try to 1105 * convert them, so that we avoid touching stuff that needs fsck. 1106 */ 1107 if (S_ISDIR(inode->i_mode)) { 1108 error = ext4_check_all_de(inode, iloc->bh, 1109 buf + EXT4_INLINE_DOTDOT_SIZE, 1110 inline_size - EXT4_INLINE_DOTDOT_SIZE); 1111 if (error) 1112 goto out; 1113 } 1114 1115 error = ext4_destroy_inline_data_nolock(handle, inode); 1116 if (error) 1117 goto out; 1118 1119 map.m_lblk = 0; 1120 map.m_len = 1; 1121 map.m_flags = 0; 1122 error = ext4_map_blocks(handle, inode, &map, EXT4_GET_BLOCKS_CREATE); 1123 if (error < 0) 1124 goto out_restore; 1125 if (!(map.m_flags & EXT4_MAP_MAPPED)) { 1126 error = -EIO; 1127 goto out_restore; 1128 } 1129 1130 data_bh = sb_getblk(inode->i_sb, map.m_pblk); 1131 if (!data_bh) { 1132 error = -ENOMEM; 1133 goto out_restore; 1134 } 1135 1136 lock_buffer(data_bh); 1137 error = ext4_journal_get_create_access(handle, inode->i_sb, data_bh, 1138 EXT4_JTR_NONE); 1139 if (error) { 1140 unlock_buffer(data_bh); 1141 error = -EIO; 1142 goto out_restore; 1143 } 1144 memset(data_bh->b_data, 0, inode->i_sb->s_blocksize); 1145 1146 if (!S_ISDIR(inode->i_mode)) { 1147 memcpy(data_bh->b_data, buf, inline_size); 1148 set_buffer_uptodate(data_bh); 1149 unlock_buffer(data_bh); 1150 error = ext4_handle_dirty_metadata(handle, 1151 inode, data_bh); 1152 } else { 1153 unlock_buffer(data_bh); 1154 inode->i_size = inode->i_sb->s_blocksize; 1155 i_size_write(inode, inode->i_sb->s_blocksize); 1156 EXT4_I(inode)->i_disksize = inode->i_sb->s_blocksize; 1157 1158 error = ext4_init_dirblock(handle, inode, data_bh, 1159 le32_to_cpu(((struct ext4_dir_entry_2 *)buf)->inode), 1160 buf + EXT4_INLINE_DOTDOT_SIZE, 1161 inline_size - EXT4_INLINE_DOTDOT_SIZE); 1162 if (!error) 1163 error = ext4_mark_inode_dirty(handle, inode); 1164 } 1165 1166 out_restore: 1167 if (error) 1168 ext4_restore_inline_data(handle, inode, iloc, buf, inline_size); 1169 1170 out: 1171 brelse(data_bh); 1172 kfree(buf); 1173 return error; 1174 } 1175 1176 /* 1177 * Try to add the new entry to the inline data. 1178 * If succeeds, return 0. If not, extended the inline dir and copied data to 1179 * the new created block. 1180 */ 1181 int ext4_try_add_inline_entry(handle_t *handle, struct ext4_filename *fname, 1182 struct inode *dir, struct inode *inode) 1183 { 1184 int ret, ret2, inline_size, no_expand; 1185 void *inline_start; 1186 struct ext4_iloc iloc; 1187 1188 ret = ext4_get_inode_loc(dir, &iloc); 1189 if (ret) 1190 return ret; 1191 1192 ext4_write_lock_xattr(dir, &no_expand); 1193 if (!ext4_has_inline_data(dir)) 1194 goto out; 1195 1196 inline_start = (void *)ext4_raw_inode(&iloc)->i_block + 1197 EXT4_INLINE_DOTDOT_SIZE; 1198 inline_size = EXT4_MIN_INLINE_DATA_SIZE - EXT4_INLINE_DOTDOT_SIZE; 1199 1200 ret = ext4_add_dirent_to_inline(handle, fname, dir, inode, &iloc, 1201 inline_start, inline_size); 1202 if (ret != -ENOSPC) 1203 goto out; 1204 1205 /* check whether it can be inserted to inline xattr space. */ 1206 inline_size = EXT4_I(dir)->i_inline_size - 1207 EXT4_MIN_INLINE_DATA_SIZE; 1208 if (!inline_size) { 1209 /* Try to use the xattr space.*/ 1210 ret = ext4_update_inline_dir(handle, dir, &iloc); 1211 if (ret && ret != -ENOSPC) 1212 goto out; 1213 1214 inline_size = EXT4_I(dir)->i_inline_size - 1215 EXT4_MIN_INLINE_DATA_SIZE; 1216 } 1217 1218 if (inline_size) { 1219 inline_start = ext4_get_inline_xattr_pos(dir, &iloc); 1220 1221 ret = ext4_add_dirent_to_inline(handle, fname, dir, 1222 inode, &iloc, inline_start, 1223 inline_size); 1224 1225 if (ret != -ENOSPC) 1226 goto out; 1227 } 1228 1229 /* 1230 * The inline space is filled up, so create a new block for it. 1231 * As the extent tree will be created, we have to save the inline 1232 * dir first. 1233 */ 1234 ret = ext4_convert_inline_data_nolock(handle, dir, &iloc); 1235 1236 out: 1237 ext4_write_unlock_xattr(dir, &no_expand); 1238 ret2 = ext4_mark_inode_dirty(handle, dir); 1239 if (unlikely(ret2 && !ret)) 1240 ret = ret2; 1241 brelse(iloc.bh); 1242 return ret; 1243 } 1244 1245 /* 1246 * This function fills a red-black tree with information from an 1247 * inlined dir. It returns the number directory entries loaded 1248 * into the tree. If there is an error it is returned in err. 1249 */ 1250 int ext4_inlinedir_to_tree(struct file *dir_file, 1251 struct inode *dir, ext4_lblk_t block, 1252 struct dx_hash_info *hinfo, 1253 __u32 start_hash, __u32 start_minor_hash, 1254 int *has_inline_data) 1255 { 1256 int err = 0, count = 0; 1257 unsigned int parent_ino; 1258 int pos; 1259 struct ext4_dir_entry_2 *de; 1260 struct inode *inode = file_inode(dir_file); 1261 int ret, inline_size = 0; 1262 struct ext4_iloc iloc; 1263 void *dir_buf = NULL; 1264 struct ext4_dir_entry_2 fake; 1265 struct fscrypt_str tmp_str; 1266 1267 ret = ext4_get_inode_loc(inode, &iloc); 1268 if (ret) 1269 return ret; 1270 1271 down_read(&EXT4_I(inode)->xattr_sem); 1272 if (!ext4_has_inline_data(inode)) { 1273 up_read(&EXT4_I(inode)->xattr_sem); 1274 *has_inline_data = 0; 1275 goto out; 1276 } 1277 1278 inline_size = ext4_get_inline_size(inode); 1279 dir_buf = kmalloc(inline_size, GFP_NOFS); 1280 if (!dir_buf) { 1281 ret = -ENOMEM; 1282 up_read(&EXT4_I(inode)->xattr_sem); 1283 goto out; 1284 } 1285 1286 ret = ext4_read_inline_data(inode, dir_buf, inline_size, &iloc); 1287 up_read(&EXT4_I(inode)->xattr_sem); 1288 if (ret < 0) 1289 goto out; 1290 1291 pos = 0; 1292 parent_ino = le32_to_cpu(((struct ext4_dir_entry_2 *)dir_buf)->inode); 1293 while (pos < inline_size) { 1294 /* 1295 * As inlined dir doesn't store any information about '.' and 1296 * only the inode number of '..' is stored, we have to handle 1297 * them differently. 1298 */ 1299 if (pos == 0) { 1300 fake.inode = cpu_to_le32(inode->i_ino); 1301 fake.name_len = 1; 1302 memcpy(fake.name, ".", 2); 1303 fake.rec_len = ext4_rec_len_to_disk( 1304 ext4_dir_rec_len(fake.name_len, NULL), 1305 inline_size); 1306 ext4_set_de_type(inode->i_sb, &fake, S_IFDIR); 1307 de = &fake; 1308 pos = EXT4_INLINE_DOTDOT_OFFSET; 1309 } else if (pos == EXT4_INLINE_DOTDOT_OFFSET) { 1310 fake.inode = cpu_to_le32(parent_ino); 1311 fake.name_len = 2; 1312 memcpy(fake.name, "..", 3); 1313 fake.rec_len = ext4_rec_len_to_disk( 1314 ext4_dir_rec_len(fake.name_len, NULL), 1315 inline_size); 1316 ext4_set_de_type(inode->i_sb, &fake, S_IFDIR); 1317 de = &fake; 1318 pos = EXT4_INLINE_DOTDOT_SIZE; 1319 } else { 1320 de = (struct ext4_dir_entry_2 *)(dir_buf + pos); 1321 pos += ext4_rec_len_from_disk(de->rec_len, inline_size); 1322 if (ext4_check_dir_entry(inode, dir_file, de, 1323 iloc.bh, dir_buf, 1324 inline_size, pos)) { 1325 ret = count; 1326 goto out; 1327 } 1328 } 1329 1330 if (ext4_hash_in_dirent(dir)) { 1331 hinfo->hash = EXT4_DIRENT_HASH(de); 1332 hinfo->minor_hash = EXT4_DIRENT_MINOR_HASH(de); 1333 } else { 1334 err = ext4fs_dirhash(dir, de->name, de->name_len, hinfo); 1335 if (err) { 1336 ret = err; 1337 goto out; 1338 } 1339 } 1340 if ((hinfo->hash < start_hash) || 1341 ((hinfo->hash == start_hash) && 1342 (hinfo->minor_hash < start_minor_hash))) 1343 continue; 1344 if (de->inode == 0) 1345 continue; 1346 tmp_str.name = de->name; 1347 tmp_str.len = de->name_len; 1348 err = ext4_htree_store_dirent(dir_file, hinfo->hash, 1349 hinfo->minor_hash, de, &tmp_str); 1350 if (err) { 1351 ret = err; 1352 goto out; 1353 } 1354 count++; 1355 } 1356 ret = count; 1357 out: 1358 kfree(dir_buf); 1359 brelse(iloc.bh); 1360 return ret; 1361 } 1362 1363 /* 1364 * So this function is called when the volume is mkfsed with 1365 * dir_index disabled. In order to keep f_pos persistent 1366 * after we convert from an inlined dir to a blocked based, 1367 * we just pretend that we are a normal dir and return the 1368 * offset as if '.' and '..' really take place. 1369 * 1370 */ 1371 int ext4_read_inline_dir(struct file *file, 1372 struct dir_context *ctx, 1373 int *has_inline_data) 1374 { 1375 unsigned int offset, parent_ino; 1376 int i; 1377 struct ext4_dir_entry_2 *de; 1378 struct super_block *sb; 1379 struct inode *inode = file_inode(file); 1380 int ret, inline_size = 0; 1381 struct ext4_iloc iloc; 1382 void *dir_buf = NULL; 1383 int dotdot_offset, dotdot_size, extra_offset, extra_size; 1384 struct dir_private_info *info = file->private_data; 1385 1386 ret = ext4_get_inode_loc(inode, &iloc); 1387 if (ret) 1388 return ret; 1389 1390 down_read(&EXT4_I(inode)->xattr_sem); 1391 if (!ext4_has_inline_data(inode)) { 1392 up_read(&EXT4_I(inode)->xattr_sem); 1393 *has_inline_data = 0; 1394 goto out; 1395 } 1396 1397 inline_size = ext4_get_inline_size(inode); 1398 dir_buf = kmalloc(inline_size, GFP_NOFS); 1399 if (!dir_buf) { 1400 ret = -ENOMEM; 1401 up_read(&EXT4_I(inode)->xattr_sem); 1402 goto out; 1403 } 1404 1405 ret = ext4_read_inline_data(inode, dir_buf, inline_size, &iloc); 1406 up_read(&EXT4_I(inode)->xattr_sem); 1407 if (ret < 0) 1408 goto out; 1409 1410 ret = 0; 1411 sb = inode->i_sb; 1412 parent_ino = le32_to_cpu(((struct ext4_dir_entry_2 *)dir_buf)->inode); 1413 offset = ctx->pos; 1414 1415 /* 1416 * dotdot_offset and dotdot_size is the real offset and 1417 * size for ".." and "." if the dir is block based while 1418 * the real size for them are only EXT4_INLINE_DOTDOT_SIZE. 1419 * So we will use extra_offset and extra_size to indicate them 1420 * during the inline dir iteration. 1421 */ 1422 dotdot_offset = ext4_dir_rec_len(1, NULL); 1423 dotdot_size = dotdot_offset + ext4_dir_rec_len(2, NULL); 1424 extra_offset = dotdot_size - EXT4_INLINE_DOTDOT_SIZE; 1425 extra_size = extra_offset + inline_size; 1426 1427 /* 1428 * If the cookie has changed since the last call to 1429 * readdir(2), then we might be pointing to an invalid 1430 * dirent right now. Scan from the start of the inline 1431 * dir to make sure. 1432 */ 1433 if (!inode_eq_iversion(inode, info->cookie)) { 1434 for (i = 0; i < extra_size && i < offset;) { 1435 /* 1436 * "." is with offset 0 and 1437 * ".." is dotdot_offset. 1438 */ 1439 if (!i) { 1440 i = dotdot_offset; 1441 continue; 1442 } else if (i == dotdot_offset) { 1443 i = dotdot_size; 1444 continue; 1445 } 1446 /* for other entry, the real offset in 1447 * the buf has to be tuned accordingly. 1448 */ 1449 de = (struct ext4_dir_entry_2 *) 1450 (dir_buf + i - extra_offset); 1451 /* It's too expensive to do a full 1452 * dirent test each time round this 1453 * loop, but we do have to test at 1454 * least that it is non-zero. A 1455 * failure will be detected in the 1456 * dirent test below. */ 1457 if (ext4_rec_len_from_disk(de->rec_len, extra_size) 1458 < ext4_dir_rec_len(1, NULL)) 1459 break; 1460 i += ext4_rec_len_from_disk(de->rec_len, 1461 extra_size); 1462 } 1463 offset = i; 1464 ctx->pos = offset; 1465 info->cookie = inode_query_iversion(inode); 1466 } 1467 1468 while (ctx->pos < extra_size) { 1469 if (ctx->pos == 0) { 1470 if (!dir_emit(ctx, ".", 1, inode->i_ino, DT_DIR)) 1471 goto out; 1472 ctx->pos = dotdot_offset; 1473 continue; 1474 } 1475 1476 if (ctx->pos == dotdot_offset) { 1477 if (!dir_emit(ctx, "..", 2, parent_ino, DT_DIR)) 1478 goto out; 1479 ctx->pos = dotdot_size; 1480 continue; 1481 } 1482 1483 de = (struct ext4_dir_entry_2 *) 1484 (dir_buf + ctx->pos - extra_offset); 1485 if (ext4_check_dir_entry(inode, file, de, iloc.bh, dir_buf, 1486 extra_size, ctx->pos)) 1487 goto out; 1488 if (le32_to_cpu(de->inode)) { 1489 if (!dir_emit(ctx, de->name, de->name_len, 1490 le32_to_cpu(de->inode), 1491 get_dtype(sb, de->file_type))) 1492 goto out; 1493 } 1494 ctx->pos += ext4_rec_len_from_disk(de->rec_len, extra_size); 1495 } 1496 out: 1497 kfree(dir_buf); 1498 brelse(iloc.bh); 1499 return ret; 1500 } 1501 1502 void *ext4_read_inline_link(struct inode *inode) 1503 { 1504 struct ext4_iloc iloc; 1505 int ret, inline_size; 1506 void *link; 1507 1508 ret = ext4_get_inode_loc(inode, &iloc); 1509 if (ret) 1510 return ERR_PTR(ret); 1511 1512 ret = -ENOMEM; 1513 inline_size = ext4_get_inline_size(inode); 1514 link = kmalloc(inline_size + 1, GFP_NOFS); 1515 if (!link) 1516 goto out; 1517 1518 ret = ext4_read_inline_data(inode, link, inline_size, &iloc); 1519 if (ret < 0) { 1520 kfree(link); 1521 goto out; 1522 } 1523 nd_terminate_link(link, inode->i_size, ret); 1524 out: 1525 if (ret < 0) 1526 link = ERR_PTR(ret); 1527 brelse(iloc.bh); 1528 return link; 1529 } 1530 1531 struct buffer_head *ext4_get_first_inline_block(struct inode *inode, 1532 struct ext4_dir_entry_2 **parent_de, 1533 int *retval) 1534 { 1535 struct ext4_iloc iloc; 1536 1537 *retval = ext4_get_inode_loc(inode, &iloc); 1538 if (*retval) 1539 return NULL; 1540 1541 *parent_de = (struct ext4_dir_entry_2 *)ext4_raw_inode(&iloc)->i_block; 1542 1543 return iloc.bh; 1544 } 1545 1546 /* 1547 * Try to create the inline data for the new dir. 1548 * If it succeeds, return 0, otherwise return the error. 1549 * In case of ENOSPC, the caller should create the normal disk layout dir. 1550 */ 1551 int ext4_try_create_inline_dir(handle_t *handle, struct inode *parent, 1552 struct inode *inode) 1553 { 1554 int ret, inline_size = EXT4_MIN_INLINE_DATA_SIZE; 1555 struct ext4_iloc iloc; 1556 struct ext4_dir_entry_2 *de; 1557 1558 ret = ext4_get_inode_loc(inode, &iloc); 1559 if (ret) 1560 return ret; 1561 1562 ret = ext4_prepare_inline_data(handle, inode, inline_size); 1563 if (ret) 1564 goto out; 1565 1566 /* 1567 * For inline dir, we only save the inode information for the ".." 1568 * and create a fake dentry to cover the left space. 1569 */ 1570 de = (struct ext4_dir_entry_2 *)ext4_raw_inode(&iloc)->i_block; 1571 de->inode = cpu_to_le32(parent->i_ino); 1572 de = (struct ext4_dir_entry_2 *)((void *)de + EXT4_INLINE_DOTDOT_SIZE); 1573 de->inode = 0; 1574 de->rec_len = ext4_rec_len_to_disk( 1575 inline_size - EXT4_INLINE_DOTDOT_SIZE, 1576 inline_size); 1577 set_nlink(inode, 2); 1578 inode->i_size = EXT4_I(inode)->i_disksize = inline_size; 1579 out: 1580 brelse(iloc.bh); 1581 return ret; 1582 } 1583 1584 struct buffer_head *ext4_find_inline_entry(struct inode *dir, 1585 struct ext4_filename *fname, 1586 struct ext4_dir_entry_2 **res_dir, 1587 int *has_inline_data) 1588 { 1589 struct ext4_xattr_ibody_find is = { 1590 .s = { .not_found = -ENODATA, }, 1591 }; 1592 struct ext4_xattr_info i = { 1593 .name_index = EXT4_XATTR_INDEX_SYSTEM, 1594 .name = EXT4_XATTR_SYSTEM_DATA, 1595 }; 1596 int ret; 1597 void *inline_start; 1598 int inline_size; 1599 1600 ret = ext4_get_inode_loc(dir, &is.iloc); 1601 if (ret) 1602 return ERR_PTR(ret); 1603 1604 down_read(&EXT4_I(dir)->xattr_sem); 1605 1606 ret = ext4_xattr_ibody_find(dir, &i, &is); 1607 if (ret) 1608 goto out; 1609 1610 if (!ext4_has_inline_data(dir)) { 1611 *has_inline_data = 0; 1612 goto out; 1613 } 1614 1615 inline_start = (void *)ext4_raw_inode(&is.iloc)->i_block + 1616 EXT4_INLINE_DOTDOT_SIZE; 1617 inline_size = EXT4_MIN_INLINE_DATA_SIZE - EXT4_INLINE_DOTDOT_SIZE; 1618 ret = ext4_search_dir(is.iloc.bh, inline_start, inline_size, 1619 dir, fname, 0, res_dir); 1620 if (ret == 1) 1621 goto out_find; 1622 if (ret < 0) 1623 goto out; 1624 1625 if (ext4_get_inline_size(dir) == EXT4_MIN_INLINE_DATA_SIZE) 1626 goto out; 1627 1628 inline_start = ext4_get_inline_xattr_pos(dir, &is.iloc); 1629 inline_size = ext4_get_inline_size(dir) - EXT4_MIN_INLINE_DATA_SIZE; 1630 1631 ret = ext4_search_dir(is.iloc.bh, inline_start, inline_size, 1632 dir, fname, 0, res_dir); 1633 if (ret == 1) 1634 goto out_find; 1635 1636 out: 1637 brelse(is.iloc.bh); 1638 if (ret < 0) 1639 is.iloc.bh = ERR_PTR(ret); 1640 else 1641 is.iloc.bh = NULL; 1642 out_find: 1643 up_read(&EXT4_I(dir)->xattr_sem); 1644 return is.iloc.bh; 1645 } 1646 1647 int ext4_delete_inline_entry(handle_t *handle, 1648 struct inode *dir, 1649 struct ext4_dir_entry_2 *de_del, 1650 struct buffer_head *bh, 1651 int *has_inline_data) 1652 { 1653 int err, inline_size, no_expand; 1654 struct ext4_iloc iloc; 1655 void *inline_start; 1656 1657 err = ext4_get_inode_loc(dir, &iloc); 1658 if (err) 1659 return err; 1660 1661 ext4_write_lock_xattr(dir, &no_expand); 1662 if (!ext4_has_inline_data(dir)) { 1663 *has_inline_data = 0; 1664 goto out; 1665 } 1666 1667 if ((void *)de_del - ((void *)ext4_raw_inode(&iloc)->i_block) < 1668 EXT4_MIN_INLINE_DATA_SIZE) { 1669 inline_start = (void *)ext4_raw_inode(&iloc)->i_block + 1670 EXT4_INLINE_DOTDOT_SIZE; 1671 inline_size = EXT4_MIN_INLINE_DATA_SIZE - 1672 EXT4_INLINE_DOTDOT_SIZE; 1673 } else { 1674 inline_start = ext4_get_inline_xattr_pos(dir, &iloc); 1675 inline_size = ext4_get_inline_size(dir) - 1676 EXT4_MIN_INLINE_DATA_SIZE; 1677 } 1678 1679 BUFFER_TRACE(bh, "get_write_access"); 1680 err = ext4_journal_get_write_access(handle, dir->i_sb, bh, 1681 EXT4_JTR_NONE); 1682 if (err) 1683 goto out; 1684 1685 err = ext4_generic_delete_entry(dir, de_del, bh, 1686 inline_start, inline_size, 0); 1687 if (err) 1688 goto out; 1689 1690 ext4_show_inline_dir(dir, iloc.bh, inline_start, inline_size); 1691 out: 1692 ext4_write_unlock_xattr(dir, &no_expand); 1693 if (likely(err == 0)) 1694 err = ext4_mark_inode_dirty(handle, dir); 1695 brelse(iloc.bh); 1696 if (err != -ENOENT) 1697 ext4_std_error(dir->i_sb, err); 1698 return err; 1699 } 1700 1701 /* 1702 * Get the inline dentry at offset. 1703 */ 1704 static inline struct ext4_dir_entry_2 * 1705 ext4_get_inline_entry(struct inode *inode, 1706 struct ext4_iloc *iloc, 1707 unsigned int offset, 1708 void **inline_start, 1709 int *inline_size) 1710 { 1711 void *inline_pos; 1712 1713 BUG_ON(offset > ext4_get_inline_size(inode)); 1714 1715 if (offset < EXT4_MIN_INLINE_DATA_SIZE) { 1716 inline_pos = (void *)ext4_raw_inode(iloc)->i_block; 1717 *inline_size = EXT4_MIN_INLINE_DATA_SIZE; 1718 } else { 1719 inline_pos = ext4_get_inline_xattr_pos(inode, iloc); 1720 offset -= EXT4_MIN_INLINE_DATA_SIZE; 1721 *inline_size = ext4_get_inline_size(inode) - 1722 EXT4_MIN_INLINE_DATA_SIZE; 1723 } 1724 1725 if (inline_start) 1726 *inline_start = inline_pos; 1727 return (struct ext4_dir_entry_2 *)(inline_pos + offset); 1728 } 1729 1730 bool empty_inline_dir(struct inode *dir, int *has_inline_data) 1731 { 1732 int err, inline_size; 1733 struct ext4_iloc iloc; 1734 size_t inline_len; 1735 void *inline_pos; 1736 unsigned int offset; 1737 struct ext4_dir_entry_2 *de; 1738 bool ret = false; 1739 1740 err = ext4_get_inode_loc(dir, &iloc); 1741 if (err) { 1742 EXT4_ERROR_INODE_ERR(dir, -err, 1743 "error %d getting inode %lu block", 1744 err, dir->i_ino); 1745 return false; 1746 } 1747 1748 down_read(&EXT4_I(dir)->xattr_sem); 1749 if (!ext4_has_inline_data(dir)) { 1750 *has_inline_data = 0; 1751 ret = true; 1752 goto out; 1753 } 1754 1755 de = (struct ext4_dir_entry_2 *)ext4_raw_inode(&iloc)->i_block; 1756 if (!le32_to_cpu(de->inode)) { 1757 ext4_warning(dir->i_sb, 1758 "bad inline directory (dir #%lu) - no `..'", 1759 dir->i_ino); 1760 goto out; 1761 } 1762 1763 inline_len = ext4_get_inline_size(dir); 1764 offset = EXT4_INLINE_DOTDOT_SIZE; 1765 while (offset < inline_len) { 1766 de = ext4_get_inline_entry(dir, &iloc, offset, 1767 &inline_pos, &inline_size); 1768 if (ext4_check_dir_entry(dir, NULL, de, 1769 iloc.bh, inline_pos, 1770 inline_size, offset)) { 1771 ext4_warning(dir->i_sb, 1772 "bad inline directory (dir #%lu) - " 1773 "inode %u, rec_len %u, name_len %d" 1774 "inline size %d", 1775 dir->i_ino, le32_to_cpu(de->inode), 1776 le16_to_cpu(de->rec_len), de->name_len, 1777 inline_size); 1778 goto out; 1779 } 1780 if (le32_to_cpu(de->inode)) { 1781 goto out; 1782 } 1783 offset += ext4_rec_len_from_disk(de->rec_len, inline_size); 1784 } 1785 1786 ret = true; 1787 out: 1788 up_read(&EXT4_I(dir)->xattr_sem); 1789 brelse(iloc.bh); 1790 return ret; 1791 } 1792 1793 int ext4_destroy_inline_data(handle_t *handle, struct inode *inode) 1794 { 1795 int ret, no_expand; 1796 1797 ext4_write_lock_xattr(inode, &no_expand); 1798 ret = ext4_destroy_inline_data_nolock(handle, inode); 1799 ext4_write_unlock_xattr(inode, &no_expand); 1800 1801 return ret; 1802 } 1803 1804 int ext4_inline_data_iomap(struct inode *inode, struct iomap *iomap) 1805 { 1806 __u64 addr; 1807 int error = -EAGAIN; 1808 struct ext4_iloc iloc; 1809 1810 down_read(&EXT4_I(inode)->xattr_sem); 1811 if (!ext4_has_inline_data(inode)) 1812 goto out; 1813 1814 error = ext4_get_inode_loc(inode, &iloc); 1815 if (error) 1816 goto out; 1817 1818 addr = (__u64)iloc.bh->b_blocknr << inode->i_sb->s_blocksize_bits; 1819 addr += (char *)ext4_raw_inode(&iloc) - iloc.bh->b_data; 1820 addr += offsetof(struct ext4_inode, i_block); 1821 1822 brelse(iloc.bh); 1823 1824 iomap->addr = addr; 1825 iomap->offset = 0; 1826 iomap->length = min_t(loff_t, ext4_get_inline_size(inode), 1827 i_size_read(inode)); 1828 iomap->type = IOMAP_INLINE; 1829 iomap->flags = 0; 1830 1831 out: 1832 up_read(&EXT4_I(inode)->xattr_sem); 1833 return error; 1834 } 1835 1836 int ext4_inline_data_truncate(struct inode *inode, int *has_inline) 1837 { 1838 handle_t *handle; 1839 int inline_size, value_len, needed_blocks, no_expand, err = 0; 1840 size_t i_size; 1841 void *value = NULL; 1842 struct ext4_xattr_ibody_find is = { 1843 .s = { .not_found = -ENODATA, }, 1844 }; 1845 struct ext4_xattr_info i = { 1846 .name_index = EXT4_XATTR_INDEX_SYSTEM, 1847 .name = EXT4_XATTR_SYSTEM_DATA, 1848 }; 1849 1850 1851 needed_blocks = ext4_chunk_trans_extent(inode, 1); 1852 handle = ext4_journal_start(inode, EXT4_HT_INODE, needed_blocks); 1853 if (IS_ERR(handle)) 1854 return PTR_ERR(handle); 1855 1856 ext4_write_lock_xattr(inode, &no_expand); 1857 if (!ext4_has_inline_data(inode)) { 1858 ext4_write_unlock_xattr(inode, &no_expand); 1859 *has_inline = 0; 1860 ext4_journal_stop(handle); 1861 return 0; 1862 } 1863 1864 if ((err = ext4_orphan_add(handle, inode)) != 0) 1865 goto out; 1866 1867 if ((err = ext4_get_inode_loc(inode, &is.iloc)) != 0) 1868 goto out; 1869 1870 down_write(&EXT4_I(inode)->i_data_sem); 1871 i_size = inode->i_size; 1872 inline_size = ext4_get_inline_size(inode); 1873 EXT4_I(inode)->i_disksize = i_size; 1874 1875 if (i_size < inline_size) { 1876 /* 1877 * if there's inline data to truncate and this file was 1878 * converted to extents after that inline data was written, 1879 * the extent status cache must be cleared to avoid leaving 1880 * behind stale delayed allocated extent entries 1881 */ 1882 if (!ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) 1883 ext4_es_remove_extent(inode, 0, EXT_MAX_BLOCKS); 1884 1885 /* Clear the content in the xattr space. */ 1886 if (inline_size > EXT4_MIN_INLINE_DATA_SIZE) { 1887 if ((err = ext4_xattr_ibody_find(inode, &i, &is)) != 0) 1888 goto out_error; 1889 1890 if (is.s.not_found) { 1891 EXT4_ERROR_INODE(inode, 1892 "missing inline data xattr"); 1893 err = -EFSCORRUPTED; 1894 goto out_error; 1895 } 1896 1897 value_len = le32_to_cpu(is.s.here->e_value_size); 1898 value = kmalloc(value_len, GFP_NOFS); 1899 if (!value) { 1900 err = -ENOMEM; 1901 goto out_error; 1902 } 1903 1904 err = ext4_xattr_ibody_get(inode, i.name_index, 1905 i.name, value, value_len); 1906 if (err <= 0) 1907 goto out_error; 1908 1909 i.value = value; 1910 i.value_len = i_size > EXT4_MIN_INLINE_DATA_SIZE ? 1911 i_size - EXT4_MIN_INLINE_DATA_SIZE : 0; 1912 err = ext4_xattr_ibody_set(handle, inode, &i, &is); 1913 if (err) 1914 goto out_error; 1915 } 1916 1917 /* Clear the content within i_blocks. */ 1918 if (i_size < EXT4_MIN_INLINE_DATA_SIZE) { 1919 void *p = (void *) ext4_raw_inode(&is.iloc)->i_block; 1920 memset(p + i_size, 0, 1921 EXT4_MIN_INLINE_DATA_SIZE - i_size); 1922 } 1923 1924 EXT4_I(inode)->i_inline_size = i_size < 1925 EXT4_MIN_INLINE_DATA_SIZE ? 1926 EXT4_MIN_INLINE_DATA_SIZE : i_size; 1927 } 1928 1929 out_error: 1930 up_write(&EXT4_I(inode)->i_data_sem); 1931 out: 1932 brelse(is.iloc.bh); 1933 ext4_write_unlock_xattr(inode, &no_expand); 1934 kfree(value); 1935 if (inode->i_nlink) 1936 ext4_orphan_del(handle, inode); 1937 1938 if (err == 0) { 1939 inode_set_mtime_to_ts(inode, inode_set_ctime_current(inode)); 1940 err = ext4_mark_inode_dirty(handle, inode); 1941 if (IS_SYNC(inode)) 1942 ext4_handle_sync(handle); 1943 } 1944 ext4_journal_stop(handle); 1945 return err; 1946 } 1947 1948 int ext4_convert_inline_data(struct inode *inode) 1949 { 1950 int error, needed_blocks, no_expand; 1951 handle_t *handle; 1952 struct ext4_iloc iloc; 1953 1954 if (!ext4_has_inline_data(inode)) { 1955 ext4_clear_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA); 1956 return 0; 1957 } else if (!ext4_test_inode_state(inode, EXT4_STATE_MAY_INLINE_DATA)) { 1958 /* 1959 * Inode has inline data but EXT4_STATE_MAY_INLINE_DATA is 1960 * cleared. This means we are in the middle of moving of 1961 * inline data to delay allocated block. Just force writeout 1962 * here to finish conversion. 1963 */ 1964 error = filemap_flush(inode->i_mapping); 1965 if (error) 1966 return error; 1967 if (!ext4_has_inline_data(inode)) 1968 return 0; 1969 } 1970 1971 needed_blocks = ext4_chunk_trans_extent(inode, 1); 1972 1973 iloc.bh = NULL; 1974 error = ext4_get_inode_loc(inode, &iloc); 1975 if (error) 1976 return error; 1977 1978 handle = ext4_journal_start(inode, EXT4_HT_WRITE_PAGE, needed_blocks); 1979 if (IS_ERR(handle)) { 1980 error = PTR_ERR(handle); 1981 goto out_free; 1982 } 1983 1984 ext4_write_lock_xattr(inode, &no_expand); 1985 if (ext4_has_inline_data(inode)) 1986 error = ext4_convert_inline_data_nolock(handle, inode, &iloc); 1987 ext4_write_unlock_xattr(inode, &no_expand); 1988 ext4_journal_stop(handle); 1989 out_free: 1990 brelse(iloc.bh); 1991 return error; 1992 } 1993