1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * NTFS kernel directory operations. 4 * 5 * Copyright (c) 2001-2007 Anton Altaparmakov 6 * Copyright (c) 2002 Richard Russon 7 * Copyright (c) 2025 LG Electronics Co., Ltd. 8 */ 9 10 #include <linux/blkdev.h> 11 12 #include "dir.h" 13 #include "mft.h" 14 #include "ntfs.h" 15 #include "index.h" 16 #include "reparse.h" 17 18 #include <linux/filelock.h> 19 20 /* 21 * The little endian Unicode string $I30 as a global constant. 22 */ 23 __le16 I30[5] = { cpu_to_le16('$'), cpu_to_le16('I'), 24 cpu_to_le16('3'), cpu_to_le16('0'), 0 }; 25 26 /* 27 * ntfs_lookup_inode_by_name - find an inode in a directory given its name 28 * @dir_ni: ntfs inode of the directory in which to search for the name 29 * @uname: Unicode name for which to search in the directory 30 * @uname_len: length of the name @uname in Unicode characters 31 * @res: return the found file name if necessary (see below) 32 * 33 * Look for an inode with name @uname in the directory with inode @dir_ni. 34 * ntfs_lookup_inode_by_name() walks the contents of the directory looking for 35 * the Unicode name. If the name is found in the directory, the corresponding 36 * inode number (>= 0) is returned as a mft reference in cpu format, i.e. it 37 * is a 64-bit number containing the sequence number. 38 * 39 * On error, a negative value is returned corresponding to the error code. In 40 * particular if the inode is not found -ENOENT is returned. Note that you 41 * can't just check the return value for being negative, you have to check the 42 * inode number for being negative which you can extract using MREC(return 43 * value). 44 * 45 * Note, @uname_len does not include the (optional) terminating NULL character. 46 * 47 * Note, we look for a case sensitive match first but we also look for a case 48 * insensitive match at the same time. If we find a case insensitive match, we 49 * save that for the case that we don't find an exact match, where we return 50 * the case insensitive match and setup @res (which we allocate!) with the mft 51 * reference, the file name type, length and with a copy of the little endian 52 * Unicode file name itself. If we match a file name which is in the DOS name 53 * space, we only return the mft reference and file name type in @res. 54 * ntfs_lookup() then uses this to find the long file name in the inode itself. 55 * This is to avoid polluting the dcache with short file names. We want them to 56 * work but we don't care for how quickly one can access them. This also fixes 57 * the dcache aliasing issues. 58 * 59 * Locking: - Caller must hold i_mutex on the directory. 60 * - Each page cache page in the index allocation mapping must be 61 * locked whilst being accessed otherwise we may find a corrupt 62 * page due to it being under ->writepage at the moment which 63 * applies the mst protection fixups before writing out and then 64 * removes them again after the write is complete after which it 65 * unlocks the page. 66 */ 67 u64 ntfs_lookup_inode_by_name(struct ntfs_inode *dir_ni, const __le16 *uname, 68 const int uname_len, struct ntfs_name **res) 69 { 70 struct ntfs_volume *vol = dir_ni->vol; 71 struct super_block *sb = vol->sb; 72 struct inode *ia_vi = NULL; 73 struct mft_record *m; 74 struct index_root *ir; 75 struct index_entry *ie; 76 struct index_block *ia; 77 u8 *index_end; 78 u64 mref; 79 struct ntfs_attr_search_ctx *ctx; 80 int err, rc; 81 s64 vcn, old_vcn; 82 struct address_space *ia_mapping; 83 struct folio *folio; 84 u8 *kaddr = NULL; 85 struct ntfs_name *name = NULL; 86 87 /* Get hold of the mft record for the directory. */ 88 m = map_mft_record(dir_ni); 89 if (IS_ERR(m)) { 90 ntfs_error(sb, "map_mft_record() failed with error code %ld.", 91 -PTR_ERR(m)); 92 return ERR_MREF(PTR_ERR(m)); 93 } 94 ctx = ntfs_attr_get_search_ctx(dir_ni, m); 95 if (unlikely(!ctx)) { 96 err = -ENOMEM; 97 goto err_out; 98 } 99 /* Find the index root attribute in the mft record. */ 100 err = ntfs_attr_lookup(AT_INDEX_ROOT, I30, 4, CASE_SENSITIVE, 0, NULL, 101 0, ctx); 102 if (unlikely(err)) { 103 if (err == -ENOENT) { 104 ntfs_error(sb, 105 "Index root attribute missing in directory inode 0x%llx.", 106 dir_ni->mft_no); 107 err = -EIO; 108 } 109 goto err_out; 110 } 111 /* Get to the index root value (it's been verified in read_inode). */ 112 ir = (struct index_root *)((u8 *)ctx->attr + 113 le16_to_cpu(ctx->attr->data.resident.value_offset)); 114 index_end = (u8 *)&ir->index + le32_to_cpu(ir->index.index_length); 115 /* The first index entry. */ 116 ie = (struct index_entry *)((u8 *)&ir->index + 117 le32_to_cpu(ir->index.entries_offset)); 118 /* 119 * Loop until we exceed valid memory (corruption case) or until we 120 * reach the last entry. 121 */ 122 for (;; ie = (struct index_entry *)((u8 *)ie + le16_to_cpu(ie->length))) { 123 /* Bounds checks. */ 124 if ((u8 *)ie < (u8 *)ctx->mrec || 125 (u8 *)ie + sizeof(struct index_entry_header) > index_end || 126 (u8 *)ie + sizeof(struct index_entry_header) + le16_to_cpu(ie->key_length) > 127 index_end || (u8 *)ie + le16_to_cpu(ie->length) > index_end) 128 goto dir_err_out; 129 /* 130 * The last entry cannot contain a name. It can however contain 131 * a pointer to a child node in the B+tree so we just break out. 132 */ 133 if (ie->flags & INDEX_ENTRY_END) 134 break; 135 /* Key length should not be zero if it is not last entry. */ 136 if (!ie->key_length) 137 goto dir_err_out; 138 /* 139 * We perform a case sensitive comparison and if that matches 140 * we are done and return the mft reference of the inode (i.e. 141 * the inode number together with the sequence number for 142 * consistency checking). We convert it to cpu format before 143 * returning. 144 */ 145 if (ntfs_are_names_equal(uname, uname_len, 146 (__le16 *)&ie->key.file_name.file_name, 147 ie->key.file_name.file_name_length, 148 CASE_SENSITIVE, vol->upcase, vol->upcase_len)) { 149 found_it: 150 /* 151 * We have a perfect match, so we don't need to care 152 * about having matched imperfectly before, so we can 153 * free name and set *res to NULL. 154 * However, if the perfect match is a short file name, 155 * we need to signal this through *res, so that 156 * ntfs_lookup() can fix dcache aliasing issues. 157 * As an optimization we just reuse an existing 158 * allocation of *res. 159 */ 160 if (ie->key.file_name.file_name_type == FILE_NAME_DOS) { 161 if (!name) { 162 name = kmalloc(sizeof(struct ntfs_name), 163 GFP_NOFS); 164 if (!name) { 165 err = -ENOMEM; 166 goto err_out; 167 } 168 } 169 name->mref = le64_to_cpu( 170 ie->data.dir.indexed_file); 171 name->type = FILE_NAME_DOS; 172 name->len = 0; 173 *res = name; 174 } else { 175 kfree(name); 176 *res = NULL; 177 } 178 mref = le64_to_cpu(ie->data.dir.indexed_file); 179 ntfs_attr_put_search_ctx(ctx); 180 unmap_mft_record(dir_ni); 181 return mref; 182 } 183 /* 184 * For a case insensitive mount, we also perform a case 185 * insensitive comparison (provided the file name is not in the 186 * POSIX namespace). If the comparison matches, and the name is 187 * in the WIN32 namespace, we cache the filename in *res so 188 * that the caller, ntfs_lookup(), can work on it. If the 189 * comparison matches, and the name is in the DOS namespace, we 190 * only cache the mft reference and the file name type (we set 191 * the name length to zero for simplicity). 192 */ 193 if ((!NVolCaseSensitive(vol) || 194 ie->key.file_name.file_name_type == FILE_NAME_DOS) && 195 ntfs_are_names_equal(uname, uname_len, 196 (__le16 *)&ie->key.file_name.file_name, 197 ie->key.file_name.file_name_length, 198 IGNORE_CASE, vol->upcase, 199 vol->upcase_len)) { 200 int name_size = sizeof(struct ntfs_name); 201 u8 type = ie->key.file_name.file_name_type; 202 u8 len = ie->key.file_name.file_name_length; 203 204 /* Only one case insensitive matching name allowed. */ 205 if (name) { 206 ntfs_error(sb, 207 "Found already allocated name in phase 1. Please run chkdsk"); 208 goto dir_err_out; 209 } 210 211 if (type != FILE_NAME_DOS) 212 name_size += len * sizeof(__le16); 213 name = kmalloc(name_size, GFP_NOFS); 214 if (!name) { 215 err = -ENOMEM; 216 goto err_out; 217 } 218 name->mref = le64_to_cpu(ie->data.dir.indexed_file); 219 name->type = type; 220 if (type != FILE_NAME_DOS) { 221 name->len = len; 222 memcpy(name->name, ie->key.file_name.file_name, 223 len * sizeof(__le16)); 224 } else 225 name->len = 0; 226 *res = name; 227 } 228 /* 229 * Not a perfect match, need to do full blown collation so we 230 * know which way in the B+tree we have to go. 231 */ 232 rc = ntfs_collate_names(uname, uname_len, 233 (__le16 *)&ie->key.file_name.file_name, 234 ie->key.file_name.file_name_length, 1, 235 IGNORE_CASE, vol->upcase, vol->upcase_len); 236 /* 237 * If uname collates before the name of the current entry, there 238 * is definitely no such name in this index but we might need to 239 * descend into the B+tree so we just break out of the loop. 240 */ 241 if (rc == -1) 242 break; 243 /* The names are not equal, continue the search. */ 244 if (rc) 245 continue; 246 /* 247 * Names match with case insensitive comparison, now try the 248 * case sensitive comparison, which is required for proper 249 * collation. 250 */ 251 rc = ntfs_collate_names(uname, uname_len, 252 (__le16 *)&ie->key.file_name.file_name, 253 ie->key.file_name.file_name_length, 1, 254 CASE_SENSITIVE, vol->upcase, vol->upcase_len); 255 if (rc == -1) 256 break; 257 if (rc) 258 continue; 259 /* 260 * Perfect match, this will never happen as the 261 * ntfs_are_names_equal() call will have gotten a match but we 262 * still treat it correctly. 263 */ 264 goto found_it; 265 } 266 /* 267 * We have finished with this index without success. Check for the 268 * presence of a child node and if not present return -ENOENT, unless 269 * we have got a matching name cached in name in which case return the 270 * mft reference associated with it. 271 */ 272 if (!(ie->flags & INDEX_ENTRY_NODE)) { 273 if (name) { 274 ntfs_attr_put_search_ctx(ctx); 275 unmap_mft_record(dir_ni); 276 return name->mref; 277 } 278 ntfs_debug("Entry not found."); 279 err = -ENOENT; 280 goto err_out; 281 } /* Child node present, descend into it. */ 282 283 /* Get the starting vcn of the index_block holding the child node. */ 284 vcn = le64_to_cpup((__le64 *)((u8 *)ie + le16_to_cpu(ie->length) - 8)); 285 286 /* 287 * We are done with the index root and the mft record. Release them, 288 * otherwise we deadlock with read_mapping_folio(). 289 */ 290 ntfs_attr_put_search_ctx(ctx); 291 unmap_mft_record(dir_ni); 292 m = NULL; 293 ctx = NULL; 294 295 ia_vi = ntfs_index_iget(VFS_I(dir_ni), I30, 4); 296 if (IS_ERR(ia_vi)) { 297 err = PTR_ERR(ia_vi); 298 goto err_out; 299 } 300 301 ia_mapping = ia_vi->i_mapping; 302 descend_into_child_node: 303 /* 304 * Convert vcn to index into the index allocation attribute in units 305 * of PAGE_SIZE and map the page cache page, reading it from 306 * disk if necessary. 307 */ 308 folio = read_mapping_folio(ia_mapping, vcn << 309 dir_ni->itype.index.vcn_size_bits >> PAGE_SHIFT, NULL); 310 if (IS_ERR(folio)) { 311 ntfs_error(sb, "Failed to map directory index page, error %ld.", 312 -PTR_ERR(folio)); 313 err = PTR_ERR(folio); 314 goto err_out; 315 } 316 317 folio_lock(folio); 318 kaddr = kmalloc(PAGE_SIZE, GFP_NOFS); 319 if (!kaddr) { 320 err = -ENOMEM; 321 folio_unlock(folio); 322 folio_put(folio); 323 goto unm_err_out; 324 } 325 326 memcpy_from_folio(kaddr, folio, 0, PAGE_SIZE); 327 post_read_mst_fixup((struct ntfs_record *)kaddr, PAGE_SIZE); 328 folio_unlock(folio); 329 folio_put(folio); 330 fast_descend_into_child_node: 331 /* Get to the index allocation block. */ 332 ia = (struct index_block *)(kaddr + ((vcn << 333 dir_ni->itype.index.vcn_size_bits) & ~PAGE_MASK)); 334 /* Bounds checks. */ 335 if ((u8 *)ia < kaddr || (u8 *)ia > kaddr + PAGE_SIZE) { 336 ntfs_error(sb, 337 "Out of bounds check failed. Corrupt directory inode 0x%llx or driver bug.", 338 dir_ni->mft_no); 339 goto unm_err_out; 340 } 341 index_end = (u8 *)ia + dir_ni->itype.index.block_size; 342 if (index_end > kaddr + PAGE_SIZE) { 343 ntfs_error(sb, 344 "Index buffer (VCN 0x%llx) of directory inode 0x%llx crosses page boundary. Impossible! Cannot access! This is probably a bug in the driver.", 345 vcn, dir_ni->mft_no); 346 goto unm_err_out; 347 } 348 err = ntfs_index_block_inconsistent(vol, ia, 349 dir_ni->itype.index.block_size, 350 vcn, COLLATION_FILE_NAME, 351 dir_ni->mft_no); 352 if (err) 353 goto unm_err_out; 354 index_end = (u8 *)&ia->index + le32_to_cpu(ia->index.index_length); 355 /* The first index entry. */ 356 ie = (struct index_entry *)((u8 *)&ia->index + 357 le32_to_cpu(ia->index.entries_offset)); 358 /* 359 * Iterate similar to above big loop but applied to index buffer, thus 360 * loop until we exceed valid memory (corruption case) or until we 361 * reach the last entry. 362 */ 363 for (;; ie = (struct index_entry *)((u8 *)ie + le16_to_cpu(ie->length))) { 364 /* 365 * The last entry cannot contain a name. It can however contain 366 * a pointer to a child node in the B+tree so we just break out. 367 */ 368 if (ie->flags & INDEX_ENTRY_END) 369 break; 370 /* Key length should not be zero if it is not last entry. */ 371 if (!ie->key_length) 372 goto unm_err_out; 373 /* 374 * We perform a case sensitive comparison and if that matches 375 * we are done and return the mft reference of the inode (i.e. 376 * the inode number together with the sequence number for 377 * consistency checking). We convert it to cpu format before 378 * returning. 379 */ 380 if (ntfs_are_names_equal(uname, uname_len, 381 (__le16 *)&ie->key.file_name.file_name, 382 ie->key.file_name.file_name_length, 383 CASE_SENSITIVE, vol->upcase, vol->upcase_len)) { 384 found_it2: 385 /* 386 * We have a perfect match, so we don't need to care 387 * about having matched imperfectly before, so we can 388 * free name and set *res to NULL. 389 * However, if the perfect match is a short file name, 390 * we need to signal this through *res, so that 391 * ntfs_lookup() can fix dcache aliasing issues. 392 * As an optimization we just reuse an existing 393 * allocation of *res. 394 */ 395 if (ie->key.file_name.file_name_type == FILE_NAME_DOS) { 396 if (!name) { 397 name = kmalloc(sizeof(struct ntfs_name), 398 GFP_NOFS); 399 if (!name) { 400 err = -ENOMEM; 401 goto unm_err_out; 402 } 403 } 404 name->mref = le64_to_cpu( 405 ie->data.dir.indexed_file); 406 name->type = FILE_NAME_DOS; 407 name->len = 0; 408 *res = name; 409 } else { 410 kfree(name); 411 *res = NULL; 412 } 413 mref = le64_to_cpu(ie->data.dir.indexed_file); 414 kfree(kaddr); 415 iput(ia_vi); 416 return mref; 417 } 418 /* 419 * For a case insensitive mount, we also perform a case 420 * insensitive comparison (provided the file name is not in the 421 * POSIX namespace). If the comparison matches, and the name is 422 * in the WIN32 namespace, we cache the filename in *res so 423 * that the caller, ntfs_lookup(), can work on it. If the 424 * comparison matches, and the name is in the DOS namespace, we 425 * only cache the mft reference and the file name type (we set 426 * the name length to zero for simplicity). 427 */ 428 if ((!NVolCaseSensitive(vol) || 429 ie->key.file_name.file_name_type == FILE_NAME_DOS) && 430 ntfs_are_names_equal(uname, uname_len, 431 (__le16 *)&ie->key.file_name.file_name, 432 ie->key.file_name.file_name_length, 433 IGNORE_CASE, vol->upcase, 434 vol->upcase_len)) { 435 int name_size = sizeof(struct ntfs_name); 436 u8 type = ie->key.file_name.file_name_type; 437 u8 len = ie->key.file_name.file_name_length; 438 439 /* Only one case insensitive matching name allowed. */ 440 if (name) { 441 ntfs_error(sb, 442 "Found already allocated name in phase 2. Please run chkdsk"); 443 kfree(kaddr); 444 goto dir_err_out; 445 } 446 447 if (type != FILE_NAME_DOS) 448 name_size += len * sizeof(__le16); 449 name = kmalloc(name_size, GFP_NOFS); 450 if (!name) { 451 err = -ENOMEM; 452 goto unm_err_out; 453 } 454 name->mref = le64_to_cpu(ie->data.dir.indexed_file); 455 name->type = type; 456 if (type != FILE_NAME_DOS) { 457 name->len = len; 458 memcpy(name->name, ie->key.file_name.file_name, 459 len * sizeof(__le16)); 460 } else 461 name->len = 0; 462 *res = name; 463 } 464 /* 465 * Not a perfect match, need to do full blown collation so we 466 * know which way in the B+tree we have to go. 467 */ 468 rc = ntfs_collate_names(uname, uname_len, 469 (__le16 *)&ie->key.file_name.file_name, 470 ie->key.file_name.file_name_length, 1, 471 IGNORE_CASE, vol->upcase, vol->upcase_len); 472 /* 473 * If uname collates before the name of the current entry, there 474 * is definitely no such name in this index but we might need to 475 * descend into the B+tree so we just break out of the loop. 476 */ 477 if (rc == -1) 478 break; 479 /* The names are not equal, continue the search. */ 480 if (rc) 481 continue; 482 /* 483 * Names match with case insensitive comparison, now try the 484 * case sensitive comparison, which is required for proper 485 * collation. 486 */ 487 rc = ntfs_collate_names(uname, uname_len, 488 (__le16 *)&ie->key.file_name.file_name, 489 ie->key.file_name.file_name_length, 1, 490 CASE_SENSITIVE, vol->upcase, vol->upcase_len); 491 if (rc == -1) 492 break; 493 if (rc) 494 continue; 495 /* 496 * Perfect match, this will never happen as the 497 * ntfs_are_names_equal() call will have gotten a match but we 498 * still treat it correctly. 499 */ 500 goto found_it2; 501 } 502 /* 503 * We have finished with this index buffer without success. Check for 504 * the presence of a child node. 505 */ 506 if (ie->flags & INDEX_ENTRY_NODE) { 507 if ((ia->index.flags & NODE_MASK) == LEAF_NODE) { 508 ntfs_error(sb, 509 "Index entry with child node found in a leaf node in directory inode 0x%llx.", 510 dir_ni->mft_no); 511 goto unm_err_out; 512 } 513 /* Child node present, descend into it. */ 514 old_vcn = vcn; 515 vcn = le64_to_cpup((__le64 *)((u8 *)ie + 516 le16_to_cpu(ie->length) - 8)); 517 if (vcn >= 0) { 518 /* 519 * If vcn is in the same page cache page as old_vcn we 520 * recycle the mapped page. 521 */ 522 if (ntfs_cluster_to_pidx(vol, old_vcn) == 523 ntfs_cluster_to_pidx(vol, vcn)) 524 goto fast_descend_into_child_node; 525 kfree(kaddr); 526 kaddr = NULL; 527 goto descend_into_child_node; 528 } 529 ntfs_error(sb, "Negative child node vcn in directory inode 0x%llx.", 530 dir_ni->mft_no); 531 goto unm_err_out; 532 } 533 /* 534 * No child node present, return -ENOENT, unless we have got a matching 535 * name cached in name in which case return the mft reference 536 * associated with it. 537 */ 538 if (name) { 539 kfree(kaddr); 540 iput(ia_vi); 541 return name->mref; 542 } 543 ntfs_debug("Entry not found."); 544 err = -ENOENT; 545 unm_err_out: 546 kfree(kaddr); 547 err_out: 548 if (!err) 549 err = -EIO; 550 if (ctx) 551 ntfs_attr_put_search_ctx(ctx); 552 if (m) 553 unmap_mft_record(dir_ni); 554 kfree(name); 555 *res = NULL; 556 if (!IS_ERR_OR_NULL(ia_vi)) 557 iput(ia_vi); 558 return ERR_MREF(err); 559 dir_err_out: 560 ntfs_error(sb, "Corrupt directory. Aborting lookup."); 561 goto err_out; 562 } 563 564 /* 565 * ntfs_filldir - ntfs specific filldir method 566 * @vol: current ntfs volume 567 * @ndir: ntfs inode of current directory 568 * @ia_page: page in which the index allocation buffer @ie is in resides 569 * @ie: current index entry 570 * @name: buffer to use for the converted name 571 * @actor: what to feed the entries to 572 * 573 * Convert the Unicode @name to the loaded NLS and pass it to the @filldir 574 * callback. 575 * 576 * If @ia_page is not NULL it is the locked page containing the index 577 * allocation block containing the index entry @ie. 578 * 579 * Note, we drop (and then reacquire) the page lock on @ia_page across the 580 * @filldir() call otherwise we would deadlock with NFSd when it calls ->lookup 581 * since ntfs_lookup() will lock the same page. As an optimization, we do not 582 * retake the lock if we are returning a non-zero value as ntfs_readdir() 583 * would need to drop the lock immediately anyway. 584 */ 585 static inline int ntfs_filldir(struct ntfs_volume *vol, 586 struct ntfs_inode *ndir, struct page *ia_page, struct index_entry *ie, 587 u8 *name, struct dir_context *actor) 588 { 589 unsigned long mref; 590 int name_len; 591 unsigned int dt_type; 592 u8 name_type; 593 594 name_type = ie->key.file_name.file_name_type; 595 if (name_type == FILE_NAME_DOS) { 596 ntfs_debug("Skipping DOS name space entry."); 597 return 0; 598 } 599 if (MREF_LE(ie->data.dir.indexed_file) == FILE_root) { 600 ntfs_debug("Skipping root directory self reference entry."); 601 return 0; 602 } 603 if (MREF_LE(ie->data.dir.indexed_file) < FILE_first_user && 604 !NVolShowSystemFiles(vol)) { 605 ntfs_debug("Skipping system file."); 606 return 0; 607 } 608 if (!NVolShowHiddenFiles(vol) && 609 (ie->key.file_name.file_attributes & FILE_ATTR_HIDDEN)) { 610 ntfs_debug("Skipping hidden file."); 611 return 0; 612 } 613 614 name_len = ntfs_ucstonls(vol, (__le16 *)&ie->key.file_name.file_name, 615 ie->key.file_name.file_name_length, &name, 616 NTFS_MAX_NAME_LEN * NLS_MAX_CHARSET_SIZE + 1); 617 if (name_len <= 0) { 618 ntfs_warning(vol->sb, "Skipping unrepresentable inode 0x%llx.", 619 (long long)MREF_LE(ie->data.dir.indexed_file)); 620 return 0; 621 } 622 623 mref = MREF_LE(ie->data.dir.indexed_file); 624 if (ie->key.file_name.file_attributes & 625 FILE_ATTR_DUP_FILE_NAME_INDEX_PRESENT) 626 dt_type = DT_DIR; 627 else if (ie->key.file_name.file_attributes & FILE_ATTR_REPARSE_POINT) 628 dt_type = ntfs_reparse_tag_dt_types(vol, mref); 629 else 630 dt_type = DT_REG; 631 632 /* 633 * Drop the page lock otherwise we deadlock with NFS when it calls 634 * ->lookup since ntfs_lookup() will lock the same page. 635 */ 636 if (ia_page) 637 unlock_page(ia_page); 638 ntfs_debug("Calling filldir for %s with len %i, fpos 0x%llx, inode 0x%lx, DT_%s.", 639 name, name_len, actor->pos, mref, dt_type == DT_DIR ? "DIR" : "REG"); 640 if (!dir_emit(actor, name, name_len, mref, dt_type)) 641 return 1; 642 /* Relock the page but not if we are aborting ->readdir. */ 643 if (ia_page) 644 lock_page(ia_page); 645 return 0; 646 } 647 648 struct ntfs_file_private { 649 void *key; 650 __le16 key_length; 651 bool end_in_iterate; 652 loff_t curr_pos; 653 }; 654 655 struct ntfs_index_ra { 656 unsigned long start_index; 657 unsigned int count; 658 struct rb_node rb_node; 659 }; 660 661 static void ntfs_insert_rb(struct ntfs_index_ra *nir, struct rb_root *root) 662 { 663 struct rb_node **new = &root->rb_node, *parent = NULL; 664 struct ntfs_index_ra *cnir; 665 666 while (*new) { 667 parent = *new; 668 cnir = rb_entry(parent, struct ntfs_index_ra, rb_node); 669 if (nir->start_index < cnir->start_index) 670 new = &parent->rb_left; 671 else if (nir->start_index >= cnir->start_index + cnir->count) 672 new = &parent->rb_right; 673 else { 674 pr_err("nir start index : %ld, count : %d, cnir start_index : %ld, count : %d\n", 675 nir->start_index, nir->count, cnir->start_index, cnir->count); 676 return; 677 } 678 } 679 680 rb_link_node(&nir->rb_node, parent, new); 681 rb_insert_color(&nir->rb_node, root); 682 } 683 684 static int ntfs_ia_blocks_readahead(struct ntfs_inode *ia_ni, loff_t pos) 685 { 686 unsigned long dir_start_index, dir_end_index; 687 struct inode *ia_vi = VFS_I(ia_ni); 688 struct file_ra_state *dir_ra; 689 690 dir_end_index = (i_size_read(ia_vi) + PAGE_SIZE - 1) >> PAGE_SHIFT; 691 dir_start_index = (pos + PAGE_SIZE - 1) >> PAGE_SHIFT; 692 693 if (dir_start_index >= dir_end_index) 694 return 0; 695 696 dir_ra = kzalloc(sizeof(*dir_ra), GFP_NOFS); 697 if (!dir_ra) 698 return -ENOMEM; 699 700 file_ra_state_init(dir_ra, ia_vi->i_mapping); 701 dir_end_index = (i_size_read(ia_vi) + PAGE_SIZE - 1) >> PAGE_SHIFT; 702 dir_start_index = (pos + PAGE_SIZE - 1) >> PAGE_SHIFT; 703 dir_ra->ra_pages = dir_end_index - dir_start_index; 704 page_cache_sync_readahead(ia_vi->i_mapping, dir_ra, NULL, 705 dir_start_index, dir_end_index - dir_start_index); 706 kfree(dir_ra); 707 708 return 0; 709 } 710 711 static int ntfs_readdir(struct file *file, struct dir_context *actor) 712 { 713 struct inode *vdir = file_inode(file); 714 struct super_block *sb = vdir->i_sb; 715 struct ntfs_inode *ndir = NTFS_I(vdir); 716 struct ntfs_volume *vol = NTFS_SB(sb); 717 struct ntfs_attr_search_ctx *ctx = NULL; 718 struct ntfs_index_context *ictx = NULL; 719 u8 *name; 720 struct index_root *ir; 721 struct index_entry *next = NULL; 722 struct ntfs_file_private *private = NULL; 723 int err = 0; 724 loff_t ie_pos = 2; /* initialize it with dot and dotdot size */ 725 struct ntfs_index_ra *nir = NULL; 726 unsigned long index; 727 struct rb_root ra_root = RB_ROOT; 728 struct file_ra_state *ra; 729 730 ntfs_debug("Entering for inode 0x%llx, fpos 0x%llx.", 731 ndir->mft_no, actor->pos); 732 733 if (file->private_data) { 734 private = file->private_data; 735 736 if (actor->pos != private->curr_pos) { 737 /* 738 * If actor->pos is different from the previous passed 739 * one, Discard the private->key and fill dirent buffer 740 * with linear lookup. 741 */ 742 kfree(private->key); 743 private->key = NULL; 744 private->end_in_iterate = false; 745 } else if (private->end_in_iterate) { 746 kfree(private->key); 747 kfree(file->private_data); 748 file->private_data = NULL; 749 return 0; 750 } 751 } 752 753 /* Emulate . and .. for all directories. */ 754 if (!dir_emit_dots(file, actor)) 755 return 0; 756 757 /* 758 * Allocate a buffer to store the current name being processed 759 * converted to format determined by current NLS. 760 */ 761 name = kmalloc(NTFS_MAX_NAME_LEN * NLS_MAX_CHARSET_SIZE + 1, GFP_NOFS); 762 if (unlikely(!name)) 763 return -ENOMEM; 764 765 mutex_lock_nested(&ndir->mrec_lock, NTFS_INODE_MUTEX_PARENT); 766 ictx = ntfs_index_ctx_get(ndir, I30, 4); 767 if (!ictx) { 768 kfree(name); 769 mutex_unlock(&ndir->mrec_lock); 770 return -ENOMEM; 771 } 772 773 ra = kzalloc(sizeof(struct file_ra_state), GFP_NOFS); 774 if (!ra) { 775 kfree(name); 776 ntfs_index_ctx_put(ictx); 777 mutex_unlock(&ndir->mrec_lock); 778 return -ENOMEM; 779 } 780 file_ra_state_init(ra, vol->mft_ino->i_mapping); 781 782 if (private && private->key) { 783 /* 784 * Find index witk private->key using ntfs_index_lookup() 785 * instead of linear index lookup. 786 */ 787 err = ntfs_index_lookup(private->key, 788 le16_to_cpu(private->key_length), 789 ictx); 790 if (!err) { 791 next = ictx->entry; 792 /* 793 * Update ie_pos with private->curr_pos 794 * to make next d_off of dirent correct. 795 */ 796 ie_pos = private->curr_pos; 797 798 if (actor->pos > vol->mft_record_size && ictx->ia_ni) { 799 err = ntfs_ia_blocks_readahead(ictx->ia_ni, actor->pos); 800 if (err) 801 goto out; 802 } 803 804 goto nextdir; 805 } else { 806 goto out; 807 } 808 } else if (!private) { 809 private = kzalloc(sizeof(struct ntfs_file_private), GFP_KERNEL); 810 if (!private) { 811 err = -ENOMEM; 812 goto out; 813 } 814 file->private_data = private; 815 } 816 817 ctx = ntfs_attr_get_search_ctx(ndir, NULL); 818 if (!ctx) { 819 err = -ENOMEM; 820 goto out; 821 } 822 823 /* Find the index root attribute in the mft record. */ 824 if (ntfs_attr_lookup(AT_INDEX_ROOT, I30, 4, CASE_SENSITIVE, 0, NULL, 0, 825 ctx)) { 826 ntfs_error(sb, "Index root attribute missing in directory inode %llu", 827 ndir->mft_no); 828 ntfs_attr_put_search_ctx(ctx); 829 err = -ENOMEM; 830 goto out; 831 } 832 833 /* Get to the index root value. */ 834 ir = (struct index_root *)((u8 *)ctx->attr + 835 le16_to_cpu(ctx->attr->data.resident.value_offset)); 836 837 ictx->ir = ir; 838 ictx->actx = ctx; 839 ictx->parent_vcn[ictx->pindex] = VCN_INDEX_ROOT_PARENT; 840 ictx->is_in_root = true; 841 ictx->parent_pos[ictx->pindex] = 0; 842 843 ictx->block_size = le32_to_cpu(ir->index_block_size); 844 if (ictx->block_size < NTFS_BLOCK_SIZE) { 845 ntfs_error(sb, "Index block size (%d) is smaller than the sector size (%d)", 846 ictx->block_size, NTFS_BLOCK_SIZE); 847 err = -EIO; 848 goto out; 849 } 850 851 if (vol->cluster_size <= ictx->block_size) 852 ictx->vcn_size_bits = vol->cluster_size_bits; 853 else 854 ictx->vcn_size_bits = NTFS_BLOCK_SIZE_BITS; 855 ictx->cr = ir->collation_rule; 856 857 /* The first index entry. */ 858 next = (struct index_entry *)((u8 *)&ir->index + 859 le32_to_cpu(ir->index.entries_offset)); 860 861 if (next->flags & INDEX_ENTRY_NODE) { 862 ictx->ia_ni = ntfs_ia_open(ictx, ictx->idx_ni); 863 if (!ictx->ia_ni) { 864 err = -EINVAL; 865 goto out; 866 } 867 868 err = ntfs_ia_blocks_readahead(ictx->ia_ni, actor->pos); 869 if (err) 870 goto out; 871 } 872 873 if (next->flags & INDEX_ENTRY_NODE) { 874 next = ntfs_index_walk_down(next, ictx); 875 if (IS_ERR(next)) { 876 err = PTR_ERR(next); 877 goto out; 878 } 879 } 880 881 if (next && !(next->flags & INDEX_ENTRY_END)) 882 goto nextdir; 883 884 while (1) { 885 next = ntfs_index_next(next, ictx); 886 if (IS_ERR(next)) { 887 err = PTR_ERR(next); 888 goto out; 889 } 890 if (!next) 891 break; 892 nextdir: 893 if (ie_pos < actor->pos) { 894 ie_pos += le16_to_cpu(next->length); 895 continue; 896 } 897 898 actor->pos = ie_pos; 899 900 index = ntfs_mft_no_to_pidx(vol, 901 MREF_LE(next->data.dir.indexed_file)); 902 if (nir) { 903 struct ntfs_index_ra *cnir; 904 struct rb_node *node = ra_root.rb_node; 905 906 if (nir->start_index <= index && 907 index < nir->start_index + nir->count) { 908 /* No behavior */ 909 goto filldir; 910 } 911 912 while (node) { 913 cnir = rb_entry(node, struct ntfs_index_ra, rb_node); 914 if (cnir->start_index <= index && 915 index < cnir->start_index + cnir->count) { 916 goto filldir; 917 } else if (cnir->start_index + cnir->count == index) { 918 cnir->count++; 919 goto filldir; 920 } else if (!cnir->start_index && cnir->start_index - 1 == index) { 921 cnir->start_index = index; 922 goto filldir; 923 } 924 925 if (index < cnir->start_index) 926 node = node->rb_left; 927 else if (index >= cnir->start_index + cnir->count) 928 node = node->rb_right; 929 } 930 931 if (nir->start_index + nir->count == index) { 932 nir->count++; 933 } else if (!nir->start_index && nir->start_index - 1 == index) { 934 nir->start_index = index; 935 } else if (nir->count > 2) { 936 ntfs_insert_rb(nir, &ra_root); 937 nir = NULL; 938 } else { 939 nir->start_index = index; 940 nir->count = 1; 941 } 942 } 943 944 if (!nir) { 945 nir = kzalloc(sizeof(struct ntfs_index_ra), GFP_KERNEL); 946 if (nir) { 947 nir->start_index = index; 948 nir->count = 1; 949 } 950 } 951 952 filldir: 953 /* Submit the name to the filldir callback. */ 954 err = ntfs_filldir(vol, ndir, NULL, next, name, actor); 955 if (err) { 956 /* 957 * Store index key value to file private_data to start 958 * from current index offset on next round. 959 */ 960 private = file->private_data; 961 kfree(private->key); 962 private->key = kmalloc(le16_to_cpu(next->key_length), GFP_KERNEL); 963 if (!private->key) { 964 err = -ENOMEM; 965 goto out; 966 } 967 968 memcpy(private->key, &next->key.file_name, le16_to_cpu(next->key_length)); 969 private->key_length = next->key_length; 970 break; 971 } 972 ie_pos += le16_to_cpu(next->length); 973 } 974 975 if (!err) 976 private->end_in_iterate = true; 977 else 978 err = 0; 979 980 private->curr_pos = actor->pos = ie_pos; 981 out: 982 while (!RB_EMPTY_ROOT(&ra_root)) { 983 struct ntfs_index_ra *cnir; 984 struct rb_node *node; 985 986 node = rb_first(&ra_root); 987 cnir = rb_entry(node, struct ntfs_index_ra, rb_node); 988 ra->ra_pages = cnir->count; 989 page_cache_sync_readahead(vol->mft_ino->i_mapping, ra, NULL, 990 cnir->start_index, cnir->count); 991 rb_erase(node, &ra_root); 992 kfree(cnir); 993 } 994 995 if (err) { 996 if (private) { 997 private->curr_pos = actor->pos; 998 private->end_in_iterate = true; 999 } 1000 err = 0; 1001 } 1002 ntfs_index_ctx_put(ictx); 1003 kfree(name); 1004 kfree(nir); 1005 kfree(ra); 1006 mutex_unlock(&ndir->mrec_lock); 1007 return err; 1008 } 1009 1010 int ntfs_check_empty_dir(struct ntfs_inode *ni, struct mft_record *ni_mrec) 1011 { 1012 struct ntfs_attr_search_ctx *ctx; 1013 int ret = 0; 1014 1015 if (!(ni_mrec->flags & MFT_RECORD_IS_DIRECTORY)) 1016 return 0; 1017 1018 ctx = ntfs_attr_get_search_ctx(ni, NULL); 1019 if (!ctx) { 1020 ntfs_error(ni->vol->sb, "Failed to get search context"); 1021 return -ENOMEM; 1022 } 1023 1024 /* Find the index root attribute in the mft record. */ 1025 ret = ntfs_attr_lookup(AT_INDEX_ROOT, I30, 4, CASE_SENSITIVE, 0, NULL, 1026 0, ctx); 1027 if (ret) { 1028 ntfs_error(ni->vol->sb, "Index root attribute missing in directory inode %llu", 1029 ni->mft_no); 1030 ntfs_attr_put_search_ctx(ctx); 1031 return ret; 1032 } 1033 1034 /* Non-empty directory? */ 1035 if (le32_to_cpu(ctx->attr->data.resident.value_length) != 1036 sizeof(struct index_root) + sizeof(struct index_entry_header)) { 1037 /* Both ENOTEMPTY and EEXIST are ok. We use the more common. */ 1038 ret = -ENOTEMPTY; 1039 ntfs_debug("Directory is not empty\n"); 1040 } 1041 1042 ntfs_attr_put_search_ctx(ctx); 1043 1044 return ret; 1045 } 1046 1047 /* 1048 * ntfs_dir_open - called when an inode is about to be opened 1049 * @vi: inode to be opened 1050 * @filp: file structure describing the inode 1051 * 1052 * Limit directory size to the page cache limit on architectures where unsigned 1053 * long is 32-bits. This is the most we can do for now without overflowing the 1054 * page cache page index. Doing it this way means we don't run into problems 1055 * because of existing too large directories. It would be better to allow the 1056 * user to read the accessible part of the directory but I doubt very much 1057 * anyone is going to hit this check on a 32-bit architecture, so there is no 1058 * point in adding the extra complexity required to support this. 1059 * 1060 * On 64-bit architectures, the check is hopefully optimized away by the 1061 * compiler. 1062 */ 1063 static int ntfs_dir_open(struct inode *vi, struct file *filp) 1064 { 1065 if (sizeof(unsigned long) < 8) { 1066 if (i_size_read(vi) > MAX_LFS_FILESIZE) 1067 return -EFBIG; 1068 } 1069 return 0; 1070 } 1071 1072 static int ntfs_dir_release(struct inode *vi, struct file *filp) 1073 { 1074 if (filp->private_data) { 1075 kfree(((struct ntfs_file_private *)filp->private_data)->key); 1076 kfree(filp->private_data); 1077 filp->private_data = NULL; 1078 } 1079 return 0; 1080 } 1081 1082 /* 1083 * ntfs_dir_fsync - sync a directory to disk 1084 * @filp: file describing the directory to be synced 1085 * @start: start offset to be synced 1086 * @end: end offset to be synced 1087 * @datasync: if non-zero only flush user data and not metadata 1088 * 1089 * Data integrity sync of a directory to disk. Used for fsync, fdatasync, and 1090 * msync system calls. This function is based on file.c::ntfs_file_fsync(). 1091 * 1092 * Write the mft record and all associated extent mft records as well as the 1093 * $INDEX_ALLOCATION and $BITMAP attributes and then sync the block device. 1094 * 1095 * If @datasync is true, we do not wait on the inode(s) to be written out 1096 * but we always wait on the page cache pages to be written out. 1097 * 1098 * Note: In the past @filp could be NULL so we ignore it as we don't need it 1099 * anyway. 1100 * 1101 * Locking: Caller must hold i_mutex on the inode. 1102 */ 1103 static int ntfs_dir_fsync(struct file *filp, loff_t start, loff_t end, 1104 int datasync) 1105 { 1106 struct inode *bmp_vi, *vi = filp->f_mapping->host; 1107 struct ntfs_volume *vol = NTFS_I(vi)->vol; 1108 struct ntfs_inode *ni = NTFS_I(vi); 1109 struct ntfs_attr_search_ctx *ctx; 1110 struct inode *parent_vi, *ia_vi; 1111 int err, ret; 1112 struct ntfs_attr na; 1113 1114 ntfs_debug("Entering for inode 0x%llx.", ni->mft_no); 1115 1116 if (NVolShutdown(vol)) 1117 return -EIO; 1118 1119 ctx = ntfs_attr_get_search_ctx(ni, NULL); 1120 if (!ctx) 1121 return -ENOMEM; 1122 1123 mutex_lock_nested(&ni->mrec_lock, NTFS_INODE_MUTEX_NORMAL_CHILD); 1124 while (!(err = ntfs_attr_lookup(AT_FILE_NAME, NULL, 0, 0, 0, NULL, 0, ctx))) { 1125 struct file_name_attr *fn = (struct file_name_attr *)((u8 *)ctx->attr + 1126 le16_to_cpu(ctx->attr->data.resident.value_offset)); 1127 1128 if (MREF_LE(fn->parent_directory) == ni->mft_no) 1129 continue; 1130 1131 parent_vi = ntfs_iget(vi->i_sb, MREF_LE(fn->parent_directory)); 1132 if (IS_ERR(parent_vi)) 1133 continue; 1134 mutex_lock_nested(&NTFS_I(parent_vi)->mrec_lock, NTFS_INODE_MUTEX_NORMAL); 1135 ia_vi = ntfs_index_iget(parent_vi, I30, 4); 1136 mutex_unlock(&NTFS_I(parent_vi)->mrec_lock); 1137 if (IS_ERR(ia_vi)) { 1138 iput(parent_vi); 1139 continue; 1140 } 1141 write_inode_now(ia_vi, 1); 1142 iput(ia_vi); 1143 write_inode_now(parent_vi, 1); 1144 iput(parent_vi); 1145 } 1146 mutex_unlock(&ni->mrec_lock); 1147 ntfs_attr_put_search_ctx(ctx); 1148 1149 err = file_write_and_wait_range(filp, start, end); 1150 if (err) 1151 return err; 1152 inode_lock(vi); 1153 1154 /* If the bitmap attribute inode is in memory sync it, too. */ 1155 na.mft_no = vi->i_ino; 1156 na.type = AT_BITMAP; 1157 na.name = I30; 1158 na.name_len = 4; 1159 bmp_vi = ilookup5(vi->i_sb, vi->i_ino, ntfs_test_inode, &na); 1160 if (bmp_vi) { 1161 write_inode_now(bmp_vi, !datasync); 1162 iput(bmp_vi); 1163 } 1164 ret = __ntfs_write_inode(vi, 1); 1165 1166 write_inode_now(vi, !datasync); 1167 1168 write_inode_now(vol->mftbmp_ino, 1); 1169 down_write(&vol->lcnbmp_lock); 1170 write_inode_now(vol->lcnbmp_ino, 1); 1171 up_write(&vol->lcnbmp_lock); 1172 write_inode_now(vol->mft_ino, 1); 1173 1174 err = sync_blockdev(vi->i_sb->s_bdev); 1175 if (unlikely(err && !ret)) 1176 ret = err; 1177 if (likely(!ret)) 1178 ntfs_debug("Done."); 1179 else 1180 ntfs_warning(vi->i_sb, 1181 "Failed to f%ssync inode 0x%llx. Error %u.", 1182 datasync ? "data" : "", ni->mft_no, -ret); 1183 inode_unlock(vi); 1184 return ret; 1185 } 1186 1187 const struct file_operations ntfs_dir_ops = { 1188 .llseek = generic_file_llseek, /* Seek inside directory. */ 1189 .read = generic_read_dir, /* Return -EISDIR. */ 1190 .iterate_shared = ntfs_readdir, /* Read directory contents. */ 1191 .fsync = ntfs_dir_fsync, /* Sync a directory to disk. */ 1192 .open = ntfs_dir_open, /* Open directory. */ 1193 .release = ntfs_dir_release, 1194 .unlocked_ioctl = ntfs_ioctl, 1195 #ifdef CONFIG_COMPAT 1196 .compat_ioctl = ntfs_compat_ioctl, 1197 #endif 1198 .setlease = generic_setlease, 1199 }; 1200