1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* 3 * NTFS kernel directory operations. 4 * 5 * Copyright (c) 2001-2007 Anton Altaparmakov 6 * Copyright (c) 2002 Richard Russon 7 * Copyright (c) 2025 LG Electronics Co., Ltd. 8 */ 9 10 #include <linux/blkdev.h> 11 12 #include "dir.h" 13 #include "mft.h" 14 #include "ntfs.h" 15 #include "index.h" 16 #include "reparse.h" 17 18 #include <linux/filelock.h> 19 20 /* 21 * The little endian Unicode string $I30 as a global constant. 22 */ 23 __le16 I30[5] = { cpu_to_le16('$'), cpu_to_le16('I'), 24 cpu_to_le16('3'), cpu_to_le16('0'), 0 }; 25 26 /* 27 * ntfs_lookup_inode_by_name - find an inode in a directory given its name 28 * @dir_ni: ntfs inode of the directory in which to search for the name 29 * @uname: Unicode name for which to search in the directory 30 * @uname_len: length of the name @uname in Unicode characters 31 * @res: return the found file name if necessary (see below) 32 * 33 * Look for an inode with name @uname in the directory with inode @dir_ni. 34 * ntfs_lookup_inode_by_name() walks the contents of the directory looking for 35 * the Unicode name. If the name is found in the directory, the corresponding 36 * inode number (>= 0) is returned as a mft reference in cpu format, i.e. it 37 * is a 64-bit number containing the sequence number. 38 * 39 * On error, a negative value is returned corresponding to the error code. In 40 * particular if the inode is not found -ENOENT is returned. Note that you 41 * can't just check the return value for being negative, you have to check the 42 * inode number for being negative which you can extract using MREC(return 43 * value). 44 * 45 * Note, @uname_len does not include the (optional) terminating NULL character. 46 * 47 * Note, we look for a case sensitive match first but we also look for a case 48 * insensitive match at the same time. If we find a case insensitive match, we 49 * save that for the case that we don't find an exact match, where we return 50 * the case insensitive match and setup @res (which we allocate!) with the mft 51 * reference, the file name type, length and with a copy of the little endian 52 * Unicode file name itself. If we match a file name which is in the DOS name 53 * space, we only return the mft reference and file name type in @res. 54 * ntfs_lookup() then uses this to find the long file name in the inode itself. 55 * This is to avoid polluting the dcache with short file names. We want them to 56 * work but we don't care for how quickly one can access them. This also fixes 57 * the dcache aliasing issues. 58 * 59 * Locking: - Caller must hold i_mutex on the directory. 60 * - Each page cache page in the index allocation mapping must be 61 * locked whilst being accessed otherwise we may find a corrupt 62 * page due to it being under ->writepage at the moment which 63 * applies the mst protection fixups before writing out and then 64 * removes them again after the write is complete after which it 65 * unlocks the page. 66 */ 67 u64 ntfs_lookup_inode_by_name(struct ntfs_inode *dir_ni, const __le16 *uname, 68 const int uname_len, struct ntfs_name **res) 69 { 70 struct ntfs_volume *vol = dir_ni->vol; 71 struct super_block *sb = vol->sb; 72 struct inode *ia_vi = NULL; 73 struct mft_record *m; 74 struct index_root *ir; 75 struct index_entry *ie; 76 struct index_block *ia; 77 u8 *index_end; 78 u64 mref; 79 struct ntfs_attr_search_ctx *ctx; 80 int err, rc; 81 s64 vcn, old_vcn; 82 struct address_space *ia_mapping; 83 struct folio *folio; 84 u8 *kaddr = NULL; 85 struct ntfs_name *name = NULL; 86 87 /* Get hold of the mft record for the directory. */ 88 m = map_mft_record(dir_ni); 89 if (IS_ERR(m)) { 90 ntfs_error(sb, "map_mft_record() failed with error code %ld.", 91 -PTR_ERR(m)); 92 return ERR_MREF(PTR_ERR(m)); 93 } 94 ctx = ntfs_attr_get_search_ctx(dir_ni, m); 95 if (unlikely(!ctx)) { 96 err = -ENOMEM; 97 goto err_out; 98 } 99 /* Find the index root attribute in the mft record. */ 100 err = ntfs_attr_lookup(AT_INDEX_ROOT, I30, 4, CASE_SENSITIVE, 0, NULL, 101 0, ctx); 102 if (unlikely(err)) { 103 if (err == -ENOENT) { 104 ntfs_error(sb, 105 "Index root attribute missing in directory inode 0x%llx.", 106 dir_ni->mft_no); 107 err = -EIO; 108 } 109 goto err_out; 110 } 111 /* Get to the index root value (it's been verified in read_inode). */ 112 ir = (struct index_root *)((u8 *)ctx->attr + 113 le16_to_cpu(ctx->attr->data.resident.value_offset)); 114 index_end = (u8 *)&ir->index + le32_to_cpu(ir->index.index_length); 115 /* The first index entry. */ 116 ie = (struct index_entry *)((u8 *)&ir->index + 117 le32_to_cpu(ir->index.entries_offset)); 118 /* 119 * Loop until we exceed valid memory (corruption case) or until we 120 * reach the last entry. 121 */ 122 for (;; ie = (struct index_entry *)((u8 *)ie + le16_to_cpu(ie->length))) { 123 /* Bounds checks. */ 124 if ((u8 *)ie < (u8 *)ctx->mrec || 125 (u8 *)ie + sizeof(struct index_entry_header) > index_end || 126 (u8 *)ie + sizeof(struct index_entry_header) + le16_to_cpu(ie->key_length) > 127 index_end || (u8 *)ie + le16_to_cpu(ie->length) > index_end) 128 goto dir_err_out; 129 /* 130 * The last entry cannot contain a name. It can however contain 131 * a pointer to a child node in the B+tree so we just break out. 132 */ 133 if (ie->flags & INDEX_ENTRY_END) 134 break; 135 /* Key length should not be zero if it is not last entry. */ 136 if (!ie->key_length) 137 goto dir_err_out; 138 /* Check the consistency of an index entry */ 139 if (ntfs_index_entry_inconsistent(NULL, vol, ie, COLLATION_FILE_NAME, 140 dir_ni->mft_no)) 141 goto dir_err_out; 142 /* 143 * We perform a case sensitive comparison and if that matches 144 * we are done and return the mft reference of the inode (i.e. 145 * the inode number together with the sequence number for 146 * consistency checking). We convert it to cpu format before 147 * returning. 148 */ 149 if (ntfs_are_names_equal(uname, uname_len, 150 (__le16 *)&ie->key.file_name.file_name, 151 ie->key.file_name.file_name_length, 152 CASE_SENSITIVE, vol->upcase, vol->upcase_len)) { 153 found_it: 154 /* 155 * We have a perfect match, so we don't need to care 156 * about having matched imperfectly before, so we can 157 * free name and set *res to NULL. 158 * However, if the perfect match is a short file name, 159 * we need to signal this through *res, so that 160 * ntfs_lookup() can fix dcache aliasing issues. 161 * As an optimization we just reuse an existing 162 * allocation of *res. 163 */ 164 if (ie->key.file_name.file_name_type == FILE_NAME_DOS) { 165 if (!name) { 166 name = kmalloc(sizeof(struct ntfs_name), 167 GFP_NOFS); 168 if (!name) { 169 err = -ENOMEM; 170 goto err_out; 171 } 172 } 173 name->mref = le64_to_cpu( 174 ie->data.dir.indexed_file); 175 name->type = FILE_NAME_DOS; 176 name->len = 0; 177 *res = name; 178 } else { 179 kfree(name); 180 *res = NULL; 181 } 182 mref = le64_to_cpu(ie->data.dir.indexed_file); 183 ntfs_attr_put_search_ctx(ctx); 184 unmap_mft_record(dir_ni); 185 return mref; 186 } 187 /* 188 * For a case insensitive mount, we also perform a case 189 * insensitive comparison (provided the file name is not in the 190 * POSIX namespace). If the comparison matches, and the name is 191 * in the WIN32 namespace, we cache the filename in *res so 192 * that the caller, ntfs_lookup(), can work on it. If the 193 * comparison matches, and the name is in the DOS namespace, we 194 * only cache the mft reference and the file name type (we set 195 * the name length to zero for simplicity). 196 */ 197 if ((!NVolCaseSensitive(vol) || 198 ie->key.file_name.file_name_type == FILE_NAME_DOS) && 199 ntfs_are_names_equal(uname, uname_len, 200 (__le16 *)&ie->key.file_name.file_name, 201 ie->key.file_name.file_name_length, 202 IGNORE_CASE, vol->upcase, 203 vol->upcase_len)) { 204 int name_size = sizeof(struct ntfs_name); 205 u8 type = ie->key.file_name.file_name_type; 206 u8 len = ie->key.file_name.file_name_length; 207 208 /* Only one case insensitive matching name allowed. */ 209 if (name) { 210 ntfs_error(sb, 211 "Found already allocated name in phase 1. Please run chkdsk"); 212 goto dir_err_out; 213 } 214 215 if (type != FILE_NAME_DOS) 216 name_size += len * sizeof(__le16); 217 name = kmalloc(name_size, GFP_NOFS); 218 if (!name) { 219 err = -ENOMEM; 220 goto err_out; 221 } 222 name->mref = le64_to_cpu(ie->data.dir.indexed_file); 223 name->type = type; 224 if (type != FILE_NAME_DOS) { 225 name->len = len; 226 memcpy(name->name, ie->key.file_name.file_name, 227 len * sizeof(__le16)); 228 } else 229 name->len = 0; 230 *res = name; 231 } 232 /* 233 * Not a perfect match, need to do full blown collation so we 234 * know which way in the B+tree we have to go. 235 */ 236 rc = ntfs_collate_names(uname, uname_len, 237 (__le16 *)&ie->key.file_name.file_name, 238 ie->key.file_name.file_name_length, 1, 239 IGNORE_CASE, vol->upcase, vol->upcase_len); 240 /* 241 * If uname collates before the name of the current entry, there 242 * is definitely no such name in this index but we might need to 243 * descend into the B+tree so we just break out of the loop. 244 */ 245 if (rc == -1) 246 break; 247 /* The names are not equal, continue the search. */ 248 if (rc) 249 continue; 250 /* 251 * Names match with case insensitive comparison, now try the 252 * case sensitive comparison, which is required for proper 253 * collation. 254 */ 255 rc = ntfs_collate_names(uname, uname_len, 256 (__le16 *)&ie->key.file_name.file_name, 257 ie->key.file_name.file_name_length, 1, 258 CASE_SENSITIVE, vol->upcase, vol->upcase_len); 259 if (rc == -1) 260 break; 261 if (rc) 262 continue; 263 /* 264 * Perfect match, this will never happen as the 265 * ntfs_are_names_equal() call will have gotten a match but we 266 * still treat it correctly. 267 */ 268 goto found_it; 269 } 270 /* 271 * We have finished with this index without success. Check for the 272 * presence of a child node and if not present return -ENOENT, unless 273 * we have got a matching name cached in name in which case return the 274 * mft reference associated with it. 275 */ 276 if (!(ie->flags & INDEX_ENTRY_NODE)) { 277 if (name) { 278 ntfs_attr_put_search_ctx(ctx); 279 unmap_mft_record(dir_ni); 280 return name->mref; 281 } 282 ntfs_debug("Entry not found."); 283 err = -ENOENT; 284 goto err_out; 285 } /* Child node present, descend into it. */ 286 287 /* Get the starting vcn of the index_block holding the child node. */ 288 vcn = le64_to_cpup((__le64 *)((u8 *)ie + le16_to_cpu(ie->length) - 8)); 289 290 /* 291 * We are done with the index root and the mft record. Release them, 292 * otherwise we deadlock with read_mapping_folio(). 293 */ 294 ntfs_attr_put_search_ctx(ctx); 295 unmap_mft_record(dir_ni); 296 m = NULL; 297 ctx = NULL; 298 299 ia_vi = ntfs_index_iget(VFS_I(dir_ni), I30, 4); 300 if (IS_ERR(ia_vi)) { 301 err = PTR_ERR(ia_vi); 302 goto err_out; 303 } 304 305 ia_mapping = ia_vi->i_mapping; 306 descend_into_child_node: 307 /* 308 * Convert vcn to index into the index allocation attribute in units 309 * of PAGE_SIZE and map the page cache page, reading it from 310 * disk if necessary. 311 */ 312 folio = read_mapping_folio(ia_mapping, vcn << 313 dir_ni->itype.index.vcn_size_bits >> PAGE_SHIFT, NULL); 314 if (IS_ERR(folio)) { 315 ntfs_error(sb, "Failed to map directory index page, error %ld.", 316 -PTR_ERR(folio)); 317 err = PTR_ERR(folio); 318 goto err_out; 319 } 320 321 folio_lock(folio); 322 kaddr = kmalloc(PAGE_SIZE, GFP_NOFS); 323 if (!kaddr) { 324 err = -ENOMEM; 325 folio_unlock(folio); 326 folio_put(folio); 327 goto unm_err_out; 328 } 329 330 memcpy_from_folio(kaddr, folio, 0, PAGE_SIZE); 331 post_read_mst_fixup((struct ntfs_record *)kaddr, PAGE_SIZE); 332 folio_unlock(folio); 333 folio_put(folio); 334 fast_descend_into_child_node: 335 /* Get to the index allocation block. */ 336 ia = (struct index_block *)(kaddr + ((vcn << 337 dir_ni->itype.index.vcn_size_bits) & ~PAGE_MASK)); 338 /* Bounds checks. */ 339 if ((u8 *)ia < kaddr || (u8 *)ia > kaddr + PAGE_SIZE) { 340 ntfs_error(sb, 341 "Out of bounds check failed. Corrupt directory inode 0x%llx or driver bug.", 342 dir_ni->mft_no); 343 goto unm_err_out; 344 } 345 /* Catch multi sector transfer fixup errors. */ 346 if (unlikely(!ntfs_is_indx_record(ia->magic))) { 347 ntfs_error(sb, 348 "Directory index record with vcn 0x%llx is corrupt. Corrupt inode 0x%llx. Run chkdsk.", 349 vcn, dir_ni->mft_no); 350 goto unm_err_out; 351 } 352 if (le64_to_cpu(ia->index_block_vcn) != vcn) { 353 ntfs_error(sb, 354 "Actual VCN (0x%llx) of index buffer is different from expected VCN (0x%llx). Directory inode 0x%llx is corrupt or driver bug.", 355 le64_to_cpu(ia->index_block_vcn), 356 vcn, dir_ni->mft_no); 357 goto unm_err_out; 358 } 359 if (le32_to_cpu(ia->index.allocated_size) + 0x18 != 360 dir_ni->itype.index.block_size) { 361 ntfs_error(sb, 362 "Index buffer (VCN 0x%llx) of directory inode 0x%llx has a size (%u) differing from the directory specified size (%u). Directory inode is corrupt or driver bug.", 363 vcn, dir_ni->mft_no, 364 le32_to_cpu(ia->index.allocated_size) + 0x18, 365 dir_ni->itype.index.block_size); 366 goto unm_err_out; 367 } 368 index_end = (u8 *)ia + dir_ni->itype.index.block_size; 369 if (index_end > kaddr + PAGE_SIZE) { 370 ntfs_error(sb, 371 "Index buffer (VCN 0x%llx) of directory inode 0x%llx crosses page boundary. Impossible! Cannot access! This is probably a bug in the driver.", 372 vcn, dir_ni->mft_no); 373 goto unm_err_out; 374 } 375 index_end = (u8 *)&ia->index + le32_to_cpu(ia->index.index_length); 376 if (index_end > (u8 *)ia + dir_ni->itype.index.block_size) { 377 ntfs_error(sb, 378 "Size of index buffer (VCN 0x%llx) of directory inode 0x%llx exceeds maximum size.", 379 vcn, dir_ni->mft_no); 380 goto unm_err_out; 381 } 382 /* The first index entry. */ 383 ie = (struct index_entry *)((u8 *)&ia->index + 384 le32_to_cpu(ia->index.entries_offset)); 385 /* 386 * Iterate similar to above big loop but applied to index buffer, thus 387 * loop until we exceed valid memory (corruption case) or until we 388 * reach the last entry. 389 */ 390 for (;; ie = (struct index_entry *)((u8 *)ie + le16_to_cpu(ie->length))) { 391 /* Bounds checks. */ 392 if ((u8 *)ie < (u8 *)ia || 393 (u8 *)ie + sizeof(struct index_entry_header) > index_end || 394 (u8 *)ie + sizeof(struct index_entry_header) + le16_to_cpu(ie->key_length) > 395 index_end || (u8 *)ie + le16_to_cpu(ie->length) > index_end) { 396 ntfs_error(sb, "Index entry out of bounds in directory inode 0x%llx.", 397 dir_ni->mft_no); 398 goto unm_err_out; 399 } 400 /* 401 * The last entry cannot contain a name. It can however contain 402 * a pointer to a child node in the B+tree so we just break out. 403 */ 404 if (ie->flags & INDEX_ENTRY_END) 405 break; 406 /* Key length should not be zero if it is not last entry. */ 407 if (!ie->key_length) 408 goto unm_err_out; 409 /* Check the consistency of an index entry */ 410 if (ntfs_index_entry_inconsistent(NULL, vol, ie, COLLATION_FILE_NAME, 411 dir_ni->mft_no)) 412 goto unm_err_out; 413 /* 414 * We perform a case sensitive comparison and if that matches 415 * we are done and return the mft reference of the inode (i.e. 416 * the inode number together with the sequence number for 417 * consistency checking). We convert it to cpu format before 418 * returning. 419 */ 420 if (ntfs_are_names_equal(uname, uname_len, 421 (__le16 *)&ie->key.file_name.file_name, 422 ie->key.file_name.file_name_length, 423 CASE_SENSITIVE, vol->upcase, vol->upcase_len)) { 424 found_it2: 425 /* 426 * We have a perfect match, so we don't need to care 427 * about having matched imperfectly before, so we can 428 * free name and set *res to NULL. 429 * However, if the perfect match is a short file name, 430 * we need to signal this through *res, so that 431 * ntfs_lookup() can fix dcache aliasing issues. 432 * As an optimization we just reuse an existing 433 * allocation of *res. 434 */ 435 if (ie->key.file_name.file_name_type == FILE_NAME_DOS) { 436 if (!name) { 437 name = kmalloc(sizeof(struct ntfs_name), 438 GFP_NOFS); 439 if (!name) { 440 err = -ENOMEM; 441 goto unm_err_out; 442 } 443 } 444 name->mref = le64_to_cpu( 445 ie->data.dir.indexed_file); 446 name->type = FILE_NAME_DOS; 447 name->len = 0; 448 *res = name; 449 } else { 450 kfree(name); 451 *res = NULL; 452 } 453 mref = le64_to_cpu(ie->data.dir.indexed_file); 454 kfree(kaddr); 455 iput(ia_vi); 456 return mref; 457 } 458 /* 459 * For a case insensitive mount, we also perform a case 460 * insensitive comparison (provided the file name is not in the 461 * POSIX namespace). If the comparison matches, and the name is 462 * in the WIN32 namespace, we cache the filename in *res so 463 * that the caller, ntfs_lookup(), can work on it. If the 464 * comparison matches, and the name is in the DOS namespace, we 465 * only cache the mft reference and the file name type (we set 466 * the name length to zero for simplicity). 467 */ 468 if ((!NVolCaseSensitive(vol) || 469 ie->key.file_name.file_name_type == FILE_NAME_DOS) && 470 ntfs_are_names_equal(uname, uname_len, 471 (__le16 *)&ie->key.file_name.file_name, 472 ie->key.file_name.file_name_length, 473 IGNORE_CASE, vol->upcase, 474 vol->upcase_len)) { 475 int name_size = sizeof(struct ntfs_name); 476 u8 type = ie->key.file_name.file_name_type; 477 u8 len = ie->key.file_name.file_name_length; 478 479 /* Only one case insensitive matching name allowed. */ 480 if (name) { 481 ntfs_error(sb, 482 "Found already allocated name in phase 2. Please run chkdsk"); 483 kfree(kaddr); 484 goto dir_err_out; 485 } 486 487 if (type != FILE_NAME_DOS) 488 name_size += len * sizeof(__le16); 489 name = kmalloc(name_size, GFP_NOFS); 490 if (!name) { 491 err = -ENOMEM; 492 goto unm_err_out; 493 } 494 name->mref = le64_to_cpu(ie->data.dir.indexed_file); 495 name->type = type; 496 if (type != FILE_NAME_DOS) { 497 name->len = len; 498 memcpy(name->name, ie->key.file_name.file_name, 499 len * sizeof(__le16)); 500 } else 501 name->len = 0; 502 *res = name; 503 } 504 /* 505 * Not a perfect match, need to do full blown collation so we 506 * know which way in the B+tree we have to go. 507 */ 508 rc = ntfs_collate_names(uname, uname_len, 509 (__le16 *)&ie->key.file_name.file_name, 510 ie->key.file_name.file_name_length, 1, 511 IGNORE_CASE, vol->upcase, vol->upcase_len); 512 /* 513 * If uname collates before the name of the current entry, there 514 * is definitely no such name in this index but we might need to 515 * descend into the B+tree so we just break out of the loop. 516 */ 517 if (rc == -1) 518 break; 519 /* The names are not equal, continue the search. */ 520 if (rc) 521 continue; 522 /* 523 * Names match with case insensitive comparison, now try the 524 * case sensitive comparison, which is required for proper 525 * collation. 526 */ 527 rc = ntfs_collate_names(uname, uname_len, 528 (__le16 *)&ie->key.file_name.file_name, 529 ie->key.file_name.file_name_length, 1, 530 CASE_SENSITIVE, vol->upcase, vol->upcase_len); 531 if (rc == -1) 532 break; 533 if (rc) 534 continue; 535 /* 536 * Perfect match, this will never happen as the 537 * ntfs_are_names_equal() call will have gotten a match but we 538 * still treat it correctly. 539 */ 540 goto found_it2; 541 } 542 /* 543 * We have finished with this index buffer without success. Check for 544 * the presence of a child node. 545 */ 546 if (ie->flags & INDEX_ENTRY_NODE) { 547 if ((ia->index.flags & NODE_MASK) == LEAF_NODE) { 548 ntfs_error(sb, 549 "Index entry with child node found in a leaf node in directory inode 0x%llx.", 550 dir_ni->mft_no); 551 goto unm_err_out; 552 } 553 /* Child node present, descend into it. */ 554 old_vcn = vcn; 555 vcn = le64_to_cpup((__le64 *)((u8 *)ie + 556 le16_to_cpu(ie->length) - 8)); 557 if (vcn >= 0) { 558 /* 559 * If vcn is in the same page cache page as old_vcn we 560 * recycle the mapped page. 561 */ 562 if (ntfs_cluster_to_pidx(vol, old_vcn) == 563 ntfs_cluster_to_pidx(vol, vcn)) 564 goto fast_descend_into_child_node; 565 kfree(kaddr); 566 kaddr = NULL; 567 goto descend_into_child_node; 568 } 569 ntfs_error(sb, "Negative child node vcn in directory inode 0x%llx.", 570 dir_ni->mft_no); 571 goto unm_err_out; 572 } 573 /* 574 * No child node present, return -ENOENT, unless we have got a matching 575 * name cached in name in which case return the mft reference 576 * associated with it. 577 */ 578 if (name) { 579 kfree(kaddr); 580 iput(ia_vi); 581 return name->mref; 582 } 583 ntfs_debug("Entry not found."); 584 err = -ENOENT; 585 unm_err_out: 586 kfree(kaddr); 587 err_out: 588 if (!err) 589 err = -EIO; 590 if (ctx) 591 ntfs_attr_put_search_ctx(ctx); 592 if (m) 593 unmap_mft_record(dir_ni); 594 kfree(name); 595 *res = NULL; 596 if (!IS_ERR_OR_NULL(ia_vi)) 597 iput(ia_vi); 598 return ERR_MREF(err); 599 dir_err_out: 600 ntfs_error(sb, "Corrupt directory. Aborting lookup."); 601 goto err_out; 602 } 603 604 /* 605 * ntfs_filldir - ntfs specific filldir method 606 * @vol: current ntfs volume 607 * @ndir: ntfs inode of current directory 608 * @ia_page: page in which the index allocation buffer @ie is in resides 609 * @ie: current index entry 610 * @name: buffer to use for the converted name 611 * @actor: what to feed the entries to 612 * 613 * Convert the Unicode @name to the loaded NLS and pass it to the @filldir 614 * callback. 615 * 616 * If @ia_page is not NULL it is the locked page containing the index 617 * allocation block containing the index entry @ie. 618 * 619 * Note, we drop (and then reacquire) the page lock on @ia_page across the 620 * @filldir() call otherwise we would deadlock with NFSd when it calls ->lookup 621 * since ntfs_lookup() will lock the same page. As an optimization, we do not 622 * retake the lock if we are returning a non-zero value as ntfs_readdir() 623 * would need to drop the lock immediately anyway. 624 */ 625 static inline int ntfs_filldir(struct ntfs_volume *vol, 626 struct ntfs_inode *ndir, struct page *ia_page, struct index_entry *ie, 627 u8 *name, struct dir_context *actor) 628 { 629 unsigned long mref; 630 int name_len; 631 unsigned int dt_type; 632 u8 name_type; 633 634 name_type = ie->key.file_name.file_name_type; 635 if (name_type == FILE_NAME_DOS) { 636 ntfs_debug("Skipping DOS name space entry."); 637 return 0; 638 } 639 if (MREF_LE(ie->data.dir.indexed_file) == FILE_root) { 640 ntfs_debug("Skipping root directory self reference entry."); 641 return 0; 642 } 643 if (MREF_LE(ie->data.dir.indexed_file) < FILE_first_user && 644 !NVolShowSystemFiles(vol)) { 645 ntfs_debug("Skipping system file."); 646 return 0; 647 } 648 if (!NVolShowHiddenFiles(vol) && 649 (ie->key.file_name.file_attributes & FILE_ATTR_HIDDEN)) { 650 ntfs_debug("Skipping hidden file."); 651 return 0; 652 } 653 654 name_len = ntfs_ucstonls(vol, (__le16 *)&ie->key.file_name.file_name, 655 ie->key.file_name.file_name_length, &name, 656 NTFS_MAX_NAME_LEN * NLS_MAX_CHARSET_SIZE + 1); 657 if (name_len <= 0) { 658 ntfs_warning(vol->sb, "Skipping unrepresentable inode 0x%llx.", 659 (long long)MREF_LE(ie->data.dir.indexed_file)); 660 return 0; 661 } 662 663 mref = MREF_LE(ie->data.dir.indexed_file); 664 if (ie->key.file_name.file_attributes & 665 FILE_ATTR_DUP_FILE_NAME_INDEX_PRESENT) 666 dt_type = DT_DIR; 667 else if (ie->key.file_name.file_attributes & FILE_ATTR_REPARSE_POINT) 668 dt_type = ntfs_reparse_tag_dt_types(vol, mref); 669 else 670 dt_type = DT_REG; 671 672 /* 673 * Drop the page lock otherwise we deadlock with NFS when it calls 674 * ->lookup since ntfs_lookup() will lock the same page. 675 */ 676 if (ia_page) 677 unlock_page(ia_page); 678 ntfs_debug("Calling filldir for %s with len %i, fpos 0x%llx, inode 0x%lx, DT_%s.", 679 name, name_len, actor->pos, mref, dt_type == DT_DIR ? "DIR" : "REG"); 680 if (!dir_emit(actor, name, name_len, mref, dt_type)) 681 return 1; 682 /* Relock the page but not if we are aborting ->readdir. */ 683 if (ia_page) 684 lock_page(ia_page); 685 return 0; 686 } 687 688 struct ntfs_file_private { 689 void *key; 690 __le16 key_length; 691 bool end_in_iterate; 692 loff_t curr_pos; 693 }; 694 695 struct ntfs_index_ra { 696 unsigned long start_index; 697 unsigned int count; 698 struct rb_node rb_node; 699 }; 700 701 static void ntfs_insert_rb(struct ntfs_index_ra *nir, struct rb_root *root) 702 { 703 struct rb_node **new = &root->rb_node, *parent = NULL; 704 struct ntfs_index_ra *cnir; 705 706 while (*new) { 707 parent = *new; 708 cnir = rb_entry(parent, struct ntfs_index_ra, rb_node); 709 if (nir->start_index < cnir->start_index) 710 new = &parent->rb_left; 711 else if (nir->start_index >= cnir->start_index + cnir->count) 712 new = &parent->rb_right; 713 else { 714 pr_err("nir start index : %ld, count : %d, cnir start_index : %ld, count : %d\n", 715 nir->start_index, nir->count, cnir->start_index, cnir->count); 716 return; 717 } 718 } 719 720 rb_link_node(&nir->rb_node, parent, new); 721 rb_insert_color(&nir->rb_node, root); 722 } 723 724 static int ntfs_ia_blocks_readahead(struct ntfs_inode *ia_ni, loff_t pos) 725 { 726 unsigned long dir_start_index, dir_end_index; 727 struct inode *ia_vi = VFS_I(ia_ni); 728 struct file_ra_state *dir_ra; 729 730 dir_end_index = (i_size_read(ia_vi) + PAGE_SIZE - 1) >> PAGE_SHIFT; 731 dir_start_index = (pos + PAGE_SIZE - 1) >> PAGE_SHIFT; 732 733 if (dir_start_index >= dir_end_index) 734 return 0; 735 736 dir_ra = kzalloc(sizeof(*dir_ra), GFP_NOFS); 737 if (!dir_ra) 738 return -ENOMEM; 739 740 file_ra_state_init(dir_ra, ia_vi->i_mapping); 741 dir_end_index = (i_size_read(ia_vi) + PAGE_SIZE - 1) >> PAGE_SHIFT; 742 dir_start_index = (pos + PAGE_SIZE - 1) >> PAGE_SHIFT; 743 dir_ra->ra_pages = dir_end_index - dir_start_index; 744 page_cache_sync_readahead(ia_vi->i_mapping, dir_ra, NULL, 745 dir_start_index, dir_end_index - dir_start_index); 746 kfree(dir_ra); 747 748 return 0; 749 } 750 751 static int ntfs_readdir(struct file *file, struct dir_context *actor) 752 { 753 struct inode *vdir = file_inode(file); 754 struct super_block *sb = vdir->i_sb; 755 struct ntfs_inode *ndir = NTFS_I(vdir); 756 struct ntfs_volume *vol = NTFS_SB(sb); 757 struct ntfs_attr_search_ctx *ctx = NULL; 758 struct ntfs_index_context *ictx = NULL; 759 u8 *name; 760 struct index_root *ir; 761 struct index_entry *next = NULL; 762 struct ntfs_file_private *private = NULL; 763 int err = 0; 764 loff_t ie_pos = 2; /* initialize it with dot and dotdot size */ 765 struct ntfs_index_ra *nir = NULL; 766 unsigned long index; 767 struct rb_root ra_root = RB_ROOT; 768 struct file_ra_state *ra; 769 770 ntfs_debug("Entering for inode 0x%llx, fpos 0x%llx.", 771 ndir->mft_no, actor->pos); 772 773 if (file->private_data) { 774 private = file->private_data; 775 776 if (actor->pos != private->curr_pos) { 777 /* 778 * If actor->pos is different from the previous passed 779 * one, Discard the private->key and fill dirent buffer 780 * with linear lookup. 781 */ 782 kfree(private->key); 783 private->key = NULL; 784 private->end_in_iterate = false; 785 } else if (private->end_in_iterate) { 786 kfree(private->key); 787 kfree(file->private_data); 788 file->private_data = NULL; 789 return 0; 790 } 791 } 792 793 /* Emulate . and .. for all directories. */ 794 if (!dir_emit_dots(file, actor)) 795 return 0; 796 797 /* 798 * Allocate a buffer to store the current name being processed 799 * converted to format determined by current NLS. 800 */ 801 name = kmalloc(NTFS_MAX_NAME_LEN * NLS_MAX_CHARSET_SIZE + 1, GFP_NOFS); 802 if (unlikely(!name)) 803 return -ENOMEM; 804 805 mutex_lock_nested(&ndir->mrec_lock, NTFS_INODE_MUTEX_PARENT); 806 ictx = ntfs_index_ctx_get(ndir, I30, 4); 807 if (!ictx) { 808 kfree(name); 809 mutex_unlock(&ndir->mrec_lock); 810 return -ENOMEM; 811 } 812 813 ra = kzalloc(sizeof(struct file_ra_state), GFP_NOFS); 814 if (!ra) { 815 kfree(name); 816 ntfs_index_ctx_put(ictx); 817 mutex_unlock(&ndir->mrec_lock); 818 return -ENOMEM; 819 } 820 file_ra_state_init(ra, vol->mft_ino->i_mapping); 821 822 if (private && private->key) { 823 /* 824 * Find index witk private->key using ntfs_index_lookup() 825 * instead of linear index lookup. 826 */ 827 err = ntfs_index_lookup(private->key, 828 le16_to_cpu(private->key_length), 829 ictx); 830 if (!err) { 831 next = ictx->entry; 832 /* 833 * Update ie_pos with private->curr_pos 834 * to make next d_off of dirent correct. 835 */ 836 ie_pos = private->curr_pos; 837 838 if (actor->pos > vol->mft_record_size && ictx->ia_ni) { 839 err = ntfs_ia_blocks_readahead(ictx->ia_ni, actor->pos); 840 if (err) 841 goto out; 842 } 843 844 goto nextdir; 845 } else { 846 goto out; 847 } 848 } else if (!private) { 849 private = kzalloc(sizeof(struct ntfs_file_private), GFP_KERNEL); 850 if (!private) { 851 err = -ENOMEM; 852 goto out; 853 } 854 file->private_data = private; 855 } 856 857 ctx = ntfs_attr_get_search_ctx(ndir, NULL); 858 if (!ctx) { 859 err = -ENOMEM; 860 goto out; 861 } 862 863 /* Find the index root attribute in the mft record. */ 864 if (ntfs_attr_lookup(AT_INDEX_ROOT, I30, 4, CASE_SENSITIVE, 0, NULL, 0, 865 ctx)) { 866 ntfs_error(sb, "Index root attribute missing in directory inode %llu", 867 ndir->mft_no); 868 ntfs_attr_put_search_ctx(ctx); 869 err = -ENOMEM; 870 goto out; 871 } 872 873 /* Get to the index root value. */ 874 ir = (struct index_root *)((u8 *)ctx->attr + 875 le16_to_cpu(ctx->attr->data.resident.value_offset)); 876 877 ictx->ir = ir; 878 ictx->actx = ctx; 879 ictx->parent_vcn[ictx->pindex] = VCN_INDEX_ROOT_PARENT; 880 ictx->is_in_root = true; 881 ictx->parent_pos[ictx->pindex] = 0; 882 883 ictx->block_size = le32_to_cpu(ir->index_block_size); 884 if (ictx->block_size < NTFS_BLOCK_SIZE) { 885 ntfs_error(sb, "Index block size (%d) is smaller than the sector size (%d)", 886 ictx->block_size, NTFS_BLOCK_SIZE); 887 err = -EIO; 888 goto out; 889 } 890 891 if (vol->cluster_size <= ictx->block_size) 892 ictx->vcn_size_bits = vol->cluster_size_bits; 893 else 894 ictx->vcn_size_bits = NTFS_BLOCK_SIZE_BITS; 895 896 /* The first index entry. */ 897 next = (struct index_entry *)((u8 *)&ir->index + 898 le32_to_cpu(ir->index.entries_offset)); 899 900 if (next->flags & INDEX_ENTRY_NODE) { 901 ictx->ia_ni = ntfs_ia_open(ictx, ictx->idx_ni); 902 if (!ictx->ia_ni) { 903 err = -EINVAL; 904 goto out; 905 } 906 907 err = ntfs_ia_blocks_readahead(ictx->ia_ni, actor->pos); 908 if (err) 909 goto out; 910 } 911 912 if (next->flags & INDEX_ENTRY_NODE) { 913 next = ntfs_index_walk_down(next, ictx); 914 if (!next) { 915 err = -EIO; 916 goto out; 917 } 918 } 919 920 if (next && !(next->flags & INDEX_ENTRY_END)) 921 goto nextdir; 922 923 while ((next = ntfs_index_next(next, ictx)) != NULL) { 924 nextdir: 925 /* Check the consistency of an index entry */ 926 if (ntfs_index_entry_inconsistent(ictx, vol, next, COLLATION_FILE_NAME, 927 ndir->mft_no)) { 928 err = -EIO; 929 goto out; 930 } 931 932 if (ie_pos < actor->pos) { 933 ie_pos += le16_to_cpu(next->length); 934 continue; 935 } 936 937 actor->pos = ie_pos; 938 939 index = ntfs_mft_no_to_pidx(vol, 940 MREF_LE(next->data.dir.indexed_file)); 941 if (nir) { 942 struct ntfs_index_ra *cnir; 943 struct rb_node *node = ra_root.rb_node; 944 945 if (nir->start_index <= index && 946 index < nir->start_index + nir->count) { 947 /* No behavior */ 948 goto filldir; 949 } 950 951 while (node) { 952 cnir = rb_entry(node, struct ntfs_index_ra, rb_node); 953 if (cnir->start_index <= index && 954 index < cnir->start_index + cnir->count) { 955 goto filldir; 956 } else if (cnir->start_index + cnir->count == index) { 957 cnir->count++; 958 goto filldir; 959 } else if (!cnir->start_index && cnir->start_index - 1 == index) { 960 cnir->start_index = index; 961 goto filldir; 962 } 963 964 if (index < cnir->start_index) 965 node = node->rb_left; 966 else if (index >= cnir->start_index + cnir->count) 967 node = node->rb_right; 968 } 969 970 if (nir->start_index + nir->count == index) { 971 nir->count++; 972 } else if (!nir->start_index && nir->start_index - 1 == index) { 973 nir->start_index = index; 974 } else if (nir->count > 2) { 975 ntfs_insert_rb(nir, &ra_root); 976 nir = NULL; 977 } else { 978 nir->start_index = index; 979 nir->count = 1; 980 } 981 } 982 983 if (!nir) { 984 nir = kzalloc(sizeof(struct ntfs_index_ra), GFP_KERNEL); 985 if (nir) { 986 nir->start_index = index; 987 nir->count = 1; 988 } 989 } 990 991 filldir: 992 /* Submit the name to the filldir callback. */ 993 err = ntfs_filldir(vol, ndir, NULL, next, name, actor); 994 if (err) { 995 /* 996 * Store index key value to file private_data to start 997 * from current index offset on next round. 998 */ 999 private = file->private_data; 1000 kfree(private->key); 1001 private->key = kmalloc(le16_to_cpu(next->key_length), GFP_KERNEL); 1002 if (!private->key) { 1003 err = -ENOMEM; 1004 goto out; 1005 } 1006 1007 memcpy(private->key, &next->key.file_name, le16_to_cpu(next->key_length)); 1008 private->key_length = next->key_length; 1009 break; 1010 } 1011 ie_pos += le16_to_cpu(next->length); 1012 } 1013 1014 if (!err) 1015 private->end_in_iterate = true; 1016 else 1017 err = 0; 1018 1019 private->curr_pos = actor->pos = ie_pos; 1020 out: 1021 while (!RB_EMPTY_ROOT(&ra_root)) { 1022 struct ntfs_index_ra *cnir; 1023 struct rb_node *node; 1024 1025 node = rb_first(&ra_root); 1026 cnir = rb_entry(node, struct ntfs_index_ra, rb_node); 1027 ra->ra_pages = cnir->count; 1028 page_cache_sync_readahead(vol->mft_ino->i_mapping, ra, NULL, 1029 cnir->start_index, cnir->count); 1030 rb_erase(node, &ra_root); 1031 kfree(cnir); 1032 } 1033 1034 if (err) { 1035 if (private) { 1036 private->curr_pos = actor->pos; 1037 private->end_in_iterate = true; 1038 } 1039 err = 0; 1040 } 1041 ntfs_index_ctx_put(ictx); 1042 kfree(name); 1043 kfree(nir); 1044 kfree(ra); 1045 mutex_unlock(&ndir->mrec_lock); 1046 return err; 1047 } 1048 1049 int ntfs_check_empty_dir(struct ntfs_inode *ni, struct mft_record *ni_mrec) 1050 { 1051 struct ntfs_attr_search_ctx *ctx; 1052 int ret = 0; 1053 1054 if (!(ni_mrec->flags & MFT_RECORD_IS_DIRECTORY)) 1055 return 0; 1056 1057 ctx = ntfs_attr_get_search_ctx(ni, NULL); 1058 if (!ctx) { 1059 ntfs_error(ni->vol->sb, "Failed to get search context"); 1060 return -ENOMEM; 1061 } 1062 1063 /* Find the index root attribute in the mft record. */ 1064 ret = ntfs_attr_lookup(AT_INDEX_ROOT, I30, 4, CASE_SENSITIVE, 0, NULL, 1065 0, ctx); 1066 if (ret) { 1067 ntfs_error(ni->vol->sb, "Index root attribute missing in directory inode %llu", 1068 ni->mft_no); 1069 ntfs_attr_put_search_ctx(ctx); 1070 return ret; 1071 } 1072 1073 /* Non-empty directory? */ 1074 if (le32_to_cpu(ctx->attr->data.resident.value_length) != 1075 sizeof(struct index_root) + sizeof(struct index_entry_header)) { 1076 /* Both ENOTEMPTY and EEXIST are ok. We use the more common. */ 1077 ret = -ENOTEMPTY; 1078 ntfs_debug("Directory is not empty\n"); 1079 } 1080 1081 ntfs_attr_put_search_ctx(ctx); 1082 1083 return ret; 1084 } 1085 1086 /* 1087 * ntfs_dir_open - called when an inode is about to be opened 1088 * @vi: inode to be opened 1089 * @filp: file structure describing the inode 1090 * 1091 * Limit directory size to the page cache limit on architectures where unsigned 1092 * long is 32-bits. This is the most we can do for now without overflowing the 1093 * page cache page index. Doing it this way means we don't run into problems 1094 * because of existing too large directories. It would be better to allow the 1095 * user to read the accessible part of the directory but I doubt very much 1096 * anyone is going to hit this check on a 32-bit architecture, so there is no 1097 * point in adding the extra complexity required to support this. 1098 * 1099 * On 64-bit architectures, the check is hopefully optimized away by the 1100 * compiler. 1101 */ 1102 static int ntfs_dir_open(struct inode *vi, struct file *filp) 1103 { 1104 if (sizeof(unsigned long) < 8) { 1105 if (i_size_read(vi) > MAX_LFS_FILESIZE) 1106 return -EFBIG; 1107 } 1108 return 0; 1109 } 1110 1111 static int ntfs_dir_release(struct inode *vi, struct file *filp) 1112 { 1113 if (filp->private_data) { 1114 kfree(((struct ntfs_file_private *)filp->private_data)->key); 1115 kfree(filp->private_data); 1116 filp->private_data = NULL; 1117 } 1118 return 0; 1119 } 1120 1121 /* 1122 * ntfs_dir_fsync - sync a directory to disk 1123 * @filp: file describing the directory to be synced 1124 * @start: start offset to be synced 1125 * @end: end offset to be synced 1126 * @datasync: if non-zero only flush user data and not metadata 1127 * 1128 * Data integrity sync of a directory to disk. Used for fsync, fdatasync, and 1129 * msync system calls. This function is based on file.c::ntfs_file_fsync(). 1130 * 1131 * Write the mft record and all associated extent mft records as well as the 1132 * $INDEX_ALLOCATION and $BITMAP attributes and then sync the block device. 1133 * 1134 * If @datasync is true, we do not wait on the inode(s) to be written out 1135 * but we always wait on the page cache pages to be written out. 1136 * 1137 * Note: In the past @filp could be NULL so we ignore it as we don't need it 1138 * anyway. 1139 * 1140 * Locking: Caller must hold i_mutex on the inode. 1141 */ 1142 static int ntfs_dir_fsync(struct file *filp, loff_t start, loff_t end, 1143 int datasync) 1144 { 1145 struct inode *bmp_vi, *vi = filp->f_mapping->host; 1146 struct ntfs_volume *vol = NTFS_I(vi)->vol; 1147 struct ntfs_inode *ni = NTFS_I(vi); 1148 struct ntfs_attr_search_ctx *ctx; 1149 struct inode *parent_vi, *ia_vi; 1150 int err, ret; 1151 struct ntfs_attr na; 1152 1153 ntfs_debug("Entering for inode 0x%llx.", ni->mft_no); 1154 1155 if (NVolShutdown(vol)) 1156 return -EIO; 1157 1158 ctx = ntfs_attr_get_search_ctx(ni, NULL); 1159 if (!ctx) 1160 return -ENOMEM; 1161 1162 mutex_lock_nested(&ni->mrec_lock, NTFS_INODE_MUTEX_NORMAL_CHILD); 1163 while (!(err = ntfs_attr_lookup(AT_FILE_NAME, NULL, 0, 0, 0, NULL, 0, ctx))) { 1164 struct file_name_attr *fn = (struct file_name_attr *)((u8 *)ctx->attr + 1165 le16_to_cpu(ctx->attr->data.resident.value_offset)); 1166 1167 if (MREF_LE(fn->parent_directory) == ni->mft_no) 1168 continue; 1169 1170 parent_vi = ntfs_iget(vi->i_sb, MREF_LE(fn->parent_directory)); 1171 if (IS_ERR(parent_vi)) 1172 continue; 1173 mutex_lock_nested(&NTFS_I(parent_vi)->mrec_lock, NTFS_INODE_MUTEX_NORMAL); 1174 ia_vi = ntfs_index_iget(parent_vi, I30, 4); 1175 mutex_unlock(&NTFS_I(parent_vi)->mrec_lock); 1176 if (IS_ERR(ia_vi)) { 1177 iput(parent_vi); 1178 continue; 1179 } 1180 write_inode_now(ia_vi, 1); 1181 iput(ia_vi); 1182 write_inode_now(parent_vi, 1); 1183 iput(parent_vi); 1184 } 1185 mutex_unlock(&ni->mrec_lock); 1186 ntfs_attr_put_search_ctx(ctx); 1187 1188 err = file_write_and_wait_range(filp, start, end); 1189 if (err) 1190 return err; 1191 inode_lock(vi); 1192 1193 /* If the bitmap attribute inode is in memory sync it, too. */ 1194 na.mft_no = vi->i_ino; 1195 na.type = AT_BITMAP; 1196 na.name = I30; 1197 na.name_len = 4; 1198 bmp_vi = ilookup5(vi->i_sb, vi->i_ino, ntfs_test_inode, &na); 1199 if (bmp_vi) { 1200 write_inode_now(bmp_vi, !datasync); 1201 iput(bmp_vi); 1202 } 1203 ret = __ntfs_write_inode(vi, 1); 1204 1205 write_inode_now(vi, !datasync); 1206 1207 write_inode_now(vol->mftbmp_ino, 1); 1208 down_write(&vol->lcnbmp_lock); 1209 write_inode_now(vol->lcnbmp_ino, 1); 1210 up_write(&vol->lcnbmp_lock); 1211 write_inode_now(vol->mft_ino, 1); 1212 1213 err = sync_blockdev(vi->i_sb->s_bdev); 1214 if (unlikely(err && !ret)) 1215 ret = err; 1216 if (likely(!ret)) 1217 ntfs_debug("Done."); 1218 else 1219 ntfs_warning(vi->i_sb, 1220 "Failed to f%ssync inode 0x%llx. Error %u.", 1221 datasync ? "data" : "", ni->mft_no, -ret); 1222 inode_unlock(vi); 1223 return ret; 1224 } 1225 1226 const struct file_operations ntfs_dir_ops = { 1227 .llseek = generic_file_llseek, /* Seek inside directory. */ 1228 .read = generic_read_dir, /* Return -EISDIR. */ 1229 .iterate_shared = ntfs_readdir, /* Read directory contents. */ 1230 .fsync = ntfs_dir_fsync, /* Sync a directory to disk. */ 1231 .open = ntfs_dir_open, /* Open directory. */ 1232 .release = ntfs_dir_release, 1233 .unlocked_ioctl = ntfs_ioctl, 1234 #ifdef CONFIG_COMPAT 1235 .compat_ioctl = ntfs_compat_ioctl, 1236 #endif 1237 .setlease = generic_setlease, 1238 }; 1239