1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /*
3 * NTFS kernel directory operations.
4 *
5 * Copyright (c) 2001-2007 Anton Altaparmakov
6 * Copyright (c) 2002 Richard Russon
7 * Copyright (c) 2025 LG Electronics Co., Ltd.
8 */
9
10 #include <linux/blkdev.h>
11
12 #include "dir.h"
13 #include "mft.h"
14 #include "ntfs.h"
15 #include "index.h"
16 #include "reparse.h"
17
18 #include <linux/filelock.h>
19
20 /*
21 * The little endian Unicode string $I30 as a global constant.
22 */
23 __le16 I30[5] = { cpu_to_le16('$'), cpu_to_le16('I'),
24 cpu_to_le16('3'), cpu_to_le16('0'), 0 };
25
26 /*
27 * ntfs_lookup_inode_by_name - find an inode in a directory given its name
28 * @dir_ni: ntfs inode of the directory in which to search for the name
29 * @uname: Unicode name for which to search in the directory
30 * @uname_len: length of the name @uname in Unicode characters
31 * @res: return the found file name if necessary (see below)
32 *
33 * Look for an inode with name @uname in the directory with inode @dir_ni.
34 * ntfs_lookup_inode_by_name() walks the contents of the directory looking for
35 * the Unicode name. If the name is found in the directory, the corresponding
36 * inode number (>= 0) is returned as a mft reference in cpu format, i.e. it
37 * is a 64-bit number containing the sequence number.
38 *
39 * On error, a negative value is returned corresponding to the error code. In
40 * particular if the inode is not found -ENOENT is returned. Note that you
41 * can't just check the return value for being negative, you have to check the
42 * inode number for being negative which you can extract using MREC(return
43 * value).
44 *
45 * Note, @uname_len does not include the (optional) terminating NULL character.
46 *
47 * Note, we look for a case sensitive match first but we also look for a case
48 * insensitive match at the same time. If we find a case insensitive match, we
49 * save that for the case that we don't find an exact match, where we return
50 * the case insensitive match and setup @res (which we allocate!) with the mft
51 * reference, the file name type, length and with a copy of the little endian
52 * Unicode file name itself. If we match a file name which is in the DOS name
53 * space, we only return the mft reference and file name type in @res.
54 * ntfs_lookup() then uses this to find the long file name in the inode itself.
55 * This is to avoid polluting the dcache with short file names. We want them to
56 * work but we don't care for how quickly one can access them. This also fixes
57 * the dcache aliasing issues.
58 *
59 * Locking: - Caller must hold i_mutex on the directory.
60 * - Each page cache page in the index allocation mapping must be
61 * locked whilst being accessed otherwise we may find a corrupt
62 * page due to it being under ->writepage at the moment which
63 * applies the mst protection fixups before writing out and then
64 * removes them again after the write is complete after which it
65 * unlocks the page.
66 */
ntfs_lookup_inode_by_name(struct ntfs_inode * dir_ni,const __le16 * uname,const int uname_len,struct ntfs_name ** res)67 u64 ntfs_lookup_inode_by_name(struct ntfs_inode *dir_ni, const __le16 *uname,
68 const int uname_len, struct ntfs_name **res)
69 {
70 struct ntfs_volume *vol = dir_ni->vol;
71 struct super_block *sb = vol->sb;
72 struct inode *ia_vi = NULL;
73 struct mft_record *m;
74 struct index_root *ir;
75 struct index_entry *ie;
76 struct index_block *ia;
77 u8 *index_end;
78 u64 mref;
79 struct ntfs_attr_search_ctx *ctx;
80 int err, rc;
81 s64 vcn, old_vcn;
82 struct address_space *ia_mapping;
83 struct folio *folio;
84 u8 *kaddr = NULL;
85 struct ntfs_name *name = NULL;
86
87 /* Get hold of the mft record for the directory. */
88 m = map_mft_record(dir_ni);
89 if (IS_ERR(m)) {
90 ntfs_error(sb, "map_mft_record() failed with error code %ld.",
91 -PTR_ERR(m));
92 return ERR_MREF(PTR_ERR(m));
93 }
94 ctx = ntfs_attr_get_search_ctx(dir_ni, m);
95 if (unlikely(!ctx)) {
96 err = -ENOMEM;
97 goto err_out;
98 }
99 /* Find the index root attribute in the mft record. */
100 err = ntfs_attr_lookup(AT_INDEX_ROOT, I30, 4, CASE_SENSITIVE, 0, NULL,
101 0, ctx);
102 if (unlikely(err)) {
103 if (err == -ENOENT) {
104 ntfs_error(sb,
105 "Index root attribute missing in directory inode 0x%llx.",
106 dir_ni->mft_no);
107 err = -EIO;
108 }
109 goto err_out;
110 }
111 /* Get to the index root value (it's been verified in read_inode). */
112 ir = (struct index_root *)((u8 *)ctx->attr +
113 le16_to_cpu(ctx->attr->data.resident.value_offset));
114 index_end = (u8 *)&ir->index + le32_to_cpu(ir->index.index_length);
115 /* The first index entry. */
116 ie = (struct index_entry *)((u8 *)&ir->index +
117 le32_to_cpu(ir->index.entries_offset));
118 /*
119 * Loop until we exceed valid memory (corruption case) or until we
120 * reach the last entry.
121 */
122 for (;; ie = (struct index_entry *)((u8 *)ie + le16_to_cpu(ie->length))) {
123 /* Bounds checks. */
124 if ((u8 *)ie < (u8 *)ctx->mrec ||
125 (u8 *)ie + sizeof(struct index_entry_header) > index_end ||
126 (u8 *)ie + sizeof(struct index_entry_header) + le16_to_cpu(ie->key_length) >
127 index_end || (u8 *)ie + le16_to_cpu(ie->length) > index_end)
128 goto dir_err_out;
129 /*
130 * The last entry cannot contain a name. It can however contain
131 * a pointer to a child node in the B+tree so we just break out.
132 */
133 if (ie->flags & INDEX_ENTRY_END)
134 break;
135 /* Key length should not be zero if it is not last entry. */
136 if (!ie->key_length)
137 goto dir_err_out;
138 /* Check the consistency of an index entry */
139 if (ntfs_index_entry_inconsistent(NULL, vol, ie, COLLATION_FILE_NAME,
140 dir_ni->mft_no))
141 goto dir_err_out;
142 /*
143 * We perform a case sensitive comparison and if that matches
144 * we are done and return the mft reference of the inode (i.e.
145 * the inode number together with the sequence number for
146 * consistency checking). We convert it to cpu format before
147 * returning.
148 */
149 if (ntfs_are_names_equal(uname, uname_len,
150 (__le16 *)&ie->key.file_name.file_name,
151 ie->key.file_name.file_name_length,
152 CASE_SENSITIVE, vol->upcase, vol->upcase_len)) {
153 found_it:
154 /*
155 * We have a perfect match, so we don't need to care
156 * about having matched imperfectly before, so we can
157 * free name and set *res to NULL.
158 * However, if the perfect match is a short file name,
159 * we need to signal this through *res, so that
160 * ntfs_lookup() can fix dcache aliasing issues.
161 * As an optimization we just reuse an existing
162 * allocation of *res.
163 */
164 if (ie->key.file_name.file_name_type == FILE_NAME_DOS) {
165 if (!name) {
166 name = kmalloc(sizeof(struct ntfs_name),
167 GFP_NOFS);
168 if (!name) {
169 err = -ENOMEM;
170 goto err_out;
171 }
172 }
173 name->mref = le64_to_cpu(
174 ie->data.dir.indexed_file);
175 name->type = FILE_NAME_DOS;
176 name->len = 0;
177 *res = name;
178 } else {
179 kfree(name);
180 *res = NULL;
181 }
182 mref = le64_to_cpu(ie->data.dir.indexed_file);
183 ntfs_attr_put_search_ctx(ctx);
184 unmap_mft_record(dir_ni);
185 return mref;
186 }
187 /*
188 * For a case insensitive mount, we also perform a case
189 * insensitive comparison (provided the file name is not in the
190 * POSIX namespace). If the comparison matches, and the name is
191 * in the WIN32 namespace, we cache the filename in *res so
192 * that the caller, ntfs_lookup(), can work on it. If the
193 * comparison matches, and the name is in the DOS namespace, we
194 * only cache the mft reference and the file name type (we set
195 * the name length to zero for simplicity).
196 */
197 if ((!NVolCaseSensitive(vol) ||
198 ie->key.file_name.file_name_type == FILE_NAME_DOS) &&
199 ntfs_are_names_equal(uname, uname_len,
200 (__le16 *)&ie->key.file_name.file_name,
201 ie->key.file_name.file_name_length,
202 IGNORE_CASE, vol->upcase,
203 vol->upcase_len)) {
204 int name_size = sizeof(struct ntfs_name);
205 u8 type = ie->key.file_name.file_name_type;
206 u8 len = ie->key.file_name.file_name_length;
207
208 /* Only one case insensitive matching name allowed. */
209 if (name) {
210 ntfs_error(sb,
211 "Found already allocated name in phase 1. Please run chkdsk");
212 goto dir_err_out;
213 }
214
215 if (type != FILE_NAME_DOS)
216 name_size += len * sizeof(__le16);
217 name = kmalloc(name_size, GFP_NOFS);
218 if (!name) {
219 err = -ENOMEM;
220 goto err_out;
221 }
222 name->mref = le64_to_cpu(ie->data.dir.indexed_file);
223 name->type = type;
224 if (type != FILE_NAME_DOS) {
225 name->len = len;
226 memcpy(name->name, ie->key.file_name.file_name,
227 len * sizeof(__le16));
228 } else
229 name->len = 0;
230 *res = name;
231 }
232 /*
233 * Not a perfect match, need to do full blown collation so we
234 * know which way in the B+tree we have to go.
235 */
236 rc = ntfs_collate_names(uname, uname_len,
237 (__le16 *)&ie->key.file_name.file_name,
238 ie->key.file_name.file_name_length, 1,
239 IGNORE_CASE, vol->upcase, vol->upcase_len);
240 /*
241 * If uname collates before the name of the current entry, there
242 * is definitely no such name in this index but we might need to
243 * descend into the B+tree so we just break out of the loop.
244 */
245 if (rc == -1)
246 break;
247 /* The names are not equal, continue the search. */
248 if (rc)
249 continue;
250 /*
251 * Names match with case insensitive comparison, now try the
252 * case sensitive comparison, which is required for proper
253 * collation.
254 */
255 rc = ntfs_collate_names(uname, uname_len,
256 (__le16 *)&ie->key.file_name.file_name,
257 ie->key.file_name.file_name_length, 1,
258 CASE_SENSITIVE, vol->upcase, vol->upcase_len);
259 if (rc == -1)
260 break;
261 if (rc)
262 continue;
263 /*
264 * Perfect match, this will never happen as the
265 * ntfs_are_names_equal() call will have gotten a match but we
266 * still treat it correctly.
267 */
268 goto found_it;
269 }
270 /*
271 * We have finished with this index without success. Check for the
272 * presence of a child node and if not present return -ENOENT, unless
273 * we have got a matching name cached in name in which case return the
274 * mft reference associated with it.
275 */
276 if (!(ie->flags & INDEX_ENTRY_NODE)) {
277 if (name) {
278 ntfs_attr_put_search_ctx(ctx);
279 unmap_mft_record(dir_ni);
280 return name->mref;
281 }
282 ntfs_debug("Entry not found.");
283 err = -ENOENT;
284 goto err_out;
285 } /* Child node present, descend into it. */
286
287 /* Get the starting vcn of the index_block holding the child node. */
288 vcn = le64_to_cpup((__le64 *)((u8 *)ie + le16_to_cpu(ie->length) - 8));
289
290 /*
291 * We are done with the index root and the mft record. Release them,
292 * otherwise we deadlock with read_mapping_folio().
293 */
294 ntfs_attr_put_search_ctx(ctx);
295 unmap_mft_record(dir_ni);
296 m = NULL;
297 ctx = NULL;
298
299 ia_vi = ntfs_index_iget(VFS_I(dir_ni), I30, 4);
300 if (IS_ERR(ia_vi)) {
301 err = PTR_ERR(ia_vi);
302 goto err_out;
303 }
304
305 ia_mapping = ia_vi->i_mapping;
306 descend_into_child_node:
307 /*
308 * Convert vcn to index into the index allocation attribute in units
309 * of PAGE_SIZE and map the page cache page, reading it from
310 * disk if necessary.
311 */
312 folio = read_mapping_folio(ia_mapping, vcn <<
313 dir_ni->itype.index.vcn_size_bits >> PAGE_SHIFT, NULL);
314 if (IS_ERR(folio)) {
315 ntfs_error(sb, "Failed to map directory index page, error %ld.",
316 -PTR_ERR(folio));
317 err = PTR_ERR(folio);
318 goto err_out;
319 }
320
321 folio_lock(folio);
322 kaddr = kmalloc(PAGE_SIZE, GFP_NOFS);
323 if (!kaddr) {
324 err = -ENOMEM;
325 folio_unlock(folio);
326 folio_put(folio);
327 goto unm_err_out;
328 }
329
330 memcpy_from_folio(kaddr, folio, 0, PAGE_SIZE);
331 post_read_mst_fixup((struct ntfs_record *)kaddr, PAGE_SIZE);
332 folio_unlock(folio);
333 folio_put(folio);
334 fast_descend_into_child_node:
335 /* Get to the index allocation block. */
336 ia = (struct index_block *)(kaddr + ((vcn <<
337 dir_ni->itype.index.vcn_size_bits) & ~PAGE_MASK));
338 /* Bounds checks. */
339 if ((u8 *)ia < kaddr || (u8 *)ia > kaddr + PAGE_SIZE) {
340 ntfs_error(sb,
341 "Out of bounds check failed. Corrupt directory inode 0x%llx or driver bug.",
342 dir_ni->mft_no);
343 goto unm_err_out;
344 }
345 /* Catch multi sector transfer fixup errors. */
346 if (unlikely(!ntfs_is_indx_record(ia->magic))) {
347 ntfs_error(sb,
348 "Directory index record with vcn 0x%llx is corrupt. Corrupt inode 0x%llx. Run chkdsk.",
349 vcn, dir_ni->mft_no);
350 goto unm_err_out;
351 }
352 if (le64_to_cpu(ia->index_block_vcn) != vcn) {
353 ntfs_error(sb,
354 "Actual VCN (0x%llx) of index buffer is different from expected VCN (0x%llx). Directory inode 0x%llx is corrupt or driver bug.",
355 le64_to_cpu(ia->index_block_vcn),
356 vcn, dir_ni->mft_no);
357 goto unm_err_out;
358 }
359 if (le32_to_cpu(ia->index.allocated_size) + 0x18 !=
360 dir_ni->itype.index.block_size) {
361 ntfs_error(sb,
362 "Index buffer (VCN 0x%llx) of directory inode 0x%llx has a size (%u) differing from the directory specified size (%u). Directory inode is corrupt or driver bug.",
363 vcn, dir_ni->mft_no,
364 le32_to_cpu(ia->index.allocated_size) + 0x18,
365 dir_ni->itype.index.block_size);
366 goto unm_err_out;
367 }
368 index_end = (u8 *)ia + dir_ni->itype.index.block_size;
369 if (index_end > kaddr + PAGE_SIZE) {
370 ntfs_error(sb,
371 "Index buffer (VCN 0x%llx) of directory inode 0x%llx crosses page boundary. Impossible! Cannot access! This is probably a bug in the driver.",
372 vcn, dir_ni->mft_no);
373 goto unm_err_out;
374 }
375 index_end = (u8 *)&ia->index + le32_to_cpu(ia->index.index_length);
376 if (index_end > (u8 *)ia + dir_ni->itype.index.block_size) {
377 ntfs_error(sb,
378 "Size of index buffer (VCN 0x%llx) of directory inode 0x%llx exceeds maximum size.",
379 vcn, dir_ni->mft_no);
380 goto unm_err_out;
381 }
382 /* The first index entry. */
383 ie = (struct index_entry *)((u8 *)&ia->index +
384 le32_to_cpu(ia->index.entries_offset));
385 /*
386 * Iterate similar to above big loop but applied to index buffer, thus
387 * loop until we exceed valid memory (corruption case) or until we
388 * reach the last entry.
389 */
390 for (;; ie = (struct index_entry *)((u8 *)ie + le16_to_cpu(ie->length))) {
391 /* Bounds checks. */
392 if ((u8 *)ie < (u8 *)ia ||
393 (u8 *)ie + sizeof(struct index_entry_header) > index_end ||
394 (u8 *)ie + sizeof(struct index_entry_header) + le16_to_cpu(ie->key_length) >
395 index_end || (u8 *)ie + le16_to_cpu(ie->length) > index_end) {
396 ntfs_error(sb, "Index entry out of bounds in directory inode 0x%llx.",
397 dir_ni->mft_no);
398 goto unm_err_out;
399 }
400 /*
401 * The last entry cannot contain a name. It can however contain
402 * a pointer to a child node in the B+tree so we just break out.
403 */
404 if (ie->flags & INDEX_ENTRY_END)
405 break;
406 /* Key length should not be zero if it is not last entry. */
407 if (!ie->key_length)
408 goto unm_err_out;
409 /* Check the consistency of an index entry */
410 if (ntfs_index_entry_inconsistent(NULL, vol, ie, COLLATION_FILE_NAME,
411 dir_ni->mft_no))
412 goto unm_err_out;
413 /*
414 * We perform a case sensitive comparison and if that matches
415 * we are done and return the mft reference of the inode (i.e.
416 * the inode number together with the sequence number for
417 * consistency checking). We convert it to cpu format before
418 * returning.
419 */
420 if (ntfs_are_names_equal(uname, uname_len,
421 (__le16 *)&ie->key.file_name.file_name,
422 ie->key.file_name.file_name_length,
423 CASE_SENSITIVE, vol->upcase, vol->upcase_len)) {
424 found_it2:
425 /*
426 * We have a perfect match, so we don't need to care
427 * about having matched imperfectly before, so we can
428 * free name and set *res to NULL.
429 * However, if the perfect match is a short file name,
430 * we need to signal this through *res, so that
431 * ntfs_lookup() can fix dcache aliasing issues.
432 * As an optimization we just reuse an existing
433 * allocation of *res.
434 */
435 if (ie->key.file_name.file_name_type == FILE_NAME_DOS) {
436 if (!name) {
437 name = kmalloc(sizeof(struct ntfs_name),
438 GFP_NOFS);
439 if (!name) {
440 err = -ENOMEM;
441 goto unm_err_out;
442 }
443 }
444 name->mref = le64_to_cpu(
445 ie->data.dir.indexed_file);
446 name->type = FILE_NAME_DOS;
447 name->len = 0;
448 *res = name;
449 } else {
450 kfree(name);
451 *res = NULL;
452 }
453 mref = le64_to_cpu(ie->data.dir.indexed_file);
454 kfree(kaddr);
455 iput(ia_vi);
456 return mref;
457 }
458 /*
459 * For a case insensitive mount, we also perform a case
460 * insensitive comparison (provided the file name is not in the
461 * POSIX namespace). If the comparison matches, and the name is
462 * in the WIN32 namespace, we cache the filename in *res so
463 * that the caller, ntfs_lookup(), can work on it. If the
464 * comparison matches, and the name is in the DOS namespace, we
465 * only cache the mft reference and the file name type (we set
466 * the name length to zero for simplicity).
467 */
468 if ((!NVolCaseSensitive(vol) ||
469 ie->key.file_name.file_name_type == FILE_NAME_DOS) &&
470 ntfs_are_names_equal(uname, uname_len,
471 (__le16 *)&ie->key.file_name.file_name,
472 ie->key.file_name.file_name_length,
473 IGNORE_CASE, vol->upcase,
474 vol->upcase_len)) {
475 int name_size = sizeof(struct ntfs_name);
476 u8 type = ie->key.file_name.file_name_type;
477 u8 len = ie->key.file_name.file_name_length;
478
479 /* Only one case insensitive matching name allowed. */
480 if (name) {
481 ntfs_error(sb,
482 "Found already allocated name in phase 2. Please run chkdsk");
483 kfree(kaddr);
484 goto dir_err_out;
485 }
486
487 if (type != FILE_NAME_DOS)
488 name_size += len * sizeof(__le16);
489 name = kmalloc(name_size, GFP_NOFS);
490 if (!name) {
491 err = -ENOMEM;
492 goto unm_err_out;
493 }
494 name->mref = le64_to_cpu(ie->data.dir.indexed_file);
495 name->type = type;
496 if (type != FILE_NAME_DOS) {
497 name->len = len;
498 memcpy(name->name, ie->key.file_name.file_name,
499 len * sizeof(__le16));
500 } else
501 name->len = 0;
502 *res = name;
503 }
504 /*
505 * Not a perfect match, need to do full blown collation so we
506 * know which way in the B+tree we have to go.
507 */
508 rc = ntfs_collate_names(uname, uname_len,
509 (__le16 *)&ie->key.file_name.file_name,
510 ie->key.file_name.file_name_length, 1,
511 IGNORE_CASE, vol->upcase, vol->upcase_len);
512 /*
513 * If uname collates before the name of the current entry, there
514 * is definitely no such name in this index but we might need to
515 * descend into the B+tree so we just break out of the loop.
516 */
517 if (rc == -1)
518 break;
519 /* The names are not equal, continue the search. */
520 if (rc)
521 continue;
522 /*
523 * Names match with case insensitive comparison, now try the
524 * case sensitive comparison, which is required for proper
525 * collation.
526 */
527 rc = ntfs_collate_names(uname, uname_len,
528 (__le16 *)&ie->key.file_name.file_name,
529 ie->key.file_name.file_name_length, 1,
530 CASE_SENSITIVE, vol->upcase, vol->upcase_len);
531 if (rc == -1)
532 break;
533 if (rc)
534 continue;
535 /*
536 * Perfect match, this will never happen as the
537 * ntfs_are_names_equal() call will have gotten a match but we
538 * still treat it correctly.
539 */
540 goto found_it2;
541 }
542 /*
543 * We have finished with this index buffer without success. Check for
544 * the presence of a child node.
545 */
546 if (ie->flags & INDEX_ENTRY_NODE) {
547 if ((ia->index.flags & NODE_MASK) == LEAF_NODE) {
548 ntfs_error(sb,
549 "Index entry with child node found in a leaf node in directory inode 0x%llx.",
550 dir_ni->mft_no);
551 goto unm_err_out;
552 }
553 /* Child node present, descend into it. */
554 old_vcn = vcn;
555 vcn = le64_to_cpup((__le64 *)((u8 *)ie +
556 le16_to_cpu(ie->length) - 8));
557 if (vcn >= 0) {
558 /*
559 * If vcn is in the same page cache page as old_vcn we
560 * recycle the mapped page.
561 */
562 if (ntfs_cluster_to_pidx(vol, old_vcn) ==
563 ntfs_cluster_to_pidx(vol, vcn))
564 goto fast_descend_into_child_node;
565 kfree(kaddr);
566 kaddr = NULL;
567 goto descend_into_child_node;
568 }
569 ntfs_error(sb, "Negative child node vcn in directory inode 0x%llx.",
570 dir_ni->mft_no);
571 goto unm_err_out;
572 }
573 /*
574 * No child node present, return -ENOENT, unless we have got a matching
575 * name cached in name in which case return the mft reference
576 * associated with it.
577 */
578 if (name) {
579 kfree(kaddr);
580 iput(ia_vi);
581 return name->mref;
582 }
583 ntfs_debug("Entry not found.");
584 err = -ENOENT;
585 unm_err_out:
586 kfree(kaddr);
587 err_out:
588 if (!err)
589 err = -EIO;
590 if (ctx)
591 ntfs_attr_put_search_ctx(ctx);
592 if (m)
593 unmap_mft_record(dir_ni);
594 kfree(name);
595 *res = NULL;
596 if (!IS_ERR_OR_NULL(ia_vi))
597 iput(ia_vi);
598 return ERR_MREF(err);
599 dir_err_out:
600 ntfs_error(sb, "Corrupt directory. Aborting lookup.");
601 goto err_out;
602 }
603
604 /*
605 * ntfs_filldir - ntfs specific filldir method
606 * @vol: current ntfs volume
607 * @ndir: ntfs inode of current directory
608 * @ia_page: page in which the index allocation buffer @ie is in resides
609 * @ie: current index entry
610 * @name: buffer to use for the converted name
611 * @actor: what to feed the entries to
612 *
613 * Convert the Unicode @name to the loaded NLS and pass it to the @filldir
614 * callback.
615 *
616 * If @ia_page is not NULL it is the locked page containing the index
617 * allocation block containing the index entry @ie.
618 *
619 * Note, we drop (and then reacquire) the page lock on @ia_page across the
620 * @filldir() call otherwise we would deadlock with NFSd when it calls ->lookup
621 * since ntfs_lookup() will lock the same page. As an optimization, we do not
622 * retake the lock if we are returning a non-zero value as ntfs_readdir()
623 * would need to drop the lock immediately anyway.
624 */
ntfs_filldir(struct ntfs_volume * vol,struct ntfs_inode * ndir,struct page * ia_page,struct index_entry * ie,u8 * name,struct dir_context * actor)625 static inline int ntfs_filldir(struct ntfs_volume *vol,
626 struct ntfs_inode *ndir, struct page *ia_page, struct index_entry *ie,
627 u8 *name, struct dir_context *actor)
628 {
629 unsigned long mref;
630 int name_len;
631 unsigned int dt_type;
632 u8 name_type;
633
634 name_type = ie->key.file_name.file_name_type;
635 if (name_type == FILE_NAME_DOS) {
636 ntfs_debug("Skipping DOS name space entry.");
637 return 0;
638 }
639 if (MREF_LE(ie->data.dir.indexed_file) == FILE_root) {
640 ntfs_debug("Skipping root directory self reference entry.");
641 return 0;
642 }
643 if (MREF_LE(ie->data.dir.indexed_file) < FILE_first_user &&
644 !NVolShowSystemFiles(vol)) {
645 ntfs_debug("Skipping system file.");
646 return 0;
647 }
648 if (!NVolShowHiddenFiles(vol) &&
649 (ie->key.file_name.file_attributes & FILE_ATTR_HIDDEN)) {
650 ntfs_debug("Skipping hidden file.");
651 return 0;
652 }
653
654 name_len = ntfs_ucstonls(vol, (__le16 *)&ie->key.file_name.file_name,
655 ie->key.file_name.file_name_length, &name,
656 NTFS_MAX_NAME_LEN * NLS_MAX_CHARSET_SIZE + 1);
657 if (name_len <= 0) {
658 ntfs_warning(vol->sb, "Skipping unrepresentable inode 0x%llx.",
659 (long long)MREF_LE(ie->data.dir.indexed_file));
660 return 0;
661 }
662
663 mref = MREF_LE(ie->data.dir.indexed_file);
664 if (ie->key.file_name.file_attributes &
665 FILE_ATTR_DUP_FILE_NAME_INDEX_PRESENT)
666 dt_type = DT_DIR;
667 else if (ie->key.file_name.file_attributes & FILE_ATTR_REPARSE_POINT)
668 dt_type = ntfs_reparse_tag_dt_types(vol, mref);
669 else
670 dt_type = DT_REG;
671
672 /*
673 * Drop the page lock otherwise we deadlock with NFS when it calls
674 * ->lookup since ntfs_lookup() will lock the same page.
675 */
676 if (ia_page)
677 unlock_page(ia_page);
678 ntfs_debug("Calling filldir for %s with len %i, fpos 0x%llx, inode 0x%lx, DT_%s.",
679 name, name_len, actor->pos, mref, dt_type == DT_DIR ? "DIR" : "REG");
680 if (!dir_emit(actor, name, name_len, mref, dt_type))
681 return 1;
682 /* Relock the page but not if we are aborting ->readdir. */
683 if (ia_page)
684 lock_page(ia_page);
685 return 0;
686 }
687
688 struct ntfs_file_private {
689 void *key;
690 __le16 key_length;
691 bool end_in_iterate;
692 loff_t curr_pos;
693 };
694
695 struct ntfs_index_ra {
696 unsigned long start_index;
697 unsigned int count;
698 struct rb_node rb_node;
699 };
700
ntfs_insert_rb(struct ntfs_index_ra * nir,struct rb_root * root)701 static void ntfs_insert_rb(struct ntfs_index_ra *nir, struct rb_root *root)
702 {
703 struct rb_node **new = &root->rb_node, *parent = NULL;
704 struct ntfs_index_ra *cnir;
705
706 while (*new) {
707 parent = *new;
708 cnir = rb_entry(parent, struct ntfs_index_ra, rb_node);
709 if (nir->start_index < cnir->start_index)
710 new = &parent->rb_left;
711 else if (nir->start_index >= cnir->start_index + cnir->count)
712 new = &parent->rb_right;
713 else {
714 pr_err("nir start index : %ld, count : %d, cnir start_index : %ld, count : %d\n",
715 nir->start_index, nir->count, cnir->start_index, cnir->count);
716 return;
717 }
718 }
719
720 rb_link_node(&nir->rb_node, parent, new);
721 rb_insert_color(&nir->rb_node, root);
722 }
723
ntfs_ia_blocks_readahead(struct ntfs_inode * ia_ni,loff_t pos)724 static int ntfs_ia_blocks_readahead(struct ntfs_inode *ia_ni, loff_t pos)
725 {
726 unsigned long dir_start_index, dir_end_index;
727 struct inode *ia_vi = VFS_I(ia_ni);
728 struct file_ra_state *dir_ra;
729
730 dir_end_index = (i_size_read(ia_vi) + PAGE_SIZE - 1) >> PAGE_SHIFT;
731 dir_start_index = (pos + PAGE_SIZE - 1) >> PAGE_SHIFT;
732
733 if (dir_start_index >= dir_end_index)
734 return 0;
735
736 dir_ra = kzalloc(sizeof(*dir_ra), GFP_NOFS);
737 if (!dir_ra)
738 return -ENOMEM;
739
740 file_ra_state_init(dir_ra, ia_vi->i_mapping);
741 dir_end_index = (i_size_read(ia_vi) + PAGE_SIZE - 1) >> PAGE_SHIFT;
742 dir_start_index = (pos + PAGE_SIZE - 1) >> PAGE_SHIFT;
743 dir_ra->ra_pages = dir_end_index - dir_start_index;
744 page_cache_sync_readahead(ia_vi->i_mapping, dir_ra, NULL,
745 dir_start_index, dir_end_index - dir_start_index);
746 kfree(dir_ra);
747
748 return 0;
749 }
750
ntfs_readdir(struct file * file,struct dir_context * actor)751 static int ntfs_readdir(struct file *file, struct dir_context *actor)
752 {
753 struct inode *vdir = file_inode(file);
754 struct super_block *sb = vdir->i_sb;
755 struct ntfs_inode *ndir = NTFS_I(vdir);
756 struct ntfs_volume *vol = NTFS_SB(sb);
757 struct ntfs_attr_search_ctx *ctx = NULL;
758 struct ntfs_index_context *ictx = NULL;
759 u8 *name;
760 struct index_root *ir;
761 struct index_entry *next = NULL;
762 struct ntfs_file_private *private = NULL;
763 int err = 0;
764 loff_t ie_pos = 2; /* initialize it with dot and dotdot size */
765 struct ntfs_index_ra *nir = NULL;
766 unsigned long index;
767 struct rb_root ra_root = RB_ROOT;
768 struct file_ra_state *ra;
769
770 ntfs_debug("Entering for inode 0x%llx, fpos 0x%llx.",
771 ndir->mft_no, actor->pos);
772
773 if (file->private_data) {
774 private = file->private_data;
775
776 if (actor->pos != private->curr_pos) {
777 /*
778 * If actor->pos is different from the previous passed
779 * one, Discard the private->key and fill dirent buffer
780 * with linear lookup.
781 */
782 kfree(private->key);
783 private->key = NULL;
784 private->end_in_iterate = false;
785 } else if (private->end_in_iterate) {
786 kfree(private->key);
787 kfree(file->private_data);
788 file->private_data = NULL;
789 return 0;
790 }
791 }
792
793 /* Emulate . and .. for all directories. */
794 if (!dir_emit_dots(file, actor))
795 return 0;
796
797 /*
798 * Allocate a buffer to store the current name being processed
799 * converted to format determined by current NLS.
800 */
801 name = kmalloc(NTFS_MAX_NAME_LEN * NLS_MAX_CHARSET_SIZE + 1, GFP_NOFS);
802 if (unlikely(!name))
803 return -ENOMEM;
804
805 mutex_lock_nested(&ndir->mrec_lock, NTFS_INODE_MUTEX_PARENT);
806 ictx = ntfs_index_ctx_get(ndir, I30, 4);
807 if (!ictx) {
808 kfree(name);
809 mutex_unlock(&ndir->mrec_lock);
810 return -ENOMEM;
811 }
812
813 ra = kzalloc(sizeof(struct file_ra_state), GFP_NOFS);
814 if (!ra) {
815 kfree(name);
816 ntfs_index_ctx_put(ictx);
817 mutex_unlock(&ndir->mrec_lock);
818 return -ENOMEM;
819 }
820 file_ra_state_init(ra, vol->mft_ino->i_mapping);
821
822 if (private && private->key) {
823 /*
824 * Find index witk private->key using ntfs_index_lookup()
825 * instead of linear index lookup.
826 */
827 err = ntfs_index_lookup(private->key,
828 le16_to_cpu(private->key_length),
829 ictx);
830 if (!err) {
831 next = ictx->entry;
832 /*
833 * Update ie_pos with private->curr_pos
834 * to make next d_off of dirent correct.
835 */
836 ie_pos = private->curr_pos;
837
838 if (actor->pos > vol->mft_record_size && ictx->ia_ni) {
839 err = ntfs_ia_blocks_readahead(ictx->ia_ni, actor->pos);
840 if (err)
841 goto out;
842 }
843
844 goto nextdir;
845 } else {
846 goto out;
847 }
848 } else if (!private) {
849 private = kzalloc(sizeof(struct ntfs_file_private), GFP_KERNEL);
850 if (!private) {
851 err = -ENOMEM;
852 goto out;
853 }
854 file->private_data = private;
855 }
856
857 ctx = ntfs_attr_get_search_ctx(ndir, NULL);
858 if (!ctx) {
859 err = -ENOMEM;
860 goto out;
861 }
862
863 /* Find the index root attribute in the mft record. */
864 if (ntfs_attr_lookup(AT_INDEX_ROOT, I30, 4, CASE_SENSITIVE, 0, NULL, 0,
865 ctx)) {
866 ntfs_error(sb, "Index root attribute missing in directory inode %llu",
867 ndir->mft_no);
868 ntfs_attr_put_search_ctx(ctx);
869 err = -ENOMEM;
870 goto out;
871 }
872
873 /* Get to the index root value. */
874 ir = (struct index_root *)((u8 *)ctx->attr +
875 le16_to_cpu(ctx->attr->data.resident.value_offset));
876
877 ictx->ir = ir;
878 ictx->actx = ctx;
879 ictx->parent_vcn[ictx->pindex] = VCN_INDEX_ROOT_PARENT;
880 ictx->is_in_root = true;
881 ictx->parent_pos[ictx->pindex] = 0;
882
883 ictx->block_size = le32_to_cpu(ir->index_block_size);
884 if (ictx->block_size < NTFS_BLOCK_SIZE) {
885 ntfs_error(sb, "Index block size (%d) is smaller than the sector size (%d)",
886 ictx->block_size, NTFS_BLOCK_SIZE);
887 err = -EIO;
888 goto out;
889 }
890
891 if (vol->cluster_size <= ictx->block_size)
892 ictx->vcn_size_bits = vol->cluster_size_bits;
893 else
894 ictx->vcn_size_bits = NTFS_BLOCK_SIZE_BITS;
895
896 /* The first index entry. */
897 next = (struct index_entry *)((u8 *)&ir->index +
898 le32_to_cpu(ir->index.entries_offset));
899
900 if (next->flags & INDEX_ENTRY_NODE) {
901 ictx->ia_ni = ntfs_ia_open(ictx, ictx->idx_ni);
902 if (!ictx->ia_ni) {
903 err = -EINVAL;
904 goto out;
905 }
906
907 err = ntfs_ia_blocks_readahead(ictx->ia_ni, actor->pos);
908 if (err)
909 goto out;
910 }
911
912 if (next->flags & INDEX_ENTRY_NODE) {
913 next = ntfs_index_walk_down(next, ictx);
914 if (IS_ERR(next)) {
915 err = PTR_ERR(next);
916 goto out;
917 }
918 }
919
920 if (next && !(next->flags & INDEX_ENTRY_END))
921 goto nextdir;
922
923 while (1) {
924 next = ntfs_index_next(next, ictx);
925 if (IS_ERR(next)) {
926 err = PTR_ERR(next);
927 goto out;
928 }
929 if (!next)
930 break;
931 nextdir:
932 /* Check the consistency of an index entry */
933 if (ntfs_index_entry_inconsistent(ictx, vol, next, COLLATION_FILE_NAME,
934 ndir->mft_no)) {
935 err = -EIO;
936 goto out;
937 }
938
939 if (ie_pos < actor->pos) {
940 ie_pos += le16_to_cpu(next->length);
941 continue;
942 }
943
944 actor->pos = ie_pos;
945
946 index = ntfs_mft_no_to_pidx(vol,
947 MREF_LE(next->data.dir.indexed_file));
948 if (nir) {
949 struct ntfs_index_ra *cnir;
950 struct rb_node *node = ra_root.rb_node;
951
952 if (nir->start_index <= index &&
953 index < nir->start_index + nir->count) {
954 /* No behavior */
955 goto filldir;
956 }
957
958 while (node) {
959 cnir = rb_entry(node, struct ntfs_index_ra, rb_node);
960 if (cnir->start_index <= index &&
961 index < cnir->start_index + cnir->count) {
962 goto filldir;
963 } else if (cnir->start_index + cnir->count == index) {
964 cnir->count++;
965 goto filldir;
966 } else if (!cnir->start_index && cnir->start_index - 1 == index) {
967 cnir->start_index = index;
968 goto filldir;
969 }
970
971 if (index < cnir->start_index)
972 node = node->rb_left;
973 else if (index >= cnir->start_index + cnir->count)
974 node = node->rb_right;
975 }
976
977 if (nir->start_index + nir->count == index) {
978 nir->count++;
979 } else if (!nir->start_index && nir->start_index - 1 == index) {
980 nir->start_index = index;
981 } else if (nir->count > 2) {
982 ntfs_insert_rb(nir, &ra_root);
983 nir = NULL;
984 } else {
985 nir->start_index = index;
986 nir->count = 1;
987 }
988 }
989
990 if (!nir) {
991 nir = kzalloc(sizeof(struct ntfs_index_ra), GFP_KERNEL);
992 if (nir) {
993 nir->start_index = index;
994 nir->count = 1;
995 }
996 }
997
998 filldir:
999 /* Submit the name to the filldir callback. */
1000 err = ntfs_filldir(vol, ndir, NULL, next, name, actor);
1001 if (err) {
1002 /*
1003 * Store index key value to file private_data to start
1004 * from current index offset on next round.
1005 */
1006 private = file->private_data;
1007 kfree(private->key);
1008 private->key = kmalloc(le16_to_cpu(next->key_length), GFP_KERNEL);
1009 if (!private->key) {
1010 err = -ENOMEM;
1011 goto out;
1012 }
1013
1014 memcpy(private->key, &next->key.file_name, le16_to_cpu(next->key_length));
1015 private->key_length = next->key_length;
1016 break;
1017 }
1018 ie_pos += le16_to_cpu(next->length);
1019 }
1020
1021 if (!err)
1022 private->end_in_iterate = true;
1023 else
1024 err = 0;
1025
1026 private->curr_pos = actor->pos = ie_pos;
1027 out:
1028 while (!RB_EMPTY_ROOT(&ra_root)) {
1029 struct ntfs_index_ra *cnir;
1030 struct rb_node *node;
1031
1032 node = rb_first(&ra_root);
1033 cnir = rb_entry(node, struct ntfs_index_ra, rb_node);
1034 ra->ra_pages = cnir->count;
1035 page_cache_sync_readahead(vol->mft_ino->i_mapping, ra, NULL,
1036 cnir->start_index, cnir->count);
1037 rb_erase(node, &ra_root);
1038 kfree(cnir);
1039 }
1040
1041 if (err) {
1042 if (private) {
1043 private->curr_pos = actor->pos;
1044 private->end_in_iterate = true;
1045 }
1046 err = 0;
1047 }
1048 ntfs_index_ctx_put(ictx);
1049 kfree(name);
1050 kfree(nir);
1051 kfree(ra);
1052 mutex_unlock(&ndir->mrec_lock);
1053 return err;
1054 }
1055
ntfs_check_empty_dir(struct ntfs_inode * ni,struct mft_record * ni_mrec)1056 int ntfs_check_empty_dir(struct ntfs_inode *ni, struct mft_record *ni_mrec)
1057 {
1058 struct ntfs_attr_search_ctx *ctx;
1059 int ret = 0;
1060
1061 if (!(ni_mrec->flags & MFT_RECORD_IS_DIRECTORY))
1062 return 0;
1063
1064 ctx = ntfs_attr_get_search_ctx(ni, NULL);
1065 if (!ctx) {
1066 ntfs_error(ni->vol->sb, "Failed to get search context");
1067 return -ENOMEM;
1068 }
1069
1070 /* Find the index root attribute in the mft record. */
1071 ret = ntfs_attr_lookup(AT_INDEX_ROOT, I30, 4, CASE_SENSITIVE, 0, NULL,
1072 0, ctx);
1073 if (ret) {
1074 ntfs_error(ni->vol->sb, "Index root attribute missing in directory inode %llu",
1075 ni->mft_no);
1076 ntfs_attr_put_search_ctx(ctx);
1077 return ret;
1078 }
1079
1080 /* Non-empty directory? */
1081 if (le32_to_cpu(ctx->attr->data.resident.value_length) !=
1082 sizeof(struct index_root) + sizeof(struct index_entry_header)) {
1083 /* Both ENOTEMPTY and EEXIST are ok. We use the more common. */
1084 ret = -ENOTEMPTY;
1085 ntfs_debug("Directory is not empty\n");
1086 }
1087
1088 ntfs_attr_put_search_ctx(ctx);
1089
1090 return ret;
1091 }
1092
1093 /*
1094 * ntfs_dir_open - called when an inode is about to be opened
1095 * @vi: inode to be opened
1096 * @filp: file structure describing the inode
1097 *
1098 * Limit directory size to the page cache limit on architectures where unsigned
1099 * long is 32-bits. This is the most we can do for now without overflowing the
1100 * page cache page index. Doing it this way means we don't run into problems
1101 * because of existing too large directories. It would be better to allow the
1102 * user to read the accessible part of the directory but I doubt very much
1103 * anyone is going to hit this check on a 32-bit architecture, so there is no
1104 * point in adding the extra complexity required to support this.
1105 *
1106 * On 64-bit architectures, the check is hopefully optimized away by the
1107 * compiler.
1108 */
ntfs_dir_open(struct inode * vi,struct file * filp)1109 static int ntfs_dir_open(struct inode *vi, struct file *filp)
1110 {
1111 if (sizeof(unsigned long) < 8) {
1112 if (i_size_read(vi) > MAX_LFS_FILESIZE)
1113 return -EFBIG;
1114 }
1115 return 0;
1116 }
1117
ntfs_dir_release(struct inode * vi,struct file * filp)1118 static int ntfs_dir_release(struct inode *vi, struct file *filp)
1119 {
1120 if (filp->private_data) {
1121 kfree(((struct ntfs_file_private *)filp->private_data)->key);
1122 kfree(filp->private_data);
1123 filp->private_data = NULL;
1124 }
1125 return 0;
1126 }
1127
1128 /*
1129 * ntfs_dir_fsync - sync a directory to disk
1130 * @filp: file describing the directory to be synced
1131 * @start: start offset to be synced
1132 * @end: end offset to be synced
1133 * @datasync: if non-zero only flush user data and not metadata
1134 *
1135 * Data integrity sync of a directory to disk. Used for fsync, fdatasync, and
1136 * msync system calls. This function is based on file.c::ntfs_file_fsync().
1137 *
1138 * Write the mft record and all associated extent mft records as well as the
1139 * $INDEX_ALLOCATION and $BITMAP attributes and then sync the block device.
1140 *
1141 * If @datasync is true, we do not wait on the inode(s) to be written out
1142 * but we always wait on the page cache pages to be written out.
1143 *
1144 * Note: In the past @filp could be NULL so we ignore it as we don't need it
1145 * anyway.
1146 *
1147 * Locking: Caller must hold i_mutex on the inode.
1148 */
ntfs_dir_fsync(struct file * filp,loff_t start,loff_t end,int datasync)1149 static int ntfs_dir_fsync(struct file *filp, loff_t start, loff_t end,
1150 int datasync)
1151 {
1152 struct inode *bmp_vi, *vi = filp->f_mapping->host;
1153 struct ntfs_volume *vol = NTFS_I(vi)->vol;
1154 struct ntfs_inode *ni = NTFS_I(vi);
1155 struct ntfs_attr_search_ctx *ctx;
1156 struct inode *parent_vi, *ia_vi;
1157 int err, ret;
1158 struct ntfs_attr na;
1159
1160 ntfs_debug("Entering for inode 0x%llx.", ni->mft_no);
1161
1162 if (NVolShutdown(vol))
1163 return -EIO;
1164
1165 ctx = ntfs_attr_get_search_ctx(ni, NULL);
1166 if (!ctx)
1167 return -ENOMEM;
1168
1169 mutex_lock_nested(&ni->mrec_lock, NTFS_INODE_MUTEX_NORMAL_CHILD);
1170 while (!(err = ntfs_attr_lookup(AT_FILE_NAME, NULL, 0, 0, 0, NULL, 0, ctx))) {
1171 struct file_name_attr *fn = (struct file_name_attr *)((u8 *)ctx->attr +
1172 le16_to_cpu(ctx->attr->data.resident.value_offset));
1173
1174 if (MREF_LE(fn->parent_directory) == ni->mft_no)
1175 continue;
1176
1177 parent_vi = ntfs_iget(vi->i_sb, MREF_LE(fn->parent_directory));
1178 if (IS_ERR(parent_vi))
1179 continue;
1180 mutex_lock_nested(&NTFS_I(parent_vi)->mrec_lock, NTFS_INODE_MUTEX_NORMAL);
1181 ia_vi = ntfs_index_iget(parent_vi, I30, 4);
1182 mutex_unlock(&NTFS_I(parent_vi)->mrec_lock);
1183 if (IS_ERR(ia_vi)) {
1184 iput(parent_vi);
1185 continue;
1186 }
1187 write_inode_now(ia_vi, 1);
1188 iput(ia_vi);
1189 write_inode_now(parent_vi, 1);
1190 iput(parent_vi);
1191 }
1192 mutex_unlock(&ni->mrec_lock);
1193 ntfs_attr_put_search_ctx(ctx);
1194
1195 err = file_write_and_wait_range(filp, start, end);
1196 if (err)
1197 return err;
1198 inode_lock(vi);
1199
1200 /* If the bitmap attribute inode is in memory sync it, too. */
1201 na.mft_no = vi->i_ino;
1202 na.type = AT_BITMAP;
1203 na.name = I30;
1204 na.name_len = 4;
1205 bmp_vi = ilookup5(vi->i_sb, vi->i_ino, ntfs_test_inode, &na);
1206 if (bmp_vi) {
1207 write_inode_now(bmp_vi, !datasync);
1208 iput(bmp_vi);
1209 }
1210 ret = __ntfs_write_inode(vi, 1);
1211
1212 write_inode_now(vi, !datasync);
1213
1214 write_inode_now(vol->mftbmp_ino, 1);
1215 down_write(&vol->lcnbmp_lock);
1216 write_inode_now(vol->lcnbmp_ino, 1);
1217 up_write(&vol->lcnbmp_lock);
1218 write_inode_now(vol->mft_ino, 1);
1219
1220 err = sync_blockdev(vi->i_sb->s_bdev);
1221 if (unlikely(err && !ret))
1222 ret = err;
1223 if (likely(!ret))
1224 ntfs_debug("Done.");
1225 else
1226 ntfs_warning(vi->i_sb,
1227 "Failed to f%ssync inode 0x%llx. Error %u.",
1228 datasync ? "data" : "", ni->mft_no, -ret);
1229 inode_unlock(vi);
1230 return ret;
1231 }
1232
1233 const struct file_operations ntfs_dir_ops = {
1234 .llseek = generic_file_llseek, /* Seek inside directory. */
1235 .read = generic_read_dir, /* Return -EISDIR. */
1236 .iterate_shared = ntfs_readdir, /* Read directory contents. */
1237 .fsync = ntfs_dir_fsync, /* Sync a directory to disk. */
1238 .open = ntfs_dir_open, /* Open directory. */
1239 .release = ntfs_dir_release,
1240 .unlocked_ioctl = ntfs_ioctl,
1241 #ifdef CONFIG_COMPAT
1242 .compat_ioctl = ntfs_compat_ioctl,
1243 #endif
1244 .setlease = generic_setlease,
1245 };
1246