xref: /linux/fs/nfs/dir.c (revision 4eec0b3048fcd74b504c2a6828a07f133a8ab508)
1  // SPDX-License-Identifier: GPL-2.0-only
2  /*
3   *  linux/fs/nfs/dir.c
4   *
5   *  Copyright (C) 1992  Rick Sladkey
6   *
7   *  nfs directory handling functions
8   *
9   * 10 Apr 1996	Added silly rename for unlink	--okir
10   * 28 Sep 1996	Improved directory cache --okir
11   * 23 Aug 1997  Claus Heine claus@momo.math.rwth-aachen.de
12   *              Re-implemented silly rename for unlink, newly implemented
13   *              silly rename for nfs_rename() following the suggestions
14   *              of Olaf Kirch (okir) found in this file.
15   *              Following Linus comments on my original hack, this version
16   *              depends only on the dcache stuff and doesn't touch the inode
17   *              layer (iput() and friends).
18   *  6 Jun 1999	Cache readdir lookups in the page cache. -DaveM
19   */
20  
21  #include <linux/compat.h>
22  #include <linux/module.h>
23  #include <linux/time.h>
24  #include <linux/errno.h>
25  #include <linux/stat.h>
26  #include <linux/fcntl.h>
27  #include <linux/string.h>
28  #include <linux/kernel.h>
29  #include <linux/slab.h>
30  #include <linux/mm.h>
31  #include <linux/sunrpc/clnt.h>
32  #include <linux/nfs_fs.h>
33  #include <linux/nfs_mount.h>
34  #include <linux/pagemap.h>
35  #include <linux/pagevec.h>
36  #include <linux/namei.h>
37  #include <linux/mount.h>
38  #include <linux/swap.h>
39  #include <linux/sched.h>
40  #include <linux/kmemleak.h>
41  #include <linux/xattr.h>
42  #include <linux/hash.h>
43  
44  #include "delegation.h"
45  #include "iostat.h"
46  #include "internal.h"
47  #include "fscache.h"
48  
49  #include "nfstrace.h"
50  
51  /* #define NFS_DEBUG_VERBOSE 1 */
52  
53  static int nfs_opendir(struct inode *, struct file *);
54  static int nfs_closedir(struct inode *, struct file *);
55  static int nfs_readdir(struct file *, struct dir_context *);
56  static int nfs_fsync_dir(struct file *, loff_t, loff_t, int);
57  static loff_t nfs_llseek_dir(struct file *, loff_t, int);
58  static void nfs_readdir_free_folio(struct folio *);
59  
60  const struct file_operations nfs_dir_operations = {
61  	.llseek		= nfs_llseek_dir,
62  	.read		= generic_read_dir,
63  	.iterate_shared	= nfs_readdir,
64  	.open		= nfs_opendir,
65  	.release	= nfs_closedir,
66  	.fsync		= nfs_fsync_dir,
67  };
68  
69  const struct address_space_operations nfs_dir_aops = {
70  	.free_folio = nfs_readdir_free_folio,
71  };
72  
73  #define NFS_INIT_DTSIZE PAGE_SIZE
74  
75  static struct nfs_open_dir_context *
76  alloc_nfs_open_dir_context(struct inode *dir)
77  {
78  	struct nfs_inode *nfsi = NFS_I(dir);
79  	struct nfs_open_dir_context *ctx;
80  
81  	ctx = kzalloc(sizeof(*ctx), GFP_KERNEL_ACCOUNT);
82  	if (ctx != NULL) {
83  		ctx->attr_gencount = nfsi->attr_gencount;
84  		ctx->dtsize = NFS_INIT_DTSIZE;
85  		spin_lock(&dir->i_lock);
86  		if (list_empty(&nfsi->open_files) &&
87  		    (nfsi->cache_validity & NFS_INO_DATA_INVAL_DEFER))
88  			nfs_set_cache_invalid(dir,
89  					      NFS_INO_INVALID_DATA |
90  						      NFS_INO_REVAL_FORCED);
91  		list_add_tail_rcu(&ctx->list, &nfsi->open_files);
92  		memcpy(ctx->verf, nfsi->cookieverf, sizeof(ctx->verf));
93  		spin_unlock(&dir->i_lock);
94  		return ctx;
95  	}
96  	return  ERR_PTR(-ENOMEM);
97  }
98  
99  static void put_nfs_open_dir_context(struct inode *dir, struct nfs_open_dir_context *ctx)
100  {
101  	spin_lock(&dir->i_lock);
102  	list_del_rcu(&ctx->list);
103  	spin_unlock(&dir->i_lock);
104  	kfree_rcu(ctx, rcu_head);
105  }
106  
107  /*
108   * Open file
109   */
110  static int
111  nfs_opendir(struct inode *inode, struct file *filp)
112  {
113  	int res = 0;
114  	struct nfs_open_dir_context *ctx;
115  
116  	dfprintk(FILE, "NFS: open dir(%pD2)\n", filp);
117  
118  	nfs_inc_stats(inode, NFSIOS_VFSOPEN);
119  
120  	ctx = alloc_nfs_open_dir_context(inode);
121  	if (IS_ERR(ctx)) {
122  		res = PTR_ERR(ctx);
123  		goto out;
124  	}
125  	filp->private_data = ctx;
126  out:
127  	return res;
128  }
129  
130  static int
131  nfs_closedir(struct inode *inode, struct file *filp)
132  {
133  	put_nfs_open_dir_context(file_inode(filp), filp->private_data);
134  	return 0;
135  }
136  
137  struct nfs_cache_array_entry {
138  	u64 cookie;
139  	u64 ino;
140  	const char *name;
141  	unsigned int name_len;
142  	unsigned char d_type;
143  };
144  
145  struct nfs_cache_array {
146  	u64 change_attr;
147  	u64 last_cookie;
148  	unsigned int size;
149  	unsigned char page_full : 1,
150  		      page_is_eof : 1,
151  		      cookies_are_ordered : 1;
152  	struct nfs_cache_array_entry array[];
153  };
154  
155  struct nfs_readdir_descriptor {
156  	struct file	*file;
157  	struct page	*page;
158  	struct dir_context *ctx;
159  	pgoff_t		page_index;
160  	pgoff_t		page_index_max;
161  	u64		dir_cookie;
162  	u64		last_cookie;
163  	loff_t		current_index;
164  
165  	__be32		verf[NFS_DIR_VERIFIER_SIZE];
166  	unsigned long	dir_verifier;
167  	unsigned long	timestamp;
168  	unsigned long	gencount;
169  	unsigned long	attr_gencount;
170  	unsigned int	cache_entry_index;
171  	unsigned int	buffer_fills;
172  	unsigned int	dtsize;
173  	bool clear_cache;
174  	bool plus;
175  	bool eob;
176  	bool eof;
177  };
178  
179  static void nfs_set_dtsize(struct nfs_readdir_descriptor *desc, unsigned int sz)
180  {
181  	struct nfs_server *server = NFS_SERVER(file_inode(desc->file));
182  	unsigned int maxsize = server->dtsize;
183  
184  	if (sz > maxsize)
185  		sz = maxsize;
186  	if (sz < NFS_MIN_FILE_IO_SIZE)
187  		sz = NFS_MIN_FILE_IO_SIZE;
188  	desc->dtsize = sz;
189  }
190  
191  static void nfs_shrink_dtsize(struct nfs_readdir_descriptor *desc)
192  {
193  	nfs_set_dtsize(desc, desc->dtsize >> 1);
194  }
195  
196  static void nfs_grow_dtsize(struct nfs_readdir_descriptor *desc)
197  {
198  	nfs_set_dtsize(desc, desc->dtsize << 1);
199  }
200  
201  static void nfs_readdir_page_init_array(struct page *page, u64 last_cookie,
202  					u64 change_attr)
203  {
204  	struct nfs_cache_array *array;
205  
206  	array = kmap_local_page(page);
207  	array->change_attr = change_attr;
208  	array->last_cookie = last_cookie;
209  	array->size = 0;
210  	array->page_full = 0;
211  	array->page_is_eof = 0;
212  	array->cookies_are_ordered = 1;
213  	kunmap_local(array);
214  }
215  
216  /*
217   * we are freeing strings created by nfs_add_to_readdir_array()
218   */
219  static void nfs_readdir_clear_array(struct page *page)
220  {
221  	struct nfs_cache_array *array;
222  	unsigned int i;
223  
224  	array = kmap_local_page(page);
225  	for (i = 0; i < array->size; i++)
226  		kfree(array->array[i].name);
227  	array->size = 0;
228  	kunmap_local(array);
229  }
230  
231  static void nfs_readdir_free_folio(struct folio *folio)
232  {
233  	nfs_readdir_clear_array(&folio->page);
234  }
235  
236  static void nfs_readdir_page_reinit_array(struct page *page, u64 last_cookie,
237  					  u64 change_attr)
238  {
239  	nfs_readdir_clear_array(page);
240  	nfs_readdir_page_init_array(page, last_cookie, change_attr);
241  }
242  
243  static struct page *
244  nfs_readdir_page_array_alloc(u64 last_cookie, gfp_t gfp_flags)
245  {
246  	struct page *page = alloc_page(gfp_flags);
247  	if (page)
248  		nfs_readdir_page_init_array(page, last_cookie, 0);
249  	return page;
250  }
251  
252  static void nfs_readdir_page_array_free(struct page *page)
253  {
254  	if (page) {
255  		nfs_readdir_clear_array(page);
256  		put_page(page);
257  	}
258  }
259  
260  static u64 nfs_readdir_array_index_cookie(struct nfs_cache_array *array)
261  {
262  	return array->size == 0 ? array->last_cookie : array->array[0].cookie;
263  }
264  
265  static void nfs_readdir_array_set_eof(struct nfs_cache_array *array)
266  {
267  	array->page_is_eof = 1;
268  	array->page_full = 1;
269  }
270  
271  static bool nfs_readdir_array_is_full(struct nfs_cache_array *array)
272  {
273  	return array->page_full;
274  }
275  
276  /*
277   * the caller is responsible for freeing qstr.name
278   * when called by nfs_readdir_add_to_array, the strings will be freed in
279   * nfs_clear_readdir_array()
280   */
281  static const char *nfs_readdir_copy_name(const char *name, unsigned int len)
282  {
283  	const char *ret = kmemdup_nul(name, len, GFP_KERNEL);
284  
285  	/*
286  	 * Avoid a kmemleak false positive. The pointer to the name is stored
287  	 * in a page cache page which kmemleak does not scan.
288  	 */
289  	if (ret != NULL)
290  		kmemleak_not_leak(ret);
291  	return ret;
292  }
293  
294  static size_t nfs_readdir_array_maxentries(void)
295  {
296  	return (PAGE_SIZE - sizeof(struct nfs_cache_array)) /
297  	       sizeof(struct nfs_cache_array_entry);
298  }
299  
300  /*
301   * Check that the next array entry lies entirely within the page bounds
302   */
303  static int nfs_readdir_array_can_expand(struct nfs_cache_array *array)
304  {
305  	if (array->page_full)
306  		return -ENOSPC;
307  	if (array->size == nfs_readdir_array_maxentries()) {
308  		array->page_full = 1;
309  		return -ENOSPC;
310  	}
311  	return 0;
312  }
313  
314  static int nfs_readdir_page_array_append(struct page *page,
315  					 const struct nfs_entry *entry,
316  					 u64 *cookie)
317  {
318  	struct nfs_cache_array *array;
319  	struct nfs_cache_array_entry *cache_entry;
320  	const char *name;
321  	int ret = -ENOMEM;
322  
323  	name = nfs_readdir_copy_name(entry->name, entry->len);
324  
325  	array = kmap_atomic(page);
326  	if (!name)
327  		goto out;
328  	ret = nfs_readdir_array_can_expand(array);
329  	if (ret) {
330  		kfree(name);
331  		goto out;
332  	}
333  
334  	cache_entry = &array->array[array->size];
335  	cache_entry->cookie = array->last_cookie;
336  	cache_entry->ino = entry->ino;
337  	cache_entry->d_type = entry->d_type;
338  	cache_entry->name_len = entry->len;
339  	cache_entry->name = name;
340  	array->last_cookie = entry->cookie;
341  	if (array->last_cookie <= cache_entry->cookie)
342  		array->cookies_are_ordered = 0;
343  	array->size++;
344  	if (entry->eof != 0)
345  		nfs_readdir_array_set_eof(array);
346  out:
347  	*cookie = array->last_cookie;
348  	kunmap_atomic(array);
349  	return ret;
350  }
351  
352  #define NFS_READDIR_COOKIE_MASK (U32_MAX >> 14)
353  /*
354   * Hash algorithm allowing content addressible access to sequences
355   * of directory cookies. Content is addressed by the value of the
356   * cookie index of the first readdir entry in a page.
357   *
358   * We select only the first 18 bits to avoid issues with excessive
359   * memory use for the page cache XArray. 18 bits should allow the caching
360   * of 262144 pages of sequences of readdir entries. Since each page holds
361   * 127 readdir entries for a typical 64-bit system, that works out to a
362   * cache of ~ 33 million entries per directory.
363   */
364  static pgoff_t nfs_readdir_page_cookie_hash(u64 cookie)
365  {
366  	if (cookie == 0)
367  		return 0;
368  	return hash_64(cookie, 18);
369  }
370  
371  static bool nfs_readdir_page_validate(struct page *page, u64 last_cookie,
372  				      u64 change_attr)
373  {
374  	struct nfs_cache_array *array = kmap_local_page(page);
375  	int ret = true;
376  
377  	if (array->change_attr != change_attr)
378  		ret = false;
379  	if (nfs_readdir_array_index_cookie(array) != last_cookie)
380  		ret = false;
381  	kunmap_local(array);
382  	return ret;
383  }
384  
385  static void nfs_readdir_page_unlock_and_put(struct page *page)
386  {
387  	unlock_page(page);
388  	put_page(page);
389  }
390  
391  static void nfs_readdir_page_init_and_validate(struct page *page, u64 cookie,
392  					       u64 change_attr)
393  {
394  	if (PageUptodate(page)) {
395  		if (nfs_readdir_page_validate(page, cookie, change_attr))
396  			return;
397  		nfs_readdir_clear_array(page);
398  	}
399  	nfs_readdir_page_init_array(page, cookie, change_attr);
400  	SetPageUptodate(page);
401  }
402  
403  static struct page *nfs_readdir_page_get_locked(struct address_space *mapping,
404  						u64 cookie, u64 change_attr)
405  {
406  	pgoff_t index = nfs_readdir_page_cookie_hash(cookie);
407  	struct page *page;
408  
409  	page = grab_cache_page(mapping, index);
410  	if (!page)
411  		return NULL;
412  	nfs_readdir_page_init_and_validate(page, cookie, change_attr);
413  	return page;
414  }
415  
416  static u64 nfs_readdir_page_last_cookie(struct page *page)
417  {
418  	struct nfs_cache_array *array;
419  	u64 ret;
420  
421  	array = kmap_local_page(page);
422  	ret = array->last_cookie;
423  	kunmap_local(array);
424  	return ret;
425  }
426  
427  static bool nfs_readdir_page_needs_filling(struct page *page)
428  {
429  	struct nfs_cache_array *array;
430  	bool ret;
431  
432  	array = kmap_local_page(page);
433  	ret = !nfs_readdir_array_is_full(array);
434  	kunmap_local(array);
435  	return ret;
436  }
437  
438  static void nfs_readdir_page_set_eof(struct page *page)
439  {
440  	struct nfs_cache_array *array;
441  
442  	array = kmap_local_page(page);
443  	nfs_readdir_array_set_eof(array);
444  	kunmap_local(array);
445  }
446  
447  static struct page *nfs_readdir_page_get_next(struct address_space *mapping,
448  					      u64 cookie, u64 change_attr)
449  {
450  	pgoff_t index = nfs_readdir_page_cookie_hash(cookie);
451  	struct page *page;
452  
453  	page = grab_cache_page_nowait(mapping, index);
454  	if (!page)
455  		return NULL;
456  	nfs_readdir_page_init_and_validate(page, cookie, change_attr);
457  	if (nfs_readdir_page_last_cookie(page) != cookie)
458  		nfs_readdir_page_reinit_array(page, cookie, change_attr);
459  	return page;
460  }
461  
462  static inline
463  int is_32bit_api(void)
464  {
465  #ifdef CONFIG_COMPAT
466  	return in_compat_syscall();
467  #else
468  	return (BITS_PER_LONG == 32);
469  #endif
470  }
471  
472  static
473  bool nfs_readdir_use_cookie(const struct file *filp)
474  {
475  	if ((filp->f_mode & FMODE_32BITHASH) ||
476  	    (!(filp->f_mode & FMODE_64BITHASH) && is_32bit_api()))
477  		return false;
478  	return true;
479  }
480  
481  static void nfs_readdir_seek_next_array(struct nfs_cache_array *array,
482  					struct nfs_readdir_descriptor *desc)
483  {
484  	if (array->page_full) {
485  		desc->last_cookie = array->last_cookie;
486  		desc->current_index += array->size;
487  		desc->cache_entry_index = 0;
488  		desc->page_index++;
489  	} else
490  		desc->last_cookie = nfs_readdir_array_index_cookie(array);
491  }
492  
493  static void nfs_readdir_rewind_search(struct nfs_readdir_descriptor *desc)
494  {
495  	desc->current_index = 0;
496  	desc->last_cookie = 0;
497  	desc->page_index = 0;
498  }
499  
500  static int nfs_readdir_search_for_pos(struct nfs_cache_array *array,
501  				      struct nfs_readdir_descriptor *desc)
502  {
503  	loff_t diff = desc->ctx->pos - desc->current_index;
504  	unsigned int index;
505  
506  	if (diff < 0)
507  		goto out_eof;
508  	if (diff >= array->size) {
509  		if (array->page_is_eof)
510  			goto out_eof;
511  		nfs_readdir_seek_next_array(array, desc);
512  		return -EAGAIN;
513  	}
514  
515  	index = (unsigned int)diff;
516  	desc->dir_cookie = array->array[index].cookie;
517  	desc->cache_entry_index = index;
518  	return 0;
519  out_eof:
520  	desc->eof = true;
521  	return -EBADCOOKIE;
522  }
523  
524  static bool nfs_readdir_array_cookie_in_range(struct nfs_cache_array *array,
525  					      u64 cookie)
526  {
527  	if (!array->cookies_are_ordered)
528  		return true;
529  	/* Optimisation for monotonically increasing cookies */
530  	if (cookie >= array->last_cookie)
531  		return false;
532  	if (array->size && cookie < array->array[0].cookie)
533  		return false;
534  	return true;
535  }
536  
537  static int nfs_readdir_search_for_cookie(struct nfs_cache_array *array,
538  					 struct nfs_readdir_descriptor *desc)
539  {
540  	unsigned int i;
541  	int status = -EAGAIN;
542  
543  	if (!nfs_readdir_array_cookie_in_range(array, desc->dir_cookie))
544  		goto check_eof;
545  
546  	for (i = 0; i < array->size; i++) {
547  		if (array->array[i].cookie == desc->dir_cookie) {
548  			if (nfs_readdir_use_cookie(desc->file))
549  				desc->ctx->pos = desc->dir_cookie;
550  			else
551  				desc->ctx->pos = desc->current_index + i;
552  			desc->cache_entry_index = i;
553  			return 0;
554  		}
555  	}
556  check_eof:
557  	if (array->page_is_eof) {
558  		status = -EBADCOOKIE;
559  		if (desc->dir_cookie == array->last_cookie)
560  			desc->eof = true;
561  	} else
562  		nfs_readdir_seek_next_array(array, desc);
563  	return status;
564  }
565  
566  static int nfs_readdir_search_array(struct nfs_readdir_descriptor *desc)
567  {
568  	struct nfs_cache_array *array;
569  	int status;
570  
571  	array = kmap_local_page(desc->page);
572  
573  	if (desc->dir_cookie == 0)
574  		status = nfs_readdir_search_for_pos(array, desc);
575  	else
576  		status = nfs_readdir_search_for_cookie(array, desc);
577  
578  	kunmap_local(array);
579  	return status;
580  }
581  
582  /* Fill a page with xdr information before transferring to the cache page */
583  static int nfs_readdir_xdr_filler(struct nfs_readdir_descriptor *desc,
584  				  __be32 *verf, u64 cookie,
585  				  struct page **pages, size_t bufsize,
586  				  __be32 *verf_res)
587  {
588  	struct inode *inode = file_inode(desc->file);
589  	struct nfs_readdir_arg arg = {
590  		.dentry = file_dentry(desc->file),
591  		.cred = desc->file->f_cred,
592  		.verf = verf,
593  		.cookie = cookie,
594  		.pages = pages,
595  		.page_len = bufsize,
596  		.plus = desc->plus,
597  	};
598  	struct nfs_readdir_res res = {
599  		.verf = verf_res,
600  	};
601  	unsigned long	timestamp, gencount;
602  	int		error;
603  
604   again:
605  	timestamp = jiffies;
606  	gencount = nfs_inc_attr_generation_counter();
607  	desc->dir_verifier = nfs_save_change_attribute(inode);
608  	error = NFS_PROTO(inode)->readdir(&arg, &res);
609  	if (error < 0) {
610  		/* We requested READDIRPLUS, but the server doesn't grok it */
611  		if (error == -ENOTSUPP && desc->plus) {
612  			NFS_SERVER(inode)->caps &= ~NFS_CAP_READDIRPLUS;
613  			desc->plus = arg.plus = false;
614  			goto again;
615  		}
616  		goto error;
617  	}
618  	desc->timestamp = timestamp;
619  	desc->gencount = gencount;
620  error:
621  	return error;
622  }
623  
624  static int xdr_decode(struct nfs_readdir_descriptor *desc,
625  		      struct nfs_entry *entry, struct xdr_stream *xdr)
626  {
627  	struct inode *inode = file_inode(desc->file);
628  	int error;
629  
630  	error = NFS_PROTO(inode)->decode_dirent(xdr, entry, desc->plus);
631  	if (error)
632  		return error;
633  	entry->fattr->time_start = desc->timestamp;
634  	entry->fattr->gencount = desc->gencount;
635  	return 0;
636  }
637  
638  /* Match file and dirent using either filehandle or fileid
639   * Note: caller is responsible for checking the fsid
640   */
641  static
642  int nfs_same_file(struct dentry *dentry, struct nfs_entry *entry)
643  {
644  	struct inode *inode;
645  	struct nfs_inode *nfsi;
646  
647  	if (d_really_is_negative(dentry))
648  		return 0;
649  
650  	inode = d_inode(dentry);
651  	if (is_bad_inode(inode) || NFS_STALE(inode))
652  		return 0;
653  
654  	nfsi = NFS_I(inode);
655  	if (entry->fattr->fileid != nfsi->fileid)
656  		return 0;
657  	if (entry->fh->size && nfs_compare_fh(entry->fh, &nfsi->fh) != 0)
658  		return 0;
659  	return 1;
660  }
661  
662  #define NFS_READDIR_CACHE_USAGE_THRESHOLD (8UL)
663  
664  static bool nfs_use_readdirplus(struct inode *dir, struct dir_context *ctx,
665  				unsigned int cache_hits,
666  				unsigned int cache_misses)
667  {
668  	if (!nfs_server_capable(dir, NFS_CAP_READDIRPLUS))
669  		return false;
670  	if (ctx->pos == 0 ||
671  	    cache_hits + cache_misses > NFS_READDIR_CACHE_USAGE_THRESHOLD)
672  		return true;
673  	return false;
674  }
675  
676  /*
677   * This function is called by the getattr code to request the
678   * use of readdirplus to accelerate any future lookups in the same
679   * directory.
680   */
681  void nfs_readdir_record_entry_cache_hit(struct inode *dir)
682  {
683  	struct nfs_inode *nfsi = NFS_I(dir);
684  	struct nfs_open_dir_context *ctx;
685  
686  	if (nfs_server_capable(dir, NFS_CAP_READDIRPLUS) &&
687  	    S_ISDIR(dir->i_mode)) {
688  		rcu_read_lock();
689  		list_for_each_entry_rcu (ctx, &nfsi->open_files, list)
690  			atomic_inc(&ctx->cache_hits);
691  		rcu_read_unlock();
692  	}
693  }
694  
695  /*
696   * This function is mainly for use by nfs_getattr().
697   *
698   * If this is an 'ls -l', we want to force use of readdirplus.
699   */
700  void nfs_readdir_record_entry_cache_miss(struct inode *dir)
701  {
702  	struct nfs_inode *nfsi = NFS_I(dir);
703  	struct nfs_open_dir_context *ctx;
704  
705  	if (nfs_server_capable(dir, NFS_CAP_READDIRPLUS) &&
706  	    S_ISDIR(dir->i_mode)) {
707  		rcu_read_lock();
708  		list_for_each_entry_rcu (ctx, &nfsi->open_files, list)
709  			atomic_inc(&ctx->cache_misses);
710  		rcu_read_unlock();
711  	}
712  }
713  
714  static void nfs_lookup_advise_force_readdirplus(struct inode *dir,
715  						unsigned int flags)
716  {
717  	if (nfs_server_capable(dir, NFS_CAP_CASE_INSENSITIVE))
718  		return;
719  	if (flags & (LOOKUP_EXCL | LOOKUP_PARENT | LOOKUP_REVAL))
720  		return;
721  	nfs_readdir_record_entry_cache_miss(dir);
722  }
723  
724  static
725  void nfs_prime_dcache(struct dentry *parent, struct nfs_entry *entry,
726  		unsigned long dir_verifier)
727  {
728  	struct qstr filename = QSTR_INIT(entry->name, entry->len);
729  	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
730  	struct dentry *dentry;
731  	struct dentry *alias;
732  	struct inode *inode;
733  	int status;
734  
735  	if (!(entry->fattr->valid & NFS_ATTR_FATTR_FILEID))
736  		return;
737  	if (!(entry->fattr->valid & NFS_ATTR_FATTR_FSID))
738  		return;
739  	if (filename.len == 0)
740  		return;
741  	/* Validate that the name doesn't contain any illegal '\0' */
742  	if (strnlen(filename.name, filename.len) != filename.len)
743  		return;
744  	/* ...or '/' */
745  	if (strnchr(filename.name, filename.len, '/'))
746  		return;
747  	if (filename.name[0] == '.') {
748  		if (filename.len == 1)
749  			return;
750  		if (filename.len == 2 && filename.name[1] == '.')
751  			return;
752  	}
753  	filename.hash = full_name_hash(parent, filename.name, filename.len);
754  
755  	dentry = d_lookup(parent, &filename);
756  again:
757  	if (!dentry) {
758  		dentry = d_alloc_parallel(parent, &filename, &wq);
759  		if (IS_ERR(dentry))
760  			return;
761  	}
762  	if (!d_in_lookup(dentry)) {
763  		/* Is there a mountpoint here? If so, just exit */
764  		if (!nfs_fsid_equal(&NFS_SB(dentry->d_sb)->fsid,
765  					&entry->fattr->fsid))
766  			goto out;
767  		if (nfs_same_file(dentry, entry)) {
768  			if (!entry->fh->size)
769  				goto out;
770  			nfs_set_verifier(dentry, dir_verifier);
771  			status = nfs_refresh_inode(d_inode(dentry), entry->fattr);
772  			if (!status)
773  				nfs_setsecurity(d_inode(dentry), entry->fattr);
774  			trace_nfs_readdir_lookup_revalidate(d_inode(parent),
775  							    dentry, 0, status);
776  			goto out;
777  		} else {
778  			trace_nfs_readdir_lookup_revalidate_failed(
779  				d_inode(parent), dentry, 0);
780  			d_invalidate(dentry);
781  			dput(dentry);
782  			dentry = NULL;
783  			goto again;
784  		}
785  	}
786  	if (!entry->fh->size) {
787  		d_lookup_done(dentry);
788  		goto out;
789  	}
790  
791  	inode = nfs_fhget(dentry->d_sb, entry->fh, entry->fattr);
792  	alias = d_splice_alias(inode, dentry);
793  	d_lookup_done(dentry);
794  	if (alias) {
795  		if (IS_ERR(alias))
796  			goto out;
797  		dput(dentry);
798  		dentry = alias;
799  	}
800  	nfs_set_verifier(dentry, dir_verifier);
801  	trace_nfs_readdir_lookup(d_inode(parent), dentry, 0);
802  out:
803  	dput(dentry);
804  }
805  
806  static int nfs_readdir_entry_decode(struct nfs_readdir_descriptor *desc,
807  				    struct nfs_entry *entry,
808  				    struct xdr_stream *stream)
809  {
810  	int ret;
811  
812  	if (entry->fattr->label)
813  		entry->fattr->label->len = NFS4_MAXLABELLEN;
814  	ret = xdr_decode(desc, entry, stream);
815  	if (ret || !desc->plus)
816  		return ret;
817  	nfs_prime_dcache(file_dentry(desc->file), entry, desc->dir_verifier);
818  	return 0;
819  }
820  
821  /* Perform conversion from xdr to cache array */
822  static int nfs_readdir_page_filler(struct nfs_readdir_descriptor *desc,
823  				   struct nfs_entry *entry,
824  				   struct page **xdr_pages, unsigned int buflen,
825  				   struct page **arrays, size_t narrays,
826  				   u64 change_attr)
827  {
828  	struct address_space *mapping = desc->file->f_mapping;
829  	struct xdr_stream stream;
830  	struct xdr_buf buf;
831  	struct page *scratch, *new, *page = *arrays;
832  	u64 cookie;
833  	int status;
834  
835  	scratch = alloc_page(GFP_KERNEL);
836  	if (scratch == NULL)
837  		return -ENOMEM;
838  
839  	xdr_init_decode_pages(&stream, &buf, xdr_pages, buflen);
840  	xdr_set_scratch_page(&stream, scratch);
841  
842  	do {
843  		status = nfs_readdir_entry_decode(desc, entry, &stream);
844  		if (status != 0)
845  			break;
846  
847  		status = nfs_readdir_page_array_append(page, entry, &cookie);
848  		if (status != -ENOSPC)
849  			continue;
850  
851  		if (page->mapping != mapping) {
852  			if (!--narrays)
853  				break;
854  			new = nfs_readdir_page_array_alloc(cookie, GFP_KERNEL);
855  			if (!new)
856  				break;
857  			arrays++;
858  			*arrays = page = new;
859  		} else {
860  			new = nfs_readdir_page_get_next(mapping, cookie,
861  							change_attr);
862  			if (!new)
863  				break;
864  			if (page != *arrays)
865  				nfs_readdir_page_unlock_and_put(page);
866  			page = new;
867  		}
868  		desc->page_index_max++;
869  		status = nfs_readdir_page_array_append(page, entry, &cookie);
870  	} while (!status && !entry->eof);
871  
872  	switch (status) {
873  	case -EBADCOOKIE:
874  		if (!entry->eof)
875  			break;
876  		nfs_readdir_page_set_eof(page);
877  		fallthrough;
878  	case -EAGAIN:
879  		status = 0;
880  		break;
881  	case -ENOSPC:
882  		status = 0;
883  		if (!desc->plus)
884  			break;
885  		while (!nfs_readdir_entry_decode(desc, entry, &stream))
886  			;
887  	}
888  
889  	if (page != *arrays)
890  		nfs_readdir_page_unlock_and_put(page);
891  
892  	put_page(scratch);
893  	return status;
894  }
895  
896  static void nfs_readdir_free_pages(struct page **pages, size_t npages)
897  {
898  	while (npages--)
899  		put_page(pages[npages]);
900  	kfree(pages);
901  }
902  
903  /*
904   * nfs_readdir_alloc_pages() will allocate pages that must be freed with a call
905   * to nfs_readdir_free_pages()
906   */
907  static struct page **nfs_readdir_alloc_pages(size_t npages)
908  {
909  	struct page **pages;
910  	size_t i;
911  
912  	pages = kmalloc_array(npages, sizeof(*pages), GFP_KERNEL);
913  	if (!pages)
914  		return NULL;
915  	for (i = 0; i < npages; i++) {
916  		struct page *page = alloc_page(GFP_KERNEL);
917  		if (page == NULL)
918  			goto out_freepages;
919  		pages[i] = page;
920  	}
921  	return pages;
922  
923  out_freepages:
924  	nfs_readdir_free_pages(pages, i);
925  	return NULL;
926  }
927  
928  static int nfs_readdir_xdr_to_array(struct nfs_readdir_descriptor *desc,
929  				    __be32 *verf_arg, __be32 *verf_res,
930  				    struct page **arrays, size_t narrays)
931  {
932  	u64 change_attr;
933  	struct page **pages;
934  	struct page *page = *arrays;
935  	struct nfs_entry *entry;
936  	size_t array_size;
937  	struct inode *inode = file_inode(desc->file);
938  	unsigned int dtsize = desc->dtsize;
939  	unsigned int pglen;
940  	int status = -ENOMEM;
941  
942  	entry = kzalloc(sizeof(*entry), GFP_KERNEL);
943  	if (!entry)
944  		return -ENOMEM;
945  	entry->cookie = nfs_readdir_page_last_cookie(page);
946  	entry->fh = nfs_alloc_fhandle();
947  	entry->fattr = nfs_alloc_fattr_with_label(NFS_SERVER(inode));
948  	entry->server = NFS_SERVER(inode);
949  	if (entry->fh == NULL || entry->fattr == NULL)
950  		goto out;
951  
952  	array_size = (dtsize + PAGE_SIZE - 1) >> PAGE_SHIFT;
953  	pages = nfs_readdir_alloc_pages(array_size);
954  	if (!pages)
955  		goto out;
956  
957  	change_attr = inode_peek_iversion_raw(inode);
958  	status = nfs_readdir_xdr_filler(desc, verf_arg, entry->cookie, pages,
959  					dtsize, verf_res);
960  	if (status < 0)
961  		goto free_pages;
962  
963  	pglen = status;
964  	if (pglen != 0)
965  		status = nfs_readdir_page_filler(desc, entry, pages, pglen,
966  						 arrays, narrays, change_attr);
967  	else
968  		nfs_readdir_page_set_eof(page);
969  	desc->buffer_fills++;
970  
971  free_pages:
972  	nfs_readdir_free_pages(pages, array_size);
973  out:
974  	nfs_free_fattr(entry->fattr);
975  	nfs_free_fhandle(entry->fh);
976  	kfree(entry);
977  	return status;
978  }
979  
980  static void nfs_readdir_page_put(struct nfs_readdir_descriptor *desc)
981  {
982  	put_page(desc->page);
983  	desc->page = NULL;
984  }
985  
986  static void
987  nfs_readdir_page_unlock_and_put_cached(struct nfs_readdir_descriptor *desc)
988  {
989  	unlock_page(desc->page);
990  	nfs_readdir_page_put(desc);
991  }
992  
993  static struct page *
994  nfs_readdir_page_get_cached(struct nfs_readdir_descriptor *desc)
995  {
996  	struct address_space *mapping = desc->file->f_mapping;
997  	u64 change_attr = inode_peek_iversion_raw(mapping->host);
998  	u64 cookie = desc->last_cookie;
999  	struct page *page;
1000  
1001  	page = nfs_readdir_page_get_locked(mapping, cookie, change_attr);
1002  	if (!page)
1003  		return NULL;
1004  	if (desc->clear_cache && !nfs_readdir_page_needs_filling(page))
1005  		nfs_readdir_page_reinit_array(page, cookie, change_attr);
1006  	return page;
1007  }
1008  
1009  /*
1010   * Returns 0 if desc->dir_cookie was found on page desc->page_index
1011   * and locks the page to prevent removal from the page cache.
1012   */
1013  static int find_and_lock_cache_page(struct nfs_readdir_descriptor *desc)
1014  {
1015  	struct inode *inode = file_inode(desc->file);
1016  	struct nfs_inode *nfsi = NFS_I(inode);
1017  	__be32 verf[NFS_DIR_VERIFIER_SIZE];
1018  	int res;
1019  
1020  	desc->page = nfs_readdir_page_get_cached(desc);
1021  	if (!desc->page)
1022  		return -ENOMEM;
1023  	if (nfs_readdir_page_needs_filling(desc->page)) {
1024  		/* Grow the dtsize if we had to go back for more pages */
1025  		if (desc->page_index == desc->page_index_max)
1026  			nfs_grow_dtsize(desc);
1027  		desc->page_index_max = desc->page_index;
1028  		trace_nfs_readdir_cache_fill(desc->file, nfsi->cookieverf,
1029  					     desc->last_cookie,
1030  					     desc->page->index, desc->dtsize);
1031  		res = nfs_readdir_xdr_to_array(desc, nfsi->cookieverf, verf,
1032  					       &desc->page, 1);
1033  		if (res < 0) {
1034  			nfs_readdir_page_unlock_and_put_cached(desc);
1035  			trace_nfs_readdir_cache_fill_done(inode, res);
1036  			if (res == -EBADCOOKIE || res == -ENOTSYNC) {
1037  				invalidate_inode_pages2(desc->file->f_mapping);
1038  				nfs_readdir_rewind_search(desc);
1039  				trace_nfs_readdir_invalidate_cache_range(
1040  					inode, 0, MAX_LFS_FILESIZE);
1041  				return -EAGAIN;
1042  			}
1043  			return res;
1044  		}
1045  		/*
1046  		 * Set the cookie verifier if the page cache was empty
1047  		 */
1048  		if (desc->last_cookie == 0 &&
1049  		    memcmp(nfsi->cookieverf, verf, sizeof(nfsi->cookieverf))) {
1050  			memcpy(nfsi->cookieverf, verf,
1051  			       sizeof(nfsi->cookieverf));
1052  			invalidate_inode_pages2_range(desc->file->f_mapping, 1,
1053  						      -1);
1054  			trace_nfs_readdir_invalidate_cache_range(
1055  				inode, 1, MAX_LFS_FILESIZE);
1056  		}
1057  		desc->clear_cache = false;
1058  	}
1059  	res = nfs_readdir_search_array(desc);
1060  	if (res == 0)
1061  		return 0;
1062  	nfs_readdir_page_unlock_and_put_cached(desc);
1063  	return res;
1064  }
1065  
1066  /* Search for desc->dir_cookie from the beginning of the page cache */
1067  static int readdir_search_pagecache(struct nfs_readdir_descriptor *desc)
1068  {
1069  	int res;
1070  
1071  	do {
1072  		res = find_and_lock_cache_page(desc);
1073  	} while (res == -EAGAIN);
1074  	return res;
1075  }
1076  
1077  #define NFS_READDIR_CACHE_MISS_THRESHOLD (16UL)
1078  
1079  /*
1080   * Once we've found the start of the dirent within a page: fill 'er up...
1081   */
1082  static void nfs_do_filldir(struct nfs_readdir_descriptor *desc,
1083  			   const __be32 *verf)
1084  {
1085  	struct file	*file = desc->file;
1086  	struct nfs_cache_array *array;
1087  	unsigned int i;
1088  	bool first_emit = !desc->dir_cookie;
1089  
1090  	array = kmap_local_page(desc->page);
1091  	for (i = desc->cache_entry_index; i < array->size; i++) {
1092  		struct nfs_cache_array_entry *ent;
1093  
1094  		ent = &array->array[i];
1095  		if (!dir_emit(desc->ctx, ent->name, ent->name_len,
1096  		    nfs_compat_user_ino64(ent->ino), ent->d_type)) {
1097  			desc->eob = true;
1098  			break;
1099  		}
1100  		memcpy(desc->verf, verf, sizeof(desc->verf));
1101  		if (i == array->size - 1) {
1102  			desc->dir_cookie = array->last_cookie;
1103  			nfs_readdir_seek_next_array(array, desc);
1104  		} else {
1105  			desc->dir_cookie = array->array[i + 1].cookie;
1106  			desc->last_cookie = array->array[0].cookie;
1107  		}
1108  		if (nfs_readdir_use_cookie(file))
1109  			desc->ctx->pos = desc->dir_cookie;
1110  		else
1111  			desc->ctx->pos++;
1112  		if (first_emit && i > NFS_READDIR_CACHE_MISS_THRESHOLD + 1) {
1113  			desc->eob = true;
1114  			break;
1115  		}
1116  	}
1117  	if (array->page_is_eof)
1118  		desc->eof = !desc->eob;
1119  
1120  	kunmap_local(array);
1121  	dfprintk(DIRCACHE, "NFS: nfs_do_filldir() filling ended @ cookie %llu\n",
1122  			(unsigned long long)desc->dir_cookie);
1123  }
1124  
1125  /*
1126   * If we cannot find a cookie in our cache, we suspect that this is
1127   * because it points to a deleted file, so we ask the server to return
1128   * whatever it thinks is the next entry. We then feed this to filldir.
1129   * If all goes well, we should then be able to find our way round the
1130   * cache on the next call to readdir_search_pagecache();
1131   *
1132   * NOTE: we cannot add the anonymous page to the pagecache because
1133   *	 the data it contains might not be page aligned. Besides,
1134   *	 we should already have a complete representation of the
1135   *	 directory in the page cache by the time we get here.
1136   */
1137  static int uncached_readdir(struct nfs_readdir_descriptor *desc)
1138  {
1139  	struct page	**arrays;
1140  	size_t		i, sz = 512;
1141  	__be32		verf[NFS_DIR_VERIFIER_SIZE];
1142  	int		status = -ENOMEM;
1143  
1144  	dfprintk(DIRCACHE, "NFS: uncached_readdir() searching for cookie %llu\n",
1145  			(unsigned long long)desc->dir_cookie);
1146  
1147  	arrays = kcalloc(sz, sizeof(*arrays), GFP_KERNEL);
1148  	if (!arrays)
1149  		goto out;
1150  	arrays[0] = nfs_readdir_page_array_alloc(desc->dir_cookie, GFP_KERNEL);
1151  	if (!arrays[0])
1152  		goto out;
1153  
1154  	desc->page_index = 0;
1155  	desc->cache_entry_index = 0;
1156  	desc->last_cookie = desc->dir_cookie;
1157  	desc->page_index_max = 0;
1158  
1159  	trace_nfs_readdir_uncached(desc->file, desc->verf, desc->last_cookie,
1160  				   -1, desc->dtsize);
1161  
1162  	status = nfs_readdir_xdr_to_array(desc, desc->verf, verf, arrays, sz);
1163  	if (status < 0) {
1164  		trace_nfs_readdir_uncached_done(file_inode(desc->file), status);
1165  		goto out_free;
1166  	}
1167  
1168  	for (i = 0; !desc->eob && i < sz && arrays[i]; i++) {
1169  		desc->page = arrays[i];
1170  		nfs_do_filldir(desc, verf);
1171  	}
1172  	desc->page = NULL;
1173  
1174  	/*
1175  	 * Grow the dtsize if we have to go back for more pages,
1176  	 * or shrink it if we're reading too many.
1177  	 */
1178  	if (!desc->eof) {
1179  		if (!desc->eob)
1180  			nfs_grow_dtsize(desc);
1181  		else if (desc->buffer_fills == 1 &&
1182  			 i < (desc->page_index_max >> 1))
1183  			nfs_shrink_dtsize(desc);
1184  	}
1185  out_free:
1186  	for (i = 0; i < sz && arrays[i]; i++)
1187  		nfs_readdir_page_array_free(arrays[i]);
1188  out:
1189  	if (!nfs_readdir_use_cookie(desc->file))
1190  		nfs_readdir_rewind_search(desc);
1191  	desc->page_index_max = -1;
1192  	kfree(arrays);
1193  	dfprintk(DIRCACHE, "NFS: %s: returns %d\n", __func__, status);
1194  	return status;
1195  }
1196  
1197  static bool nfs_readdir_handle_cache_misses(struct inode *inode,
1198  					    struct nfs_readdir_descriptor *desc,
1199  					    unsigned int cache_misses,
1200  					    bool force_clear)
1201  {
1202  	if (desc->ctx->pos == 0 || !desc->plus)
1203  		return false;
1204  	if (cache_misses <= NFS_READDIR_CACHE_MISS_THRESHOLD && !force_clear)
1205  		return false;
1206  	trace_nfs_readdir_force_readdirplus(inode);
1207  	return true;
1208  }
1209  
1210  /* The file offset position represents the dirent entry number.  A
1211     last cookie cache takes care of the common case of reading the
1212     whole directory.
1213   */
1214  static int nfs_readdir(struct file *file, struct dir_context *ctx)
1215  {
1216  	struct dentry	*dentry = file_dentry(file);
1217  	struct inode	*inode = d_inode(dentry);
1218  	struct nfs_inode *nfsi = NFS_I(inode);
1219  	struct nfs_open_dir_context *dir_ctx = file->private_data;
1220  	struct nfs_readdir_descriptor *desc;
1221  	unsigned int cache_hits, cache_misses;
1222  	bool force_clear;
1223  	int res;
1224  
1225  	dfprintk(FILE, "NFS: readdir(%pD2) starting at cookie %llu\n",
1226  			file, (long long)ctx->pos);
1227  	nfs_inc_stats(inode, NFSIOS_VFSGETDENTS);
1228  
1229  	/*
1230  	 * ctx->pos points to the dirent entry number.
1231  	 * *desc->dir_cookie has the cookie for the next entry. We have
1232  	 * to either find the entry with the appropriate number or
1233  	 * revalidate the cookie.
1234  	 */
1235  	nfs_revalidate_mapping(inode, file->f_mapping);
1236  
1237  	res = -ENOMEM;
1238  	desc = kzalloc(sizeof(*desc), GFP_KERNEL);
1239  	if (!desc)
1240  		goto out;
1241  	desc->file = file;
1242  	desc->ctx = ctx;
1243  	desc->page_index_max = -1;
1244  
1245  	spin_lock(&file->f_lock);
1246  	desc->dir_cookie = dir_ctx->dir_cookie;
1247  	desc->page_index = dir_ctx->page_index;
1248  	desc->last_cookie = dir_ctx->last_cookie;
1249  	desc->attr_gencount = dir_ctx->attr_gencount;
1250  	desc->eof = dir_ctx->eof;
1251  	nfs_set_dtsize(desc, dir_ctx->dtsize);
1252  	memcpy(desc->verf, dir_ctx->verf, sizeof(desc->verf));
1253  	cache_hits = atomic_xchg(&dir_ctx->cache_hits, 0);
1254  	cache_misses = atomic_xchg(&dir_ctx->cache_misses, 0);
1255  	force_clear = dir_ctx->force_clear;
1256  	spin_unlock(&file->f_lock);
1257  
1258  	if (desc->eof) {
1259  		res = 0;
1260  		goto out_free;
1261  	}
1262  
1263  	desc->plus = nfs_use_readdirplus(inode, ctx, cache_hits, cache_misses);
1264  	force_clear = nfs_readdir_handle_cache_misses(inode, desc, cache_misses,
1265  						      force_clear);
1266  	desc->clear_cache = force_clear;
1267  
1268  	do {
1269  		res = readdir_search_pagecache(desc);
1270  
1271  		if (res == -EBADCOOKIE) {
1272  			res = 0;
1273  			/* This means either end of directory */
1274  			if (desc->dir_cookie && !desc->eof) {
1275  				/* Or that the server has 'lost' a cookie */
1276  				res = uncached_readdir(desc);
1277  				if (res == 0)
1278  					continue;
1279  				if (res == -EBADCOOKIE || res == -ENOTSYNC)
1280  					res = 0;
1281  			}
1282  			break;
1283  		}
1284  		if (res == -ETOOSMALL && desc->plus) {
1285  			nfs_zap_caches(inode);
1286  			desc->plus = false;
1287  			desc->eof = false;
1288  			continue;
1289  		}
1290  		if (res < 0)
1291  			break;
1292  
1293  		nfs_do_filldir(desc, nfsi->cookieverf);
1294  		nfs_readdir_page_unlock_and_put_cached(desc);
1295  		if (desc->page_index == desc->page_index_max)
1296  			desc->clear_cache = force_clear;
1297  	} while (!desc->eob && !desc->eof);
1298  
1299  	spin_lock(&file->f_lock);
1300  	dir_ctx->dir_cookie = desc->dir_cookie;
1301  	dir_ctx->last_cookie = desc->last_cookie;
1302  	dir_ctx->attr_gencount = desc->attr_gencount;
1303  	dir_ctx->page_index = desc->page_index;
1304  	dir_ctx->force_clear = force_clear;
1305  	dir_ctx->eof = desc->eof;
1306  	dir_ctx->dtsize = desc->dtsize;
1307  	memcpy(dir_ctx->verf, desc->verf, sizeof(dir_ctx->verf));
1308  	spin_unlock(&file->f_lock);
1309  out_free:
1310  	kfree(desc);
1311  
1312  out:
1313  	dfprintk(FILE, "NFS: readdir(%pD2) returns %d\n", file, res);
1314  	return res;
1315  }
1316  
1317  static loff_t nfs_llseek_dir(struct file *filp, loff_t offset, int whence)
1318  {
1319  	struct nfs_open_dir_context *dir_ctx = filp->private_data;
1320  
1321  	dfprintk(FILE, "NFS: llseek dir(%pD2, %lld, %d)\n",
1322  			filp, offset, whence);
1323  
1324  	switch (whence) {
1325  	default:
1326  		return -EINVAL;
1327  	case SEEK_SET:
1328  		if (offset < 0)
1329  			return -EINVAL;
1330  		spin_lock(&filp->f_lock);
1331  		break;
1332  	case SEEK_CUR:
1333  		if (offset == 0)
1334  			return filp->f_pos;
1335  		spin_lock(&filp->f_lock);
1336  		offset += filp->f_pos;
1337  		if (offset < 0) {
1338  			spin_unlock(&filp->f_lock);
1339  			return -EINVAL;
1340  		}
1341  	}
1342  	if (offset != filp->f_pos) {
1343  		filp->f_pos = offset;
1344  		dir_ctx->page_index = 0;
1345  		if (!nfs_readdir_use_cookie(filp)) {
1346  			dir_ctx->dir_cookie = 0;
1347  			dir_ctx->last_cookie = 0;
1348  		} else {
1349  			dir_ctx->dir_cookie = offset;
1350  			dir_ctx->last_cookie = offset;
1351  		}
1352  		dir_ctx->eof = false;
1353  	}
1354  	spin_unlock(&filp->f_lock);
1355  	return offset;
1356  }
1357  
1358  /*
1359   * All directory operations under NFS are synchronous, so fsync()
1360   * is a dummy operation.
1361   */
1362  static int nfs_fsync_dir(struct file *filp, loff_t start, loff_t end,
1363  			 int datasync)
1364  {
1365  	dfprintk(FILE, "NFS: fsync dir(%pD2) datasync %d\n", filp, datasync);
1366  
1367  	nfs_inc_stats(file_inode(filp), NFSIOS_VFSFSYNC);
1368  	return 0;
1369  }
1370  
1371  /**
1372   * nfs_force_lookup_revalidate - Mark the directory as having changed
1373   * @dir: pointer to directory inode
1374   *
1375   * This forces the revalidation code in nfs_lookup_revalidate() to do a
1376   * full lookup on all child dentries of 'dir' whenever a change occurs
1377   * on the server that might have invalidated our dcache.
1378   *
1379   * Note that we reserve bit '0' as a tag to let us know when a dentry
1380   * was revalidated while holding a delegation on its inode.
1381   *
1382   * The caller should be holding dir->i_lock
1383   */
1384  void nfs_force_lookup_revalidate(struct inode *dir)
1385  {
1386  	NFS_I(dir)->cache_change_attribute += 2;
1387  }
1388  EXPORT_SYMBOL_GPL(nfs_force_lookup_revalidate);
1389  
1390  /**
1391   * nfs_verify_change_attribute - Detects NFS remote directory changes
1392   * @dir: pointer to parent directory inode
1393   * @verf: previously saved change attribute
1394   *
1395   * Return "false" if the verifiers doesn't match the change attribute.
1396   * This would usually indicate that the directory contents have changed on
1397   * the server, and that any dentries need revalidating.
1398   */
1399  static bool nfs_verify_change_attribute(struct inode *dir, unsigned long verf)
1400  {
1401  	return (verf & ~1UL) == nfs_save_change_attribute(dir);
1402  }
1403  
1404  static void nfs_set_verifier_delegated(unsigned long *verf)
1405  {
1406  	*verf |= 1UL;
1407  }
1408  
1409  #if IS_ENABLED(CONFIG_NFS_V4)
1410  static void nfs_unset_verifier_delegated(unsigned long *verf)
1411  {
1412  	*verf &= ~1UL;
1413  }
1414  #endif /* IS_ENABLED(CONFIG_NFS_V4) */
1415  
1416  static bool nfs_test_verifier_delegated(unsigned long verf)
1417  {
1418  	return verf & 1;
1419  }
1420  
1421  static bool nfs_verifier_is_delegated(struct dentry *dentry)
1422  {
1423  	return nfs_test_verifier_delegated(dentry->d_time);
1424  }
1425  
1426  static void nfs_set_verifier_locked(struct dentry *dentry, unsigned long verf)
1427  {
1428  	struct inode *inode = d_inode(dentry);
1429  	struct inode *dir = d_inode(dentry->d_parent);
1430  
1431  	if (!nfs_verify_change_attribute(dir, verf))
1432  		return;
1433  	if (inode && NFS_PROTO(inode)->have_delegation(inode, FMODE_READ))
1434  		nfs_set_verifier_delegated(&verf);
1435  	dentry->d_time = verf;
1436  }
1437  
1438  /**
1439   * nfs_set_verifier - save a parent directory verifier in the dentry
1440   * @dentry: pointer to dentry
1441   * @verf: verifier to save
1442   *
1443   * Saves the parent directory verifier in @dentry. If the inode has
1444   * a delegation, we also tag the dentry as having been revalidated
1445   * while holding a delegation so that we know we don't have to
1446   * look it up again after a directory change.
1447   */
1448  void nfs_set_verifier(struct dentry *dentry, unsigned long verf)
1449  {
1450  
1451  	spin_lock(&dentry->d_lock);
1452  	nfs_set_verifier_locked(dentry, verf);
1453  	spin_unlock(&dentry->d_lock);
1454  }
1455  EXPORT_SYMBOL_GPL(nfs_set_verifier);
1456  
1457  #if IS_ENABLED(CONFIG_NFS_V4)
1458  /**
1459   * nfs_clear_verifier_delegated - clear the dir verifier delegation tag
1460   * @inode: pointer to inode
1461   *
1462   * Iterates through the dentries in the inode alias list and clears
1463   * the tag used to indicate that the dentry has been revalidated
1464   * while holding a delegation.
1465   * This function is intended for use when the delegation is being
1466   * returned or revoked.
1467   */
1468  void nfs_clear_verifier_delegated(struct inode *inode)
1469  {
1470  	struct dentry *alias;
1471  
1472  	if (!inode)
1473  		return;
1474  	spin_lock(&inode->i_lock);
1475  	hlist_for_each_entry(alias, &inode->i_dentry, d_u.d_alias) {
1476  		spin_lock(&alias->d_lock);
1477  		nfs_unset_verifier_delegated(&alias->d_time);
1478  		spin_unlock(&alias->d_lock);
1479  	}
1480  	spin_unlock(&inode->i_lock);
1481  }
1482  EXPORT_SYMBOL_GPL(nfs_clear_verifier_delegated);
1483  #endif /* IS_ENABLED(CONFIG_NFS_V4) */
1484  
1485  static int nfs_dentry_verify_change(struct inode *dir, struct dentry *dentry)
1486  {
1487  	if (nfs_server_capable(dir, NFS_CAP_CASE_INSENSITIVE) &&
1488  	    d_really_is_negative(dentry))
1489  		return dentry->d_time == inode_peek_iversion_raw(dir);
1490  	return nfs_verify_change_attribute(dir, dentry->d_time);
1491  }
1492  
1493  /*
1494   * A check for whether or not the parent directory has changed.
1495   * In the case it has, we assume that the dentries are untrustworthy
1496   * and may need to be looked up again.
1497   * If rcu_walk prevents us from performing a full check, return 0.
1498   */
1499  static int nfs_check_verifier(struct inode *dir, struct dentry *dentry,
1500  			      int rcu_walk)
1501  {
1502  	if (IS_ROOT(dentry))
1503  		return 1;
1504  	if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONE)
1505  		return 0;
1506  	if (!nfs_dentry_verify_change(dir, dentry))
1507  		return 0;
1508  	/* Revalidate nfsi->cache_change_attribute before we declare a match */
1509  	if (nfs_mapping_need_revalidate_inode(dir)) {
1510  		if (rcu_walk)
1511  			return 0;
1512  		if (__nfs_revalidate_inode(NFS_SERVER(dir), dir) < 0)
1513  			return 0;
1514  	}
1515  	if (!nfs_dentry_verify_change(dir, dentry))
1516  		return 0;
1517  	return 1;
1518  }
1519  
1520  /*
1521   * Use intent information to check whether or not we're going to do
1522   * an O_EXCL create using this path component.
1523   */
1524  static int nfs_is_exclusive_create(struct inode *dir, unsigned int flags)
1525  {
1526  	if (NFS_PROTO(dir)->version == 2)
1527  		return 0;
1528  	return flags & LOOKUP_EXCL;
1529  }
1530  
1531  /*
1532   * Inode and filehandle revalidation for lookups.
1533   *
1534   * We force revalidation in the cases where the VFS sets LOOKUP_REVAL,
1535   * or if the intent information indicates that we're about to open this
1536   * particular file and the "nocto" mount flag is not set.
1537   *
1538   */
1539  static
1540  int nfs_lookup_verify_inode(struct inode *inode, unsigned int flags)
1541  {
1542  	struct nfs_server *server = NFS_SERVER(inode);
1543  	int ret;
1544  
1545  	if (IS_AUTOMOUNT(inode))
1546  		return 0;
1547  
1548  	if (flags & LOOKUP_OPEN) {
1549  		switch (inode->i_mode & S_IFMT) {
1550  		case S_IFREG:
1551  			/* A NFSv4 OPEN will revalidate later */
1552  			if (server->caps & NFS_CAP_ATOMIC_OPEN)
1553  				goto out;
1554  			fallthrough;
1555  		case S_IFDIR:
1556  			if (server->flags & NFS_MOUNT_NOCTO)
1557  				break;
1558  			/* NFS close-to-open cache consistency validation */
1559  			goto out_force;
1560  		}
1561  	}
1562  
1563  	/* VFS wants an on-the-wire revalidation */
1564  	if (flags & LOOKUP_REVAL)
1565  		goto out_force;
1566  out:
1567  	if (inode->i_nlink > 0 ||
1568  	    (inode->i_nlink == 0 &&
1569  	     test_bit(NFS_INO_PRESERVE_UNLINKED, &NFS_I(inode)->flags)))
1570  		return 0;
1571  	else
1572  		return -ESTALE;
1573  out_force:
1574  	if (flags & LOOKUP_RCU)
1575  		return -ECHILD;
1576  	ret = __nfs_revalidate_inode(server, inode);
1577  	if (ret != 0)
1578  		return ret;
1579  	goto out;
1580  }
1581  
1582  static void nfs_mark_dir_for_revalidate(struct inode *inode)
1583  {
1584  	spin_lock(&inode->i_lock);
1585  	nfs_set_cache_invalid(inode, NFS_INO_INVALID_CHANGE);
1586  	spin_unlock(&inode->i_lock);
1587  }
1588  
1589  /*
1590   * We judge how long we want to trust negative
1591   * dentries by looking at the parent inode mtime.
1592   *
1593   * If parent mtime has changed, we revalidate, else we wait for a
1594   * period corresponding to the parent's attribute cache timeout value.
1595   *
1596   * If LOOKUP_RCU prevents us from performing a full check, return 1
1597   * suggesting a reval is needed.
1598   *
1599   * Note that when creating a new file, or looking up a rename target,
1600   * then it shouldn't be necessary to revalidate a negative dentry.
1601   */
1602  static inline
1603  int nfs_neg_need_reval(struct inode *dir, struct dentry *dentry,
1604  		       unsigned int flags)
1605  {
1606  	if (flags & (LOOKUP_CREATE | LOOKUP_RENAME_TARGET))
1607  		return 0;
1608  	if (NFS_SERVER(dir)->flags & NFS_MOUNT_LOOKUP_CACHE_NONEG)
1609  		return 1;
1610  	/* Case insensitive server? Revalidate negative dentries */
1611  	if (nfs_server_capable(dir, NFS_CAP_CASE_INSENSITIVE))
1612  		return 1;
1613  	return !nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU);
1614  }
1615  
1616  static int
1617  nfs_lookup_revalidate_done(struct inode *dir, struct dentry *dentry,
1618  			   struct inode *inode, int error)
1619  {
1620  	switch (error) {
1621  	case 1:
1622  		break;
1623  	case 0:
1624  		/*
1625  		 * We can't d_drop the root of a disconnected tree:
1626  		 * its d_hash is on the s_anon list and d_drop() would hide
1627  		 * it from shrink_dcache_for_unmount(), leading to busy
1628  		 * inodes on unmount and further oopses.
1629  		 */
1630  		if (inode && IS_ROOT(dentry))
1631  			error = 1;
1632  		break;
1633  	}
1634  	trace_nfs_lookup_revalidate_exit(dir, dentry, 0, error);
1635  	return error;
1636  }
1637  
1638  static int
1639  nfs_lookup_revalidate_negative(struct inode *dir, struct dentry *dentry,
1640  			       unsigned int flags)
1641  {
1642  	int ret = 1;
1643  	if (nfs_neg_need_reval(dir, dentry, flags)) {
1644  		if (flags & LOOKUP_RCU)
1645  			return -ECHILD;
1646  		ret = 0;
1647  	}
1648  	return nfs_lookup_revalidate_done(dir, dentry, NULL, ret);
1649  }
1650  
1651  static int
1652  nfs_lookup_revalidate_delegated(struct inode *dir, struct dentry *dentry,
1653  				struct inode *inode)
1654  {
1655  	nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
1656  	return nfs_lookup_revalidate_done(dir, dentry, inode, 1);
1657  }
1658  
1659  static int nfs_lookup_revalidate_dentry(struct inode *dir,
1660  					struct dentry *dentry,
1661  					struct inode *inode, unsigned int flags)
1662  {
1663  	struct nfs_fh *fhandle;
1664  	struct nfs_fattr *fattr;
1665  	unsigned long dir_verifier;
1666  	int ret;
1667  
1668  	trace_nfs_lookup_revalidate_enter(dir, dentry, flags);
1669  
1670  	ret = -ENOMEM;
1671  	fhandle = nfs_alloc_fhandle();
1672  	fattr = nfs_alloc_fattr_with_label(NFS_SERVER(inode));
1673  	if (fhandle == NULL || fattr == NULL)
1674  		goto out;
1675  
1676  	dir_verifier = nfs_save_change_attribute(dir);
1677  	ret = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr);
1678  	if (ret < 0) {
1679  		switch (ret) {
1680  		case -ESTALE:
1681  		case -ENOENT:
1682  			ret = 0;
1683  			break;
1684  		case -ETIMEDOUT:
1685  			if (NFS_SERVER(inode)->flags & NFS_MOUNT_SOFTREVAL)
1686  				ret = 1;
1687  		}
1688  		goto out;
1689  	}
1690  
1691  	/* Request help from readdirplus */
1692  	nfs_lookup_advise_force_readdirplus(dir, flags);
1693  
1694  	ret = 0;
1695  	if (nfs_compare_fh(NFS_FH(inode), fhandle))
1696  		goto out;
1697  	if (nfs_refresh_inode(inode, fattr) < 0)
1698  		goto out;
1699  
1700  	nfs_setsecurity(inode, fattr);
1701  	nfs_set_verifier(dentry, dir_verifier);
1702  
1703  	ret = 1;
1704  out:
1705  	nfs_free_fattr(fattr);
1706  	nfs_free_fhandle(fhandle);
1707  
1708  	/*
1709  	 * If the lookup failed despite the dentry change attribute being
1710  	 * a match, then we should revalidate the directory cache.
1711  	 */
1712  	if (!ret && nfs_dentry_verify_change(dir, dentry))
1713  		nfs_mark_dir_for_revalidate(dir);
1714  	return nfs_lookup_revalidate_done(dir, dentry, inode, ret);
1715  }
1716  
1717  /*
1718   * This is called every time the dcache has a lookup hit,
1719   * and we should check whether we can really trust that
1720   * lookup.
1721   *
1722   * NOTE! The hit can be a negative hit too, don't assume
1723   * we have an inode!
1724   *
1725   * If the parent directory is seen to have changed, we throw out the
1726   * cached dentry and do a new lookup.
1727   */
1728  static int
1729  nfs_do_lookup_revalidate(struct inode *dir, struct dentry *dentry,
1730  			 unsigned int flags)
1731  {
1732  	struct inode *inode;
1733  	int error;
1734  
1735  	nfs_inc_stats(dir, NFSIOS_DENTRYREVALIDATE);
1736  	inode = d_inode(dentry);
1737  
1738  	if (!inode)
1739  		return nfs_lookup_revalidate_negative(dir, dentry, flags);
1740  
1741  	if (is_bad_inode(inode)) {
1742  		dfprintk(LOOKUPCACHE, "%s: %pd2 has dud inode\n",
1743  				__func__, dentry);
1744  		goto out_bad;
1745  	}
1746  
1747  	if ((flags & LOOKUP_RENAME_TARGET) && d_count(dentry) < 2 &&
1748  	    nfs_server_capable(dir, NFS_CAP_CASE_INSENSITIVE))
1749  		goto out_bad;
1750  
1751  	if (nfs_verifier_is_delegated(dentry))
1752  		return nfs_lookup_revalidate_delegated(dir, dentry, inode);
1753  
1754  	/* Force a full look up iff the parent directory has changed */
1755  	if (!(flags & (LOOKUP_EXCL | LOOKUP_REVAL)) &&
1756  	    nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU)) {
1757  		error = nfs_lookup_verify_inode(inode, flags);
1758  		if (error) {
1759  			if (error == -ESTALE)
1760  				nfs_mark_dir_for_revalidate(dir);
1761  			goto out_bad;
1762  		}
1763  		goto out_valid;
1764  	}
1765  
1766  	if (flags & LOOKUP_RCU)
1767  		return -ECHILD;
1768  
1769  	if (NFS_STALE(inode))
1770  		goto out_bad;
1771  
1772  	return nfs_lookup_revalidate_dentry(dir, dentry, inode, flags);
1773  out_valid:
1774  	return nfs_lookup_revalidate_done(dir, dentry, inode, 1);
1775  out_bad:
1776  	if (flags & LOOKUP_RCU)
1777  		return -ECHILD;
1778  	return nfs_lookup_revalidate_done(dir, dentry, inode, 0);
1779  }
1780  
1781  static int
1782  __nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags,
1783  			int (*reval)(struct inode *, struct dentry *, unsigned int))
1784  {
1785  	struct dentry *parent;
1786  	struct inode *dir;
1787  	int ret;
1788  
1789  	if (flags & LOOKUP_RCU) {
1790  		if (dentry->d_fsdata == NFS_FSDATA_BLOCKED)
1791  			return -ECHILD;
1792  		parent = READ_ONCE(dentry->d_parent);
1793  		dir = d_inode_rcu(parent);
1794  		if (!dir)
1795  			return -ECHILD;
1796  		ret = reval(dir, dentry, flags);
1797  		if (parent != READ_ONCE(dentry->d_parent))
1798  			return -ECHILD;
1799  	} else {
1800  		/* Wait for unlink to complete */
1801  		wait_var_event(&dentry->d_fsdata,
1802  			       dentry->d_fsdata != NFS_FSDATA_BLOCKED);
1803  		parent = dget_parent(dentry);
1804  		ret = reval(d_inode(parent), dentry, flags);
1805  		dput(parent);
1806  	}
1807  	return ret;
1808  }
1809  
1810  static int nfs_lookup_revalidate(struct dentry *dentry, unsigned int flags)
1811  {
1812  	return __nfs_lookup_revalidate(dentry, flags, nfs_do_lookup_revalidate);
1813  }
1814  
1815  /*
1816   * A weaker form of d_revalidate for revalidating just the d_inode(dentry)
1817   * when we don't really care about the dentry name. This is called when a
1818   * pathwalk ends on a dentry that was not found via a normal lookup in the
1819   * parent dir (e.g.: ".", "..", procfs symlinks or mountpoint traversals).
1820   *
1821   * In this situation, we just want to verify that the inode itself is OK
1822   * since the dentry might have changed on the server.
1823   */
1824  static int nfs_weak_revalidate(struct dentry *dentry, unsigned int flags)
1825  {
1826  	struct inode *inode = d_inode(dentry);
1827  	int error = 0;
1828  
1829  	/*
1830  	 * I believe we can only get a negative dentry here in the case of a
1831  	 * procfs-style symlink. Just assume it's correct for now, but we may
1832  	 * eventually need to do something more here.
1833  	 */
1834  	if (!inode) {
1835  		dfprintk(LOOKUPCACHE, "%s: %pd2 has negative inode\n",
1836  				__func__, dentry);
1837  		return 1;
1838  	}
1839  
1840  	if (is_bad_inode(inode)) {
1841  		dfprintk(LOOKUPCACHE, "%s: %pd2 has dud inode\n",
1842  				__func__, dentry);
1843  		return 0;
1844  	}
1845  
1846  	error = nfs_lookup_verify_inode(inode, flags);
1847  	dfprintk(LOOKUPCACHE, "NFS: %s: inode %lu is %s\n",
1848  			__func__, inode->i_ino, error ? "invalid" : "valid");
1849  	return !error;
1850  }
1851  
1852  /*
1853   * This is called from dput() when d_count is going to 0.
1854   */
1855  static int nfs_dentry_delete(const struct dentry *dentry)
1856  {
1857  	dfprintk(VFS, "NFS: dentry_delete(%pd2, %x)\n",
1858  		dentry, dentry->d_flags);
1859  
1860  	/* Unhash any dentry with a stale inode */
1861  	if (d_really_is_positive(dentry) && NFS_STALE(d_inode(dentry)))
1862  		return 1;
1863  
1864  	if (dentry->d_flags & DCACHE_NFSFS_RENAMED) {
1865  		/* Unhash it, so that ->d_iput() would be called */
1866  		return 1;
1867  	}
1868  	if (!(dentry->d_sb->s_flags & SB_ACTIVE)) {
1869  		/* Unhash it, so that ancestors of killed async unlink
1870  		 * files will be cleaned up during umount */
1871  		return 1;
1872  	}
1873  	return 0;
1874  
1875  }
1876  
1877  /* Ensure that we revalidate inode->i_nlink */
1878  static void nfs_drop_nlink(struct inode *inode)
1879  {
1880  	spin_lock(&inode->i_lock);
1881  	/* drop the inode if we're reasonably sure this is the last link */
1882  	if (inode->i_nlink > 0)
1883  		drop_nlink(inode);
1884  	NFS_I(inode)->attr_gencount = nfs_inc_attr_generation_counter();
1885  	nfs_set_cache_invalid(
1886  		inode, NFS_INO_INVALID_CHANGE | NFS_INO_INVALID_CTIME |
1887  			       NFS_INO_INVALID_NLINK);
1888  	spin_unlock(&inode->i_lock);
1889  }
1890  
1891  /*
1892   * Called when the dentry loses inode.
1893   * We use it to clean up silly-renamed files.
1894   */
1895  static void nfs_dentry_iput(struct dentry *dentry, struct inode *inode)
1896  {
1897  	if (dentry->d_flags & DCACHE_NFSFS_RENAMED) {
1898  		nfs_complete_unlink(dentry, inode);
1899  		nfs_drop_nlink(inode);
1900  	}
1901  	iput(inode);
1902  }
1903  
1904  static void nfs_d_release(struct dentry *dentry)
1905  {
1906  	/* free cached devname value, if it survived that far */
1907  	if (unlikely(dentry->d_fsdata)) {
1908  		if (dentry->d_flags & DCACHE_NFSFS_RENAMED)
1909  			WARN_ON(1);
1910  		else
1911  			kfree(dentry->d_fsdata);
1912  	}
1913  }
1914  
1915  const struct dentry_operations nfs_dentry_operations = {
1916  	.d_revalidate	= nfs_lookup_revalidate,
1917  	.d_weak_revalidate	= nfs_weak_revalidate,
1918  	.d_delete	= nfs_dentry_delete,
1919  	.d_iput		= nfs_dentry_iput,
1920  	.d_automount	= nfs_d_automount,
1921  	.d_release	= nfs_d_release,
1922  };
1923  EXPORT_SYMBOL_GPL(nfs_dentry_operations);
1924  
1925  struct dentry *nfs_lookup(struct inode *dir, struct dentry * dentry, unsigned int flags)
1926  {
1927  	struct dentry *res;
1928  	struct inode *inode = NULL;
1929  	struct nfs_fh *fhandle = NULL;
1930  	struct nfs_fattr *fattr = NULL;
1931  	unsigned long dir_verifier;
1932  	int error;
1933  
1934  	dfprintk(VFS, "NFS: lookup(%pd2)\n", dentry);
1935  	nfs_inc_stats(dir, NFSIOS_VFSLOOKUP);
1936  
1937  	if (unlikely(dentry->d_name.len > NFS_SERVER(dir)->namelen))
1938  		return ERR_PTR(-ENAMETOOLONG);
1939  
1940  	/*
1941  	 * If we're doing an exclusive create, optimize away the lookup
1942  	 * but don't hash the dentry.
1943  	 */
1944  	if (nfs_is_exclusive_create(dir, flags) || flags & LOOKUP_RENAME_TARGET)
1945  		return NULL;
1946  
1947  	res = ERR_PTR(-ENOMEM);
1948  	fhandle = nfs_alloc_fhandle();
1949  	fattr = nfs_alloc_fattr_with_label(NFS_SERVER(dir));
1950  	if (fhandle == NULL || fattr == NULL)
1951  		goto out;
1952  
1953  	dir_verifier = nfs_save_change_attribute(dir);
1954  	trace_nfs_lookup_enter(dir, dentry, flags);
1955  	error = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr);
1956  	if (error == -ENOENT) {
1957  		if (nfs_server_capable(dir, NFS_CAP_CASE_INSENSITIVE))
1958  			dir_verifier = inode_peek_iversion_raw(dir);
1959  		goto no_entry;
1960  	}
1961  	if (error < 0) {
1962  		res = ERR_PTR(error);
1963  		goto out;
1964  	}
1965  	inode = nfs_fhget(dentry->d_sb, fhandle, fattr);
1966  	res = ERR_CAST(inode);
1967  	if (IS_ERR(res))
1968  		goto out;
1969  
1970  	/* Notify readdir to use READDIRPLUS */
1971  	nfs_lookup_advise_force_readdirplus(dir, flags);
1972  
1973  no_entry:
1974  	res = d_splice_alias(inode, dentry);
1975  	if (res != NULL) {
1976  		if (IS_ERR(res))
1977  			goto out;
1978  		dentry = res;
1979  	}
1980  	nfs_set_verifier(dentry, dir_verifier);
1981  out:
1982  	trace_nfs_lookup_exit(dir, dentry, flags, PTR_ERR_OR_ZERO(res));
1983  	nfs_free_fattr(fattr);
1984  	nfs_free_fhandle(fhandle);
1985  	return res;
1986  }
1987  EXPORT_SYMBOL_GPL(nfs_lookup);
1988  
1989  void nfs_d_prune_case_insensitive_aliases(struct inode *inode)
1990  {
1991  	/* Case insensitive server? Revalidate dentries */
1992  	if (inode && nfs_server_capable(inode, NFS_CAP_CASE_INSENSITIVE))
1993  		d_prune_aliases(inode);
1994  }
1995  EXPORT_SYMBOL_GPL(nfs_d_prune_case_insensitive_aliases);
1996  
1997  #if IS_ENABLED(CONFIG_NFS_V4)
1998  static int nfs4_lookup_revalidate(struct dentry *, unsigned int);
1999  
2000  const struct dentry_operations nfs4_dentry_operations = {
2001  	.d_revalidate	= nfs4_lookup_revalidate,
2002  	.d_weak_revalidate	= nfs_weak_revalidate,
2003  	.d_delete	= nfs_dentry_delete,
2004  	.d_iput		= nfs_dentry_iput,
2005  	.d_automount	= nfs_d_automount,
2006  	.d_release	= nfs_d_release,
2007  };
2008  EXPORT_SYMBOL_GPL(nfs4_dentry_operations);
2009  
2010  static struct nfs_open_context *create_nfs_open_context(struct dentry *dentry, int open_flags, struct file *filp)
2011  {
2012  	return alloc_nfs_open_context(dentry, flags_to_mode(open_flags), filp);
2013  }
2014  
2015  static int do_open(struct inode *inode, struct file *filp)
2016  {
2017  	nfs_fscache_open_file(inode, filp);
2018  	return 0;
2019  }
2020  
2021  static int nfs_finish_open(struct nfs_open_context *ctx,
2022  			   struct dentry *dentry,
2023  			   struct file *file, unsigned open_flags)
2024  {
2025  	int err;
2026  
2027  	err = finish_open(file, dentry, do_open);
2028  	if (err)
2029  		goto out;
2030  	if (S_ISREG(file_inode(file)->i_mode))
2031  		nfs_file_set_open_context(file, ctx);
2032  	else
2033  		err = -EOPENSTALE;
2034  out:
2035  	return err;
2036  }
2037  
2038  int nfs_atomic_open(struct inode *dir, struct dentry *dentry,
2039  		    struct file *file, unsigned open_flags,
2040  		    umode_t mode)
2041  {
2042  	DECLARE_WAIT_QUEUE_HEAD_ONSTACK(wq);
2043  	struct nfs_open_context *ctx;
2044  	struct dentry *res;
2045  	struct iattr attr = { .ia_valid = ATTR_OPEN };
2046  	struct inode *inode;
2047  	unsigned int lookup_flags = 0;
2048  	unsigned long dir_verifier;
2049  	bool switched = false;
2050  	int created = 0;
2051  	int err;
2052  
2053  	/* Expect a negative dentry */
2054  	BUG_ON(d_inode(dentry));
2055  
2056  	dfprintk(VFS, "NFS: atomic_open(%s/%lu), %pd\n",
2057  			dir->i_sb->s_id, dir->i_ino, dentry);
2058  
2059  	err = nfs_check_flags(open_flags);
2060  	if (err)
2061  		return err;
2062  
2063  	/* NFS only supports OPEN on regular files */
2064  	if ((open_flags & O_DIRECTORY)) {
2065  		if (!d_in_lookup(dentry)) {
2066  			/*
2067  			 * Hashed negative dentry with O_DIRECTORY: dentry was
2068  			 * revalidated and is fine, no need to perform lookup
2069  			 * again
2070  			 */
2071  			return -ENOENT;
2072  		}
2073  		lookup_flags = LOOKUP_OPEN|LOOKUP_DIRECTORY;
2074  		goto no_open;
2075  	}
2076  
2077  	if (dentry->d_name.len > NFS_SERVER(dir)->namelen)
2078  		return -ENAMETOOLONG;
2079  
2080  	if (open_flags & O_CREAT) {
2081  		struct nfs_server *server = NFS_SERVER(dir);
2082  
2083  		if (!(server->attr_bitmask[2] & FATTR4_WORD2_MODE_UMASK))
2084  			mode &= ~current_umask();
2085  
2086  		attr.ia_valid |= ATTR_MODE;
2087  		attr.ia_mode = mode;
2088  	}
2089  	if (open_flags & O_TRUNC) {
2090  		attr.ia_valid |= ATTR_SIZE;
2091  		attr.ia_size = 0;
2092  	}
2093  
2094  	if (!(open_flags & O_CREAT) && !d_in_lookup(dentry)) {
2095  		d_drop(dentry);
2096  		switched = true;
2097  		dentry = d_alloc_parallel(dentry->d_parent,
2098  					  &dentry->d_name, &wq);
2099  		if (IS_ERR(dentry))
2100  			return PTR_ERR(dentry);
2101  		if (unlikely(!d_in_lookup(dentry)))
2102  			return finish_no_open(file, dentry);
2103  	}
2104  
2105  	ctx = create_nfs_open_context(dentry, open_flags, file);
2106  	err = PTR_ERR(ctx);
2107  	if (IS_ERR(ctx))
2108  		goto out;
2109  
2110  	trace_nfs_atomic_open_enter(dir, ctx, open_flags);
2111  	inode = NFS_PROTO(dir)->open_context(dir, ctx, open_flags, &attr, &created);
2112  	if (created)
2113  		file->f_mode |= FMODE_CREATED;
2114  	if (IS_ERR(inode)) {
2115  		err = PTR_ERR(inode);
2116  		trace_nfs_atomic_open_exit(dir, ctx, open_flags, err);
2117  		put_nfs_open_context(ctx);
2118  		d_drop(dentry);
2119  		switch (err) {
2120  		case -ENOENT:
2121  			d_splice_alias(NULL, dentry);
2122  			if (nfs_server_capable(dir, NFS_CAP_CASE_INSENSITIVE))
2123  				dir_verifier = inode_peek_iversion_raw(dir);
2124  			else
2125  				dir_verifier = nfs_save_change_attribute(dir);
2126  			nfs_set_verifier(dentry, dir_verifier);
2127  			break;
2128  		case -EISDIR:
2129  		case -ENOTDIR:
2130  			goto no_open;
2131  		case -ELOOP:
2132  			if (!(open_flags & O_NOFOLLOW))
2133  				goto no_open;
2134  			break;
2135  			/* case -EINVAL: */
2136  		default:
2137  			break;
2138  		}
2139  		goto out;
2140  	}
2141  	file->f_mode |= FMODE_CAN_ODIRECT;
2142  
2143  	err = nfs_finish_open(ctx, ctx->dentry, file, open_flags);
2144  	trace_nfs_atomic_open_exit(dir, ctx, open_flags, err);
2145  	put_nfs_open_context(ctx);
2146  out:
2147  	if (unlikely(switched)) {
2148  		d_lookup_done(dentry);
2149  		dput(dentry);
2150  	}
2151  	return err;
2152  
2153  no_open:
2154  	res = nfs_lookup(dir, dentry, lookup_flags);
2155  	if (!res) {
2156  		inode = d_inode(dentry);
2157  		if ((lookup_flags & LOOKUP_DIRECTORY) && inode &&
2158  		    !(S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode)))
2159  			res = ERR_PTR(-ENOTDIR);
2160  		else if (inode && S_ISREG(inode->i_mode))
2161  			res = ERR_PTR(-EOPENSTALE);
2162  	} else if (!IS_ERR(res)) {
2163  		inode = d_inode(res);
2164  		if ((lookup_flags & LOOKUP_DIRECTORY) && inode &&
2165  		    !(S_ISDIR(inode->i_mode) || S_ISLNK(inode->i_mode))) {
2166  			dput(res);
2167  			res = ERR_PTR(-ENOTDIR);
2168  		} else if (inode && S_ISREG(inode->i_mode)) {
2169  			dput(res);
2170  			res = ERR_PTR(-EOPENSTALE);
2171  		}
2172  	}
2173  	if (switched) {
2174  		d_lookup_done(dentry);
2175  		if (!res)
2176  			res = dentry;
2177  		else
2178  			dput(dentry);
2179  	}
2180  	if (IS_ERR(res))
2181  		return PTR_ERR(res);
2182  	return finish_no_open(file, res);
2183  }
2184  EXPORT_SYMBOL_GPL(nfs_atomic_open);
2185  
2186  static int
2187  nfs4_do_lookup_revalidate(struct inode *dir, struct dentry *dentry,
2188  			  unsigned int flags)
2189  {
2190  	struct inode *inode;
2191  
2192  	if (!(flags & LOOKUP_OPEN) || (flags & LOOKUP_DIRECTORY))
2193  		goto full_reval;
2194  	if (d_mountpoint(dentry))
2195  		goto full_reval;
2196  
2197  	inode = d_inode(dentry);
2198  
2199  	/* We can't create new files in nfs_open_revalidate(), so we
2200  	 * optimize away revalidation of negative dentries.
2201  	 */
2202  	if (inode == NULL)
2203  		goto full_reval;
2204  
2205  	if (nfs_verifier_is_delegated(dentry))
2206  		return nfs_lookup_revalidate_delegated(dir, dentry, inode);
2207  
2208  	/* NFS only supports OPEN on regular files */
2209  	if (!S_ISREG(inode->i_mode))
2210  		goto full_reval;
2211  
2212  	/* We cannot do exclusive creation on a positive dentry */
2213  	if (flags & (LOOKUP_EXCL | LOOKUP_REVAL))
2214  		goto reval_dentry;
2215  
2216  	/* Check if the directory changed */
2217  	if (!nfs_check_verifier(dir, dentry, flags & LOOKUP_RCU))
2218  		goto reval_dentry;
2219  
2220  	/* Let f_op->open() actually open (and revalidate) the file */
2221  	return 1;
2222  reval_dentry:
2223  	if (flags & LOOKUP_RCU)
2224  		return -ECHILD;
2225  	return nfs_lookup_revalidate_dentry(dir, dentry, inode, flags);
2226  
2227  full_reval:
2228  	return nfs_do_lookup_revalidate(dir, dentry, flags);
2229  }
2230  
2231  static int nfs4_lookup_revalidate(struct dentry *dentry, unsigned int flags)
2232  {
2233  	return __nfs_lookup_revalidate(dentry, flags,
2234  			nfs4_do_lookup_revalidate);
2235  }
2236  
2237  #endif /* CONFIG_NFSV4 */
2238  
2239  struct dentry *
2240  nfs_add_or_obtain(struct dentry *dentry, struct nfs_fh *fhandle,
2241  				struct nfs_fattr *fattr)
2242  {
2243  	struct dentry *parent = dget_parent(dentry);
2244  	struct inode *dir = d_inode(parent);
2245  	struct inode *inode;
2246  	struct dentry *d;
2247  	int error;
2248  
2249  	d_drop(dentry);
2250  
2251  	if (fhandle->size == 0) {
2252  		error = NFS_PROTO(dir)->lookup(dir, dentry, fhandle, fattr);
2253  		if (error)
2254  			goto out_error;
2255  	}
2256  	nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
2257  	if (!(fattr->valid & NFS_ATTR_FATTR)) {
2258  		struct nfs_server *server = NFS_SB(dentry->d_sb);
2259  		error = server->nfs_client->rpc_ops->getattr(server, fhandle,
2260  				fattr, NULL);
2261  		if (error < 0)
2262  			goto out_error;
2263  	}
2264  	inode = nfs_fhget(dentry->d_sb, fhandle, fattr);
2265  	d = d_splice_alias(inode, dentry);
2266  out:
2267  	dput(parent);
2268  	return d;
2269  out_error:
2270  	d = ERR_PTR(error);
2271  	goto out;
2272  }
2273  EXPORT_SYMBOL_GPL(nfs_add_or_obtain);
2274  
2275  /*
2276   * Code common to create, mkdir, and mknod.
2277   */
2278  int nfs_instantiate(struct dentry *dentry, struct nfs_fh *fhandle,
2279  				struct nfs_fattr *fattr)
2280  {
2281  	struct dentry *d;
2282  
2283  	d = nfs_add_or_obtain(dentry, fhandle, fattr);
2284  	if (IS_ERR(d))
2285  		return PTR_ERR(d);
2286  
2287  	/* Callers don't care */
2288  	dput(d);
2289  	return 0;
2290  }
2291  EXPORT_SYMBOL_GPL(nfs_instantiate);
2292  
2293  /*
2294   * Following a failed create operation, we drop the dentry rather
2295   * than retain a negative dentry. This avoids a problem in the event
2296   * that the operation succeeded on the server, but an error in the
2297   * reply path made it appear to have failed.
2298   */
2299  int nfs_create(struct mnt_idmap *idmap, struct inode *dir,
2300  	       struct dentry *dentry, umode_t mode, bool excl)
2301  {
2302  	struct iattr attr;
2303  	int open_flags = excl ? O_CREAT | O_EXCL : O_CREAT;
2304  	int error;
2305  
2306  	dfprintk(VFS, "NFS: create(%s/%lu), %pd\n",
2307  			dir->i_sb->s_id, dir->i_ino, dentry);
2308  
2309  	attr.ia_mode = mode;
2310  	attr.ia_valid = ATTR_MODE;
2311  
2312  	trace_nfs_create_enter(dir, dentry, open_flags);
2313  	error = NFS_PROTO(dir)->create(dir, dentry, &attr, open_flags);
2314  	trace_nfs_create_exit(dir, dentry, open_flags, error);
2315  	if (error != 0)
2316  		goto out_err;
2317  	return 0;
2318  out_err:
2319  	d_drop(dentry);
2320  	return error;
2321  }
2322  EXPORT_SYMBOL_GPL(nfs_create);
2323  
2324  /*
2325   * See comments for nfs_proc_create regarding failed operations.
2326   */
2327  int
2328  nfs_mknod(struct mnt_idmap *idmap, struct inode *dir,
2329  	  struct dentry *dentry, umode_t mode, dev_t rdev)
2330  {
2331  	struct iattr attr;
2332  	int status;
2333  
2334  	dfprintk(VFS, "NFS: mknod(%s/%lu), %pd\n",
2335  			dir->i_sb->s_id, dir->i_ino, dentry);
2336  
2337  	attr.ia_mode = mode;
2338  	attr.ia_valid = ATTR_MODE;
2339  
2340  	trace_nfs_mknod_enter(dir, dentry);
2341  	status = NFS_PROTO(dir)->mknod(dir, dentry, &attr, rdev);
2342  	trace_nfs_mknod_exit(dir, dentry, status);
2343  	if (status != 0)
2344  		goto out_err;
2345  	return 0;
2346  out_err:
2347  	d_drop(dentry);
2348  	return status;
2349  }
2350  EXPORT_SYMBOL_GPL(nfs_mknod);
2351  
2352  /*
2353   * See comments for nfs_proc_create regarding failed operations.
2354   */
2355  int nfs_mkdir(struct mnt_idmap *idmap, struct inode *dir,
2356  	      struct dentry *dentry, umode_t mode)
2357  {
2358  	struct iattr attr;
2359  	int error;
2360  
2361  	dfprintk(VFS, "NFS: mkdir(%s/%lu), %pd\n",
2362  			dir->i_sb->s_id, dir->i_ino, dentry);
2363  
2364  	attr.ia_valid = ATTR_MODE;
2365  	attr.ia_mode = mode | S_IFDIR;
2366  
2367  	trace_nfs_mkdir_enter(dir, dentry);
2368  	error = NFS_PROTO(dir)->mkdir(dir, dentry, &attr);
2369  	trace_nfs_mkdir_exit(dir, dentry, error);
2370  	if (error != 0)
2371  		goto out_err;
2372  	return 0;
2373  out_err:
2374  	d_drop(dentry);
2375  	return error;
2376  }
2377  EXPORT_SYMBOL_GPL(nfs_mkdir);
2378  
2379  static void nfs_dentry_handle_enoent(struct dentry *dentry)
2380  {
2381  	if (simple_positive(dentry))
2382  		d_delete(dentry);
2383  }
2384  
2385  static void nfs_dentry_remove_handle_error(struct inode *dir,
2386  					   struct dentry *dentry, int error)
2387  {
2388  	switch (error) {
2389  	case -ENOENT:
2390  		if (d_really_is_positive(dentry))
2391  			d_delete(dentry);
2392  		nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
2393  		break;
2394  	case 0:
2395  		nfs_d_prune_case_insensitive_aliases(d_inode(dentry));
2396  		nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
2397  	}
2398  }
2399  
2400  int nfs_rmdir(struct inode *dir, struct dentry *dentry)
2401  {
2402  	int error;
2403  
2404  	dfprintk(VFS, "NFS: rmdir(%s/%lu), %pd\n",
2405  			dir->i_sb->s_id, dir->i_ino, dentry);
2406  
2407  	trace_nfs_rmdir_enter(dir, dentry);
2408  	if (d_really_is_positive(dentry)) {
2409  		down_write(&NFS_I(d_inode(dentry))->rmdir_sem);
2410  		error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name);
2411  		/* Ensure the VFS deletes this inode */
2412  		switch (error) {
2413  		case 0:
2414  			clear_nlink(d_inode(dentry));
2415  			break;
2416  		case -ENOENT:
2417  			nfs_dentry_handle_enoent(dentry);
2418  		}
2419  		up_write(&NFS_I(d_inode(dentry))->rmdir_sem);
2420  	} else
2421  		error = NFS_PROTO(dir)->rmdir(dir, &dentry->d_name);
2422  	nfs_dentry_remove_handle_error(dir, dentry, error);
2423  	trace_nfs_rmdir_exit(dir, dentry, error);
2424  
2425  	return error;
2426  }
2427  EXPORT_SYMBOL_GPL(nfs_rmdir);
2428  
2429  /*
2430   * Remove a file after making sure there are no pending writes,
2431   * and after checking that the file has only one user.
2432   *
2433   * We invalidate the attribute cache and free the inode prior to the operation
2434   * to avoid possible races if the server reuses the inode.
2435   */
2436  static int nfs_safe_remove(struct dentry *dentry)
2437  {
2438  	struct inode *dir = d_inode(dentry->d_parent);
2439  	struct inode *inode = d_inode(dentry);
2440  	int error = -EBUSY;
2441  
2442  	dfprintk(VFS, "NFS: safe_remove(%pd2)\n", dentry);
2443  
2444  	/* If the dentry was sillyrenamed, we simply call d_delete() */
2445  	if (dentry->d_flags & DCACHE_NFSFS_RENAMED) {
2446  		error = 0;
2447  		goto out;
2448  	}
2449  
2450  	trace_nfs_remove_enter(dir, dentry);
2451  	if (inode != NULL) {
2452  		error = NFS_PROTO(dir)->remove(dir, dentry);
2453  		if (error == 0)
2454  			nfs_drop_nlink(inode);
2455  	} else
2456  		error = NFS_PROTO(dir)->remove(dir, dentry);
2457  	if (error == -ENOENT)
2458  		nfs_dentry_handle_enoent(dentry);
2459  	trace_nfs_remove_exit(dir, dentry, error);
2460  out:
2461  	return error;
2462  }
2463  
2464  /*  We do silly rename. In case sillyrename() returns -EBUSY, the inode
2465   *  belongs to an active ".nfs..." file and we return -EBUSY.
2466   *
2467   *  If sillyrename() returns 0, we do nothing, otherwise we unlink.
2468   */
2469  int nfs_unlink(struct inode *dir, struct dentry *dentry)
2470  {
2471  	int error;
2472  
2473  	dfprintk(VFS, "NFS: unlink(%s/%lu, %pd)\n", dir->i_sb->s_id,
2474  		dir->i_ino, dentry);
2475  
2476  	trace_nfs_unlink_enter(dir, dentry);
2477  	spin_lock(&dentry->d_lock);
2478  	if (d_count(dentry) > 1 && !test_bit(NFS_INO_PRESERVE_UNLINKED,
2479  					     &NFS_I(d_inode(dentry))->flags)) {
2480  		spin_unlock(&dentry->d_lock);
2481  		/* Start asynchronous writeout of the inode */
2482  		write_inode_now(d_inode(dentry), 0);
2483  		error = nfs_sillyrename(dir, dentry);
2484  		goto out;
2485  	}
2486  	/* We must prevent any concurrent open until the unlink
2487  	 * completes.  ->d_revalidate will wait for ->d_fsdata
2488  	 * to clear.  We set it here to ensure no lookup succeeds until
2489  	 * the unlink is complete on the server.
2490  	 */
2491  	error = -ETXTBSY;
2492  	if (WARN_ON(dentry->d_flags & DCACHE_NFSFS_RENAMED) ||
2493  	    WARN_ON(dentry->d_fsdata == NFS_FSDATA_BLOCKED)) {
2494  		spin_unlock(&dentry->d_lock);
2495  		goto out;
2496  	}
2497  	/* old devname */
2498  	kfree(dentry->d_fsdata);
2499  	dentry->d_fsdata = NFS_FSDATA_BLOCKED;
2500  
2501  	spin_unlock(&dentry->d_lock);
2502  	error = nfs_safe_remove(dentry);
2503  	nfs_dentry_remove_handle_error(dir, dentry, error);
2504  	dentry->d_fsdata = NULL;
2505  	wake_up_var(&dentry->d_fsdata);
2506  out:
2507  	trace_nfs_unlink_exit(dir, dentry, error);
2508  	return error;
2509  }
2510  EXPORT_SYMBOL_GPL(nfs_unlink);
2511  
2512  /*
2513   * To create a symbolic link, most file systems instantiate a new inode,
2514   * add a page to it containing the path, then write it out to the disk
2515   * using prepare_write/commit_write.
2516   *
2517   * Unfortunately the NFS client can't create the in-core inode first
2518   * because it needs a file handle to create an in-core inode (see
2519   * fs/nfs/inode.c:nfs_fhget).  We only have a file handle *after* the
2520   * symlink request has completed on the server.
2521   *
2522   * So instead we allocate a raw page, copy the symname into it, then do
2523   * the SYMLINK request with the page as the buffer.  If it succeeds, we
2524   * now have a new file handle and can instantiate an in-core NFS inode
2525   * and move the raw page into its mapping.
2526   */
2527  int nfs_symlink(struct mnt_idmap *idmap, struct inode *dir,
2528  		struct dentry *dentry, const char *symname)
2529  {
2530  	struct page *page;
2531  	char *kaddr;
2532  	struct iattr attr;
2533  	unsigned int pathlen = strlen(symname);
2534  	int error;
2535  
2536  	dfprintk(VFS, "NFS: symlink(%s/%lu, %pd, %s)\n", dir->i_sb->s_id,
2537  		dir->i_ino, dentry, symname);
2538  
2539  	if (pathlen > PAGE_SIZE)
2540  		return -ENAMETOOLONG;
2541  
2542  	attr.ia_mode = S_IFLNK | S_IRWXUGO;
2543  	attr.ia_valid = ATTR_MODE;
2544  
2545  	page = alloc_page(GFP_USER);
2546  	if (!page)
2547  		return -ENOMEM;
2548  
2549  	kaddr = page_address(page);
2550  	memcpy(kaddr, symname, pathlen);
2551  	if (pathlen < PAGE_SIZE)
2552  		memset(kaddr + pathlen, 0, PAGE_SIZE - pathlen);
2553  
2554  	trace_nfs_symlink_enter(dir, dentry);
2555  	error = NFS_PROTO(dir)->symlink(dir, dentry, page, pathlen, &attr);
2556  	trace_nfs_symlink_exit(dir, dentry, error);
2557  	if (error != 0) {
2558  		dfprintk(VFS, "NFS: symlink(%s/%lu, %pd, %s) error %d\n",
2559  			dir->i_sb->s_id, dir->i_ino,
2560  			dentry, symname, error);
2561  		d_drop(dentry);
2562  		__free_page(page);
2563  		return error;
2564  	}
2565  
2566  	nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
2567  
2568  	/*
2569  	 * No big deal if we can't add this page to the page cache here.
2570  	 * READLINK will get the missing page from the server if needed.
2571  	 */
2572  	if (!add_to_page_cache_lru(page, d_inode(dentry)->i_mapping, 0,
2573  							GFP_KERNEL)) {
2574  		SetPageUptodate(page);
2575  		unlock_page(page);
2576  		/*
2577  		 * add_to_page_cache_lru() grabs an extra page refcount.
2578  		 * Drop it here to avoid leaking this page later.
2579  		 */
2580  		put_page(page);
2581  	} else
2582  		__free_page(page);
2583  
2584  	return 0;
2585  }
2586  EXPORT_SYMBOL_GPL(nfs_symlink);
2587  
2588  int
2589  nfs_link(struct dentry *old_dentry, struct inode *dir, struct dentry *dentry)
2590  {
2591  	struct inode *inode = d_inode(old_dentry);
2592  	int error;
2593  
2594  	dfprintk(VFS, "NFS: link(%pd2 -> %pd2)\n",
2595  		old_dentry, dentry);
2596  
2597  	trace_nfs_link_enter(inode, dir, dentry);
2598  	d_drop(dentry);
2599  	if (S_ISREG(inode->i_mode))
2600  		nfs_sync_inode(inode);
2601  	error = NFS_PROTO(dir)->link(inode, dir, &dentry->d_name);
2602  	if (error == 0) {
2603  		nfs_set_verifier(dentry, nfs_save_change_attribute(dir));
2604  		ihold(inode);
2605  		d_add(dentry, inode);
2606  	}
2607  	trace_nfs_link_exit(inode, dir, dentry, error);
2608  	return error;
2609  }
2610  EXPORT_SYMBOL_GPL(nfs_link);
2611  
2612  static void
2613  nfs_unblock_rename(struct rpc_task *task, struct nfs_renamedata *data)
2614  {
2615  	struct dentry *new_dentry = data->new_dentry;
2616  
2617  	new_dentry->d_fsdata = NULL;
2618  	wake_up_var(&new_dentry->d_fsdata);
2619  }
2620  
2621  /*
2622   * RENAME
2623   * FIXME: Some nfsds, like the Linux user space nfsd, may generate a
2624   * different file handle for the same inode after a rename (e.g. when
2625   * moving to a different directory). A fail-safe method to do so would
2626   * be to look up old_dir/old_name, create a link to new_dir/new_name and
2627   * rename the old file using the sillyrename stuff. This way, the original
2628   * file in old_dir will go away when the last process iput()s the inode.
2629   *
2630   * FIXED.
2631   *
2632   * It actually works quite well. One needs to have the possibility for
2633   * at least one ".nfs..." file in each directory the file ever gets
2634   * moved or linked to which happens automagically with the new
2635   * implementation that only depends on the dcache stuff instead of
2636   * using the inode layer
2637   *
2638   * Unfortunately, things are a little more complicated than indicated
2639   * above. For a cross-directory move, we want to make sure we can get
2640   * rid of the old inode after the operation.  This means there must be
2641   * no pending writes (if it's a file), and the use count must be 1.
2642   * If these conditions are met, we can drop the dentries before doing
2643   * the rename.
2644   */
2645  int nfs_rename(struct mnt_idmap *idmap, struct inode *old_dir,
2646  	       struct dentry *old_dentry, struct inode *new_dir,
2647  	       struct dentry *new_dentry, unsigned int flags)
2648  {
2649  	struct inode *old_inode = d_inode(old_dentry);
2650  	struct inode *new_inode = d_inode(new_dentry);
2651  	struct dentry *dentry = NULL;
2652  	struct rpc_task *task;
2653  	bool must_unblock = false;
2654  	int error = -EBUSY;
2655  
2656  	if (flags)
2657  		return -EINVAL;
2658  
2659  	dfprintk(VFS, "NFS: rename(%pd2 -> %pd2, ct=%d)\n",
2660  		 old_dentry, new_dentry,
2661  		 d_count(new_dentry));
2662  
2663  	trace_nfs_rename_enter(old_dir, old_dentry, new_dir, new_dentry);
2664  	/*
2665  	 * For non-directories, check whether the target is busy and if so,
2666  	 * make a copy of the dentry and then do a silly-rename. If the
2667  	 * silly-rename succeeds, the copied dentry is hashed and becomes
2668  	 * the new target.
2669  	 */
2670  	if (new_inode && !S_ISDIR(new_inode->i_mode)) {
2671  		/* We must prevent any concurrent open until the unlink
2672  		 * completes.  ->d_revalidate will wait for ->d_fsdata
2673  		 * to clear.  We set it here to ensure no lookup succeeds until
2674  		 * the unlink is complete on the server.
2675  		 */
2676  		error = -ETXTBSY;
2677  		if (WARN_ON(new_dentry->d_flags & DCACHE_NFSFS_RENAMED) ||
2678  		    WARN_ON(new_dentry->d_fsdata == NFS_FSDATA_BLOCKED))
2679  			goto out;
2680  		if (new_dentry->d_fsdata) {
2681  			/* old devname */
2682  			kfree(new_dentry->d_fsdata);
2683  			new_dentry->d_fsdata = NULL;
2684  		}
2685  
2686  		spin_lock(&new_dentry->d_lock);
2687  		if (d_count(new_dentry) > 2) {
2688  			int err;
2689  
2690  			spin_unlock(&new_dentry->d_lock);
2691  
2692  			/* copy the target dentry's name */
2693  			dentry = d_alloc(new_dentry->d_parent,
2694  					 &new_dentry->d_name);
2695  			if (!dentry)
2696  				goto out;
2697  
2698  			/* silly-rename the existing target ... */
2699  			err = nfs_sillyrename(new_dir, new_dentry);
2700  			if (err)
2701  				goto out;
2702  
2703  			new_dentry = dentry;
2704  			new_inode = NULL;
2705  		} else {
2706  			new_dentry->d_fsdata = NFS_FSDATA_BLOCKED;
2707  			must_unblock = true;
2708  			spin_unlock(&new_dentry->d_lock);
2709  		}
2710  
2711  	}
2712  
2713  	if (S_ISREG(old_inode->i_mode))
2714  		nfs_sync_inode(old_inode);
2715  	task = nfs_async_rename(old_dir, new_dir, old_dentry, new_dentry,
2716  				must_unblock ? nfs_unblock_rename : NULL);
2717  	if (IS_ERR(task)) {
2718  		error = PTR_ERR(task);
2719  		goto out;
2720  	}
2721  
2722  	error = rpc_wait_for_completion_task(task);
2723  	if (error != 0) {
2724  		((struct nfs_renamedata *)task->tk_calldata)->cancelled = 1;
2725  		/* Paired with the atomic_dec_and_test() barrier in rpc_do_put_task() */
2726  		smp_wmb();
2727  	} else
2728  		error = task->tk_status;
2729  	rpc_put_task(task);
2730  	/* Ensure the inode attributes are revalidated */
2731  	if (error == 0) {
2732  		spin_lock(&old_inode->i_lock);
2733  		NFS_I(old_inode)->attr_gencount = nfs_inc_attr_generation_counter();
2734  		nfs_set_cache_invalid(old_inode, NFS_INO_INVALID_CHANGE |
2735  							 NFS_INO_INVALID_CTIME |
2736  							 NFS_INO_REVAL_FORCED);
2737  		spin_unlock(&old_inode->i_lock);
2738  	}
2739  out:
2740  	trace_nfs_rename_exit(old_dir, old_dentry,
2741  			new_dir, new_dentry, error);
2742  	if (!error) {
2743  		if (new_inode != NULL)
2744  			nfs_drop_nlink(new_inode);
2745  		/*
2746  		 * The d_move() should be here instead of in an async RPC completion
2747  		 * handler because we need the proper locks to move the dentry.  If
2748  		 * we're interrupted by a signal, the async RPC completion handler
2749  		 * should mark the directories for revalidation.
2750  		 */
2751  		d_move(old_dentry, new_dentry);
2752  		nfs_set_verifier(old_dentry,
2753  					nfs_save_change_attribute(new_dir));
2754  	} else if (error == -ENOENT)
2755  		nfs_dentry_handle_enoent(old_dentry);
2756  
2757  	/* new dentry created? */
2758  	if (dentry)
2759  		dput(dentry);
2760  	return error;
2761  }
2762  EXPORT_SYMBOL_GPL(nfs_rename);
2763  
2764  static DEFINE_SPINLOCK(nfs_access_lru_lock);
2765  static LIST_HEAD(nfs_access_lru_list);
2766  static atomic_long_t nfs_access_nr_entries;
2767  
2768  static unsigned long nfs_access_max_cachesize = 4*1024*1024;
2769  module_param(nfs_access_max_cachesize, ulong, 0644);
2770  MODULE_PARM_DESC(nfs_access_max_cachesize, "NFS access maximum total cache length");
2771  
2772  static void nfs_access_free_entry(struct nfs_access_entry *entry)
2773  {
2774  	put_group_info(entry->group_info);
2775  	kfree_rcu(entry, rcu_head);
2776  	smp_mb__before_atomic();
2777  	atomic_long_dec(&nfs_access_nr_entries);
2778  	smp_mb__after_atomic();
2779  }
2780  
2781  static void nfs_access_free_list(struct list_head *head)
2782  {
2783  	struct nfs_access_entry *cache;
2784  
2785  	while (!list_empty(head)) {
2786  		cache = list_entry(head->next, struct nfs_access_entry, lru);
2787  		list_del(&cache->lru);
2788  		nfs_access_free_entry(cache);
2789  	}
2790  }
2791  
2792  static unsigned long
2793  nfs_do_access_cache_scan(unsigned int nr_to_scan)
2794  {
2795  	LIST_HEAD(head);
2796  	struct nfs_inode *nfsi, *next;
2797  	struct nfs_access_entry *cache;
2798  	long freed = 0;
2799  
2800  	spin_lock(&nfs_access_lru_lock);
2801  	list_for_each_entry_safe(nfsi, next, &nfs_access_lru_list, access_cache_inode_lru) {
2802  		struct inode *inode;
2803  
2804  		if (nr_to_scan-- == 0)
2805  			break;
2806  		inode = &nfsi->vfs_inode;
2807  		spin_lock(&inode->i_lock);
2808  		if (list_empty(&nfsi->access_cache_entry_lru))
2809  			goto remove_lru_entry;
2810  		cache = list_entry(nfsi->access_cache_entry_lru.next,
2811  				struct nfs_access_entry, lru);
2812  		list_move(&cache->lru, &head);
2813  		rb_erase(&cache->rb_node, &nfsi->access_cache);
2814  		freed++;
2815  		if (!list_empty(&nfsi->access_cache_entry_lru))
2816  			list_move_tail(&nfsi->access_cache_inode_lru,
2817  					&nfs_access_lru_list);
2818  		else {
2819  remove_lru_entry:
2820  			list_del_init(&nfsi->access_cache_inode_lru);
2821  			smp_mb__before_atomic();
2822  			clear_bit(NFS_INO_ACL_LRU_SET, &nfsi->flags);
2823  			smp_mb__after_atomic();
2824  		}
2825  		spin_unlock(&inode->i_lock);
2826  	}
2827  	spin_unlock(&nfs_access_lru_lock);
2828  	nfs_access_free_list(&head);
2829  	return freed;
2830  }
2831  
2832  unsigned long
2833  nfs_access_cache_scan(struct shrinker *shrink, struct shrink_control *sc)
2834  {
2835  	int nr_to_scan = sc->nr_to_scan;
2836  	gfp_t gfp_mask = sc->gfp_mask;
2837  
2838  	if ((gfp_mask & GFP_KERNEL) != GFP_KERNEL)
2839  		return SHRINK_STOP;
2840  	return nfs_do_access_cache_scan(nr_to_scan);
2841  }
2842  
2843  
2844  unsigned long
2845  nfs_access_cache_count(struct shrinker *shrink, struct shrink_control *sc)
2846  {
2847  	return vfs_pressure_ratio(atomic_long_read(&nfs_access_nr_entries));
2848  }
2849  
2850  static void
2851  nfs_access_cache_enforce_limit(void)
2852  {
2853  	long nr_entries = atomic_long_read(&nfs_access_nr_entries);
2854  	unsigned long diff;
2855  	unsigned int nr_to_scan;
2856  
2857  	if (nr_entries < 0 || nr_entries <= nfs_access_max_cachesize)
2858  		return;
2859  	nr_to_scan = 100;
2860  	diff = nr_entries - nfs_access_max_cachesize;
2861  	if (diff < nr_to_scan)
2862  		nr_to_scan = diff;
2863  	nfs_do_access_cache_scan(nr_to_scan);
2864  }
2865  
2866  static void __nfs_access_zap_cache(struct nfs_inode *nfsi, struct list_head *head)
2867  {
2868  	struct rb_root *root_node = &nfsi->access_cache;
2869  	struct rb_node *n;
2870  	struct nfs_access_entry *entry;
2871  
2872  	/* Unhook entries from the cache */
2873  	while ((n = rb_first(root_node)) != NULL) {
2874  		entry = rb_entry(n, struct nfs_access_entry, rb_node);
2875  		rb_erase(n, root_node);
2876  		list_move(&entry->lru, head);
2877  	}
2878  	nfsi->cache_validity &= ~NFS_INO_INVALID_ACCESS;
2879  }
2880  
2881  void nfs_access_zap_cache(struct inode *inode)
2882  {
2883  	LIST_HEAD(head);
2884  
2885  	if (test_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags) == 0)
2886  		return;
2887  	/* Remove from global LRU init */
2888  	spin_lock(&nfs_access_lru_lock);
2889  	if (test_and_clear_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags))
2890  		list_del_init(&NFS_I(inode)->access_cache_inode_lru);
2891  
2892  	spin_lock(&inode->i_lock);
2893  	__nfs_access_zap_cache(NFS_I(inode), &head);
2894  	spin_unlock(&inode->i_lock);
2895  	spin_unlock(&nfs_access_lru_lock);
2896  	nfs_access_free_list(&head);
2897  }
2898  EXPORT_SYMBOL_GPL(nfs_access_zap_cache);
2899  
2900  static int access_cmp(const struct cred *a, const struct nfs_access_entry *b)
2901  {
2902  	struct group_info *ga, *gb;
2903  	int g;
2904  
2905  	if (uid_lt(a->fsuid, b->fsuid))
2906  		return -1;
2907  	if (uid_gt(a->fsuid, b->fsuid))
2908  		return 1;
2909  
2910  	if (gid_lt(a->fsgid, b->fsgid))
2911  		return -1;
2912  	if (gid_gt(a->fsgid, b->fsgid))
2913  		return 1;
2914  
2915  	ga = a->group_info;
2916  	gb = b->group_info;
2917  	if (ga == gb)
2918  		return 0;
2919  	if (ga == NULL)
2920  		return -1;
2921  	if (gb == NULL)
2922  		return 1;
2923  	if (ga->ngroups < gb->ngroups)
2924  		return -1;
2925  	if (ga->ngroups > gb->ngroups)
2926  		return 1;
2927  
2928  	for (g = 0; g < ga->ngroups; g++) {
2929  		if (gid_lt(ga->gid[g], gb->gid[g]))
2930  			return -1;
2931  		if (gid_gt(ga->gid[g], gb->gid[g]))
2932  			return 1;
2933  	}
2934  	return 0;
2935  }
2936  
2937  static struct nfs_access_entry *nfs_access_search_rbtree(struct inode *inode, const struct cred *cred)
2938  {
2939  	struct rb_node *n = NFS_I(inode)->access_cache.rb_node;
2940  
2941  	while (n != NULL) {
2942  		struct nfs_access_entry *entry =
2943  			rb_entry(n, struct nfs_access_entry, rb_node);
2944  		int cmp = access_cmp(cred, entry);
2945  
2946  		if (cmp < 0)
2947  			n = n->rb_left;
2948  		else if (cmp > 0)
2949  			n = n->rb_right;
2950  		else
2951  			return entry;
2952  	}
2953  	return NULL;
2954  }
2955  
2956  static u64 nfs_access_login_time(const struct task_struct *task,
2957  				 const struct cred *cred)
2958  {
2959  	const struct task_struct *parent;
2960  	const struct cred *pcred;
2961  	u64 ret;
2962  
2963  	rcu_read_lock();
2964  	for (;;) {
2965  		parent = rcu_dereference(task->real_parent);
2966  		pcred = rcu_dereference(parent->cred);
2967  		if (parent == task || cred_fscmp(pcred, cred) != 0)
2968  			break;
2969  		task = parent;
2970  	}
2971  	ret = task->start_time;
2972  	rcu_read_unlock();
2973  	return ret;
2974  }
2975  
2976  static int nfs_access_get_cached_locked(struct inode *inode, const struct cred *cred, u32 *mask, bool may_block)
2977  {
2978  	struct nfs_inode *nfsi = NFS_I(inode);
2979  	u64 login_time = nfs_access_login_time(current, cred);
2980  	struct nfs_access_entry *cache;
2981  	bool retry = true;
2982  	int err;
2983  
2984  	spin_lock(&inode->i_lock);
2985  	for(;;) {
2986  		if (nfsi->cache_validity & NFS_INO_INVALID_ACCESS)
2987  			goto out_zap;
2988  		cache = nfs_access_search_rbtree(inode, cred);
2989  		err = -ENOENT;
2990  		if (cache == NULL)
2991  			goto out;
2992  		/* Found an entry, is our attribute cache valid? */
2993  		if (!nfs_check_cache_invalid(inode, NFS_INO_INVALID_ACCESS))
2994  			break;
2995  		if (!retry)
2996  			break;
2997  		err = -ECHILD;
2998  		if (!may_block)
2999  			goto out;
3000  		spin_unlock(&inode->i_lock);
3001  		err = __nfs_revalidate_inode(NFS_SERVER(inode), inode);
3002  		if (err)
3003  			return err;
3004  		spin_lock(&inode->i_lock);
3005  		retry = false;
3006  	}
3007  	err = -ENOENT;
3008  	if ((s64)(login_time - cache->timestamp) > 0)
3009  		goto out;
3010  	*mask = cache->mask;
3011  	list_move_tail(&cache->lru, &nfsi->access_cache_entry_lru);
3012  	err = 0;
3013  out:
3014  	spin_unlock(&inode->i_lock);
3015  	return err;
3016  out_zap:
3017  	spin_unlock(&inode->i_lock);
3018  	nfs_access_zap_cache(inode);
3019  	return -ENOENT;
3020  }
3021  
3022  static int nfs_access_get_cached_rcu(struct inode *inode, const struct cred *cred, u32 *mask)
3023  {
3024  	/* Only check the most recently returned cache entry,
3025  	 * but do it without locking.
3026  	 */
3027  	struct nfs_inode *nfsi = NFS_I(inode);
3028  	u64 login_time = nfs_access_login_time(current, cred);
3029  	struct nfs_access_entry *cache;
3030  	int err = -ECHILD;
3031  	struct list_head *lh;
3032  
3033  	rcu_read_lock();
3034  	if (nfsi->cache_validity & NFS_INO_INVALID_ACCESS)
3035  		goto out;
3036  	lh = rcu_dereference(list_tail_rcu(&nfsi->access_cache_entry_lru));
3037  	cache = list_entry(lh, struct nfs_access_entry, lru);
3038  	if (lh == &nfsi->access_cache_entry_lru ||
3039  	    access_cmp(cred, cache) != 0)
3040  		cache = NULL;
3041  	if (cache == NULL)
3042  		goto out;
3043  	if ((s64)(login_time - cache->timestamp) > 0)
3044  		goto out;
3045  	if (nfs_check_cache_invalid(inode, NFS_INO_INVALID_ACCESS))
3046  		goto out;
3047  	*mask = cache->mask;
3048  	err = 0;
3049  out:
3050  	rcu_read_unlock();
3051  	return err;
3052  }
3053  
3054  int nfs_access_get_cached(struct inode *inode, const struct cred *cred,
3055  			  u32 *mask, bool may_block)
3056  {
3057  	int status;
3058  
3059  	status = nfs_access_get_cached_rcu(inode, cred, mask);
3060  	if (status != 0)
3061  		status = nfs_access_get_cached_locked(inode, cred, mask,
3062  		    may_block);
3063  
3064  	return status;
3065  }
3066  EXPORT_SYMBOL_GPL(nfs_access_get_cached);
3067  
3068  static void nfs_access_add_rbtree(struct inode *inode,
3069  				  struct nfs_access_entry *set,
3070  				  const struct cred *cred)
3071  {
3072  	struct nfs_inode *nfsi = NFS_I(inode);
3073  	struct rb_root *root_node = &nfsi->access_cache;
3074  	struct rb_node **p = &root_node->rb_node;
3075  	struct rb_node *parent = NULL;
3076  	struct nfs_access_entry *entry;
3077  	int cmp;
3078  
3079  	spin_lock(&inode->i_lock);
3080  	while (*p != NULL) {
3081  		parent = *p;
3082  		entry = rb_entry(parent, struct nfs_access_entry, rb_node);
3083  		cmp = access_cmp(cred, entry);
3084  
3085  		if (cmp < 0)
3086  			p = &parent->rb_left;
3087  		else if (cmp > 0)
3088  			p = &parent->rb_right;
3089  		else
3090  			goto found;
3091  	}
3092  	rb_link_node(&set->rb_node, parent, p);
3093  	rb_insert_color(&set->rb_node, root_node);
3094  	list_add_tail(&set->lru, &nfsi->access_cache_entry_lru);
3095  	spin_unlock(&inode->i_lock);
3096  	return;
3097  found:
3098  	rb_replace_node(parent, &set->rb_node, root_node);
3099  	list_add_tail(&set->lru, &nfsi->access_cache_entry_lru);
3100  	list_del(&entry->lru);
3101  	spin_unlock(&inode->i_lock);
3102  	nfs_access_free_entry(entry);
3103  }
3104  
3105  void nfs_access_add_cache(struct inode *inode, struct nfs_access_entry *set,
3106  			  const struct cred *cred)
3107  {
3108  	struct nfs_access_entry *cache = kmalloc(sizeof(*cache), GFP_KERNEL);
3109  	if (cache == NULL)
3110  		return;
3111  	RB_CLEAR_NODE(&cache->rb_node);
3112  	cache->fsuid = cred->fsuid;
3113  	cache->fsgid = cred->fsgid;
3114  	cache->group_info = get_group_info(cred->group_info);
3115  	cache->mask = set->mask;
3116  	cache->timestamp = ktime_get_ns();
3117  
3118  	/* The above field assignments must be visible
3119  	 * before this item appears on the lru.  We cannot easily
3120  	 * use rcu_assign_pointer, so just force the memory barrier.
3121  	 */
3122  	smp_wmb();
3123  	nfs_access_add_rbtree(inode, cache, cred);
3124  
3125  	/* Update accounting */
3126  	smp_mb__before_atomic();
3127  	atomic_long_inc(&nfs_access_nr_entries);
3128  	smp_mb__after_atomic();
3129  
3130  	/* Add inode to global LRU list */
3131  	if (!test_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags)) {
3132  		spin_lock(&nfs_access_lru_lock);
3133  		if (!test_and_set_bit(NFS_INO_ACL_LRU_SET, &NFS_I(inode)->flags))
3134  			list_add_tail(&NFS_I(inode)->access_cache_inode_lru,
3135  					&nfs_access_lru_list);
3136  		spin_unlock(&nfs_access_lru_lock);
3137  	}
3138  	nfs_access_cache_enforce_limit();
3139  }
3140  EXPORT_SYMBOL_GPL(nfs_access_add_cache);
3141  
3142  #define NFS_MAY_READ (NFS_ACCESS_READ)
3143  #define NFS_MAY_WRITE (NFS_ACCESS_MODIFY | \
3144  		NFS_ACCESS_EXTEND | \
3145  		NFS_ACCESS_DELETE)
3146  #define NFS_FILE_MAY_WRITE (NFS_ACCESS_MODIFY | \
3147  		NFS_ACCESS_EXTEND)
3148  #define NFS_DIR_MAY_WRITE NFS_MAY_WRITE
3149  #define NFS_MAY_LOOKUP (NFS_ACCESS_LOOKUP)
3150  #define NFS_MAY_EXECUTE (NFS_ACCESS_EXECUTE)
3151  static int
3152  nfs_access_calc_mask(u32 access_result, umode_t umode)
3153  {
3154  	int mask = 0;
3155  
3156  	if (access_result & NFS_MAY_READ)
3157  		mask |= MAY_READ;
3158  	if (S_ISDIR(umode)) {
3159  		if ((access_result & NFS_DIR_MAY_WRITE) == NFS_DIR_MAY_WRITE)
3160  			mask |= MAY_WRITE;
3161  		if ((access_result & NFS_MAY_LOOKUP) == NFS_MAY_LOOKUP)
3162  			mask |= MAY_EXEC;
3163  	} else if (S_ISREG(umode)) {
3164  		if ((access_result & NFS_FILE_MAY_WRITE) == NFS_FILE_MAY_WRITE)
3165  			mask |= MAY_WRITE;
3166  		if ((access_result & NFS_MAY_EXECUTE) == NFS_MAY_EXECUTE)
3167  			mask |= MAY_EXEC;
3168  	} else if (access_result & NFS_MAY_WRITE)
3169  			mask |= MAY_WRITE;
3170  	return mask;
3171  }
3172  
3173  void nfs_access_set_mask(struct nfs_access_entry *entry, u32 access_result)
3174  {
3175  	entry->mask = access_result;
3176  }
3177  EXPORT_SYMBOL_GPL(nfs_access_set_mask);
3178  
3179  static int nfs_do_access(struct inode *inode, const struct cred *cred, int mask)
3180  {
3181  	struct nfs_access_entry cache;
3182  	bool may_block = (mask & MAY_NOT_BLOCK) == 0;
3183  	int cache_mask = -1;
3184  	int status;
3185  
3186  	trace_nfs_access_enter(inode);
3187  
3188  	status = nfs_access_get_cached(inode, cred, &cache.mask, may_block);
3189  	if (status == 0)
3190  		goto out_cached;
3191  
3192  	status = -ECHILD;
3193  	if (!may_block)
3194  		goto out;
3195  
3196  	/*
3197  	 * Determine which access bits we want to ask for...
3198  	 */
3199  	cache.mask = NFS_ACCESS_READ | NFS_ACCESS_MODIFY | NFS_ACCESS_EXTEND |
3200  		     nfs_access_xattr_mask(NFS_SERVER(inode));
3201  	if (S_ISDIR(inode->i_mode))
3202  		cache.mask |= NFS_ACCESS_DELETE | NFS_ACCESS_LOOKUP;
3203  	else
3204  		cache.mask |= NFS_ACCESS_EXECUTE;
3205  	status = NFS_PROTO(inode)->access(inode, &cache, cred);
3206  	if (status != 0) {
3207  		if (status == -ESTALE) {
3208  			if (!S_ISDIR(inode->i_mode))
3209  				nfs_set_inode_stale(inode);
3210  			else
3211  				nfs_zap_caches(inode);
3212  		}
3213  		goto out;
3214  	}
3215  	nfs_access_add_cache(inode, &cache, cred);
3216  out_cached:
3217  	cache_mask = nfs_access_calc_mask(cache.mask, inode->i_mode);
3218  	if ((mask & ~cache_mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) != 0)
3219  		status = -EACCES;
3220  out:
3221  	trace_nfs_access_exit(inode, mask, cache_mask, status);
3222  	return status;
3223  }
3224  
3225  static int nfs_open_permission_mask(int openflags)
3226  {
3227  	int mask = 0;
3228  
3229  	if (openflags & __FMODE_EXEC) {
3230  		/* ONLY check exec rights */
3231  		mask = MAY_EXEC;
3232  	} else {
3233  		if ((openflags & O_ACCMODE) != O_WRONLY)
3234  			mask |= MAY_READ;
3235  		if ((openflags & O_ACCMODE) != O_RDONLY)
3236  			mask |= MAY_WRITE;
3237  	}
3238  
3239  	return mask;
3240  }
3241  
3242  int nfs_may_open(struct inode *inode, const struct cred *cred, int openflags)
3243  {
3244  	return nfs_do_access(inode, cred, nfs_open_permission_mask(openflags));
3245  }
3246  EXPORT_SYMBOL_GPL(nfs_may_open);
3247  
3248  static int nfs_execute_ok(struct inode *inode, int mask)
3249  {
3250  	struct nfs_server *server = NFS_SERVER(inode);
3251  	int ret = 0;
3252  
3253  	if (S_ISDIR(inode->i_mode))
3254  		return 0;
3255  	if (nfs_check_cache_invalid(inode, NFS_INO_INVALID_MODE)) {
3256  		if (mask & MAY_NOT_BLOCK)
3257  			return -ECHILD;
3258  		ret = __nfs_revalidate_inode(server, inode);
3259  	}
3260  	if (ret == 0 && !execute_ok(inode))
3261  		ret = -EACCES;
3262  	return ret;
3263  }
3264  
3265  int nfs_permission(struct mnt_idmap *idmap,
3266  		   struct inode *inode,
3267  		   int mask)
3268  {
3269  	const struct cred *cred = current_cred();
3270  	int res = 0;
3271  
3272  	nfs_inc_stats(inode, NFSIOS_VFSACCESS);
3273  
3274  	if ((mask & (MAY_READ | MAY_WRITE | MAY_EXEC)) == 0)
3275  		goto out;
3276  	/* Is this sys_access() ? */
3277  	if (mask & (MAY_ACCESS | MAY_CHDIR))
3278  		goto force_lookup;
3279  
3280  	switch (inode->i_mode & S_IFMT) {
3281  		case S_IFLNK:
3282  			goto out;
3283  		case S_IFREG:
3284  			if ((mask & MAY_OPEN) &&
3285  			   nfs_server_capable(inode, NFS_CAP_ATOMIC_OPEN))
3286  				return 0;
3287  			break;
3288  		case S_IFDIR:
3289  			/*
3290  			 * Optimize away all write operations, since the server
3291  			 * will check permissions when we perform the op.
3292  			 */
3293  			if ((mask & MAY_WRITE) && !(mask & MAY_READ))
3294  				goto out;
3295  	}
3296  
3297  force_lookup:
3298  	if (!NFS_PROTO(inode)->access)
3299  		goto out_notsup;
3300  
3301  	res = nfs_do_access(inode, cred, mask);
3302  out:
3303  	if (!res && (mask & MAY_EXEC))
3304  		res = nfs_execute_ok(inode, mask);
3305  
3306  	dfprintk(VFS, "NFS: permission(%s/%lu), mask=0x%x, res=%d\n",
3307  		inode->i_sb->s_id, inode->i_ino, mask, res);
3308  	return res;
3309  out_notsup:
3310  	if (mask & MAY_NOT_BLOCK)
3311  		return -ECHILD;
3312  
3313  	res = nfs_revalidate_inode(inode, NFS_INO_INVALID_MODE |
3314  						  NFS_INO_INVALID_OTHER);
3315  	if (res == 0)
3316  		res = generic_permission(&nop_mnt_idmap, inode, mask);
3317  	goto out;
3318  }
3319  EXPORT_SYMBOL_GPL(nfs_permission);
3320