xref: /linux/fs/nfs/localio.c (revision 1d18101a644e6ece450d5b0a93f21a71a21b6222)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * NFS client support for local clients to bypass network stack
4  *
5  * Copyright (C) 2014 Weston Andros Adamson <dros@primarydata.com>
6  * Copyright (C) 2019 Trond Myklebust <trond.myklebust@hammerspace.com>
7  * Copyright (C) 2024 Mike Snitzer <snitzer@hammerspace.com>
8  * Copyright (C) 2024 NeilBrown <neilb@suse.de>
9  */
10 
11 #include <linux/module.h>
12 #include <linux/errno.h>
13 #include <linux/vfs.h>
14 #include <linux/file.h>
15 #include <linux/inet.h>
16 #include <linux/sunrpc/addr.h>
17 #include <linux/inetdevice.h>
18 #include <net/addrconf.h>
19 #include <linux/nfs_common.h>
20 #include <linux/nfslocalio.h>
21 #include <linux/bvec.h>
22 
23 #include <linux/nfs.h>
24 #include <linux/nfs_fs.h>
25 #include <linux/nfs_xdr.h>
26 
27 #include "internal.h"
28 #include "pnfs.h"
29 #include "nfstrace.h"
30 
31 #define NFSDBG_FACILITY		NFSDBG_VFS
32 
33 #define NFSLOCAL_MAX_IOS	3
34 
35 struct nfs_local_kiocb {
36 	struct kiocb		kiocb;
37 	struct bio_vec		*bvec;
38 	struct nfs_pgio_header	*hdr;
39 	struct work_struct	work;
40 	void (*aio_complete_work)(struct work_struct *);
41 	struct nfsd_file	*localio;
42 	/* Begin mostly DIO-specific members */
43 	size_t                  end_len;
44 	short int		end_iter_index;
45 	atomic_t		n_iters;
46 	bool			iter_is_dio_aligned[NFSLOCAL_MAX_IOS];
47 	struct iov_iter		iters[NFSLOCAL_MAX_IOS] ____cacheline_aligned;
48 	/* End mostly DIO-specific members */
49 };
50 
51 struct nfs_local_fsync_ctx {
52 	struct nfsd_file	*localio;
53 	struct nfs_commit_data	*data;
54 	struct work_struct	work;
55 	struct completion	*done;
56 };
57 
58 static bool localio_enabled __read_mostly = true;
59 module_param(localio_enabled, bool, 0644);
60 
nfs_client_is_local(const struct nfs_client * clp)61 static inline bool nfs_client_is_local(const struct nfs_client *clp)
62 {
63 	return !!rcu_access_pointer(clp->cl_uuid.net);
64 }
65 
nfs_server_is_local(const struct nfs_client * clp)66 bool nfs_server_is_local(const struct nfs_client *clp)
67 {
68 	return nfs_client_is_local(clp) && localio_enabled;
69 }
70 EXPORT_SYMBOL_GPL(nfs_server_is_local);
71 
72 /*
73  * UUID_IS_LOCAL XDR functions
74  */
75 
localio_xdr_enc_uuidargs(struct rpc_rqst * req,struct xdr_stream * xdr,const void * data)76 static void localio_xdr_enc_uuidargs(struct rpc_rqst *req,
77 				     struct xdr_stream *xdr,
78 				     const void *data)
79 {
80 	const u8 *uuid = data;
81 
82 	encode_opaque_fixed(xdr, uuid, UUID_SIZE);
83 }
84 
localio_xdr_dec_uuidres(struct rpc_rqst * req,struct xdr_stream * xdr,void * result)85 static int localio_xdr_dec_uuidres(struct rpc_rqst *req,
86 				   struct xdr_stream *xdr,
87 				   void *result)
88 {
89 	/* void return */
90 	return 0;
91 }
92 
93 static const struct rpc_procinfo nfs_localio_procedures[] = {
94 	[LOCALIOPROC_UUID_IS_LOCAL] = {
95 		.p_proc = LOCALIOPROC_UUID_IS_LOCAL,
96 		.p_encode = localio_xdr_enc_uuidargs,
97 		.p_decode = localio_xdr_dec_uuidres,
98 		.p_arglen = XDR_QUADLEN(UUID_SIZE),
99 		.p_replen = 0,
100 		.p_statidx = LOCALIOPROC_UUID_IS_LOCAL,
101 		.p_name = "UUID_IS_LOCAL",
102 	},
103 };
104 
105 static unsigned int nfs_localio_counts[ARRAY_SIZE(nfs_localio_procedures)];
106 static const struct rpc_version nfslocalio_version1 = {
107 	.number			= 1,
108 	.nrprocs		= ARRAY_SIZE(nfs_localio_procedures),
109 	.procs			= nfs_localio_procedures,
110 	.counts			= nfs_localio_counts,
111 };
112 
113 static const struct rpc_version *nfslocalio_version[] = {
114        [1]			= &nfslocalio_version1,
115 };
116 
117 extern const struct rpc_program nfslocalio_program;
118 static struct rpc_stat		nfslocalio_rpcstat = { &nfslocalio_program };
119 
120 const struct rpc_program nfslocalio_program = {
121 	.name			= "nfslocalio",
122 	.number			= NFS_LOCALIO_PROGRAM,
123 	.nrvers			= ARRAY_SIZE(nfslocalio_version),
124 	.version		= nfslocalio_version,
125 	.stats			= &nfslocalio_rpcstat,
126 };
127 
128 /*
129  * nfs_init_localioclient - Initialise an NFS localio client connection
130  */
nfs_init_localioclient(struct nfs_client * clp)131 static struct rpc_clnt *nfs_init_localioclient(struct nfs_client *clp)
132 {
133 	struct rpc_clnt *rpcclient_localio;
134 
135 	rpcclient_localio = rpc_bind_new_program(clp->cl_rpcclient,
136 						 &nfslocalio_program, 1);
137 
138 	dprintk_rcu("%s: server (%s) %s NFS LOCALIO.\n",
139 		__func__, rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR),
140 		(IS_ERR(rpcclient_localio) ? "does not support" : "supports"));
141 
142 	return rpcclient_localio;
143 }
144 
nfs_server_uuid_is_local(struct nfs_client * clp)145 static bool nfs_server_uuid_is_local(struct nfs_client *clp)
146 {
147 	u8 uuid[UUID_SIZE];
148 	struct rpc_message msg = {
149 		.rpc_argp = &uuid,
150 	};
151 	struct rpc_clnt *rpcclient_localio;
152 	int status;
153 
154 	rpcclient_localio = nfs_init_localioclient(clp);
155 	if (IS_ERR(rpcclient_localio))
156 		return false;
157 
158 	export_uuid(uuid, &clp->cl_uuid.uuid);
159 
160 	msg.rpc_proc = &nfs_localio_procedures[LOCALIOPROC_UUID_IS_LOCAL];
161 	status = rpc_call_sync(rpcclient_localio, &msg, 0);
162 	dprintk("%s: NFS reply UUID_IS_LOCAL: status=%d\n",
163 		__func__, status);
164 	rpc_shutdown_client(rpcclient_localio);
165 
166 	/* Server is only local if it initialized required struct members */
167 	if (status || !rcu_access_pointer(clp->cl_uuid.net) || !clp->cl_uuid.dom)
168 		return false;
169 
170 	return true;
171 }
172 
173 /*
174  * nfs_local_probe - probe local i/o support for an nfs_server and nfs_client
175  * - called after alloc_client and init_client (so cl_rpcclient exists)
176  * - this function is idempotent, it can be called for old or new clients
177  */
nfs_local_probe(struct nfs_client * clp)178 static void nfs_local_probe(struct nfs_client *clp)
179 {
180 	/* Disallow localio if disabled via sysfs or AUTH_SYS isn't used */
181 	if (!localio_enabled ||
182 	    clp->cl_rpcclient->cl_auth->au_flavor != RPC_AUTH_UNIX) {
183 		nfs_localio_disable_client(clp);
184 		return;
185 	}
186 
187 	if (nfs_client_is_local(clp))
188 		return;
189 
190 	if (!nfs_uuid_begin(&clp->cl_uuid))
191 		return;
192 	if (nfs_server_uuid_is_local(clp))
193 		nfs_localio_enable_client(clp);
194 	nfs_uuid_end(&clp->cl_uuid);
195 }
196 
nfs_local_probe_async_work(struct work_struct * work)197 void nfs_local_probe_async_work(struct work_struct *work)
198 {
199 	struct nfs_client *clp =
200 		container_of(work, struct nfs_client, cl_local_probe_work);
201 
202 	if (!refcount_inc_not_zero(&clp->cl_count))
203 		return;
204 	nfs_local_probe(clp);
205 	nfs_put_client(clp);
206 }
207 
nfs_local_probe_async(struct nfs_client * clp)208 void nfs_local_probe_async(struct nfs_client *clp)
209 {
210 	queue_work(nfsiod_workqueue, &clp->cl_local_probe_work);
211 }
212 EXPORT_SYMBOL_GPL(nfs_local_probe_async);
213 
nfs_local_file_put(struct nfsd_file * localio)214 static inline void nfs_local_file_put(struct nfsd_file *localio)
215 {
216 	/* nfs_to_nfsd_file_put_local() expects an __rcu pointer
217 	 * but we have a __kernel pointer.  It is always safe
218 	 * to cast a __kernel pointer to an __rcu pointer
219 	 * because the cast only weakens what is known about the pointer.
220 	 */
221 	struct nfsd_file __rcu *nf = (struct nfsd_file __rcu*) localio;
222 
223 	nfs_to_nfsd_file_put_local(&nf);
224 }
225 
226 /*
227  * __nfs_local_open_fh - open a local filehandle in terms of nfsd_file.
228  *
229  * Returns a pointer to a struct nfsd_file or ERR_PTR.
230  * Caller must release returned nfsd_file with nfs_to_nfsd_file_put_local().
231  */
232 static struct nfsd_file *
__nfs_local_open_fh(struct nfs_client * clp,const struct cred * cred,struct nfs_fh * fh,struct nfs_file_localio * nfl,struct nfsd_file __rcu ** pnf,const fmode_t mode)233 __nfs_local_open_fh(struct nfs_client *clp, const struct cred *cred,
234 		    struct nfs_fh *fh, struct nfs_file_localio *nfl,
235 		    struct nfsd_file __rcu **pnf,
236 		    const fmode_t mode)
237 {
238 	int status = 0;
239 	struct nfsd_file *localio;
240 
241 	localio = nfs_open_local_fh(&clp->cl_uuid, clp->cl_rpcclient,
242 				    cred, fh, nfl, pnf, mode);
243 	if (IS_ERR(localio)) {
244 		status = PTR_ERR(localio);
245 		switch (status) {
246 		case -ENOMEM:
247 		case -ENXIO:
248 		case -ENOENT:
249 			/* Revalidate localio */
250 			nfs_localio_disable_client(clp);
251 			nfs_local_probe(clp);
252 		}
253 	}
254 	trace_nfs_local_open_fh(fh, mode, status);
255 	return localio;
256 }
257 
258 /*
259  * nfs_local_open_fh - open a local filehandle in terms of nfsd_file.
260  * First checking if the open nfsd_file is already cached, otherwise
261  * must __nfs_local_open_fh and insert the nfsd_file in nfs_file_localio.
262  *
263  * Returns a pointer to a struct nfsd_file or NULL.
264  */
265 struct nfsd_file *
nfs_local_open_fh(struct nfs_client * clp,const struct cred * cred,struct nfs_fh * fh,struct nfs_file_localio * nfl,const fmode_t mode)266 nfs_local_open_fh(struct nfs_client *clp, const struct cred *cred,
267 		  struct nfs_fh *fh, struct nfs_file_localio *nfl,
268 		  const fmode_t mode)
269 {
270 	struct nfsd_file *nf, __rcu **pnf;
271 
272 	if (!nfs_server_is_local(clp))
273 		return NULL;
274 	if (mode & ~(FMODE_READ | FMODE_WRITE))
275 		return NULL;
276 
277 	if (mode & FMODE_WRITE)
278 		pnf = &nfl->rw_file;
279 	else
280 		pnf = &nfl->ro_file;
281 
282 	nf = __nfs_local_open_fh(clp, cred, fh, nfl, pnf, mode);
283 	if (IS_ERR(nf))
284 		return NULL;
285 	return nf;
286 }
287 EXPORT_SYMBOL_GPL(nfs_local_open_fh);
288 
289 static void
nfs_local_iocb_free(struct nfs_local_kiocb * iocb)290 nfs_local_iocb_free(struct nfs_local_kiocb *iocb)
291 {
292 	kfree(iocb->bvec);
293 	kfree(iocb);
294 }
295 
296 static struct nfs_local_kiocb *
nfs_local_iocb_alloc(struct nfs_pgio_header * hdr,struct file * file,gfp_t flags)297 nfs_local_iocb_alloc(struct nfs_pgio_header *hdr,
298 		     struct file *file, gfp_t flags)
299 {
300 	struct nfs_local_kiocb *iocb;
301 
302 	iocb = kzalloc(sizeof(*iocb), flags);
303 	if (iocb == NULL)
304 		return NULL;
305 
306 	iocb->bvec = kmalloc_array(hdr->page_array.npages,
307 				   sizeof(struct bio_vec), flags);
308 	if (iocb->bvec == NULL) {
309 		kfree(iocb);
310 		return NULL;
311 	}
312 
313 	init_sync_kiocb(&iocb->kiocb, file);
314 
315 	iocb->hdr = hdr;
316 	iocb->kiocb.ki_pos = hdr->args.offset;
317 	iocb->kiocb.ki_flags &= ~IOCB_APPEND;
318 	iocb->kiocb.ki_complete = NULL;
319 	iocb->aio_complete_work = NULL;
320 
321 	iocb->end_iter_index = -1;
322 
323 	return iocb;
324 }
325 
326 static bool
nfs_is_local_dio_possible(struct nfs_local_kiocb * iocb,int rw,size_t len,struct nfs_local_dio * local_dio)327 nfs_is_local_dio_possible(struct nfs_local_kiocb *iocb, int rw,
328 			  size_t len, struct nfs_local_dio *local_dio)
329 {
330 	struct nfs_pgio_header *hdr = iocb->hdr;
331 	loff_t offset = hdr->args.offset;
332 	u32 nf_dio_mem_align, nf_dio_offset_align, nf_dio_read_offset_align;
333 	loff_t start_end, orig_end, middle_end;
334 
335 	nfs_to->nfsd_file_dio_alignment(iocb->localio, &nf_dio_mem_align,
336 			&nf_dio_offset_align, &nf_dio_read_offset_align);
337 	if (rw == ITER_DEST)
338 		nf_dio_offset_align = nf_dio_read_offset_align;
339 
340 	if (unlikely(!nf_dio_mem_align || !nf_dio_offset_align))
341 		return false;
342 	if (unlikely(nf_dio_offset_align > PAGE_SIZE))
343 		return false;
344 	if (unlikely(len < nf_dio_offset_align))
345 		return false;
346 
347 	local_dio->mem_align = nf_dio_mem_align;
348 	local_dio->offset_align = nf_dio_offset_align;
349 
350 	start_end = round_up(offset, nf_dio_offset_align);
351 	orig_end = offset + len;
352 	middle_end = round_down(orig_end, nf_dio_offset_align);
353 
354 	local_dio->middle_offset = start_end;
355 	local_dio->end_offset = middle_end;
356 
357 	local_dio->start_len = start_end - offset;
358 	local_dio->middle_len = middle_end - start_end;
359 	local_dio->end_len = orig_end - middle_end;
360 
361 	if (rw == ITER_DEST)
362 		trace_nfs_local_dio_read(hdr->inode, offset, len, local_dio);
363 	else
364 		trace_nfs_local_dio_write(hdr->inode, offset, len, local_dio);
365 	return true;
366 }
367 
nfs_iov_iter_aligned_bvec(const struct iov_iter * i,unsigned int addr_mask,unsigned int len_mask)368 static bool nfs_iov_iter_aligned_bvec(const struct iov_iter *i,
369 		unsigned int addr_mask, unsigned int len_mask)
370 {
371 	const struct bio_vec *bvec = i->bvec;
372 	size_t skip = i->iov_offset;
373 	size_t size = i->count;
374 
375 	if (size & len_mask)
376 		return false;
377 	do {
378 		size_t len = bvec->bv_len;
379 
380 		if (len > size)
381 			len = size;
382 		if ((unsigned long)(bvec->bv_offset + skip) & addr_mask)
383 			return false;
384 		bvec++;
385 		size -= len;
386 		skip = 0;
387 	} while (size);
388 
389 	return true;
390 }
391 
392 static void
nfs_local_iter_setup(struct iov_iter * iter,int rw,struct bio_vec * bvec,unsigned int nvecs,unsigned long total,size_t start,size_t len)393 nfs_local_iter_setup(struct iov_iter *iter, int rw, struct bio_vec *bvec,
394 		     unsigned int nvecs, unsigned long total,
395 		     size_t start, size_t len)
396 {
397 	iov_iter_bvec(iter, rw, bvec, nvecs, total);
398 	if (start)
399 		iov_iter_advance(iter, start);
400 	iov_iter_truncate(iter, len);
401 }
402 
403 /*
404  * Setup as many as 3 iov_iter based on extents described by @local_dio.
405  * Returns the number of iov_iter that were setup.
406  */
407 static int
nfs_local_iters_setup_dio(struct nfs_local_kiocb * iocb,int rw,unsigned int nvecs,unsigned long total,struct nfs_local_dio * local_dio)408 nfs_local_iters_setup_dio(struct nfs_local_kiocb *iocb, int rw,
409 			  unsigned int nvecs, unsigned long total,
410 			  struct nfs_local_dio *local_dio)
411 {
412 	int n_iters = 0;
413 	struct iov_iter *iters = iocb->iters;
414 
415 	/* Setup misaligned start? */
416 	if (local_dio->start_len) {
417 		nfs_local_iter_setup(&iters[n_iters], rw, iocb->bvec,
418 				     nvecs, total, 0, local_dio->start_len);
419 		++n_iters;
420 	}
421 
422 	/*
423 	 * Setup DIO-aligned middle, if there is no misaligned end (below)
424 	 * then AIO completion is used, see nfs_local_call_{read,write}
425 	 */
426 	nfs_local_iter_setup(&iters[n_iters], rw, iocb->bvec, nvecs,
427 			     total, local_dio->start_len, local_dio->middle_len);
428 
429 	iocb->iter_is_dio_aligned[n_iters] =
430 		nfs_iov_iter_aligned_bvec(&iters[n_iters],
431 			local_dio->mem_align-1, local_dio->offset_align-1);
432 
433 	if (unlikely(!iocb->iter_is_dio_aligned[n_iters])) {
434 		trace_nfs_local_dio_misaligned(iocb->hdr->inode,
435 			local_dio->start_len, local_dio->middle_len, local_dio);
436 		return 0; /* no DIO-aligned IO possible */
437 	}
438 	iocb->end_iter_index = n_iters;
439 	++n_iters;
440 
441 	/* Setup misaligned end? */
442 	if (local_dio->end_len) {
443 		nfs_local_iter_setup(&iters[n_iters], rw, iocb->bvec,
444 				     nvecs, total, local_dio->start_len +
445 				     local_dio->middle_len, local_dio->end_len);
446 		iocb->end_iter_index = n_iters;
447 		++n_iters;
448 	}
449 
450 	atomic_set(&iocb->n_iters, n_iters);
451 	return n_iters;
452 }
453 
454 static noinline_for_stack void
nfs_local_iters_init(struct nfs_local_kiocb * iocb,int rw)455 nfs_local_iters_init(struct nfs_local_kiocb *iocb, int rw)
456 {
457 	struct nfs_pgio_header *hdr = iocb->hdr;
458 	struct page **pagevec = hdr->page_array.pagevec;
459 	unsigned long v, total;
460 	unsigned int base;
461 	size_t len;
462 
463 	v = 0;
464 	total = hdr->args.count;
465 	base = hdr->args.pgbase;
466 	while (total && v < hdr->page_array.npages) {
467 		len = min_t(size_t, total, PAGE_SIZE - base);
468 		bvec_set_page(&iocb->bvec[v], *pagevec, len, base);
469 		total -= len;
470 		++pagevec;
471 		++v;
472 		base = 0;
473 	}
474 	len = hdr->args.count - total;
475 
476 	/*
477 	 * For each iocb, iocb->n_iters is always at least 1 and we always
478 	 * end io after first nfs_local_pgio_done call unless misaligned DIO.
479 	 */
480 	atomic_set(&iocb->n_iters, 1);
481 
482 	if (test_bit(NFS_IOHDR_ODIRECT, &hdr->flags)) {
483 		struct nfs_local_dio local_dio;
484 
485 		if (nfs_is_local_dio_possible(iocb, rw, len, &local_dio) &&
486 		    nfs_local_iters_setup_dio(iocb, rw, v, len, &local_dio) != 0) {
487 			/* Ensure DIO WRITE's IO on stable storage upon completion */
488 			if (rw == ITER_SOURCE)
489 				iocb->kiocb.ki_flags |= IOCB_DSYNC|IOCB_SYNC;
490 			return; /* is DIO-aligned */
491 		}
492 	}
493 
494 	/* Use buffered IO */
495 	iov_iter_bvec(&iocb->iters[0], rw, iocb->bvec, v, len);
496 }
497 
498 static void
nfs_local_hdr_release(struct nfs_pgio_header * hdr,const struct rpc_call_ops * call_ops)499 nfs_local_hdr_release(struct nfs_pgio_header *hdr,
500 		const struct rpc_call_ops *call_ops)
501 {
502 	call_ops->rpc_call_done(&hdr->task, hdr);
503 	call_ops->rpc_release(hdr);
504 }
505 
506 static void
nfs_local_pgio_init(struct nfs_pgio_header * hdr,const struct rpc_call_ops * call_ops)507 nfs_local_pgio_init(struct nfs_pgio_header *hdr,
508 		const struct rpc_call_ops *call_ops)
509 {
510 	hdr->task.tk_ops = call_ops;
511 	if (!hdr->task.tk_start)
512 		hdr->task.tk_start = ktime_get();
513 }
514 
515 static bool
nfs_local_pgio_done(struct nfs_local_kiocb * iocb,long status,bool force)516 nfs_local_pgio_done(struct nfs_local_kiocb *iocb, long status, bool force)
517 {
518 	struct nfs_pgio_header *hdr = iocb->hdr;
519 
520 	/* Must handle partial completions */
521 	if (status >= 0) {
522 		hdr->res.count += status;
523 		/* @hdr was initialized to 0 (zeroed during allocation) */
524 		if (hdr->task.tk_status == 0)
525 			hdr->res.op_status = NFS4_OK;
526 	} else {
527 		hdr->res.op_status = nfs_localio_errno_to_nfs4_stat(status);
528 		hdr->task.tk_status = status;
529 	}
530 
531 	if (force)
532 		return true;
533 
534 	BUG_ON(atomic_read(&iocb->n_iters) <= 0);
535 	return atomic_dec_and_test(&iocb->n_iters);
536 }
537 
538 static void
nfs_local_iocb_release(struct nfs_local_kiocb * iocb)539 nfs_local_iocb_release(struct nfs_local_kiocb *iocb)
540 {
541 	nfs_local_file_put(iocb->localio);
542 	nfs_local_iocb_free(iocb);
543 }
544 
545 static void
nfs_local_pgio_release(struct nfs_local_kiocb * iocb)546 nfs_local_pgio_release(struct nfs_local_kiocb *iocb)
547 {
548 	struct nfs_pgio_header *hdr = iocb->hdr;
549 
550 	nfs_local_iocb_release(iocb);
551 	nfs_local_hdr_release(hdr, hdr->task.tk_ops);
552 }
553 
554 /*
555  * Complete the I/O from iocb->kiocb.ki_complete()
556  *
557  * Note that this function can be called from a bottom half context,
558  * hence we need to queue the rpc_call_done() etc to a workqueue
559  */
nfs_local_pgio_aio_complete(struct nfs_local_kiocb * iocb)560 static inline void nfs_local_pgio_aio_complete(struct nfs_local_kiocb *iocb)
561 {
562 	INIT_WORK(&iocb->work, iocb->aio_complete_work);
563 	queue_work(nfsiod_workqueue, &iocb->work);
564 }
565 
nfs_local_read_done(struct nfs_local_kiocb * iocb)566 static void nfs_local_read_done(struct nfs_local_kiocb *iocb)
567 {
568 	struct nfs_pgio_header *hdr = iocb->hdr;
569 	struct file *filp = iocb->kiocb.ki_filp;
570 	long status = hdr->task.tk_status;
571 
572 	if ((iocb->kiocb.ki_flags & IOCB_DIRECT) && status == -EINVAL) {
573 		/* Underlying FS will return -EINVAL if misaligned DIO is attempted. */
574 		pr_info_ratelimited("nfs: Unexpected direct I/O read alignment failure\n");
575 	}
576 
577 	/*
578 	 * Must clear replen otherwise NFSv3 data corruption will occur
579 	 * if/when switching from LOCALIO back to using normal RPC.
580 	 */
581 	hdr->res.replen = 0;
582 
583 	/* nfs_readpage_result() handles short read */
584 
585 	if (hdr->args.offset + hdr->res.count >= i_size_read(file_inode(filp)))
586 		hdr->res.eof = true;
587 
588 	dprintk("%s: read %ld bytes eof %d.\n", __func__,
589 			status > 0 ? status : 0, hdr->res.eof);
590 }
591 
nfs_local_read_iocb_done(struct nfs_local_kiocb * iocb)592 static inline void nfs_local_read_iocb_done(struct nfs_local_kiocb *iocb)
593 {
594 	nfs_local_read_done(iocb);
595 	nfs_local_pgio_release(iocb);
596 }
597 
nfs_local_read_aio_complete_work(struct work_struct * work)598 static void nfs_local_read_aio_complete_work(struct work_struct *work)
599 {
600 	struct nfs_local_kiocb *iocb =
601 		container_of(work, struct nfs_local_kiocb, work);
602 
603 	nfs_local_read_iocb_done(iocb);
604 }
605 
nfs_local_read_aio_complete(struct kiocb * kiocb,long ret)606 static void nfs_local_read_aio_complete(struct kiocb *kiocb, long ret)
607 {
608 	struct nfs_local_kiocb *iocb =
609 		container_of(kiocb, struct nfs_local_kiocb, kiocb);
610 
611 	/* AIO completion of DIO read should always be last to complete */
612 	if (unlikely(!nfs_local_pgio_done(iocb, ret, false)))
613 		return;
614 
615 	nfs_local_pgio_aio_complete(iocb); /* Calls nfs_local_read_aio_complete_work */
616 }
617 
do_nfs_local_call_read(struct nfs_local_kiocb * iocb,struct file * filp)618 static void do_nfs_local_call_read(struct nfs_local_kiocb *iocb, struct file *filp)
619 {
620 	bool force_done = false;
621 	ssize_t status;
622 	int n_iters;
623 
624 	n_iters = atomic_read(&iocb->n_iters);
625 	for (int i = 0; i < n_iters ; i++) {
626 		if (iocb->iter_is_dio_aligned[i]) {
627 			iocb->kiocb.ki_flags |= IOCB_DIRECT;
628 			/* Only use AIO completion if DIO-aligned segment is last */
629 			if (i == iocb->end_iter_index) {
630 				iocb->kiocb.ki_complete = nfs_local_read_aio_complete;
631 				iocb->aio_complete_work = nfs_local_read_aio_complete_work;
632 			}
633 		} else
634 			iocb->kiocb.ki_flags &= ~IOCB_DIRECT;
635 
636 		status = filp->f_op->read_iter(&iocb->kiocb, &iocb->iters[i]);
637 		if (status != -EIOCBQUEUED) {
638 			if (unlikely(status >= 0 && status < iocb->iters[i].count))
639 				force_done = true; /* Partial read */
640 			if (nfs_local_pgio_done(iocb, status, force_done)) {
641 				nfs_local_read_iocb_done(iocb);
642 				break;
643 			}
644 		}
645 	}
646 }
647 
nfs_local_call_read(struct work_struct * work)648 static void nfs_local_call_read(struct work_struct *work)
649 {
650 	struct nfs_local_kiocb *iocb =
651 		container_of(work, struct nfs_local_kiocb, work);
652 	struct file *filp = iocb->kiocb.ki_filp;
653 
654 	scoped_with_creds(filp->f_cred)
655 		do_nfs_local_call_read(iocb, filp);
656 }
657 
658 static int
nfs_local_do_read(struct nfs_local_kiocb * iocb,const struct rpc_call_ops * call_ops)659 nfs_local_do_read(struct nfs_local_kiocb *iocb,
660 		  const struct rpc_call_ops *call_ops)
661 {
662 	struct nfs_pgio_header *hdr = iocb->hdr;
663 
664 	dprintk("%s: vfs_read count=%u pos=%llu\n",
665 		__func__, hdr->args.count, hdr->args.offset);
666 
667 	nfs_local_pgio_init(hdr, call_ops);
668 	hdr->res.eof = false;
669 
670 	INIT_WORK(&iocb->work, nfs_local_call_read);
671 	queue_work(nfslocaliod_workqueue, &iocb->work);
672 
673 	return 0;
674 }
675 
676 static void
nfs_copy_boot_verifier(struct nfs_write_verifier * verifier,struct inode * inode)677 nfs_copy_boot_verifier(struct nfs_write_verifier *verifier, struct inode *inode)
678 {
679 	struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
680 	u32 *verf = (u32 *)verifier->data;
681 	unsigned int seq;
682 
683 	do {
684 		seq = read_seqbegin(&clp->cl_boot_lock);
685 		verf[0] = (u32)clp->cl_nfssvc_boot.tv_sec;
686 		verf[1] = (u32)clp->cl_nfssvc_boot.tv_nsec;
687 	} while (read_seqretry(&clp->cl_boot_lock, seq));
688 }
689 
690 static void
nfs_reset_boot_verifier(struct inode * inode)691 nfs_reset_boot_verifier(struct inode *inode)
692 {
693 	struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
694 
695 	write_seqlock(&clp->cl_boot_lock);
696 	ktime_get_real_ts64(&clp->cl_nfssvc_boot);
697 	write_sequnlock(&clp->cl_boot_lock);
698 }
699 
700 static void
nfs_set_local_verifier(struct inode * inode,struct nfs_writeverf * verf,enum nfs3_stable_how how)701 nfs_set_local_verifier(struct inode *inode,
702 		struct nfs_writeverf *verf,
703 		enum nfs3_stable_how how)
704 {
705 	nfs_copy_boot_verifier(&verf->verifier, inode);
706 	verf->committed = how;
707 }
708 
709 /* Factored out from fs/nfsd/vfs.h:fh_getattr() */
__vfs_getattr(const struct path * p,struct kstat * stat,int version)710 static int __vfs_getattr(const struct path *p, struct kstat *stat, int version)
711 {
712 	u32 request_mask = STATX_BASIC_STATS;
713 
714 	if (version == 4)
715 		request_mask |= (STATX_BTIME | STATX_CHANGE_COOKIE);
716 	return vfs_getattr(p, stat, request_mask, AT_STATX_SYNC_AS_STAT);
717 }
718 
719 /* Copied from fs/nfsd/nfsfh.c:nfsd4_change_attribute() */
__nfsd4_change_attribute(const struct kstat * stat,const struct inode * inode)720 static u64 __nfsd4_change_attribute(const struct kstat *stat,
721 				    const struct inode *inode)
722 {
723 	u64 chattr;
724 
725 	if (stat->result_mask & STATX_CHANGE_COOKIE) {
726 		chattr = stat->change_cookie;
727 		if (S_ISREG(inode->i_mode) &&
728 		    !(stat->attributes & STATX_ATTR_CHANGE_MONOTONIC)) {
729 			chattr += (u64)stat->ctime.tv_sec << 30;
730 			chattr += stat->ctime.tv_nsec;
731 		}
732 	} else {
733 		chattr = time_to_chattr(&stat->ctime);
734 	}
735 	return chattr;
736 }
737 
nfs_local_vfs_getattr(struct nfs_local_kiocb * iocb)738 static void nfs_local_vfs_getattr(struct nfs_local_kiocb *iocb)
739 {
740 	struct kstat stat;
741 	struct file *filp = iocb->kiocb.ki_filp;
742 	struct nfs_pgio_header *hdr = iocb->hdr;
743 	struct nfs_fattr *fattr = hdr->res.fattr;
744 	int version = NFS_PROTO(hdr->inode)->version;
745 
746 	if (unlikely(!fattr) || __vfs_getattr(&filp->f_path, &stat, version))
747 		return;
748 
749 	fattr->valid = (NFS_ATTR_FATTR_FILEID |
750 			NFS_ATTR_FATTR_CHANGE |
751 			NFS_ATTR_FATTR_SIZE |
752 			NFS_ATTR_FATTR_ATIME |
753 			NFS_ATTR_FATTR_MTIME |
754 			NFS_ATTR_FATTR_CTIME |
755 			NFS_ATTR_FATTR_SPACE_USED);
756 
757 	fattr->fileid = stat.ino;
758 	fattr->size = stat.size;
759 	fattr->atime = stat.atime;
760 	fattr->mtime = stat.mtime;
761 	fattr->ctime = stat.ctime;
762 	if (version == 4) {
763 		fattr->change_attr =
764 			__nfsd4_change_attribute(&stat, file_inode(filp));
765 	} else
766 		fattr->change_attr = nfs_timespec_to_change_attr(&fattr->ctime);
767 	fattr->du.nfs3.used = stat.blocks << 9;
768 }
769 
nfs_local_write_done(struct nfs_local_kiocb * iocb)770 static void nfs_local_write_done(struct nfs_local_kiocb *iocb)
771 {
772 	struct nfs_pgio_header *hdr = iocb->hdr;
773 	long status = hdr->task.tk_status;
774 
775 	dprintk("%s: wrote %ld bytes.\n", __func__, status > 0 ? status : 0);
776 
777 	if ((iocb->kiocb.ki_flags & IOCB_DIRECT) && status == -EINVAL) {
778 		/* Underlying FS will return -EINVAL if misaligned DIO is attempted. */
779 		pr_info_ratelimited("nfs: Unexpected direct I/O write alignment failure\n");
780 	}
781 
782 	/* Handle short writes as if they are ENOSPC */
783 	status = hdr->res.count;
784 	if (status > 0 && status < hdr->args.count) {
785 		hdr->mds_offset += status;
786 		hdr->args.offset += status;
787 		hdr->args.pgbase += status;
788 		hdr->args.count -= status;
789 		nfs_set_pgio_error(hdr, -ENOSPC, hdr->args.offset);
790 		status = -ENOSPC;
791 		/* record -ENOSPC in terms of nfs_local_pgio_done */
792 		(void) nfs_local_pgio_done(iocb, status, true);
793 	}
794 	if (hdr->task.tk_status < 0)
795 		nfs_reset_boot_verifier(hdr->inode);
796 }
797 
nfs_local_write_iocb_done(struct nfs_local_kiocb * iocb)798 static inline void nfs_local_write_iocb_done(struct nfs_local_kiocb *iocb)
799 {
800 	nfs_local_write_done(iocb);
801 	nfs_local_vfs_getattr(iocb);
802 	nfs_local_pgio_release(iocb);
803 }
804 
nfs_local_write_aio_complete_work(struct work_struct * work)805 static void nfs_local_write_aio_complete_work(struct work_struct *work)
806 {
807 	struct nfs_local_kiocb *iocb =
808 		container_of(work, struct nfs_local_kiocb, work);
809 
810 	nfs_local_write_iocb_done(iocb);
811 }
812 
nfs_local_write_aio_complete(struct kiocb * kiocb,long ret)813 static void nfs_local_write_aio_complete(struct kiocb *kiocb, long ret)
814 {
815 	struct nfs_local_kiocb *iocb =
816 		container_of(kiocb, struct nfs_local_kiocb, kiocb);
817 
818 	/* AIO completion of DIO write should always be last to complete */
819 	if (unlikely(!nfs_local_pgio_done(iocb, ret, false)))
820 		return;
821 
822 	nfs_local_pgio_aio_complete(iocb); /* Calls nfs_local_write_aio_complete_work */
823 }
824 
do_nfs_local_call_write(struct nfs_local_kiocb * iocb,struct file * filp)825 static ssize_t do_nfs_local_call_write(struct nfs_local_kiocb *iocb,
826 				       struct file *filp)
827 {
828 	bool force_done = false;
829 	ssize_t status;
830 	int n_iters;
831 
832 	file_start_write(filp);
833 	n_iters = atomic_read(&iocb->n_iters);
834 	for (int i = 0; i < n_iters ; i++) {
835 		if (iocb->iter_is_dio_aligned[i]) {
836 			iocb->kiocb.ki_flags |= IOCB_DIRECT;
837 			/* Only use AIO completion if DIO-aligned segment is last */
838 			if (i == iocb->end_iter_index) {
839 				iocb->kiocb.ki_complete = nfs_local_write_aio_complete;
840 				iocb->aio_complete_work = nfs_local_write_aio_complete_work;
841 			}
842 		} else
843 			iocb->kiocb.ki_flags &= ~IOCB_DIRECT;
844 
845 		status = filp->f_op->write_iter(&iocb->kiocb, &iocb->iters[i]);
846 		if (status != -EIOCBQUEUED) {
847 			if (unlikely(status >= 0 && status < iocb->iters[i].count))
848 				force_done = true; /* Partial write */
849 			if (nfs_local_pgio_done(iocb, status, force_done)) {
850 				nfs_local_write_iocb_done(iocb);
851 				break;
852 			}
853 		}
854 	}
855 	file_end_write(filp);
856 
857 	return status;
858 }
859 
nfs_local_call_write(struct work_struct * work)860 static void nfs_local_call_write(struct work_struct *work)
861 {
862 	struct nfs_local_kiocb *iocb =
863 		container_of(work, struct nfs_local_kiocb, work);
864 	struct file *filp = iocb->kiocb.ki_filp;
865 	unsigned long old_flags = current->flags;
866 	ssize_t status;
867 
868 	current->flags |= PF_LOCAL_THROTTLE | PF_MEMALLOC_NOIO;
869 
870 	scoped_with_creds(filp->f_cred)
871 		status = do_nfs_local_call_write(iocb, filp);
872 
873 	current->flags = old_flags;
874 }
875 
876 static int
nfs_local_do_write(struct nfs_local_kiocb * iocb,const struct rpc_call_ops * call_ops)877 nfs_local_do_write(struct nfs_local_kiocb *iocb,
878 		   const struct rpc_call_ops *call_ops)
879 {
880 	struct nfs_pgio_header *hdr = iocb->hdr;
881 
882 	dprintk("%s: vfs_write count=%u pos=%llu %s\n",
883 		__func__, hdr->args.count, hdr->args.offset,
884 		(hdr->args.stable == NFS_UNSTABLE) ?  "unstable" : "stable");
885 
886 	switch (hdr->args.stable) {
887 	default:
888 		break;
889 	case NFS_DATA_SYNC:
890 		iocb->kiocb.ki_flags |= IOCB_DSYNC;
891 		break;
892 	case NFS_FILE_SYNC:
893 		iocb->kiocb.ki_flags |= IOCB_DSYNC|IOCB_SYNC;
894 	}
895 
896 	nfs_local_pgio_init(hdr, call_ops);
897 
898 	nfs_set_local_verifier(hdr->inode, hdr->res.verf, hdr->args.stable);
899 
900 	INIT_WORK(&iocb->work, nfs_local_call_write);
901 	queue_work(nfslocaliod_workqueue, &iocb->work);
902 
903 	return 0;
904 }
905 
906 static struct nfs_local_kiocb *
nfs_local_iocb_init(struct nfs_pgio_header * hdr,struct nfsd_file * localio)907 nfs_local_iocb_init(struct nfs_pgio_header *hdr, struct nfsd_file *localio)
908 {
909 	struct file *file = nfs_to->nfsd_file_file(localio);
910 	struct nfs_local_kiocb *iocb;
911 	gfp_t gfp_mask;
912 	int rw;
913 
914 	if (hdr->rw_mode & FMODE_READ) {
915 		if (!file->f_op->read_iter)
916 			return ERR_PTR(-EOPNOTSUPP);
917 		gfp_mask = GFP_KERNEL;
918 		rw = ITER_DEST;
919 	} else {
920 		if (!file->f_op->write_iter)
921 			return ERR_PTR(-EOPNOTSUPP);
922 		gfp_mask = GFP_NOIO;
923 		rw = ITER_SOURCE;
924 	}
925 
926 	iocb = nfs_local_iocb_alloc(hdr, file, gfp_mask);
927 	if (iocb == NULL)
928 		return ERR_PTR(-ENOMEM);
929 	iocb->hdr = hdr;
930 	iocb->localio = localio;
931 
932 	nfs_local_iters_init(iocb, rw);
933 
934 	return iocb;
935 }
936 
nfs_local_doio(struct nfs_client * clp,struct nfsd_file * localio,struct nfs_pgio_header * hdr,const struct rpc_call_ops * call_ops)937 int nfs_local_doio(struct nfs_client *clp, struct nfsd_file *localio,
938 		   struct nfs_pgio_header *hdr,
939 		   const struct rpc_call_ops *call_ops)
940 {
941 	struct nfs_local_kiocb *iocb;
942 	int status = 0;
943 
944 	if (!hdr->args.count)
945 		return 0;
946 
947 	iocb = nfs_local_iocb_init(hdr, localio);
948 	if (IS_ERR(iocb))
949 		return PTR_ERR(iocb);
950 
951 	switch (hdr->rw_mode) {
952 	case FMODE_READ:
953 		status = nfs_local_do_read(iocb, call_ops);
954 		break;
955 	case FMODE_WRITE:
956 		status = nfs_local_do_write(iocb, call_ops);
957 		break;
958 	default:
959 		dprintk("%s: invalid mode: %d\n", __func__,
960 			hdr->rw_mode);
961 		status = -EOPNOTSUPP;
962 	}
963 
964 	if (status != 0) {
965 		if (status == -EAGAIN)
966 			nfs_localio_disable_client(clp);
967 		nfs_local_iocb_release(iocb);
968 		hdr->task.tk_status = status;
969 		nfs_local_hdr_release(hdr, call_ops);
970 	}
971 	return status;
972 }
973 
974 static void
nfs_local_init_commit(struct nfs_commit_data * data,const struct rpc_call_ops * call_ops)975 nfs_local_init_commit(struct nfs_commit_data *data,
976 		const struct rpc_call_ops *call_ops)
977 {
978 	data->task.tk_ops = call_ops;
979 }
980 
981 static int
nfs_local_run_commit(struct file * filp,struct nfs_commit_data * data)982 nfs_local_run_commit(struct file *filp, struct nfs_commit_data *data)
983 {
984 	loff_t start = data->args.offset;
985 	loff_t end = LLONG_MAX;
986 
987 	if (data->args.count > 0) {
988 		end = start + data->args.count - 1;
989 		if (end < start)
990 			end = LLONG_MAX;
991 	}
992 
993 	dprintk("%s: commit %llu - %llu\n", __func__, start, end);
994 	return vfs_fsync_range(filp, start, end, 0);
995 }
996 
997 static void
nfs_local_commit_done(struct nfs_commit_data * data,int status)998 nfs_local_commit_done(struct nfs_commit_data *data, int status)
999 {
1000 	if (status >= 0) {
1001 		nfs_set_local_verifier(data->inode,
1002 				data->res.verf,
1003 				NFS_FILE_SYNC);
1004 		data->res.op_status = NFS4_OK;
1005 		data->task.tk_status = 0;
1006 	} else {
1007 		nfs_reset_boot_verifier(data->inode);
1008 		data->res.op_status = nfs_localio_errno_to_nfs4_stat(status);
1009 		data->task.tk_status = status;
1010 	}
1011 }
1012 
1013 static void
nfs_local_release_commit_data(struct nfsd_file * localio,struct nfs_commit_data * data,const struct rpc_call_ops * call_ops)1014 nfs_local_release_commit_data(struct nfsd_file *localio,
1015 		struct nfs_commit_data *data,
1016 		const struct rpc_call_ops *call_ops)
1017 {
1018 	nfs_local_file_put(localio);
1019 	call_ops->rpc_call_done(&data->task, data);
1020 	call_ops->rpc_release(data);
1021 }
1022 
1023 static void
nfs_local_fsync_ctx_free(struct nfs_local_fsync_ctx * ctx)1024 nfs_local_fsync_ctx_free(struct nfs_local_fsync_ctx *ctx)
1025 {
1026 	nfs_local_release_commit_data(ctx->localio, ctx->data,
1027 				      ctx->data->task.tk_ops);
1028 	kfree(ctx);
1029 }
1030 
1031 static void
nfs_local_fsync_work(struct work_struct * work)1032 nfs_local_fsync_work(struct work_struct *work)
1033 {
1034 	struct nfs_local_fsync_ctx *ctx;
1035 	int status;
1036 
1037 	ctx = container_of(work, struct nfs_local_fsync_ctx, work);
1038 
1039 	status = nfs_local_run_commit(nfs_to->nfsd_file_file(ctx->localio),
1040 				      ctx->data);
1041 	nfs_local_commit_done(ctx->data, status);
1042 	if (ctx->done != NULL)
1043 		complete(ctx->done);
1044 	nfs_local_fsync_ctx_free(ctx);
1045 }
1046 
1047 static struct nfs_local_fsync_ctx *
nfs_local_fsync_ctx_alloc(struct nfs_commit_data * data,struct nfsd_file * localio,gfp_t flags)1048 nfs_local_fsync_ctx_alloc(struct nfs_commit_data *data,
1049 			  struct nfsd_file *localio, gfp_t flags)
1050 {
1051 	struct nfs_local_fsync_ctx *ctx = kmalloc(sizeof(*ctx), flags);
1052 
1053 	if (ctx != NULL) {
1054 		ctx->localio = localio;
1055 		ctx->data = data;
1056 		INIT_WORK(&ctx->work, nfs_local_fsync_work);
1057 		ctx->done = NULL;
1058 	}
1059 	return ctx;
1060 }
1061 
nfs_local_commit(struct nfsd_file * localio,struct nfs_commit_data * data,const struct rpc_call_ops * call_ops,int how)1062 int nfs_local_commit(struct nfsd_file *localio,
1063 		     struct nfs_commit_data *data,
1064 		     const struct rpc_call_ops *call_ops, int how)
1065 {
1066 	struct nfs_local_fsync_ctx *ctx;
1067 
1068 	ctx = nfs_local_fsync_ctx_alloc(data, localio, GFP_KERNEL);
1069 	if (!ctx) {
1070 		nfs_local_commit_done(data, -ENOMEM);
1071 		nfs_local_release_commit_data(localio, data, call_ops);
1072 		return -ENOMEM;
1073 	}
1074 
1075 	nfs_local_init_commit(data, call_ops);
1076 
1077 	if (how & FLUSH_SYNC) {
1078 		DECLARE_COMPLETION_ONSTACK(done);
1079 		ctx->done = &done;
1080 		queue_work(nfsiod_workqueue, &ctx->work);
1081 		wait_for_completion(&done);
1082 	} else
1083 		queue_work(nfsiod_workqueue, &ctx->work);
1084 
1085 	return 0;
1086 }
1087