xref: /linux/fs/nfs/localio.c (revision b7e32ae6664285e156e9f0cd821e63e19798baf7)
1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3  * NFS client support for local clients to bypass network stack
4  *
5  * Copyright (C) 2014 Weston Andros Adamson <dros@primarydata.com>
6  * Copyright (C) 2019 Trond Myklebust <trond.myklebust@hammerspace.com>
7  * Copyright (C) 2024 Mike Snitzer <snitzer@hammerspace.com>
8  * Copyright (C) 2024 NeilBrown <neilb@suse.de>
9  */
10 
11 #include <linux/module.h>
12 #include <linux/errno.h>
13 #include <linux/vfs.h>
14 #include <linux/file.h>
15 #include <linux/inet.h>
16 #include <linux/sunrpc/addr.h>
17 #include <linux/inetdevice.h>
18 #include <net/addrconf.h>
19 #include <linux/nfs_common.h>
20 #include <linux/nfslocalio.h>
21 #include <linux/bvec.h>
22 
23 #include <linux/nfs.h>
24 #include <linux/nfs_fs.h>
25 #include <linux/nfs_xdr.h>
26 
27 #include "internal.h"
28 #include "pnfs.h"
29 #include "nfstrace.h"
30 
31 #define NFSDBG_FACILITY		NFSDBG_VFS
32 
33 struct nfs_local_kiocb {
34 	struct kiocb		kiocb;
35 	struct bio_vec		*bvec;
36 	struct nfs_pgio_header	*hdr;
37 	struct work_struct	work;
38 	void (*aio_complete_work)(struct work_struct *);
39 	struct nfsd_file	*localio;
40 };
41 
42 struct nfs_local_fsync_ctx {
43 	struct nfsd_file	*localio;
44 	struct nfs_commit_data	*data;
45 	struct work_struct	work;
46 	struct completion	*done;
47 };
48 
49 static bool localio_enabled __read_mostly = true;
50 module_param(localio_enabled, bool, 0644);
51 
52 static bool localio_O_DIRECT_semantics __read_mostly = false;
53 module_param(localio_O_DIRECT_semantics, bool, 0644);
54 MODULE_PARM_DESC(localio_O_DIRECT_semantics,
55 		 "LOCALIO will use O_DIRECT semantics to filesystem.");
56 
57 static inline bool nfs_client_is_local(const struct nfs_client *clp)
58 {
59 	return !!rcu_access_pointer(clp->cl_uuid.net);
60 }
61 
62 bool nfs_server_is_local(const struct nfs_client *clp)
63 {
64 	return nfs_client_is_local(clp) && localio_enabled;
65 }
66 EXPORT_SYMBOL_GPL(nfs_server_is_local);
67 
68 /*
69  * UUID_IS_LOCAL XDR functions
70  */
71 
72 static void localio_xdr_enc_uuidargs(struct rpc_rqst *req,
73 				     struct xdr_stream *xdr,
74 				     const void *data)
75 {
76 	const u8 *uuid = data;
77 
78 	encode_opaque_fixed(xdr, uuid, UUID_SIZE);
79 }
80 
81 static int localio_xdr_dec_uuidres(struct rpc_rqst *req,
82 				   struct xdr_stream *xdr,
83 				   void *result)
84 {
85 	/* void return */
86 	return 0;
87 }
88 
89 static const struct rpc_procinfo nfs_localio_procedures[] = {
90 	[LOCALIOPROC_UUID_IS_LOCAL] = {
91 		.p_proc = LOCALIOPROC_UUID_IS_LOCAL,
92 		.p_encode = localio_xdr_enc_uuidargs,
93 		.p_decode = localio_xdr_dec_uuidres,
94 		.p_arglen = XDR_QUADLEN(UUID_SIZE),
95 		.p_replen = 0,
96 		.p_statidx = LOCALIOPROC_UUID_IS_LOCAL,
97 		.p_name = "UUID_IS_LOCAL",
98 	},
99 };
100 
101 static unsigned int nfs_localio_counts[ARRAY_SIZE(nfs_localio_procedures)];
102 static const struct rpc_version nfslocalio_version1 = {
103 	.number			= 1,
104 	.nrprocs		= ARRAY_SIZE(nfs_localio_procedures),
105 	.procs			= nfs_localio_procedures,
106 	.counts			= nfs_localio_counts,
107 };
108 
109 static const struct rpc_version *nfslocalio_version[] = {
110        [1]			= &nfslocalio_version1,
111 };
112 
113 extern const struct rpc_program nfslocalio_program;
114 static struct rpc_stat		nfslocalio_rpcstat = { &nfslocalio_program };
115 
116 const struct rpc_program nfslocalio_program = {
117 	.name			= "nfslocalio",
118 	.number			= NFS_LOCALIO_PROGRAM,
119 	.nrvers			= ARRAY_SIZE(nfslocalio_version),
120 	.version		= nfslocalio_version,
121 	.stats			= &nfslocalio_rpcstat,
122 };
123 
124 /*
125  * nfs_init_localioclient - Initialise an NFS localio client connection
126  */
127 static struct rpc_clnt *nfs_init_localioclient(struct nfs_client *clp)
128 {
129 	struct rpc_clnt *rpcclient_localio;
130 
131 	rpcclient_localio = rpc_bind_new_program(clp->cl_rpcclient,
132 						 &nfslocalio_program, 1);
133 
134 	dprintk_rcu("%s: server (%s) %s NFS LOCALIO.\n",
135 		__func__, rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR),
136 		(IS_ERR(rpcclient_localio) ? "does not support" : "supports"));
137 
138 	return rpcclient_localio;
139 }
140 
141 static bool nfs_server_uuid_is_local(struct nfs_client *clp)
142 {
143 	u8 uuid[UUID_SIZE];
144 	struct rpc_message msg = {
145 		.rpc_argp = &uuid,
146 	};
147 	struct rpc_clnt *rpcclient_localio;
148 	int status;
149 
150 	rpcclient_localio = nfs_init_localioclient(clp);
151 	if (IS_ERR(rpcclient_localio))
152 		return false;
153 
154 	export_uuid(uuid, &clp->cl_uuid.uuid);
155 
156 	msg.rpc_proc = &nfs_localio_procedures[LOCALIOPROC_UUID_IS_LOCAL];
157 	status = rpc_call_sync(rpcclient_localio, &msg, 0);
158 	dprintk("%s: NFS reply UUID_IS_LOCAL: status=%d\n",
159 		__func__, status);
160 	rpc_shutdown_client(rpcclient_localio);
161 
162 	/* Server is only local if it initialized required struct members */
163 	if (status || !rcu_access_pointer(clp->cl_uuid.net) || !clp->cl_uuid.dom)
164 		return false;
165 
166 	return true;
167 }
168 
169 /*
170  * nfs_local_probe - probe local i/o support for an nfs_server and nfs_client
171  * - called after alloc_client and init_client (so cl_rpcclient exists)
172  * - this function is idempotent, it can be called for old or new clients
173  */
174 static void nfs_local_probe(struct nfs_client *clp)
175 {
176 	/* Disallow localio if disabled via sysfs or AUTH_SYS isn't used */
177 	if (!localio_enabled ||
178 	    clp->cl_rpcclient->cl_auth->au_flavor != RPC_AUTH_UNIX) {
179 		nfs_localio_disable_client(clp);
180 		return;
181 	}
182 
183 	if (nfs_client_is_local(clp))
184 		return;
185 
186 	if (!nfs_uuid_begin(&clp->cl_uuid))
187 		return;
188 	if (nfs_server_uuid_is_local(clp))
189 		nfs_localio_enable_client(clp);
190 	nfs_uuid_end(&clp->cl_uuid);
191 }
192 
193 void nfs_local_probe_async_work(struct work_struct *work)
194 {
195 	struct nfs_client *clp =
196 		container_of(work, struct nfs_client, cl_local_probe_work);
197 
198 	if (!refcount_inc_not_zero(&clp->cl_count))
199 		return;
200 	nfs_local_probe(clp);
201 	nfs_put_client(clp);
202 }
203 
204 void nfs_local_probe_async(struct nfs_client *clp)
205 {
206 	queue_work(nfsiod_workqueue, &clp->cl_local_probe_work);
207 }
208 EXPORT_SYMBOL_GPL(nfs_local_probe_async);
209 
210 static inline void nfs_local_file_put(struct nfsd_file *localio)
211 {
212 	/* nfs_to_nfsd_file_put_local() expects an __rcu pointer
213 	 * but we have a __kernel pointer.  It is always safe
214 	 * to cast a __kernel pointer to an __rcu pointer
215 	 * because the cast only weakens what is known about the pointer.
216 	 */
217 	struct nfsd_file __rcu *nf = (struct nfsd_file __rcu*) localio;
218 
219 	nfs_to_nfsd_file_put_local(&nf);
220 }
221 
222 /*
223  * __nfs_local_open_fh - open a local filehandle in terms of nfsd_file.
224  *
225  * Returns a pointer to a struct nfsd_file or ERR_PTR.
226  * Caller must release returned nfsd_file with nfs_to_nfsd_file_put_local().
227  */
228 static struct nfsd_file *
229 __nfs_local_open_fh(struct nfs_client *clp, const struct cred *cred,
230 		    struct nfs_fh *fh, struct nfs_file_localio *nfl,
231 		    struct nfsd_file __rcu **pnf,
232 		    const fmode_t mode)
233 {
234 	struct nfsd_file *localio;
235 
236 	localio = nfs_open_local_fh(&clp->cl_uuid, clp->cl_rpcclient,
237 				    cred, fh, nfl, pnf, mode);
238 	if (IS_ERR(localio)) {
239 		int status = PTR_ERR(localio);
240 		trace_nfs_local_open_fh(fh, mode, status);
241 		switch (status) {
242 		case -ENOMEM:
243 		case -ENXIO:
244 		case -ENOENT:
245 			/* Revalidate localio */
246 			nfs_localio_disable_client(clp);
247 			nfs_local_probe(clp);
248 		}
249 	}
250 	return localio;
251 }
252 
253 /*
254  * nfs_local_open_fh - open a local filehandle in terms of nfsd_file.
255  * First checking if the open nfsd_file is already cached, otherwise
256  * must __nfs_local_open_fh and insert the nfsd_file in nfs_file_localio.
257  *
258  * Returns a pointer to a struct nfsd_file or NULL.
259  */
260 struct nfsd_file *
261 nfs_local_open_fh(struct nfs_client *clp, const struct cred *cred,
262 		  struct nfs_fh *fh, struct nfs_file_localio *nfl,
263 		  const fmode_t mode)
264 {
265 	struct nfsd_file *nf, __rcu **pnf;
266 
267 	if (!nfs_server_is_local(clp))
268 		return NULL;
269 	if (mode & ~(FMODE_READ | FMODE_WRITE))
270 		return NULL;
271 
272 	if (mode & FMODE_WRITE)
273 		pnf = &nfl->rw_file;
274 	else
275 		pnf = &nfl->ro_file;
276 
277 	nf = __nfs_local_open_fh(clp, cred, fh, nfl, pnf, mode);
278 	if (IS_ERR(nf))
279 		return NULL;
280 	return nf;
281 }
282 EXPORT_SYMBOL_GPL(nfs_local_open_fh);
283 
284 static struct bio_vec *
285 nfs_bvec_alloc_and_import_pagevec(struct page **pagevec,
286 		unsigned int npages, gfp_t flags)
287 {
288 	struct bio_vec *bvec, *p;
289 
290 	bvec = kmalloc_array(npages, sizeof(*bvec), flags);
291 	if (bvec != NULL) {
292 		for (p = bvec; npages > 0; p++, pagevec++, npages--) {
293 			p->bv_page = *pagevec;
294 			p->bv_len = PAGE_SIZE;
295 			p->bv_offset = 0;
296 		}
297 	}
298 	return bvec;
299 }
300 
301 static void
302 nfs_local_iocb_free(struct nfs_local_kiocb *iocb)
303 {
304 	kfree(iocb->bvec);
305 	kfree(iocb);
306 }
307 
308 static struct nfs_local_kiocb *
309 nfs_local_iocb_alloc(struct nfs_pgio_header *hdr,
310 		     struct file *file, gfp_t flags)
311 {
312 	struct nfs_local_kiocb *iocb;
313 
314 	iocb = kmalloc(sizeof(*iocb), flags);
315 	if (iocb == NULL)
316 		return NULL;
317 	iocb->bvec = nfs_bvec_alloc_and_import_pagevec(hdr->page_array.pagevec,
318 			hdr->page_array.npages, flags);
319 	if (iocb->bvec == NULL) {
320 		kfree(iocb);
321 		return NULL;
322 	}
323 
324 	if (localio_O_DIRECT_semantics &&
325 	    test_bit(NFS_IOHDR_ODIRECT, &hdr->flags)) {
326 		iocb->kiocb.ki_filp = file;
327 		iocb->kiocb.ki_flags = IOCB_DIRECT;
328 	} else
329 		init_sync_kiocb(&iocb->kiocb, file);
330 
331 	iocb->kiocb.ki_pos = hdr->args.offset;
332 	iocb->hdr = hdr;
333 	iocb->kiocb.ki_flags &= ~IOCB_APPEND;
334 	iocb->aio_complete_work = NULL;
335 
336 	return iocb;
337 }
338 
339 static void
340 nfs_local_iter_init(struct iov_iter *i, struct nfs_local_kiocb *iocb, int dir)
341 {
342 	struct nfs_pgio_header *hdr = iocb->hdr;
343 
344 	iov_iter_bvec(i, dir, iocb->bvec, hdr->page_array.npages,
345 		      hdr->args.count + hdr->args.pgbase);
346 	if (hdr->args.pgbase != 0)
347 		iov_iter_advance(i, hdr->args.pgbase);
348 }
349 
350 static void
351 nfs_local_hdr_release(struct nfs_pgio_header *hdr,
352 		const struct rpc_call_ops *call_ops)
353 {
354 	call_ops->rpc_call_done(&hdr->task, hdr);
355 	call_ops->rpc_release(hdr);
356 }
357 
358 static void
359 nfs_local_pgio_init(struct nfs_pgio_header *hdr,
360 		const struct rpc_call_ops *call_ops)
361 {
362 	hdr->task.tk_ops = call_ops;
363 	if (!hdr->task.tk_start)
364 		hdr->task.tk_start = ktime_get();
365 }
366 
367 static void
368 nfs_local_pgio_done(struct nfs_pgio_header *hdr, long status)
369 {
370 	if (status >= 0) {
371 		hdr->res.count = status;
372 		hdr->res.op_status = NFS4_OK;
373 		hdr->task.tk_status = 0;
374 	} else {
375 		hdr->res.op_status = nfs_localio_errno_to_nfs4_stat(status);
376 		hdr->task.tk_status = status;
377 	}
378 }
379 
380 static void
381 nfs_local_pgio_release(struct nfs_local_kiocb *iocb)
382 {
383 	struct nfs_pgio_header *hdr = iocb->hdr;
384 
385 	nfs_local_file_put(iocb->localio);
386 	nfs_local_iocb_free(iocb);
387 	nfs_local_hdr_release(hdr, hdr->task.tk_ops);
388 }
389 
390 /*
391  * Complete the I/O from iocb->kiocb.ki_complete()
392  *
393  * Note that this function can be called from a bottom half context,
394  * hence we need to queue the rpc_call_done() etc to a workqueue
395  */
396 static inline void nfs_local_pgio_aio_complete(struct nfs_local_kiocb *iocb)
397 {
398 	INIT_WORK(&iocb->work, iocb->aio_complete_work);
399 	queue_work(nfsiod_workqueue, &iocb->work);
400 }
401 
402 static void
403 nfs_local_read_done(struct nfs_local_kiocb *iocb, long status)
404 {
405 	struct nfs_pgio_header *hdr = iocb->hdr;
406 	struct file *filp = iocb->kiocb.ki_filp;
407 
408 	nfs_local_pgio_done(hdr, status);
409 
410 	/*
411 	 * Must clear replen otherwise NFSv3 data corruption will occur
412 	 * if/when switching from LOCALIO back to using normal RPC.
413 	 */
414 	hdr->res.replen = 0;
415 
416 	if (hdr->res.count != hdr->args.count ||
417 	    hdr->args.offset + hdr->res.count >= i_size_read(file_inode(filp)))
418 		hdr->res.eof = true;
419 
420 	dprintk("%s: read %ld bytes eof %d.\n", __func__,
421 			status > 0 ? status : 0, hdr->res.eof);
422 }
423 
424 static void nfs_local_read_aio_complete_work(struct work_struct *work)
425 {
426 	struct nfs_local_kiocb *iocb =
427 		container_of(work, struct nfs_local_kiocb, work);
428 
429 	nfs_local_pgio_release(iocb);
430 }
431 
432 static void nfs_local_read_aio_complete(struct kiocb *kiocb, long ret)
433 {
434 	struct nfs_local_kiocb *iocb =
435 		container_of(kiocb, struct nfs_local_kiocb, kiocb);
436 
437 	nfs_local_read_done(iocb, ret);
438 	nfs_local_pgio_aio_complete(iocb); /* Calls nfs_local_read_aio_complete_work */
439 }
440 
441 static void nfs_local_call_read(struct work_struct *work)
442 {
443 	struct nfs_local_kiocb *iocb =
444 		container_of(work, struct nfs_local_kiocb, work);
445 	struct file *filp = iocb->kiocb.ki_filp;
446 	const struct cred *save_cred;
447 	struct iov_iter iter;
448 	ssize_t status;
449 
450 	save_cred = override_creds(filp->f_cred);
451 
452 	nfs_local_iter_init(&iter, iocb, READ);
453 
454 	status = filp->f_op->read_iter(&iocb->kiocb, &iter);
455 
456 	revert_creds(save_cred);
457 
458 	if (status != -EIOCBQUEUED) {
459 		nfs_local_read_done(iocb, status);
460 		nfs_local_pgio_release(iocb);
461 	}
462 }
463 
464 static int
465 nfs_do_local_read(struct nfs_pgio_header *hdr,
466 		  struct nfsd_file *localio,
467 		  const struct rpc_call_ops *call_ops)
468 {
469 	struct nfs_local_kiocb *iocb;
470 	struct file *file = nfs_to->nfsd_file_file(localio);
471 
472 	/* Don't support filesystems without read_iter */
473 	if (!file->f_op->read_iter)
474 		return -EAGAIN;
475 
476 	dprintk("%s: vfs_read count=%u pos=%llu\n",
477 		__func__, hdr->args.count, hdr->args.offset);
478 
479 	iocb = nfs_local_iocb_alloc(hdr, file, GFP_KERNEL);
480 	if (iocb == NULL)
481 		return -ENOMEM;
482 	iocb->localio = localio;
483 
484 	nfs_local_pgio_init(hdr, call_ops);
485 	hdr->res.eof = false;
486 
487 	if (iocb->kiocb.ki_flags & IOCB_DIRECT) {
488 		iocb->kiocb.ki_complete = nfs_local_read_aio_complete;
489 		iocb->aio_complete_work = nfs_local_read_aio_complete_work;
490 	}
491 
492 	INIT_WORK(&iocb->work, nfs_local_call_read);
493 	queue_work(nfslocaliod_workqueue, &iocb->work);
494 
495 	return 0;
496 }
497 
498 static void
499 nfs_copy_boot_verifier(struct nfs_write_verifier *verifier, struct inode *inode)
500 {
501 	struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
502 	u32 *verf = (u32 *)verifier->data;
503 	unsigned int seq;
504 
505 	do {
506 		seq = read_seqbegin(&clp->cl_boot_lock);
507 		verf[0] = (u32)clp->cl_nfssvc_boot.tv_sec;
508 		verf[1] = (u32)clp->cl_nfssvc_boot.tv_nsec;
509 	} while (read_seqretry(&clp->cl_boot_lock, seq));
510 }
511 
512 static void
513 nfs_reset_boot_verifier(struct inode *inode)
514 {
515 	struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
516 
517 	write_seqlock(&clp->cl_boot_lock);
518 	ktime_get_real_ts64(&clp->cl_nfssvc_boot);
519 	write_sequnlock(&clp->cl_boot_lock);
520 }
521 
522 static void
523 nfs_set_local_verifier(struct inode *inode,
524 		struct nfs_writeverf *verf,
525 		enum nfs3_stable_how how)
526 {
527 	nfs_copy_boot_verifier(&verf->verifier, inode);
528 	verf->committed = how;
529 }
530 
531 /* Factored out from fs/nfsd/vfs.h:fh_getattr() */
532 static int __vfs_getattr(struct path *p, struct kstat *stat, int version)
533 {
534 	u32 request_mask = STATX_BASIC_STATS;
535 
536 	if (version == 4)
537 		request_mask |= (STATX_BTIME | STATX_CHANGE_COOKIE);
538 	return vfs_getattr(p, stat, request_mask, AT_STATX_SYNC_AS_STAT);
539 }
540 
541 /* Copied from fs/nfsd/nfsfh.c:nfsd4_change_attribute() */
542 static u64 __nfsd4_change_attribute(const struct kstat *stat,
543 				    const struct inode *inode)
544 {
545 	u64 chattr;
546 
547 	if (stat->result_mask & STATX_CHANGE_COOKIE) {
548 		chattr = stat->change_cookie;
549 		if (S_ISREG(inode->i_mode) &&
550 		    !(stat->attributes & STATX_ATTR_CHANGE_MONOTONIC)) {
551 			chattr += (u64)stat->ctime.tv_sec << 30;
552 			chattr += stat->ctime.tv_nsec;
553 		}
554 	} else {
555 		chattr = time_to_chattr(&stat->ctime);
556 	}
557 	return chattr;
558 }
559 
560 static void nfs_local_vfs_getattr(struct nfs_local_kiocb *iocb)
561 {
562 	struct kstat stat;
563 	struct file *filp = iocb->kiocb.ki_filp;
564 	struct nfs_pgio_header *hdr = iocb->hdr;
565 	struct nfs_fattr *fattr = hdr->res.fattr;
566 	int version = NFS_PROTO(hdr->inode)->version;
567 
568 	if (unlikely(!fattr) || __vfs_getattr(&filp->f_path, &stat, version))
569 		return;
570 
571 	fattr->valid = (NFS_ATTR_FATTR_FILEID |
572 			NFS_ATTR_FATTR_CHANGE |
573 			NFS_ATTR_FATTR_SIZE |
574 			NFS_ATTR_FATTR_ATIME |
575 			NFS_ATTR_FATTR_MTIME |
576 			NFS_ATTR_FATTR_CTIME |
577 			NFS_ATTR_FATTR_SPACE_USED);
578 
579 	fattr->fileid = stat.ino;
580 	fattr->size = stat.size;
581 	fattr->atime = stat.atime;
582 	fattr->mtime = stat.mtime;
583 	fattr->ctime = stat.ctime;
584 	if (version == 4) {
585 		fattr->change_attr =
586 			__nfsd4_change_attribute(&stat, file_inode(filp));
587 	} else
588 		fattr->change_attr = nfs_timespec_to_change_attr(&fattr->ctime);
589 	fattr->du.nfs3.used = stat.blocks << 9;
590 }
591 
592 static void
593 nfs_local_write_done(struct nfs_local_kiocb *iocb, long status)
594 {
595 	struct nfs_pgio_header *hdr = iocb->hdr;
596 	struct inode *inode = hdr->inode;
597 
598 	dprintk("%s: wrote %ld bytes.\n", __func__, status > 0 ? status : 0);
599 
600 	/* Handle short writes as if they are ENOSPC */
601 	if (status > 0 && status < hdr->args.count) {
602 		hdr->mds_offset += status;
603 		hdr->args.offset += status;
604 		hdr->args.pgbase += status;
605 		hdr->args.count -= status;
606 		nfs_set_pgio_error(hdr, -ENOSPC, hdr->args.offset);
607 		status = -ENOSPC;
608 	}
609 	if (status < 0)
610 		nfs_reset_boot_verifier(inode);
611 
612 	nfs_local_pgio_done(hdr, status);
613 }
614 
615 static void nfs_local_write_aio_complete_work(struct work_struct *work)
616 {
617 	struct nfs_local_kiocb *iocb =
618 		container_of(work, struct nfs_local_kiocb, work);
619 
620 	nfs_local_vfs_getattr(iocb);
621 	nfs_local_pgio_release(iocb);
622 }
623 
624 static void nfs_local_write_aio_complete(struct kiocb *kiocb, long ret)
625 {
626 	struct nfs_local_kiocb *iocb =
627 		container_of(kiocb, struct nfs_local_kiocb, kiocb);
628 
629 	nfs_local_write_done(iocb, ret);
630 	nfs_local_pgio_aio_complete(iocb); /* Calls nfs_local_write_aio_complete_work */
631 }
632 
633 static void nfs_local_call_write(struct work_struct *work)
634 {
635 	struct nfs_local_kiocb *iocb =
636 		container_of(work, struct nfs_local_kiocb, work);
637 	struct file *filp = iocb->kiocb.ki_filp;
638 	unsigned long old_flags = current->flags;
639 	const struct cred *save_cred;
640 	struct iov_iter iter;
641 	ssize_t status;
642 
643 	current->flags |= PF_LOCAL_THROTTLE | PF_MEMALLOC_NOIO;
644 	save_cred = override_creds(filp->f_cred);
645 
646 	nfs_local_iter_init(&iter, iocb, WRITE);
647 
648 	file_start_write(filp);
649 	status = filp->f_op->write_iter(&iocb->kiocb, &iter);
650 	file_end_write(filp);
651 
652 	revert_creds(save_cred);
653 	current->flags = old_flags;
654 
655 	if (status != -EIOCBQUEUED) {
656 		nfs_local_write_done(iocb, status);
657 		nfs_local_vfs_getattr(iocb);
658 		nfs_local_pgio_release(iocb);
659 	}
660 }
661 
662 static int
663 nfs_do_local_write(struct nfs_pgio_header *hdr,
664 		   struct nfsd_file *localio,
665 		   const struct rpc_call_ops *call_ops)
666 {
667 	struct nfs_local_kiocb *iocb;
668 	struct file *file = nfs_to->nfsd_file_file(localio);
669 
670 	/* Don't support filesystems without write_iter */
671 	if (!file->f_op->write_iter)
672 		return -EAGAIN;
673 
674 	dprintk("%s: vfs_write count=%u pos=%llu %s\n",
675 		__func__, hdr->args.count, hdr->args.offset,
676 		(hdr->args.stable == NFS_UNSTABLE) ?  "unstable" : "stable");
677 
678 	iocb = nfs_local_iocb_alloc(hdr, file, GFP_NOIO);
679 	if (iocb == NULL)
680 		return -ENOMEM;
681 	iocb->localio = localio;
682 
683 	switch (hdr->args.stable) {
684 	default:
685 		break;
686 	case NFS_DATA_SYNC:
687 		iocb->kiocb.ki_flags |= IOCB_DSYNC;
688 		break;
689 	case NFS_FILE_SYNC:
690 		iocb->kiocb.ki_flags |= IOCB_DSYNC|IOCB_SYNC;
691 	}
692 
693 	nfs_local_pgio_init(hdr, call_ops);
694 
695 	nfs_set_local_verifier(hdr->inode, hdr->res.verf, hdr->args.stable);
696 
697 	if (iocb->kiocb.ki_flags & IOCB_DIRECT) {
698 		iocb->kiocb.ki_complete = nfs_local_write_aio_complete;
699 		iocb->aio_complete_work = nfs_local_write_aio_complete_work;
700 	}
701 
702 	INIT_WORK(&iocb->work, nfs_local_call_write);
703 	queue_work(nfslocaliod_workqueue, &iocb->work);
704 
705 	return 0;
706 }
707 
708 int nfs_local_doio(struct nfs_client *clp, struct nfsd_file *localio,
709 		   struct nfs_pgio_header *hdr,
710 		   const struct rpc_call_ops *call_ops)
711 {
712 	int status = 0;
713 
714 	if (!hdr->args.count)
715 		return 0;
716 
717 	switch (hdr->rw_mode) {
718 	case FMODE_READ:
719 		status = nfs_do_local_read(hdr, localio, call_ops);
720 		break;
721 	case FMODE_WRITE:
722 		status = nfs_do_local_write(hdr, localio, call_ops);
723 		break;
724 	default:
725 		dprintk("%s: invalid mode: %d\n", __func__,
726 			hdr->rw_mode);
727 		status = -EINVAL;
728 	}
729 
730 	if (status != 0) {
731 		if (status == -EAGAIN)
732 			nfs_localio_disable_client(clp);
733 		nfs_local_file_put(localio);
734 		hdr->task.tk_status = status;
735 		nfs_local_hdr_release(hdr, call_ops);
736 	}
737 	return status;
738 }
739 
740 static void
741 nfs_local_init_commit(struct nfs_commit_data *data,
742 		const struct rpc_call_ops *call_ops)
743 {
744 	data->task.tk_ops = call_ops;
745 }
746 
747 static int
748 nfs_local_run_commit(struct file *filp, struct nfs_commit_data *data)
749 {
750 	loff_t start = data->args.offset;
751 	loff_t end = LLONG_MAX;
752 
753 	if (data->args.count > 0) {
754 		end = start + data->args.count - 1;
755 		if (end < start)
756 			end = LLONG_MAX;
757 	}
758 
759 	dprintk("%s: commit %llu - %llu\n", __func__, start, end);
760 	return vfs_fsync_range(filp, start, end, 0);
761 }
762 
763 static void
764 nfs_local_commit_done(struct nfs_commit_data *data, int status)
765 {
766 	if (status >= 0) {
767 		nfs_set_local_verifier(data->inode,
768 				data->res.verf,
769 				NFS_FILE_SYNC);
770 		data->res.op_status = NFS4_OK;
771 		data->task.tk_status = 0;
772 	} else {
773 		nfs_reset_boot_verifier(data->inode);
774 		data->res.op_status = nfs_localio_errno_to_nfs4_stat(status);
775 		data->task.tk_status = status;
776 	}
777 }
778 
779 static void
780 nfs_local_release_commit_data(struct nfsd_file *localio,
781 		struct nfs_commit_data *data,
782 		const struct rpc_call_ops *call_ops)
783 {
784 	nfs_local_file_put(localio);
785 	call_ops->rpc_call_done(&data->task, data);
786 	call_ops->rpc_release(data);
787 }
788 
789 static void
790 nfs_local_fsync_ctx_free(struct nfs_local_fsync_ctx *ctx)
791 {
792 	nfs_local_release_commit_data(ctx->localio, ctx->data,
793 				      ctx->data->task.tk_ops);
794 	kfree(ctx);
795 }
796 
797 static void
798 nfs_local_fsync_work(struct work_struct *work)
799 {
800 	struct nfs_local_fsync_ctx *ctx;
801 	int status;
802 
803 	ctx = container_of(work, struct nfs_local_fsync_ctx, work);
804 
805 	status = nfs_local_run_commit(nfs_to->nfsd_file_file(ctx->localio),
806 				      ctx->data);
807 	nfs_local_commit_done(ctx->data, status);
808 	if (ctx->done != NULL)
809 		complete(ctx->done);
810 	nfs_local_fsync_ctx_free(ctx);
811 }
812 
813 static struct nfs_local_fsync_ctx *
814 nfs_local_fsync_ctx_alloc(struct nfs_commit_data *data,
815 			  struct nfsd_file *localio, gfp_t flags)
816 {
817 	struct nfs_local_fsync_ctx *ctx = kmalloc(sizeof(*ctx), flags);
818 
819 	if (ctx != NULL) {
820 		ctx->localio = localio;
821 		ctx->data = data;
822 		INIT_WORK(&ctx->work, nfs_local_fsync_work);
823 		ctx->done = NULL;
824 	}
825 	return ctx;
826 }
827 
828 int nfs_local_commit(struct nfsd_file *localio,
829 		     struct nfs_commit_data *data,
830 		     const struct rpc_call_ops *call_ops, int how)
831 {
832 	struct nfs_local_fsync_ctx *ctx;
833 
834 	ctx = nfs_local_fsync_ctx_alloc(data, localio, GFP_KERNEL);
835 	if (!ctx) {
836 		nfs_local_commit_done(data, -ENOMEM);
837 		nfs_local_release_commit_data(localio, data, call_ops);
838 		return -ENOMEM;
839 	}
840 
841 	nfs_local_init_commit(data, call_ops);
842 
843 	if (how & FLUSH_SYNC) {
844 		DECLARE_COMPLETION_ONSTACK(done);
845 		ctx->done = &done;
846 		queue_work(nfsiod_workqueue, &ctx->work);
847 		wait_for_completion(&done);
848 	} else
849 		queue_work(nfsiod_workqueue, &ctx->work);
850 
851 	return 0;
852 }
853