1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * NFS client support for local clients to bypass network stack
4 *
5 * Copyright (C) 2014 Weston Andros Adamson <dros@primarydata.com>
6 * Copyright (C) 2019 Trond Myklebust <trond.myklebust@hammerspace.com>
7 * Copyright (C) 2024 Mike Snitzer <snitzer@hammerspace.com>
8 * Copyright (C) 2024 NeilBrown <neilb@suse.de>
9 */
10
11 #include <linux/module.h>
12 #include <linux/errno.h>
13 #include <linux/vfs.h>
14 #include <linux/file.h>
15 #include <linux/inet.h>
16 #include <linux/sunrpc/addr.h>
17 #include <linux/inetdevice.h>
18 #include <net/addrconf.h>
19 #include <linux/nfs_common.h>
20 #include <linux/nfslocalio.h>
21 #include <linux/bvec.h>
22
23 #include <linux/nfs.h>
24 #include <linux/nfs_fs.h>
25 #include <linux/nfs_xdr.h>
26
27 #include "internal.h"
28 #include "pnfs.h"
29 #include "nfstrace.h"
30
31 #define NFSDBG_FACILITY NFSDBG_VFS
32
33 #define NFSLOCAL_MAX_IOS 3
34
35 struct nfs_local_kiocb {
36 struct kiocb kiocb;
37 struct bio_vec *bvec;
38 struct nfs_pgio_header *hdr;
39 struct work_struct work;
40 void (*aio_complete_work)(struct work_struct *);
41 struct nfsd_file *localio;
42 /* Begin mostly DIO-specific members */
43 size_t end_len;
44 short int end_iter_index;
45 atomic_t n_iters;
46 struct iov_iter iters[NFSLOCAL_MAX_IOS];
47 bool iter_is_dio_aligned[NFSLOCAL_MAX_IOS];
48 /* End mostly DIO-specific members */
49 };
50
51 struct nfs_local_fsync_ctx {
52 struct nfsd_file *localio;
53 struct nfs_commit_data *data;
54 struct work_struct work;
55 struct completion *done;
56 };
57
58 static bool localio_enabled __read_mostly = true;
59 module_param(localio_enabled, bool, 0644);
60
61 static void nfs_local_do_read(struct nfs_local_kiocb *iocb,
62 const struct rpc_call_ops *call_ops);
63 static void nfs_local_do_write(struct nfs_local_kiocb *iocb,
64 const struct rpc_call_ops *call_ops);
65
nfs_client_is_local(const struct nfs_client * clp)66 static inline bool nfs_client_is_local(const struct nfs_client *clp)
67 {
68 return !!rcu_access_pointer(clp->cl_uuid.net);
69 }
70
nfs_server_is_local(const struct nfs_client * clp)71 bool nfs_server_is_local(const struct nfs_client *clp)
72 {
73 return nfs_client_is_local(clp) && localio_enabled;
74 }
75 EXPORT_SYMBOL_GPL(nfs_server_is_local);
76
77 /*
78 * UUID_IS_LOCAL XDR functions
79 */
80
localio_xdr_enc_uuidargs(struct rpc_rqst * req,struct xdr_stream * xdr,const void * data)81 static void localio_xdr_enc_uuidargs(struct rpc_rqst *req,
82 struct xdr_stream *xdr,
83 const void *data)
84 {
85 const u8 *uuid = data;
86
87 encode_opaque_fixed(xdr, uuid, UUID_SIZE);
88 }
89
localio_xdr_dec_uuidres(struct rpc_rqst * req,struct xdr_stream * xdr,void * result)90 static int localio_xdr_dec_uuidres(struct rpc_rqst *req,
91 struct xdr_stream *xdr,
92 void *result)
93 {
94 /* void return */
95 return 0;
96 }
97
98 static const struct rpc_procinfo nfs_localio_procedures[] = {
99 [LOCALIOPROC_UUID_IS_LOCAL] = {
100 .p_proc = LOCALIOPROC_UUID_IS_LOCAL,
101 .p_encode = localio_xdr_enc_uuidargs,
102 .p_decode = localio_xdr_dec_uuidres,
103 .p_arglen = XDR_QUADLEN(UUID_SIZE),
104 .p_replen = 0,
105 .p_statidx = LOCALIOPROC_UUID_IS_LOCAL,
106 .p_name = "UUID_IS_LOCAL",
107 },
108 };
109
110 static unsigned int nfs_localio_counts[ARRAY_SIZE(nfs_localio_procedures)];
111 static const struct rpc_version nfslocalio_version1 = {
112 .number = 1,
113 .nrprocs = ARRAY_SIZE(nfs_localio_procedures),
114 .procs = nfs_localio_procedures,
115 .counts = nfs_localio_counts,
116 };
117
118 static const struct rpc_version *nfslocalio_version[] = {
119 [1] = &nfslocalio_version1,
120 };
121
122 extern const struct rpc_program nfslocalio_program;
123 static struct rpc_stat nfslocalio_rpcstat = { &nfslocalio_program };
124
125 const struct rpc_program nfslocalio_program = {
126 .name = "nfslocalio",
127 .number = NFS_LOCALIO_PROGRAM,
128 .nrvers = ARRAY_SIZE(nfslocalio_version),
129 .version = nfslocalio_version,
130 .stats = &nfslocalio_rpcstat,
131 };
132
133 /*
134 * nfs_init_localioclient - Initialise an NFS localio client connection
135 */
nfs_init_localioclient(struct nfs_client * clp)136 static struct rpc_clnt *nfs_init_localioclient(struct nfs_client *clp)
137 {
138 struct rpc_clnt *rpcclient_localio;
139
140 rpcclient_localio = rpc_bind_new_program(clp->cl_rpcclient,
141 &nfslocalio_program, 1);
142
143 dprintk_rcu("%s: server (%s) %s NFS LOCALIO.\n",
144 __func__, rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR),
145 (IS_ERR(rpcclient_localio) ? "does not support" : "supports"));
146
147 return rpcclient_localio;
148 }
149
nfs_server_uuid_is_local(struct nfs_client * clp)150 static bool nfs_server_uuid_is_local(struct nfs_client *clp)
151 {
152 u8 uuid[UUID_SIZE];
153 struct rpc_message msg = {
154 .rpc_argp = &uuid,
155 };
156 struct rpc_clnt *rpcclient_localio;
157 int status;
158
159 rpcclient_localio = nfs_init_localioclient(clp);
160 if (IS_ERR(rpcclient_localio))
161 return false;
162
163 export_uuid(uuid, &clp->cl_uuid.uuid);
164
165 msg.rpc_proc = &nfs_localio_procedures[LOCALIOPROC_UUID_IS_LOCAL];
166 status = rpc_call_sync(rpcclient_localio, &msg, 0);
167 dprintk("%s: NFS reply UUID_IS_LOCAL: status=%d\n",
168 __func__, status);
169 rpc_shutdown_client(rpcclient_localio);
170
171 /* Server is only local if it initialized required struct members */
172 if (status || !rcu_access_pointer(clp->cl_uuid.net) || !clp->cl_uuid.dom)
173 return false;
174
175 return true;
176 }
177
178 /*
179 * nfs_local_probe - probe local i/o support for an nfs_server and nfs_client
180 * - called after alloc_client and init_client (so cl_rpcclient exists)
181 * - this function is idempotent, it can be called for old or new clients
182 */
nfs_local_probe(struct nfs_client * clp)183 static void nfs_local_probe(struct nfs_client *clp)
184 {
185 /* Disallow localio if disabled via sysfs or AUTH_SYS isn't used */
186 if (!localio_enabled ||
187 clp->cl_rpcclient->cl_auth->au_flavor != RPC_AUTH_UNIX) {
188 nfs_localio_disable_client(clp);
189 return;
190 }
191
192 if (nfs_client_is_local(clp))
193 return;
194
195 if (!nfs_uuid_begin(&clp->cl_uuid))
196 return;
197 if (nfs_server_uuid_is_local(clp))
198 nfs_localio_enable_client(clp);
199 nfs_uuid_end(&clp->cl_uuid);
200 }
201
nfs_local_probe_async_work(struct work_struct * work)202 void nfs_local_probe_async_work(struct work_struct *work)
203 {
204 struct nfs_client *clp =
205 container_of(work, struct nfs_client, cl_local_probe_work);
206
207 if (!refcount_inc_not_zero(&clp->cl_count))
208 return;
209 nfs_local_probe(clp);
210 nfs_put_client(clp);
211 }
212
nfs_local_probe_async(struct nfs_client * clp)213 void nfs_local_probe_async(struct nfs_client *clp)
214 {
215 queue_work(nfsiod_workqueue, &clp->cl_local_probe_work);
216 }
217 EXPORT_SYMBOL_GPL(nfs_local_probe_async);
218
nfs_local_file_put(struct nfsd_file * localio)219 static inline void nfs_local_file_put(struct nfsd_file *localio)
220 {
221 /* nfs_to_nfsd_file_put_local() expects an __rcu pointer
222 * but we have a __kernel pointer. It is always safe
223 * to cast a __kernel pointer to an __rcu pointer
224 * because the cast only weakens what is known about the pointer.
225 */
226 struct nfsd_file __rcu *nf = (struct nfsd_file __rcu*) localio;
227
228 nfs_to_nfsd_file_put_local(&nf);
229 }
230
231 /*
232 * __nfs_local_open_fh - open a local filehandle in terms of nfsd_file.
233 *
234 * Returns a pointer to a struct nfsd_file or ERR_PTR.
235 * Caller must release returned nfsd_file with nfs_to_nfsd_file_put_local().
236 */
237 static struct nfsd_file *
__nfs_local_open_fh(struct nfs_client * clp,const struct cred * cred,struct nfs_fh * fh,struct nfs_file_localio * nfl,struct nfsd_file __rcu ** pnf,const fmode_t mode)238 __nfs_local_open_fh(struct nfs_client *clp, const struct cred *cred,
239 struct nfs_fh *fh, struct nfs_file_localio *nfl,
240 struct nfsd_file __rcu **pnf,
241 const fmode_t mode)
242 {
243 int status = 0;
244 struct nfsd_file *localio;
245
246 localio = nfs_open_local_fh(&clp->cl_uuid, clp->cl_rpcclient,
247 cred, fh, nfl, pnf, mode);
248 if (IS_ERR(localio)) {
249 status = PTR_ERR(localio);
250 switch (status) {
251 case -ENOMEM:
252 case -ENXIO:
253 case -ENOENT:
254 /* Revalidate localio */
255 nfs_localio_disable_client(clp);
256 nfs_local_probe(clp);
257 }
258 }
259 trace_nfs_local_open_fh(fh, mode, status);
260 return localio;
261 }
262
263 /*
264 * nfs_local_open_fh - open a local filehandle in terms of nfsd_file.
265 * First checking if the open nfsd_file is already cached, otherwise
266 * must __nfs_local_open_fh and insert the nfsd_file in nfs_file_localio.
267 *
268 * Returns a pointer to a struct nfsd_file or NULL.
269 */
270 struct nfsd_file *
nfs_local_open_fh(struct nfs_client * clp,const struct cred * cred,struct nfs_fh * fh,struct nfs_file_localio * nfl,const fmode_t mode)271 nfs_local_open_fh(struct nfs_client *clp, const struct cred *cred,
272 struct nfs_fh *fh, struct nfs_file_localio *nfl,
273 const fmode_t mode)
274 {
275 struct nfsd_file *nf, __rcu **pnf;
276
277 if (!nfs_server_is_local(clp))
278 return NULL;
279 if (mode & ~(FMODE_READ | FMODE_WRITE))
280 return NULL;
281
282 if (mode & FMODE_WRITE)
283 pnf = &nfl->rw_file;
284 else
285 pnf = &nfl->ro_file;
286
287 nf = __nfs_local_open_fh(clp, cred, fh, nfl, pnf, mode);
288 if (IS_ERR(nf))
289 return NULL;
290 return nf;
291 }
292 EXPORT_SYMBOL_GPL(nfs_local_open_fh);
293
294 /*
295 * Ensure all page cache allocations are done from GFP_NOFS context to
296 * prevent direct reclaim recursion back into NFS via nfs_writepages.
297 */
298 static void
nfs_local_mapping_set_gfp_nofs_context(struct address_space * m)299 nfs_local_mapping_set_gfp_nofs_context(struct address_space *m)
300 {
301 gfp_t gfp_mask = mapping_gfp_mask(m);
302
303 mapping_set_gfp_mask(m, (gfp_mask & ~(__GFP_FS)));
304 }
305
306 static void
nfs_local_iocb_free(struct nfs_local_kiocb * iocb)307 nfs_local_iocb_free(struct nfs_local_kiocb *iocb)
308 {
309 kfree(iocb->bvec);
310 kfree(iocb);
311 }
312
313 static struct nfs_local_kiocb *
nfs_local_iocb_alloc(struct nfs_pgio_header * hdr,struct file * file,gfp_t flags)314 nfs_local_iocb_alloc(struct nfs_pgio_header *hdr,
315 struct file *file, gfp_t flags)
316 {
317 struct nfs_local_kiocb *iocb;
318
319 iocb = kzalloc_obj(*iocb, flags);
320 if (iocb == NULL)
321 return NULL;
322
323 iocb->bvec = kmalloc_objs(struct bio_vec, hdr->page_array.npages, flags);
324 if (iocb->bvec == NULL) {
325 kfree(iocb);
326 return NULL;
327 }
328
329 nfs_local_mapping_set_gfp_nofs_context(file->f_mapping);
330 init_sync_kiocb(&iocb->kiocb, file);
331
332 iocb->hdr = hdr;
333 iocb->kiocb.ki_pos = hdr->args.offset;
334 iocb->kiocb.ki_flags &= ~IOCB_APPEND;
335 iocb->kiocb.ki_complete = NULL;
336 iocb->aio_complete_work = NULL;
337
338 iocb->end_iter_index = -1;
339
340 return iocb;
341 }
342
343 static bool
nfs_is_local_dio_possible(struct nfs_local_kiocb * iocb,int rw,size_t len,struct nfs_local_dio * local_dio)344 nfs_is_local_dio_possible(struct nfs_local_kiocb *iocb, int rw,
345 size_t len, struct nfs_local_dio *local_dio)
346 {
347 struct nfs_pgio_header *hdr = iocb->hdr;
348 loff_t offset = hdr->args.offset;
349 u32 nf_dio_mem_align, nf_dio_offset_align, nf_dio_read_offset_align;
350 loff_t start_end, orig_end, middle_end;
351
352 nfs_to->nfsd_file_dio_alignment(iocb->localio, &nf_dio_mem_align,
353 &nf_dio_offset_align, &nf_dio_read_offset_align);
354 if (rw == ITER_DEST)
355 nf_dio_offset_align = nf_dio_read_offset_align;
356
357 if (unlikely(!nf_dio_mem_align || !nf_dio_offset_align))
358 return false;
359 if (unlikely(len < nf_dio_offset_align))
360 return false;
361
362 local_dio->mem_align = nf_dio_mem_align;
363 local_dio->offset_align = nf_dio_offset_align;
364
365 start_end = round_up(offset, nf_dio_offset_align);
366 orig_end = offset + len;
367 middle_end = round_down(orig_end, nf_dio_offset_align);
368
369 local_dio->middle_offset = start_end;
370 local_dio->end_offset = middle_end;
371
372 local_dio->start_len = start_end - offset;
373 local_dio->middle_len = middle_end - start_end;
374 local_dio->end_len = orig_end - middle_end;
375
376 if (rw == ITER_DEST)
377 trace_nfs_local_dio_read(hdr->inode, offset, len, local_dio);
378 else
379 trace_nfs_local_dio_write(hdr->inode, offset, len, local_dio);
380 return true;
381 }
382
nfs_iov_iter_aligned_bvec(const struct iov_iter * i,unsigned int addr_mask,unsigned int len_mask)383 static bool nfs_iov_iter_aligned_bvec(const struct iov_iter *i,
384 unsigned int addr_mask, unsigned int len_mask)
385 {
386 const struct bio_vec *bvec = i->bvec;
387 size_t skip = i->iov_offset;
388 size_t size = i->count;
389
390 if (size & len_mask)
391 return false;
392 do {
393 size_t len = bvec->bv_len;
394
395 if (len > size)
396 len = size;
397 if ((unsigned long)(bvec->bv_offset + skip) & addr_mask)
398 return false;
399 bvec++;
400 size -= len;
401 skip = 0;
402 } while (size);
403
404 return true;
405 }
406
407 static void
nfs_local_iter_setup(struct iov_iter * iter,int rw,struct bio_vec * bvec,unsigned int nvecs,unsigned long total,size_t start,size_t len)408 nfs_local_iter_setup(struct iov_iter *iter, int rw, struct bio_vec *bvec,
409 unsigned int nvecs, unsigned long total,
410 size_t start, size_t len)
411 {
412 iov_iter_bvec(iter, rw, bvec, nvecs, total);
413 if (start)
414 iov_iter_advance(iter, start);
415 iov_iter_truncate(iter, len);
416 }
417
418 /*
419 * Setup as many as 3 iov_iter based on extents described by @local_dio.
420 * Returns the number of iov_iter that were setup.
421 */
422 static int
nfs_local_iters_setup_dio(struct nfs_local_kiocb * iocb,int rw,unsigned int nvecs,unsigned long total,struct nfs_local_dio * local_dio)423 nfs_local_iters_setup_dio(struct nfs_local_kiocb *iocb, int rw,
424 unsigned int nvecs, unsigned long total,
425 struct nfs_local_dio *local_dio)
426 {
427 int n_iters = 0;
428 struct iov_iter *iters = iocb->iters;
429
430 /* Setup misaligned start? */
431 if (local_dio->start_len) {
432 nfs_local_iter_setup(&iters[n_iters], rw, iocb->bvec,
433 nvecs, total, 0, local_dio->start_len);
434 ++n_iters;
435 }
436
437 /*
438 * Setup DIO-aligned middle, if there is no misaligned end (below)
439 * then AIO completion is used, see nfs_local_call_{read,write}
440 */
441 nfs_local_iter_setup(&iters[n_iters], rw, iocb->bvec, nvecs,
442 total, local_dio->start_len, local_dio->middle_len);
443
444 iocb->iter_is_dio_aligned[n_iters] =
445 nfs_iov_iter_aligned_bvec(&iters[n_iters],
446 local_dio->mem_align-1, local_dio->offset_align-1);
447
448 if (unlikely(!iocb->iter_is_dio_aligned[n_iters])) {
449 trace_nfs_local_dio_misaligned(iocb->hdr->inode,
450 local_dio->start_len, local_dio->middle_len, local_dio);
451 return 0; /* no DIO-aligned IO possible */
452 }
453 iocb->end_iter_index = n_iters;
454 ++n_iters;
455
456 /* Setup misaligned end? */
457 if (local_dio->end_len) {
458 nfs_local_iter_setup(&iters[n_iters], rw, iocb->bvec,
459 nvecs, total, local_dio->start_len +
460 local_dio->middle_len, local_dio->end_len);
461 iocb->end_iter_index = n_iters;
462 ++n_iters;
463 }
464
465 atomic_set(&iocb->n_iters, n_iters);
466 return n_iters;
467 }
468
469 static noinline_for_stack void
nfs_local_iters_init(struct nfs_local_kiocb * iocb,int rw)470 nfs_local_iters_init(struct nfs_local_kiocb *iocb, int rw)
471 {
472 struct nfs_pgio_header *hdr = iocb->hdr;
473 struct page **pagevec = hdr->page_array.pagevec;
474 unsigned long v, total;
475 unsigned int base;
476 size_t len;
477
478 v = 0;
479 total = hdr->args.count;
480 base = hdr->args.pgbase;
481 pagevec += base >> PAGE_SHIFT;
482 base &= ~PAGE_MASK;
483 while (total && v < hdr->page_array.npages) {
484 len = min_t(size_t, total, PAGE_SIZE - base);
485 bvec_set_page(&iocb->bvec[v], *pagevec, len, base);
486 total -= len;
487 ++pagevec;
488 ++v;
489 base = 0;
490 }
491 len = hdr->args.count - total;
492
493 /*
494 * For each iocb, iocb->n_iters is always at least 1 and we always
495 * end io after first nfs_local_pgio_done call unless misaligned DIO.
496 */
497 atomic_set(&iocb->n_iters, 1);
498
499 if (test_bit(NFS_IOHDR_ODIRECT, &hdr->flags)) {
500 struct nfs_local_dio local_dio;
501
502 if (nfs_is_local_dio_possible(iocb, rw, len, &local_dio) &&
503 nfs_local_iters_setup_dio(iocb, rw, v, len, &local_dio) != 0) {
504 /* Ensure DIO WRITE's IO on stable storage upon completion */
505 if (rw == ITER_SOURCE)
506 iocb->kiocb.ki_flags |= IOCB_DSYNC|IOCB_SYNC;
507 return; /* is DIO-aligned */
508 }
509 }
510
511 /* Use buffered IO */
512 iov_iter_bvec(&iocb->iters[0], rw, iocb->bvec, v, len);
513 }
514
515 static void
nfs_local_hdr_release(struct nfs_pgio_header * hdr,const struct rpc_call_ops * call_ops)516 nfs_local_hdr_release(struct nfs_pgio_header *hdr,
517 const struct rpc_call_ops *call_ops)
518 {
519 call_ops->rpc_call_done(&hdr->task, hdr);
520 call_ops->rpc_release(hdr);
521 }
522
523 static void
nfs_local_pgio_init(struct nfs_pgio_header * hdr,const struct rpc_call_ops * call_ops)524 nfs_local_pgio_init(struct nfs_pgio_header *hdr,
525 const struct rpc_call_ops *call_ops)
526 {
527 hdr->task.tk_ops = call_ops;
528 if (!hdr->task.tk_start)
529 hdr->task.tk_start = ktime_get();
530 }
531
nfs_local_pgio_done(struct nfs_local_kiocb * iocb,long status)532 static bool nfs_local_pgio_done(struct nfs_local_kiocb *iocb, long status)
533 {
534 struct nfs_pgio_header *hdr = iocb->hdr;
535
536 /* Must handle partial completions */
537 if (status >= 0) {
538 hdr->res.count += status;
539 /* @hdr was initialized to 0 (zeroed during allocation) */
540 if (hdr->task.tk_status == 0)
541 hdr->res.op_status = NFS4_OK;
542 } else {
543 hdr->res.op_status = nfs_localio_errno_to_nfs4_stat(status);
544 hdr->task.tk_status = status;
545 }
546
547 BUG_ON(atomic_read(&iocb->n_iters) <= 0);
548 return atomic_dec_and_test(&iocb->n_iters);
549 }
550
551 static void
nfs_local_iocb_release(struct nfs_local_kiocb * iocb)552 nfs_local_iocb_release(struct nfs_local_kiocb *iocb)
553 {
554 nfs_local_file_put(iocb->localio);
555 nfs_local_iocb_free(iocb);
556 }
557
nfs_local_pgio_restart(struct nfs_local_kiocb * iocb,struct nfs_pgio_header * hdr)558 static void nfs_local_pgio_restart(struct nfs_local_kiocb *iocb,
559 struct nfs_pgio_header *hdr)
560 {
561 int status = 0;
562
563 iocb->kiocb.ki_pos = hdr->args.offset;
564 iocb->kiocb.ki_flags &= ~(IOCB_DSYNC | IOCB_SYNC | IOCB_DIRECT);
565 iocb->kiocb.ki_complete = NULL;
566 iocb->aio_complete_work = NULL;
567 iocb->end_iter_index = -1;
568
569 switch (hdr->rw_mode) {
570 case FMODE_READ:
571 nfs_local_iters_init(iocb, ITER_DEST);
572 nfs_local_do_read(iocb, hdr->task.tk_ops);
573 break;
574 case FMODE_WRITE:
575 nfs_local_iters_init(iocb, ITER_SOURCE);
576 nfs_local_do_write(iocb, hdr->task.tk_ops);
577 break;
578 default:
579 status = -EOPNOTSUPP;
580 }
581
582 if (unlikely(status != 0)) {
583 nfs_local_iocb_release(iocb);
584 hdr->task.tk_status = status;
585 nfs_local_hdr_release(hdr, hdr->task.tk_ops);
586 }
587 }
588
nfs_local_pgio_release(struct nfs_local_kiocb * iocb)589 static void nfs_local_pgio_release(struct nfs_local_kiocb *iocb)
590 {
591 struct nfs_pgio_header *hdr = iocb->hdr;
592 struct rpc_task *task = &hdr->task;
593
594 task->tk_action = NULL;
595 task->tk_ops->rpc_call_done(task, hdr);
596
597 if (task->tk_action == NULL) {
598 nfs_local_iocb_release(iocb);
599 task->tk_ops->rpc_release(hdr);
600 } else
601 nfs_local_pgio_restart(iocb, hdr);
602 }
603
604 /*
605 * Complete the I/O from iocb->kiocb.ki_complete()
606 *
607 * Note that this function can be called from a bottom half context,
608 * hence we need to queue the rpc_call_done() etc to a workqueue
609 */
nfs_local_pgio_aio_complete(struct nfs_local_kiocb * iocb)610 static inline void nfs_local_pgio_aio_complete(struct nfs_local_kiocb *iocb)
611 {
612 INIT_WORK(&iocb->work, iocb->aio_complete_work);
613 queue_work(nfsiod_workqueue, &iocb->work);
614 }
615
nfs_local_read_done(struct nfs_local_kiocb * iocb)616 static void nfs_local_read_done(struct nfs_local_kiocb *iocb)
617 {
618 struct nfs_pgio_header *hdr = iocb->hdr;
619 struct file *filp = iocb->kiocb.ki_filp;
620 long status = hdr->task.tk_status;
621
622 if ((iocb->kiocb.ki_flags & IOCB_DIRECT) && status == -EINVAL) {
623 /* Underlying FS will return -EINVAL if misaligned DIO is attempted. */
624 pr_info_ratelimited("nfs: Unexpected direct I/O read alignment failure\n");
625 }
626
627 /*
628 * Must clear replen otherwise NFSv3 data corruption will occur
629 * if/when switching from LOCALIO back to using normal RPC.
630 */
631 hdr->res.replen = 0;
632
633 /* nfs_readpage_result() handles short read */
634
635 if (hdr->args.offset + hdr->res.count >= i_size_read(file_inode(filp)))
636 hdr->res.eof = true;
637
638 dprintk("%s: read %ld bytes eof %d.\n", __func__,
639 status > 0 ? status : 0, hdr->res.eof);
640 }
641
nfs_local_read_iocb_done(struct nfs_local_kiocb * iocb)642 static inline void nfs_local_read_iocb_done(struct nfs_local_kiocb *iocb)
643 {
644 nfs_local_read_done(iocb);
645 nfs_local_pgio_release(iocb);
646 }
647
nfs_local_read_aio_complete_work(struct work_struct * work)648 static void nfs_local_read_aio_complete_work(struct work_struct *work)
649 {
650 struct nfs_local_kiocb *iocb =
651 container_of(work, struct nfs_local_kiocb, work);
652
653 nfs_local_read_iocb_done(iocb);
654 }
655
nfs_local_read_aio_complete(struct kiocb * kiocb,long ret)656 static void nfs_local_read_aio_complete(struct kiocb *kiocb, long ret)
657 {
658 struct nfs_local_kiocb *iocb =
659 container_of(kiocb, struct nfs_local_kiocb, kiocb);
660
661 /* AIO completion of DIO read should always be last to complete */
662 if (unlikely(!nfs_local_pgio_done(iocb, ret)))
663 return;
664
665 nfs_local_pgio_aio_complete(iocb); /* Calls nfs_local_read_aio_complete_work */
666 }
667
nfs_local_call_read(struct work_struct * work)668 static void nfs_local_call_read(struct work_struct *work)
669 {
670 struct nfs_local_kiocb *iocb =
671 container_of(work, struct nfs_local_kiocb, work);
672 struct file *filp = iocb->kiocb.ki_filp;
673 ssize_t status;
674 int n_iters;
675
676 n_iters = atomic_read(&iocb->n_iters);
677 for (int i = 0; i < n_iters ; i++) {
678 if (iocb->iter_is_dio_aligned[i]) {
679 iocb->kiocb.ki_flags |= IOCB_DIRECT;
680 /* Only use AIO completion if DIO-aligned segment is last */
681 if (i == iocb->end_iter_index) {
682 iocb->kiocb.ki_complete = nfs_local_read_aio_complete;
683 iocb->aio_complete_work = nfs_local_read_aio_complete_work;
684 }
685 } else
686 iocb->kiocb.ki_flags &= ~IOCB_DIRECT;
687
688 scoped_with_creds(filp->f_cred)
689 status = filp->f_op->read_iter(&iocb->kiocb, &iocb->iters[i]);
690
691 if (status == -EIOCBQUEUED)
692 continue;
693 /* Break on completion, errors, or short reads */
694 if (nfs_local_pgio_done(iocb, status) || status < 0 ||
695 (size_t)status < iov_iter_count(&iocb->iters[i])) {
696 nfs_local_read_iocb_done(iocb);
697 break;
698 }
699 }
700 }
701
nfs_local_do_read(struct nfs_local_kiocb * iocb,const struct rpc_call_ops * call_ops)702 static void nfs_local_do_read(struct nfs_local_kiocb *iocb,
703 const struct rpc_call_ops *call_ops)
704 {
705 struct nfs_pgio_header *hdr = iocb->hdr;
706
707 dprintk("%s: vfs_read count=%u pos=%llu\n",
708 __func__, hdr->args.count, hdr->args.offset);
709
710 nfs_local_pgio_init(hdr, call_ops);
711 hdr->res.eof = false;
712
713 INIT_WORK(&iocb->work, nfs_local_call_read);
714 queue_work(nfslocaliod_workqueue, &iocb->work);
715 }
716
717 static void
nfs_copy_boot_verifier(struct nfs_write_verifier * verifier,struct inode * inode)718 nfs_copy_boot_verifier(struct nfs_write_verifier *verifier, struct inode *inode)
719 {
720 struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
721 u32 *verf = (u32 *)verifier->data;
722 unsigned int seq;
723
724 do {
725 seq = read_seqbegin(&clp->cl_boot_lock);
726 verf[0] = (u32)clp->cl_nfssvc_boot.tv_sec;
727 verf[1] = (u32)clp->cl_nfssvc_boot.tv_nsec;
728 } while (read_seqretry(&clp->cl_boot_lock, seq));
729 }
730
731 static void
nfs_reset_boot_verifier(struct inode * inode)732 nfs_reset_boot_verifier(struct inode *inode)
733 {
734 struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
735
736 write_seqlock(&clp->cl_boot_lock);
737 ktime_get_real_ts64(&clp->cl_nfssvc_boot);
738 write_sequnlock(&clp->cl_boot_lock);
739 }
740
741 static void
nfs_set_local_verifier(struct inode * inode,struct nfs_writeverf * verf,enum nfs3_stable_how how)742 nfs_set_local_verifier(struct inode *inode,
743 struct nfs_writeverf *verf,
744 enum nfs3_stable_how how)
745 {
746 nfs_copy_boot_verifier(&verf->verifier, inode);
747 verf->committed = how;
748 }
749
750 /* Factored out from fs/nfsd/vfs.h:fh_getattr() */
__vfs_getattr(const struct path * p,struct kstat * stat,int version)751 static int __vfs_getattr(const struct path *p, struct kstat *stat, int version)
752 {
753 u32 request_mask = STATX_BASIC_STATS;
754
755 if (version == 4)
756 request_mask |= (STATX_BTIME | STATX_CHANGE_COOKIE);
757 return vfs_getattr(p, stat, request_mask, AT_STATX_SYNC_AS_STAT);
758 }
759
760 /* Copied from fs/nfsd/nfsfh.c:nfsd4_change_attribute() */
__nfsd4_change_attribute(const struct kstat * stat,const struct inode * inode)761 static u64 __nfsd4_change_attribute(const struct kstat *stat,
762 const struct inode *inode)
763 {
764 u64 chattr;
765
766 if (stat->result_mask & STATX_CHANGE_COOKIE) {
767 chattr = stat->change_cookie;
768 if (S_ISREG(inode->i_mode) &&
769 !(stat->attributes & STATX_ATTR_CHANGE_MONOTONIC)) {
770 chattr += (u64)stat->ctime.tv_sec << 30;
771 chattr += stat->ctime.tv_nsec;
772 }
773 } else {
774 chattr = time_to_chattr(&stat->ctime);
775 }
776 return chattr;
777 }
778
nfs_local_vfs_getattr(struct nfs_local_kiocb * iocb)779 static void nfs_local_vfs_getattr(struct nfs_local_kiocb *iocb)
780 {
781 struct kstat stat;
782 struct file *filp = iocb->kiocb.ki_filp;
783 struct nfs_pgio_header *hdr = iocb->hdr;
784 struct nfs_fattr *fattr = hdr->res.fattr;
785 int version = NFS_PROTO(hdr->inode)->version;
786
787 if (unlikely(!fattr) || __vfs_getattr(&filp->f_path, &stat, version))
788 return;
789
790 fattr->valid = (NFS_ATTR_FATTR_FILEID |
791 NFS_ATTR_FATTR_CHANGE |
792 NFS_ATTR_FATTR_SIZE |
793 NFS_ATTR_FATTR_ATIME |
794 NFS_ATTR_FATTR_MTIME |
795 NFS_ATTR_FATTR_CTIME |
796 NFS_ATTR_FATTR_SPACE_USED);
797
798 fattr->fileid = stat.ino;
799 fattr->size = stat.size;
800 fattr->atime = stat.atime;
801 fattr->mtime = stat.mtime;
802 fattr->ctime = stat.ctime;
803 if (version == 4) {
804 fattr->change_attr =
805 __nfsd4_change_attribute(&stat, file_inode(filp));
806 } else
807 fattr->change_attr = nfs_timespec_to_change_attr(&fattr->ctime);
808 fattr->du.nfs3.used = stat.blocks << 9;
809 }
810
nfs_local_write_done(struct nfs_local_kiocb * iocb)811 static void nfs_local_write_done(struct nfs_local_kiocb *iocb)
812 {
813 struct nfs_pgio_header *hdr = iocb->hdr;
814 long status = hdr->task.tk_status;
815
816 dprintk("%s: wrote %ld bytes.\n", __func__, status > 0 ? status : 0);
817
818 if ((iocb->kiocb.ki_flags & IOCB_DIRECT) && status == -EINVAL) {
819 /* Underlying FS will return -EINVAL if misaligned DIO is attempted. */
820 pr_info_ratelimited("nfs: Unexpected direct I/O write alignment failure\n");
821 }
822
823 if (status < 0)
824 nfs_reset_boot_verifier(hdr->inode);
825 }
826
nfs_local_write_iocb_done(struct nfs_local_kiocb * iocb)827 static inline void nfs_local_write_iocb_done(struct nfs_local_kiocb *iocb)
828 {
829 nfs_local_write_done(iocb);
830 nfs_local_vfs_getattr(iocb);
831 nfs_local_pgio_release(iocb);
832 }
833
nfs_local_write_aio_complete_work(struct work_struct * work)834 static void nfs_local_write_aio_complete_work(struct work_struct *work)
835 {
836 struct nfs_local_kiocb *iocb =
837 container_of(work, struct nfs_local_kiocb, work);
838
839 nfs_local_write_iocb_done(iocb);
840 }
841
nfs_local_write_aio_complete(struct kiocb * kiocb,long ret)842 static void nfs_local_write_aio_complete(struct kiocb *kiocb, long ret)
843 {
844 struct nfs_local_kiocb *iocb =
845 container_of(kiocb, struct nfs_local_kiocb, kiocb);
846
847 /* AIO completion of DIO write should always be last to complete */
848 if (unlikely(!nfs_local_pgio_done(iocb, ret)))
849 return;
850
851 nfs_local_pgio_aio_complete(iocb); /* Calls nfs_local_write_aio_complete_work */
852 }
853
nfs_local_call_write(struct work_struct * work)854 static void nfs_local_call_write(struct work_struct *work)
855 {
856 struct nfs_local_kiocb *iocb =
857 container_of(work, struct nfs_local_kiocb, work);
858 struct file *filp = iocb->kiocb.ki_filp;
859 unsigned long old_flags = current->flags;
860 ssize_t status;
861 int n_iters;
862
863 current->flags |= PF_LOCAL_THROTTLE | PF_MEMALLOC_NOIO;
864
865 file_start_write(filp);
866 n_iters = atomic_read(&iocb->n_iters);
867 for (int i = 0; i < n_iters ; i++) {
868 if (iocb->iter_is_dio_aligned[i]) {
869 iocb->kiocb.ki_flags |= IOCB_DIRECT;
870 /* Only use AIO completion if DIO-aligned segment is last */
871 if (i == iocb->end_iter_index) {
872 iocb->kiocb.ki_complete = nfs_local_write_aio_complete;
873 iocb->aio_complete_work = nfs_local_write_aio_complete_work;
874 }
875 } else
876 iocb->kiocb.ki_flags &= ~IOCB_DIRECT;
877
878 scoped_with_creds(filp->f_cred)
879 status = filp->f_op->write_iter(&iocb->kiocb, &iocb->iters[i]);
880
881 if (status == -EIOCBQUEUED)
882 continue;
883 /* Break on completion, errors, or short writes */
884 if (nfs_local_pgio_done(iocb, status) || status < 0 ||
885 (size_t)status < iov_iter_count(&iocb->iters[i])) {
886 nfs_local_write_iocb_done(iocb);
887 break;
888 }
889 }
890 file_end_write(filp);
891
892 current->flags = old_flags;
893 }
894
nfs_local_do_write(struct nfs_local_kiocb * iocb,const struct rpc_call_ops * call_ops)895 static void nfs_local_do_write(struct nfs_local_kiocb *iocb,
896 const struct rpc_call_ops *call_ops)
897 {
898 struct nfs_pgio_header *hdr = iocb->hdr;
899
900 dprintk("%s: vfs_write count=%u pos=%llu %s\n",
901 __func__, hdr->args.count, hdr->args.offset,
902 (hdr->args.stable == NFS_UNSTABLE) ? "unstable" : "stable");
903
904 switch (hdr->args.stable) {
905 default:
906 break;
907 case NFS_DATA_SYNC:
908 iocb->kiocb.ki_flags |= IOCB_DSYNC;
909 break;
910 case NFS_FILE_SYNC:
911 iocb->kiocb.ki_flags |= IOCB_DSYNC|IOCB_SYNC;
912 }
913
914 nfs_local_pgio_init(hdr, call_ops);
915
916 nfs_set_local_verifier(hdr->inode, hdr->res.verf, hdr->args.stable);
917
918 INIT_WORK(&iocb->work, nfs_local_call_write);
919 queue_work(nfslocaliod_workqueue, &iocb->work);
920 }
921
922 static struct nfs_local_kiocb *
nfs_local_iocb_init(struct nfs_pgio_header * hdr,struct nfsd_file * localio)923 nfs_local_iocb_init(struct nfs_pgio_header *hdr, struct nfsd_file *localio)
924 {
925 struct file *file = nfs_to->nfsd_file_file(localio);
926 struct nfs_local_kiocb *iocb;
927 gfp_t gfp_mask;
928 int rw;
929
930 if (hdr->rw_mode & FMODE_READ) {
931 if (!file->f_op->read_iter)
932 return ERR_PTR(-EOPNOTSUPP);
933 gfp_mask = GFP_KERNEL;
934 rw = ITER_DEST;
935 } else {
936 if (!file->f_op->write_iter)
937 return ERR_PTR(-EOPNOTSUPP);
938 gfp_mask = GFP_NOIO;
939 rw = ITER_SOURCE;
940 }
941
942 iocb = nfs_local_iocb_alloc(hdr, file, gfp_mask);
943 if (iocb == NULL)
944 return ERR_PTR(-ENOMEM);
945 iocb->hdr = hdr;
946 iocb->localio = localio;
947
948 nfs_local_iters_init(iocb, rw);
949
950 return iocb;
951 }
952
nfs_local_doio(struct nfs_client * clp,struct nfsd_file * localio,struct nfs_pgio_header * hdr,const struct rpc_call_ops * call_ops)953 int nfs_local_doio(struct nfs_client *clp, struct nfsd_file *localio,
954 struct nfs_pgio_header *hdr,
955 const struct rpc_call_ops *call_ops)
956 {
957 struct nfs_local_kiocb *iocb;
958 int status = 0;
959
960 if (!hdr->args.count)
961 return 0;
962
963 iocb = nfs_local_iocb_init(hdr, localio);
964 if (IS_ERR(iocb))
965 return PTR_ERR(iocb);
966
967 switch (hdr->rw_mode) {
968 case FMODE_READ:
969 nfs_local_do_read(iocb, call_ops);
970 break;
971 case FMODE_WRITE:
972 nfs_local_do_write(iocb, call_ops);
973 break;
974 default:
975 dprintk("%s: invalid mode: %d\n", __func__,
976 hdr->rw_mode);
977 status = -EOPNOTSUPP;
978 }
979
980 if (unlikely(status != 0)) {
981 nfs_local_iocb_release(iocb);
982 hdr->task.tk_status = status;
983 nfs_local_hdr_release(hdr, call_ops);
984 }
985 return status;
986 }
987
988 static void
nfs_local_init_commit(struct nfs_commit_data * data,const struct rpc_call_ops * call_ops)989 nfs_local_init_commit(struct nfs_commit_data *data,
990 const struct rpc_call_ops *call_ops)
991 {
992 data->task.tk_ops = call_ops;
993 }
994
995 static int
nfs_local_run_commit(struct file * filp,struct nfs_commit_data * data)996 nfs_local_run_commit(struct file *filp, struct nfs_commit_data *data)
997 {
998 loff_t start = data->args.offset;
999 loff_t end = LLONG_MAX;
1000
1001 if (data->args.count > 0) {
1002 end = start + data->args.count - 1;
1003 if (end < start)
1004 end = LLONG_MAX;
1005 }
1006
1007 nfs_local_mapping_set_gfp_nofs_context(filp->f_mapping);
1008
1009 dprintk("%s: commit %llu - %llu\n", __func__, start, end);
1010 return vfs_fsync_range(filp, start, end, 0);
1011 }
1012
1013 static void
nfs_local_commit_done(struct nfs_commit_data * data,int status)1014 nfs_local_commit_done(struct nfs_commit_data *data, int status)
1015 {
1016 if (status >= 0) {
1017 nfs_set_local_verifier(data->inode,
1018 data->res.verf,
1019 NFS_FILE_SYNC);
1020 data->res.op_status = NFS4_OK;
1021 data->task.tk_status = 0;
1022 } else {
1023 nfs_reset_boot_verifier(data->inode);
1024 data->res.op_status = nfs_localio_errno_to_nfs4_stat(status);
1025 data->task.tk_status = status;
1026 }
1027 }
1028
1029 static void
nfs_local_release_commit_data(struct nfsd_file * localio,struct nfs_commit_data * data,const struct rpc_call_ops * call_ops)1030 nfs_local_release_commit_data(struct nfsd_file *localio,
1031 struct nfs_commit_data *data,
1032 const struct rpc_call_ops *call_ops)
1033 {
1034 nfs_local_file_put(localio);
1035 call_ops->rpc_call_done(&data->task, data);
1036 call_ops->rpc_release(data);
1037 }
1038
1039 static void
nfs_local_fsync_ctx_free(struct nfs_local_fsync_ctx * ctx)1040 nfs_local_fsync_ctx_free(struct nfs_local_fsync_ctx *ctx)
1041 {
1042 nfs_local_release_commit_data(ctx->localio, ctx->data,
1043 ctx->data->task.tk_ops);
1044 kfree(ctx);
1045 }
1046
1047 static void
nfs_local_fsync_work(struct work_struct * work)1048 nfs_local_fsync_work(struct work_struct *work)
1049 {
1050 unsigned long old_flags = current->flags;
1051 struct nfs_local_fsync_ctx *ctx;
1052 int status;
1053
1054 ctx = container_of(work, struct nfs_local_fsync_ctx, work);
1055
1056 current->flags |= PF_LOCAL_THROTTLE | PF_MEMALLOC_NOIO;
1057
1058 status = nfs_local_run_commit(nfs_to->nfsd_file_file(ctx->localio),
1059 ctx->data);
1060 nfs_local_commit_done(ctx->data, status);
1061 if (ctx->done != NULL)
1062 complete(ctx->done);
1063 nfs_local_fsync_ctx_free(ctx);
1064
1065 current->flags = old_flags;
1066 }
1067
1068 static struct nfs_local_fsync_ctx *
nfs_local_fsync_ctx_alloc(struct nfs_commit_data * data,struct nfsd_file * localio,gfp_t flags)1069 nfs_local_fsync_ctx_alloc(struct nfs_commit_data *data,
1070 struct nfsd_file *localio, gfp_t flags)
1071 {
1072 struct nfs_local_fsync_ctx *ctx = kmalloc_obj(*ctx, flags);
1073
1074 if (ctx != NULL) {
1075 ctx->localio = localio;
1076 ctx->data = data;
1077 INIT_WORK(&ctx->work, nfs_local_fsync_work);
1078 ctx->done = NULL;
1079 }
1080 return ctx;
1081 }
1082
nfs_local_commit(struct nfsd_file * localio,struct nfs_commit_data * data,const struct rpc_call_ops * call_ops,int how)1083 int nfs_local_commit(struct nfsd_file *localio,
1084 struct nfs_commit_data *data,
1085 const struct rpc_call_ops *call_ops, int how)
1086 {
1087 struct nfs_local_fsync_ctx *ctx;
1088
1089 ctx = nfs_local_fsync_ctx_alloc(data, localio, GFP_NOIO);
1090 if (!ctx) {
1091 nfs_local_commit_done(data, -ENOMEM);
1092 nfs_local_release_commit_data(localio, data, call_ops);
1093 return -ENOMEM;
1094 }
1095
1096 nfs_local_init_commit(data, call_ops);
1097
1098 if (how & FLUSH_SYNC) {
1099 DECLARE_COMPLETION_ONSTACK(done);
1100 ctx->done = &done;
1101 queue_work(nfslocaliod_workqueue, &ctx->work);
1102 wait_for_completion(&done);
1103 } else
1104 queue_work(nfslocaliod_workqueue, &ctx->work);
1105
1106 return 0;
1107 }
1108