1 // SPDX-License-Identifier: GPL-2.0-only
2 /*
3 * NFS client support for local clients to bypass network stack
4 *
5 * Copyright (C) 2014 Weston Andros Adamson <dros@primarydata.com>
6 * Copyright (C) 2019 Trond Myklebust <trond.myklebust@hammerspace.com>
7 * Copyright (C) 2024 Mike Snitzer <snitzer@hammerspace.com>
8 * Copyright (C) 2024 NeilBrown <neilb@suse.de>
9 */
10
11 #include <linux/module.h>
12 #include <linux/errno.h>
13 #include <linux/vfs.h>
14 #include <linux/file.h>
15 #include <linux/inet.h>
16 #include <linux/sunrpc/addr.h>
17 #include <linux/inetdevice.h>
18 #include <net/addrconf.h>
19 #include <linux/nfs_common.h>
20 #include <linux/nfslocalio.h>
21 #include <linux/bvec.h>
22
23 #include <linux/nfs.h>
24 #include <linux/nfs_fs.h>
25 #include <linux/nfs_xdr.h>
26
27 #include "internal.h"
28 #include "pnfs.h"
29 #include "nfstrace.h"
30
31 #define NFSDBG_FACILITY NFSDBG_VFS
32
33 #define NFSLOCAL_MAX_IOS 3
34
35 struct nfs_local_kiocb {
36 struct kiocb kiocb;
37 struct bio_vec *bvec;
38 struct nfs_pgio_header *hdr;
39 struct work_struct work;
40 void (*aio_complete_work)(struct work_struct *);
41 struct nfsd_file *localio;
42 /* Begin mostly DIO-specific members */
43 size_t end_len;
44 short int end_iter_index;
45 atomic_t n_iters;
46 struct iov_iter iters[NFSLOCAL_MAX_IOS];
47 bool iter_is_dio_aligned[NFSLOCAL_MAX_IOS];
48 /* End mostly DIO-specific members */
49 };
50
51 struct nfs_local_fsync_ctx {
52 struct nfsd_file *localio;
53 struct nfs_commit_data *data;
54 struct work_struct work;
55 struct completion *done;
56 };
57
58 static bool localio_enabled __read_mostly = true;
59 module_param(localio_enabled, bool, 0644);
60
nfs_client_is_local(const struct nfs_client * clp)61 static inline bool nfs_client_is_local(const struct nfs_client *clp)
62 {
63 return !!rcu_access_pointer(clp->cl_uuid.net);
64 }
65
nfs_server_is_local(const struct nfs_client * clp)66 bool nfs_server_is_local(const struct nfs_client *clp)
67 {
68 return nfs_client_is_local(clp) && localio_enabled;
69 }
70 EXPORT_SYMBOL_GPL(nfs_server_is_local);
71
72 /*
73 * UUID_IS_LOCAL XDR functions
74 */
75
localio_xdr_enc_uuidargs(struct rpc_rqst * req,struct xdr_stream * xdr,const void * data)76 static void localio_xdr_enc_uuidargs(struct rpc_rqst *req,
77 struct xdr_stream *xdr,
78 const void *data)
79 {
80 const u8 *uuid = data;
81
82 encode_opaque_fixed(xdr, uuid, UUID_SIZE);
83 }
84
localio_xdr_dec_uuidres(struct rpc_rqst * req,struct xdr_stream * xdr,void * result)85 static int localio_xdr_dec_uuidres(struct rpc_rqst *req,
86 struct xdr_stream *xdr,
87 void *result)
88 {
89 /* void return */
90 return 0;
91 }
92
93 static const struct rpc_procinfo nfs_localio_procedures[] = {
94 [LOCALIOPROC_UUID_IS_LOCAL] = {
95 .p_proc = LOCALIOPROC_UUID_IS_LOCAL,
96 .p_encode = localio_xdr_enc_uuidargs,
97 .p_decode = localio_xdr_dec_uuidres,
98 .p_arglen = XDR_QUADLEN(UUID_SIZE),
99 .p_replen = 0,
100 .p_statidx = LOCALIOPROC_UUID_IS_LOCAL,
101 .p_name = "UUID_IS_LOCAL",
102 },
103 };
104
105 static unsigned int nfs_localio_counts[ARRAY_SIZE(nfs_localio_procedures)];
106 static const struct rpc_version nfslocalio_version1 = {
107 .number = 1,
108 .nrprocs = ARRAY_SIZE(nfs_localio_procedures),
109 .procs = nfs_localio_procedures,
110 .counts = nfs_localio_counts,
111 };
112
113 static const struct rpc_version *nfslocalio_version[] = {
114 [1] = &nfslocalio_version1,
115 };
116
117 extern const struct rpc_program nfslocalio_program;
118 static struct rpc_stat nfslocalio_rpcstat = { &nfslocalio_program };
119
120 const struct rpc_program nfslocalio_program = {
121 .name = "nfslocalio",
122 .number = NFS_LOCALIO_PROGRAM,
123 .nrvers = ARRAY_SIZE(nfslocalio_version),
124 .version = nfslocalio_version,
125 .stats = &nfslocalio_rpcstat,
126 };
127
128 /*
129 * nfs_init_localioclient - Initialise an NFS localio client connection
130 */
nfs_init_localioclient(struct nfs_client * clp)131 static struct rpc_clnt *nfs_init_localioclient(struct nfs_client *clp)
132 {
133 struct rpc_clnt *rpcclient_localio;
134
135 rpcclient_localio = rpc_bind_new_program(clp->cl_rpcclient,
136 &nfslocalio_program, 1);
137
138 dprintk_rcu("%s: server (%s) %s NFS LOCALIO.\n",
139 __func__, rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR),
140 (IS_ERR(rpcclient_localio) ? "does not support" : "supports"));
141
142 return rpcclient_localio;
143 }
144
nfs_server_uuid_is_local(struct nfs_client * clp)145 static bool nfs_server_uuid_is_local(struct nfs_client *clp)
146 {
147 u8 uuid[UUID_SIZE];
148 struct rpc_message msg = {
149 .rpc_argp = &uuid,
150 };
151 struct rpc_clnt *rpcclient_localio;
152 int status;
153
154 rpcclient_localio = nfs_init_localioclient(clp);
155 if (IS_ERR(rpcclient_localio))
156 return false;
157
158 export_uuid(uuid, &clp->cl_uuid.uuid);
159
160 msg.rpc_proc = &nfs_localio_procedures[LOCALIOPROC_UUID_IS_LOCAL];
161 status = rpc_call_sync(rpcclient_localio, &msg, 0);
162 dprintk("%s: NFS reply UUID_IS_LOCAL: status=%d\n",
163 __func__, status);
164 rpc_shutdown_client(rpcclient_localio);
165
166 /* Server is only local if it initialized required struct members */
167 if (status || !rcu_access_pointer(clp->cl_uuid.net) || !clp->cl_uuid.dom)
168 return false;
169
170 return true;
171 }
172
173 /*
174 * nfs_local_probe - probe local i/o support for an nfs_server and nfs_client
175 * - called after alloc_client and init_client (so cl_rpcclient exists)
176 * - this function is idempotent, it can be called for old or new clients
177 */
nfs_local_probe(struct nfs_client * clp)178 static void nfs_local_probe(struct nfs_client *clp)
179 {
180 /* Disallow localio if disabled via sysfs or AUTH_SYS isn't used */
181 if (!localio_enabled ||
182 clp->cl_rpcclient->cl_auth->au_flavor != RPC_AUTH_UNIX) {
183 nfs_localio_disable_client(clp);
184 return;
185 }
186
187 if (nfs_client_is_local(clp))
188 return;
189
190 if (!nfs_uuid_begin(&clp->cl_uuid))
191 return;
192 if (nfs_server_uuid_is_local(clp))
193 nfs_localio_enable_client(clp);
194 nfs_uuid_end(&clp->cl_uuid);
195 }
196
nfs_local_probe_async_work(struct work_struct * work)197 void nfs_local_probe_async_work(struct work_struct *work)
198 {
199 struct nfs_client *clp =
200 container_of(work, struct nfs_client, cl_local_probe_work);
201
202 if (!refcount_inc_not_zero(&clp->cl_count))
203 return;
204 nfs_local_probe(clp);
205 nfs_put_client(clp);
206 }
207
nfs_local_probe_async(struct nfs_client * clp)208 void nfs_local_probe_async(struct nfs_client *clp)
209 {
210 queue_work(nfsiod_workqueue, &clp->cl_local_probe_work);
211 }
212 EXPORT_SYMBOL_GPL(nfs_local_probe_async);
213
nfs_local_file_put(struct nfsd_file * localio)214 static inline void nfs_local_file_put(struct nfsd_file *localio)
215 {
216 /* nfs_to_nfsd_file_put_local() expects an __rcu pointer
217 * but we have a __kernel pointer. It is always safe
218 * to cast a __kernel pointer to an __rcu pointer
219 * because the cast only weakens what is known about the pointer.
220 */
221 struct nfsd_file __rcu *nf = (struct nfsd_file __rcu*) localio;
222
223 nfs_to_nfsd_file_put_local(&nf);
224 }
225
226 /*
227 * __nfs_local_open_fh - open a local filehandle in terms of nfsd_file.
228 *
229 * Returns a pointer to a struct nfsd_file or ERR_PTR.
230 * Caller must release returned nfsd_file with nfs_to_nfsd_file_put_local().
231 */
232 static struct nfsd_file *
__nfs_local_open_fh(struct nfs_client * clp,const struct cred * cred,struct nfs_fh * fh,struct nfs_file_localio * nfl,struct nfsd_file __rcu ** pnf,const fmode_t mode)233 __nfs_local_open_fh(struct nfs_client *clp, const struct cred *cred,
234 struct nfs_fh *fh, struct nfs_file_localio *nfl,
235 struct nfsd_file __rcu **pnf,
236 const fmode_t mode)
237 {
238 int status = 0;
239 struct nfsd_file *localio;
240
241 localio = nfs_open_local_fh(&clp->cl_uuid, clp->cl_rpcclient,
242 cred, fh, nfl, pnf, mode);
243 if (IS_ERR(localio)) {
244 status = PTR_ERR(localio);
245 switch (status) {
246 case -ENOMEM:
247 case -ENXIO:
248 case -ENOENT:
249 /* Revalidate localio */
250 nfs_localio_disable_client(clp);
251 nfs_local_probe(clp);
252 }
253 }
254 trace_nfs_local_open_fh(fh, mode, status);
255 return localio;
256 }
257
258 /*
259 * nfs_local_open_fh - open a local filehandle in terms of nfsd_file.
260 * First checking if the open nfsd_file is already cached, otherwise
261 * must __nfs_local_open_fh and insert the nfsd_file in nfs_file_localio.
262 *
263 * Returns a pointer to a struct nfsd_file or NULL.
264 */
265 struct nfsd_file *
nfs_local_open_fh(struct nfs_client * clp,const struct cred * cred,struct nfs_fh * fh,struct nfs_file_localio * nfl,const fmode_t mode)266 nfs_local_open_fh(struct nfs_client *clp, const struct cred *cred,
267 struct nfs_fh *fh, struct nfs_file_localio *nfl,
268 const fmode_t mode)
269 {
270 struct nfsd_file *nf, __rcu **pnf;
271
272 if (!nfs_server_is_local(clp))
273 return NULL;
274 if (mode & ~(FMODE_READ | FMODE_WRITE))
275 return NULL;
276
277 if (mode & FMODE_WRITE)
278 pnf = &nfl->rw_file;
279 else
280 pnf = &nfl->ro_file;
281
282 nf = __nfs_local_open_fh(clp, cred, fh, nfl, pnf, mode);
283 if (IS_ERR(nf))
284 return NULL;
285 return nf;
286 }
287 EXPORT_SYMBOL_GPL(nfs_local_open_fh);
288
289 static void
nfs_local_iocb_free(struct nfs_local_kiocb * iocb)290 nfs_local_iocb_free(struct nfs_local_kiocb *iocb)
291 {
292 kfree(iocb->bvec);
293 kfree(iocb);
294 }
295
296 static struct nfs_local_kiocb *
nfs_local_iocb_alloc(struct nfs_pgio_header * hdr,struct file * file,gfp_t flags)297 nfs_local_iocb_alloc(struct nfs_pgio_header *hdr,
298 struct file *file, gfp_t flags)
299 {
300 struct nfs_local_kiocb *iocb;
301
302 iocb = kzalloc(sizeof(*iocb), flags);
303 if (iocb == NULL)
304 return NULL;
305
306 iocb->bvec = kmalloc_array(hdr->page_array.npages,
307 sizeof(struct bio_vec), flags);
308 if (iocb->bvec == NULL) {
309 kfree(iocb);
310 return NULL;
311 }
312
313 init_sync_kiocb(&iocb->kiocb, file);
314
315 iocb->hdr = hdr;
316 iocb->kiocb.ki_pos = hdr->args.offset;
317 iocb->kiocb.ki_flags &= ~IOCB_APPEND;
318 iocb->kiocb.ki_complete = NULL;
319 iocb->aio_complete_work = NULL;
320
321 iocb->end_iter_index = -1;
322
323 return iocb;
324 }
325
326 static bool
nfs_is_local_dio_possible(struct nfs_local_kiocb * iocb,int rw,size_t len,struct nfs_local_dio * local_dio)327 nfs_is_local_dio_possible(struct nfs_local_kiocb *iocb, int rw,
328 size_t len, struct nfs_local_dio *local_dio)
329 {
330 struct nfs_pgio_header *hdr = iocb->hdr;
331 loff_t offset = hdr->args.offset;
332 u32 nf_dio_mem_align, nf_dio_offset_align, nf_dio_read_offset_align;
333 loff_t start_end, orig_end, middle_end;
334
335 nfs_to->nfsd_file_dio_alignment(iocb->localio, &nf_dio_mem_align,
336 &nf_dio_offset_align, &nf_dio_read_offset_align);
337 if (rw == ITER_DEST)
338 nf_dio_offset_align = nf_dio_read_offset_align;
339
340 if (unlikely(!nf_dio_mem_align || !nf_dio_offset_align))
341 return false;
342 if (unlikely(len < nf_dio_offset_align))
343 return false;
344
345 local_dio->mem_align = nf_dio_mem_align;
346 local_dio->offset_align = nf_dio_offset_align;
347
348 start_end = round_up(offset, nf_dio_offset_align);
349 orig_end = offset + len;
350 middle_end = round_down(orig_end, nf_dio_offset_align);
351
352 local_dio->middle_offset = start_end;
353 local_dio->end_offset = middle_end;
354
355 local_dio->start_len = start_end - offset;
356 local_dio->middle_len = middle_end - start_end;
357 local_dio->end_len = orig_end - middle_end;
358
359 if (rw == ITER_DEST)
360 trace_nfs_local_dio_read(hdr->inode, offset, len, local_dio);
361 else
362 trace_nfs_local_dio_write(hdr->inode, offset, len, local_dio);
363 return true;
364 }
365
nfs_iov_iter_aligned_bvec(const struct iov_iter * i,unsigned int addr_mask,unsigned int len_mask)366 static bool nfs_iov_iter_aligned_bvec(const struct iov_iter *i,
367 unsigned int addr_mask, unsigned int len_mask)
368 {
369 const struct bio_vec *bvec = i->bvec;
370 size_t skip = i->iov_offset;
371 size_t size = i->count;
372
373 if (size & len_mask)
374 return false;
375 do {
376 size_t len = bvec->bv_len;
377
378 if (len > size)
379 len = size;
380 if ((unsigned long)(bvec->bv_offset + skip) & addr_mask)
381 return false;
382 bvec++;
383 size -= len;
384 skip = 0;
385 } while (size);
386
387 return true;
388 }
389
390 static void
nfs_local_iter_setup(struct iov_iter * iter,int rw,struct bio_vec * bvec,unsigned int nvecs,unsigned long total,size_t start,size_t len)391 nfs_local_iter_setup(struct iov_iter *iter, int rw, struct bio_vec *bvec,
392 unsigned int nvecs, unsigned long total,
393 size_t start, size_t len)
394 {
395 iov_iter_bvec(iter, rw, bvec, nvecs, total);
396 if (start)
397 iov_iter_advance(iter, start);
398 iov_iter_truncate(iter, len);
399 }
400
401 /*
402 * Setup as many as 3 iov_iter based on extents described by @local_dio.
403 * Returns the number of iov_iter that were setup.
404 */
405 static int
nfs_local_iters_setup_dio(struct nfs_local_kiocb * iocb,int rw,unsigned int nvecs,unsigned long total,struct nfs_local_dio * local_dio)406 nfs_local_iters_setup_dio(struct nfs_local_kiocb *iocb, int rw,
407 unsigned int nvecs, unsigned long total,
408 struct nfs_local_dio *local_dio)
409 {
410 int n_iters = 0;
411 struct iov_iter *iters = iocb->iters;
412
413 /* Setup misaligned start? */
414 if (local_dio->start_len) {
415 nfs_local_iter_setup(&iters[n_iters], rw, iocb->bvec,
416 nvecs, total, 0, local_dio->start_len);
417 ++n_iters;
418 }
419
420 /*
421 * Setup DIO-aligned middle, if there is no misaligned end (below)
422 * then AIO completion is used, see nfs_local_call_{read,write}
423 */
424 nfs_local_iter_setup(&iters[n_iters], rw, iocb->bvec, nvecs,
425 total, local_dio->start_len, local_dio->middle_len);
426
427 iocb->iter_is_dio_aligned[n_iters] =
428 nfs_iov_iter_aligned_bvec(&iters[n_iters],
429 local_dio->mem_align-1, local_dio->offset_align-1);
430
431 if (unlikely(!iocb->iter_is_dio_aligned[n_iters])) {
432 trace_nfs_local_dio_misaligned(iocb->hdr->inode,
433 local_dio->start_len, local_dio->middle_len, local_dio);
434 return 0; /* no DIO-aligned IO possible */
435 }
436 iocb->end_iter_index = n_iters;
437 ++n_iters;
438
439 /* Setup misaligned end? */
440 if (local_dio->end_len) {
441 nfs_local_iter_setup(&iters[n_iters], rw, iocb->bvec,
442 nvecs, total, local_dio->start_len +
443 local_dio->middle_len, local_dio->end_len);
444 iocb->end_iter_index = n_iters;
445 ++n_iters;
446 }
447
448 atomic_set(&iocb->n_iters, n_iters);
449 return n_iters;
450 }
451
452 static noinline_for_stack void
nfs_local_iters_init(struct nfs_local_kiocb * iocb,int rw)453 nfs_local_iters_init(struct nfs_local_kiocb *iocb, int rw)
454 {
455 struct nfs_pgio_header *hdr = iocb->hdr;
456 struct page **pagevec = hdr->page_array.pagevec;
457 unsigned long v, total;
458 unsigned int base;
459 size_t len;
460
461 v = 0;
462 total = hdr->args.count;
463 base = hdr->args.pgbase;
464 pagevec += base >> PAGE_SHIFT;
465 base &= ~PAGE_MASK;
466 while (total && v < hdr->page_array.npages) {
467 len = min_t(size_t, total, PAGE_SIZE - base);
468 bvec_set_page(&iocb->bvec[v], *pagevec, len, base);
469 total -= len;
470 ++pagevec;
471 ++v;
472 base = 0;
473 }
474 len = hdr->args.count - total;
475
476 /*
477 * For each iocb, iocb->n_iters is always at least 1 and we always
478 * end io after first nfs_local_pgio_done call unless misaligned DIO.
479 */
480 atomic_set(&iocb->n_iters, 1);
481
482 if (test_bit(NFS_IOHDR_ODIRECT, &hdr->flags)) {
483 struct nfs_local_dio local_dio;
484
485 if (nfs_is_local_dio_possible(iocb, rw, len, &local_dio) &&
486 nfs_local_iters_setup_dio(iocb, rw, v, len, &local_dio) != 0) {
487 /* Ensure DIO WRITE's IO on stable storage upon completion */
488 if (rw == ITER_SOURCE)
489 iocb->kiocb.ki_flags |= IOCB_DSYNC|IOCB_SYNC;
490 return; /* is DIO-aligned */
491 }
492 }
493
494 /* Use buffered IO */
495 iov_iter_bvec(&iocb->iters[0], rw, iocb->bvec, v, len);
496 }
497
498 static void
nfs_local_hdr_release(struct nfs_pgio_header * hdr,const struct rpc_call_ops * call_ops)499 nfs_local_hdr_release(struct nfs_pgio_header *hdr,
500 const struct rpc_call_ops *call_ops)
501 {
502 call_ops->rpc_call_done(&hdr->task, hdr);
503 call_ops->rpc_release(hdr);
504 }
505
506 static void
nfs_local_pgio_init(struct nfs_pgio_header * hdr,const struct rpc_call_ops * call_ops)507 nfs_local_pgio_init(struct nfs_pgio_header *hdr,
508 const struct rpc_call_ops *call_ops)
509 {
510 hdr->task.tk_ops = call_ops;
511 if (!hdr->task.tk_start)
512 hdr->task.tk_start = ktime_get();
513 }
514
515 static bool
nfs_local_pgio_done(struct nfs_local_kiocb * iocb,long status,bool force)516 nfs_local_pgio_done(struct nfs_local_kiocb *iocb, long status, bool force)
517 {
518 struct nfs_pgio_header *hdr = iocb->hdr;
519
520 /* Must handle partial completions */
521 if (status >= 0) {
522 hdr->res.count += status;
523 /* @hdr was initialized to 0 (zeroed during allocation) */
524 if (hdr->task.tk_status == 0)
525 hdr->res.op_status = NFS4_OK;
526 } else {
527 hdr->res.op_status = nfs_localio_errno_to_nfs4_stat(status);
528 hdr->task.tk_status = status;
529 }
530
531 if (force)
532 return true;
533
534 BUG_ON(atomic_read(&iocb->n_iters) <= 0);
535 return atomic_dec_and_test(&iocb->n_iters);
536 }
537
538 static void
nfs_local_iocb_release(struct nfs_local_kiocb * iocb)539 nfs_local_iocb_release(struct nfs_local_kiocb *iocb)
540 {
541 nfs_local_file_put(iocb->localio);
542 nfs_local_iocb_free(iocb);
543 }
544
545 static void
nfs_local_pgio_release(struct nfs_local_kiocb * iocb)546 nfs_local_pgio_release(struct nfs_local_kiocb *iocb)
547 {
548 struct nfs_pgio_header *hdr = iocb->hdr;
549
550 nfs_local_iocb_release(iocb);
551 nfs_local_hdr_release(hdr, hdr->task.tk_ops);
552 }
553
554 /*
555 * Complete the I/O from iocb->kiocb.ki_complete()
556 *
557 * Note that this function can be called from a bottom half context,
558 * hence we need to queue the rpc_call_done() etc to a workqueue
559 */
nfs_local_pgio_aio_complete(struct nfs_local_kiocb * iocb)560 static inline void nfs_local_pgio_aio_complete(struct nfs_local_kiocb *iocb)
561 {
562 INIT_WORK(&iocb->work, iocb->aio_complete_work);
563 queue_work(nfsiod_workqueue, &iocb->work);
564 }
565
nfs_local_read_done(struct nfs_local_kiocb * iocb)566 static void nfs_local_read_done(struct nfs_local_kiocb *iocb)
567 {
568 struct nfs_pgio_header *hdr = iocb->hdr;
569 struct file *filp = iocb->kiocb.ki_filp;
570 long status = hdr->task.tk_status;
571
572 if ((iocb->kiocb.ki_flags & IOCB_DIRECT) && status == -EINVAL) {
573 /* Underlying FS will return -EINVAL if misaligned DIO is attempted. */
574 pr_info_ratelimited("nfs: Unexpected direct I/O read alignment failure\n");
575 }
576
577 /*
578 * Must clear replen otherwise NFSv3 data corruption will occur
579 * if/when switching from LOCALIO back to using normal RPC.
580 */
581 hdr->res.replen = 0;
582
583 /* nfs_readpage_result() handles short read */
584
585 if (hdr->args.offset + hdr->res.count >= i_size_read(file_inode(filp)))
586 hdr->res.eof = true;
587
588 dprintk("%s: read %ld bytes eof %d.\n", __func__,
589 status > 0 ? status : 0, hdr->res.eof);
590 }
591
nfs_local_read_iocb_done(struct nfs_local_kiocb * iocb)592 static inline void nfs_local_read_iocb_done(struct nfs_local_kiocb *iocb)
593 {
594 nfs_local_read_done(iocb);
595 nfs_local_pgio_release(iocb);
596 }
597
nfs_local_read_aio_complete_work(struct work_struct * work)598 static void nfs_local_read_aio_complete_work(struct work_struct *work)
599 {
600 struct nfs_local_kiocb *iocb =
601 container_of(work, struct nfs_local_kiocb, work);
602
603 nfs_local_read_iocb_done(iocb);
604 }
605
nfs_local_read_aio_complete(struct kiocb * kiocb,long ret)606 static void nfs_local_read_aio_complete(struct kiocb *kiocb, long ret)
607 {
608 struct nfs_local_kiocb *iocb =
609 container_of(kiocb, struct nfs_local_kiocb, kiocb);
610
611 /* AIO completion of DIO read should always be last to complete */
612 if (unlikely(!nfs_local_pgio_done(iocb, ret, false)))
613 return;
614
615 nfs_local_pgio_aio_complete(iocb); /* Calls nfs_local_read_aio_complete_work */
616 }
617
nfs_local_call_read(struct work_struct * work)618 static void nfs_local_call_read(struct work_struct *work)
619 {
620 struct nfs_local_kiocb *iocb =
621 container_of(work, struct nfs_local_kiocb, work);
622 struct file *filp = iocb->kiocb.ki_filp;
623 ssize_t status;
624 int n_iters;
625
626 n_iters = atomic_read(&iocb->n_iters);
627 for (int i = 0; i < n_iters ; i++) {
628 if (iocb->iter_is_dio_aligned[i]) {
629 iocb->kiocb.ki_flags |= IOCB_DIRECT;
630 /* Only use AIO completion if DIO-aligned segment is last */
631 if (i == iocb->end_iter_index) {
632 iocb->kiocb.ki_complete = nfs_local_read_aio_complete;
633 iocb->aio_complete_work = nfs_local_read_aio_complete_work;
634 }
635 } else
636 iocb->kiocb.ki_flags &= ~IOCB_DIRECT;
637
638 scoped_with_creds(filp->f_cred)
639 status = filp->f_op->read_iter(&iocb->kiocb, &iocb->iters[i]);
640
641 if (status == -EIOCBQUEUED)
642 continue;
643 /* Break on completion, errors, or short reads */
644 if (nfs_local_pgio_done(iocb, status, false) || status < 0 ||
645 (size_t)status < iov_iter_count(&iocb->iters[i])) {
646 nfs_local_read_iocb_done(iocb);
647 break;
648 }
649 }
650 }
651
652 static int
nfs_local_do_read(struct nfs_local_kiocb * iocb,const struct rpc_call_ops * call_ops)653 nfs_local_do_read(struct nfs_local_kiocb *iocb,
654 const struct rpc_call_ops *call_ops)
655 {
656 struct nfs_pgio_header *hdr = iocb->hdr;
657
658 dprintk("%s: vfs_read count=%u pos=%llu\n",
659 __func__, hdr->args.count, hdr->args.offset);
660
661 nfs_local_pgio_init(hdr, call_ops);
662 hdr->res.eof = false;
663
664 INIT_WORK(&iocb->work, nfs_local_call_read);
665 queue_work(nfslocaliod_workqueue, &iocb->work);
666
667 return 0;
668 }
669
670 static void
nfs_copy_boot_verifier(struct nfs_write_verifier * verifier,struct inode * inode)671 nfs_copy_boot_verifier(struct nfs_write_verifier *verifier, struct inode *inode)
672 {
673 struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
674 u32 *verf = (u32 *)verifier->data;
675 unsigned int seq;
676
677 do {
678 seq = read_seqbegin(&clp->cl_boot_lock);
679 verf[0] = (u32)clp->cl_nfssvc_boot.tv_sec;
680 verf[1] = (u32)clp->cl_nfssvc_boot.tv_nsec;
681 } while (read_seqretry(&clp->cl_boot_lock, seq));
682 }
683
684 static void
nfs_reset_boot_verifier(struct inode * inode)685 nfs_reset_boot_verifier(struct inode *inode)
686 {
687 struct nfs_client *clp = NFS_SERVER(inode)->nfs_client;
688
689 write_seqlock(&clp->cl_boot_lock);
690 ktime_get_real_ts64(&clp->cl_nfssvc_boot);
691 write_sequnlock(&clp->cl_boot_lock);
692 }
693
694 static void
nfs_set_local_verifier(struct inode * inode,struct nfs_writeverf * verf,enum nfs3_stable_how how)695 nfs_set_local_verifier(struct inode *inode,
696 struct nfs_writeverf *verf,
697 enum nfs3_stable_how how)
698 {
699 nfs_copy_boot_verifier(&verf->verifier, inode);
700 verf->committed = how;
701 }
702
703 /* Factored out from fs/nfsd/vfs.h:fh_getattr() */
__vfs_getattr(const struct path * p,struct kstat * stat,int version)704 static int __vfs_getattr(const struct path *p, struct kstat *stat, int version)
705 {
706 u32 request_mask = STATX_BASIC_STATS;
707
708 if (version == 4)
709 request_mask |= (STATX_BTIME | STATX_CHANGE_COOKIE);
710 return vfs_getattr(p, stat, request_mask, AT_STATX_SYNC_AS_STAT);
711 }
712
713 /* Copied from fs/nfsd/nfsfh.c:nfsd4_change_attribute() */
__nfsd4_change_attribute(const struct kstat * stat,const struct inode * inode)714 static u64 __nfsd4_change_attribute(const struct kstat *stat,
715 const struct inode *inode)
716 {
717 u64 chattr;
718
719 if (stat->result_mask & STATX_CHANGE_COOKIE) {
720 chattr = stat->change_cookie;
721 if (S_ISREG(inode->i_mode) &&
722 !(stat->attributes & STATX_ATTR_CHANGE_MONOTONIC)) {
723 chattr += (u64)stat->ctime.tv_sec << 30;
724 chattr += stat->ctime.tv_nsec;
725 }
726 } else {
727 chattr = time_to_chattr(&stat->ctime);
728 }
729 return chattr;
730 }
731
nfs_local_vfs_getattr(struct nfs_local_kiocb * iocb)732 static void nfs_local_vfs_getattr(struct nfs_local_kiocb *iocb)
733 {
734 struct kstat stat;
735 struct file *filp = iocb->kiocb.ki_filp;
736 struct nfs_pgio_header *hdr = iocb->hdr;
737 struct nfs_fattr *fattr = hdr->res.fattr;
738 int version = NFS_PROTO(hdr->inode)->version;
739
740 if (unlikely(!fattr) || __vfs_getattr(&filp->f_path, &stat, version))
741 return;
742
743 fattr->valid = (NFS_ATTR_FATTR_FILEID |
744 NFS_ATTR_FATTR_CHANGE |
745 NFS_ATTR_FATTR_SIZE |
746 NFS_ATTR_FATTR_ATIME |
747 NFS_ATTR_FATTR_MTIME |
748 NFS_ATTR_FATTR_CTIME |
749 NFS_ATTR_FATTR_SPACE_USED);
750
751 fattr->fileid = stat.ino;
752 fattr->size = stat.size;
753 fattr->atime = stat.atime;
754 fattr->mtime = stat.mtime;
755 fattr->ctime = stat.ctime;
756 if (version == 4) {
757 fattr->change_attr =
758 __nfsd4_change_attribute(&stat, file_inode(filp));
759 } else
760 fattr->change_attr = nfs_timespec_to_change_attr(&fattr->ctime);
761 fattr->du.nfs3.used = stat.blocks << 9;
762 }
763
nfs_local_write_done(struct nfs_local_kiocb * iocb)764 static void nfs_local_write_done(struct nfs_local_kiocb *iocb)
765 {
766 struct nfs_pgio_header *hdr = iocb->hdr;
767 long status = hdr->task.tk_status;
768
769 dprintk("%s: wrote %ld bytes.\n", __func__, status > 0 ? status : 0);
770
771 if ((iocb->kiocb.ki_flags & IOCB_DIRECT) && status == -EINVAL) {
772 /* Underlying FS will return -EINVAL if misaligned DIO is attempted. */
773 pr_info_ratelimited("nfs: Unexpected direct I/O write alignment failure\n");
774 }
775
776 /* Handle short writes as if they are ENOSPC */
777 status = hdr->res.count;
778 if (status > 0 && status < hdr->args.count) {
779 hdr->mds_offset += status;
780 hdr->args.offset += status;
781 hdr->args.pgbase += status;
782 hdr->args.count -= status;
783 nfs_set_pgio_error(hdr, -ENOSPC, hdr->args.offset);
784 status = -ENOSPC;
785 /* record -ENOSPC in terms of nfs_local_pgio_done */
786 (void) nfs_local_pgio_done(iocb, status, true);
787 }
788 if (hdr->task.tk_status < 0)
789 nfs_reset_boot_verifier(hdr->inode);
790 }
791
nfs_local_write_iocb_done(struct nfs_local_kiocb * iocb)792 static inline void nfs_local_write_iocb_done(struct nfs_local_kiocb *iocb)
793 {
794 nfs_local_write_done(iocb);
795 nfs_local_vfs_getattr(iocb);
796 nfs_local_pgio_release(iocb);
797 }
798
nfs_local_write_aio_complete_work(struct work_struct * work)799 static void nfs_local_write_aio_complete_work(struct work_struct *work)
800 {
801 struct nfs_local_kiocb *iocb =
802 container_of(work, struct nfs_local_kiocb, work);
803
804 nfs_local_write_iocb_done(iocb);
805 }
806
nfs_local_write_aio_complete(struct kiocb * kiocb,long ret)807 static void nfs_local_write_aio_complete(struct kiocb *kiocb, long ret)
808 {
809 struct nfs_local_kiocb *iocb =
810 container_of(kiocb, struct nfs_local_kiocb, kiocb);
811
812 /* AIO completion of DIO write should always be last to complete */
813 if (unlikely(!nfs_local_pgio_done(iocb, ret, false)))
814 return;
815
816 nfs_local_pgio_aio_complete(iocb); /* Calls nfs_local_write_aio_complete_work */
817 }
818
nfs_local_call_write(struct work_struct * work)819 static void nfs_local_call_write(struct work_struct *work)
820 {
821 struct nfs_local_kiocb *iocb =
822 container_of(work, struct nfs_local_kiocb, work);
823 struct file *filp = iocb->kiocb.ki_filp;
824 unsigned long old_flags = current->flags;
825 ssize_t status;
826 int n_iters;
827
828 current->flags |= PF_LOCAL_THROTTLE | PF_MEMALLOC_NOIO;
829
830 file_start_write(filp);
831 n_iters = atomic_read(&iocb->n_iters);
832 for (int i = 0; i < n_iters ; i++) {
833 if (iocb->iter_is_dio_aligned[i]) {
834 iocb->kiocb.ki_flags |= IOCB_DIRECT;
835 /* Only use AIO completion if DIO-aligned segment is last */
836 if (i == iocb->end_iter_index) {
837 iocb->kiocb.ki_complete = nfs_local_write_aio_complete;
838 iocb->aio_complete_work = nfs_local_write_aio_complete_work;
839 }
840 } else
841 iocb->kiocb.ki_flags &= ~IOCB_DIRECT;
842
843 scoped_with_creds(filp->f_cred)
844 status = filp->f_op->write_iter(&iocb->kiocb, &iocb->iters[i]);
845
846 if (status == -EIOCBQUEUED)
847 continue;
848 /* Break on completion, errors, or short writes */
849 if (nfs_local_pgio_done(iocb, status, false) || status < 0 ||
850 (size_t)status < iov_iter_count(&iocb->iters[i])) {
851 nfs_local_write_iocb_done(iocb);
852 break;
853 }
854 }
855 file_end_write(filp);
856
857 current->flags = old_flags;
858 }
859
860 static int
nfs_local_do_write(struct nfs_local_kiocb * iocb,const struct rpc_call_ops * call_ops)861 nfs_local_do_write(struct nfs_local_kiocb *iocb,
862 const struct rpc_call_ops *call_ops)
863 {
864 struct nfs_pgio_header *hdr = iocb->hdr;
865
866 dprintk("%s: vfs_write count=%u pos=%llu %s\n",
867 __func__, hdr->args.count, hdr->args.offset,
868 (hdr->args.stable == NFS_UNSTABLE) ? "unstable" : "stable");
869
870 switch (hdr->args.stable) {
871 default:
872 break;
873 case NFS_DATA_SYNC:
874 iocb->kiocb.ki_flags |= IOCB_DSYNC;
875 break;
876 case NFS_FILE_SYNC:
877 iocb->kiocb.ki_flags |= IOCB_DSYNC|IOCB_SYNC;
878 }
879
880 nfs_local_pgio_init(hdr, call_ops);
881
882 nfs_set_local_verifier(hdr->inode, hdr->res.verf, hdr->args.stable);
883
884 INIT_WORK(&iocb->work, nfs_local_call_write);
885 queue_work(nfslocaliod_workqueue, &iocb->work);
886
887 return 0;
888 }
889
890 static struct nfs_local_kiocb *
nfs_local_iocb_init(struct nfs_pgio_header * hdr,struct nfsd_file * localio)891 nfs_local_iocb_init(struct nfs_pgio_header *hdr, struct nfsd_file *localio)
892 {
893 struct file *file = nfs_to->nfsd_file_file(localio);
894 struct nfs_local_kiocb *iocb;
895 gfp_t gfp_mask;
896 int rw;
897
898 if (hdr->rw_mode & FMODE_READ) {
899 if (!file->f_op->read_iter)
900 return ERR_PTR(-EOPNOTSUPP);
901 gfp_mask = GFP_KERNEL;
902 rw = ITER_DEST;
903 } else {
904 if (!file->f_op->write_iter)
905 return ERR_PTR(-EOPNOTSUPP);
906 gfp_mask = GFP_NOIO;
907 rw = ITER_SOURCE;
908 }
909
910 iocb = nfs_local_iocb_alloc(hdr, file, gfp_mask);
911 if (iocb == NULL)
912 return ERR_PTR(-ENOMEM);
913 iocb->hdr = hdr;
914 iocb->localio = localio;
915
916 nfs_local_iters_init(iocb, rw);
917
918 return iocb;
919 }
920
nfs_local_doio(struct nfs_client * clp,struct nfsd_file * localio,struct nfs_pgio_header * hdr,const struct rpc_call_ops * call_ops)921 int nfs_local_doio(struct nfs_client *clp, struct nfsd_file *localio,
922 struct nfs_pgio_header *hdr,
923 const struct rpc_call_ops *call_ops)
924 {
925 struct nfs_local_kiocb *iocb;
926 int status = 0;
927
928 if (!hdr->args.count)
929 return 0;
930
931 iocb = nfs_local_iocb_init(hdr, localio);
932 if (IS_ERR(iocb))
933 return PTR_ERR(iocb);
934
935 switch (hdr->rw_mode) {
936 case FMODE_READ:
937 status = nfs_local_do_read(iocb, call_ops);
938 break;
939 case FMODE_WRITE:
940 status = nfs_local_do_write(iocb, call_ops);
941 break;
942 default:
943 dprintk("%s: invalid mode: %d\n", __func__,
944 hdr->rw_mode);
945 status = -EOPNOTSUPP;
946 }
947
948 if (status != 0) {
949 if (status == -EAGAIN)
950 nfs_localio_disable_client(clp);
951 nfs_local_iocb_release(iocb);
952 hdr->task.tk_status = status;
953 nfs_local_hdr_release(hdr, call_ops);
954 }
955 return status;
956 }
957
958 static void
nfs_local_init_commit(struct nfs_commit_data * data,const struct rpc_call_ops * call_ops)959 nfs_local_init_commit(struct nfs_commit_data *data,
960 const struct rpc_call_ops *call_ops)
961 {
962 data->task.tk_ops = call_ops;
963 }
964
965 static int
nfs_local_run_commit(struct file * filp,struct nfs_commit_data * data)966 nfs_local_run_commit(struct file *filp, struct nfs_commit_data *data)
967 {
968 loff_t start = data->args.offset;
969 loff_t end = LLONG_MAX;
970
971 if (data->args.count > 0) {
972 end = start + data->args.count - 1;
973 if (end < start)
974 end = LLONG_MAX;
975 }
976
977 dprintk("%s: commit %llu - %llu\n", __func__, start, end);
978 return vfs_fsync_range(filp, start, end, 0);
979 }
980
981 static void
nfs_local_commit_done(struct nfs_commit_data * data,int status)982 nfs_local_commit_done(struct nfs_commit_data *data, int status)
983 {
984 if (status >= 0) {
985 nfs_set_local_verifier(data->inode,
986 data->res.verf,
987 NFS_FILE_SYNC);
988 data->res.op_status = NFS4_OK;
989 data->task.tk_status = 0;
990 } else {
991 nfs_reset_boot_verifier(data->inode);
992 data->res.op_status = nfs_localio_errno_to_nfs4_stat(status);
993 data->task.tk_status = status;
994 }
995 }
996
997 static void
nfs_local_release_commit_data(struct nfsd_file * localio,struct nfs_commit_data * data,const struct rpc_call_ops * call_ops)998 nfs_local_release_commit_data(struct nfsd_file *localio,
999 struct nfs_commit_data *data,
1000 const struct rpc_call_ops *call_ops)
1001 {
1002 nfs_local_file_put(localio);
1003 call_ops->rpc_call_done(&data->task, data);
1004 call_ops->rpc_release(data);
1005 }
1006
1007 static void
nfs_local_fsync_ctx_free(struct nfs_local_fsync_ctx * ctx)1008 nfs_local_fsync_ctx_free(struct nfs_local_fsync_ctx *ctx)
1009 {
1010 nfs_local_release_commit_data(ctx->localio, ctx->data,
1011 ctx->data->task.tk_ops);
1012 kfree(ctx);
1013 }
1014
1015 static void
nfs_local_fsync_work(struct work_struct * work)1016 nfs_local_fsync_work(struct work_struct *work)
1017 {
1018 struct nfs_local_fsync_ctx *ctx;
1019 int status;
1020
1021 ctx = container_of(work, struct nfs_local_fsync_ctx, work);
1022
1023 status = nfs_local_run_commit(nfs_to->nfsd_file_file(ctx->localio),
1024 ctx->data);
1025 nfs_local_commit_done(ctx->data, status);
1026 if (ctx->done != NULL)
1027 complete(ctx->done);
1028 nfs_local_fsync_ctx_free(ctx);
1029 }
1030
1031 static struct nfs_local_fsync_ctx *
nfs_local_fsync_ctx_alloc(struct nfs_commit_data * data,struct nfsd_file * localio,gfp_t flags)1032 nfs_local_fsync_ctx_alloc(struct nfs_commit_data *data,
1033 struct nfsd_file *localio, gfp_t flags)
1034 {
1035 struct nfs_local_fsync_ctx *ctx = kmalloc(sizeof(*ctx), flags);
1036
1037 if (ctx != NULL) {
1038 ctx->localio = localio;
1039 ctx->data = data;
1040 INIT_WORK(&ctx->work, nfs_local_fsync_work);
1041 ctx->done = NULL;
1042 }
1043 return ctx;
1044 }
1045
nfs_local_commit(struct nfsd_file * localio,struct nfs_commit_data * data,const struct rpc_call_ops * call_ops,int how)1046 int nfs_local_commit(struct nfsd_file *localio,
1047 struct nfs_commit_data *data,
1048 const struct rpc_call_ops *call_ops, int how)
1049 {
1050 struct nfs_local_fsync_ctx *ctx;
1051
1052 ctx = nfs_local_fsync_ctx_alloc(data, localio, GFP_KERNEL);
1053 if (!ctx) {
1054 nfs_local_commit_done(data, -ENOMEM);
1055 nfs_local_release_commit_data(localio, data, call_ops);
1056 return -ENOMEM;
1057 }
1058
1059 nfs_local_init_commit(data, call_ops);
1060
1061 if (how & FLUSH_SYNC) {
1062 DECLARE_COMPLETION_ONSTACK(done);
1063 ctx->done = &done;
1064 queue_work(nfsiod_workqueue, &ctx->work);
1065 wait_for_completion(&done);
1066 } else
1067 queue_work(nfsiod_workqueue, &ctx->work);
1068
1069 return 0;
1070 }
1071