1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * NFS client support for local clients to bypass network stack 4 * 5 * Copyright (C) 2014 Weston Andros Adamson <dros@primarydata.com> 6 * Copyright (C) 2019 Trond Myklebust <trond.myklebust@hammerspace.com> 7 * Copyright (C) 2024 Mike Snitzer <snitzer@hammerspace.com> 8 * Copyright (C) 2024 NeilBrown <neilb@suse.de> 9 */ 10 11 #include <linux/module.h> 12 #include <linux/errno.h> 13 #include <linux/vfs.h> 14 #include <linux/file.h> 15 #include <linux/inet.h> 16 #include <linux/sunrpc/addr.h> 17 #include <linux/inetdevice.h> 18 #include <net/addrconf.h> 19 #include <linux/nfs_common.h> 20 #include <linux/nfslocalio.h> 21 #include <linux/bvec.h> 22 23 #include <linux/nfs.h> 24 #include <linux/nfs_fs.h> 25 #include <linux/nfs_xdr.h> 26 27 #include "internal.h" 28 #include "pnfs.h" 29 #include "nfstrace.h" 30 31 #define NFSDBG_FACILITY NFSDBG_VFS 32 33 #define NFSLOCAL_MAX_IOS 3 34 35 struct nfs_local_kiocb { 36 struct kiocb kiocb; 37 struct bio_vec *bvec; 38 struct nfs_pgio_header *hdr; 39 struct work_struct work; 40 void (*aio_complete_work)(struct work_struct *); 41 struct nfsd_file *localio; 42 /* Begin mostly DIO-specific members */ 43 size_t end_len; 44 short int end_iter_index; 45 atomic_t n_iters; 46 struct iov_iter iters[NFSLOCAL_MAX_IOS]; 47 bool iter_is_dio_aligned[NFSLOCAL_MAX_IOS]; 48 /* End mostly DIO-specific members */ 49 }; 50 51 struct nfs_local_fsync_ctx { 52 struct nfsd_file *localio; 53 struct nfs_commit_data *data; 54 struct work_struct work; 55 struct completion *done; 56 }; 57 58 static bool localio_enabled __read_mostly = true; 59 module_param(localio_enabled, bool, 0644); 60 61 static void nfs_local_do_read(struct nfs_local_kiocb *iocb, 62 const struct rpc_call_ops *call_ops); 63 static void nfs_local_do_write(struct nfs_local_kiocb *iocb, 64 const struct rpc_call_ops *call_ops); 65 66 static inline bool nfs_client_is_local(const struct nfs_client *clp) 67 { 68 return !!rcu_access_pointer(clp->cl_uuid.net); 69 } 70 71 bool nfs_server_is_local(const struct nfs_client *clp) 72 { 73 return nfs_client_is_local(clp) && localio_enabled; 74 } 75 EXPORT_SYMBOL_GPL(nfs_server_is_local); 76 77 /* 78 * UUID_IS_LOCAL XDR functions 79 */ 80 81 static void localio_xdr_enc_uuidargs(struct rpc_rqst *req, 82 struct xdr_stream *xdr, 83 const void *data) 84 { 85 const u8 *uuid = data; 86 87 encode_opaque_fixed(xdr, uuid, UUID_SIZE); 88 } 89 90 static int localio_xdr_dec_uuidres(struct rpc_rqst *req, 91 struct xdr_stream *xdr, 92 void *result) 93 { 94 /* void return */ 95 return 0; 96 } 97 98 static const struct rpc_procinfo nfs_localio_procedures[] = { 99 [LOCALIOPROC_UUID_IS_LOCAL] = { 100 .p_proc = LOCALIOPROC_UUID_IS_LOCAL, 101 .p_encode = localio_xdr_enc_uuidargs, 102 .p_decode = localio_xdr_dec_uuidres, 103 .p_arglen = XDR_QUADLEN(UUID_SIZE), 104 .p_replen = 0, 105 .p_statidx = LOCALIOPROC_UUID_IS_LOCAL, 106 .p_name = "UUID_IS_LOCAL", 107 }, 108 }; 109 110 static unsigned int nfs_localio_counts[ARRAY_SIZE(nfs_localio_procedures)]; 111 static const struct rpc_version nfslocalio_version1 = { 112 .number = 1, 113 .nrprocs = ARRAY_SIZE(nfs_localio_procedures), 114 .procs = nfs_localio_procedures, 115 .counts = nfs_localio_counts, 116 }; 117 118 static const struct rpc_version *nfslocalio_version[] = { 119 [1] = &nfslocalio_version1, 120 }; 121 122 extern const struct rpc_program nfslocalio_program; 123 static struct rpc_stat nfslocalio_rpcstat = { &nfslocalio_program }; 124 125 const struct rpc_program nfslocalio_program = { 126 .name = "nfslocalio", 127 .number = NFS_LOCALIO_PROGRAM, 128 .nrvers = ARRAY_SIZE(nfslocalio_version), 129 .version = nfslocalio_version, 130 .stats = &nfslocalio_rpcstat, 131 }; 132 133 /* 134 * nfs_init_localioclient - Initialise an NFS localio client connection 135 */ 136 static struct rpc_clnt *nfs_init_localioclient(struct nfs_client *clp) 137 { 138 struct rpc_clnt *rpcclient_localio; 139 140 rpcclient_localio = rpc_bind_new_program(clp->cl_rpcclient, 141 &nfslocalio_program, 1); 142 143 dprintk_rcu("%s: server (%s) %s NFS LOCALIO.\n", 144 __func__, rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR), 145 (IS_ERR(rpcclient_localio) ? "does not support" : "supports")); 146 147 return rpcclient_localio; 148 } 149 150 static bool nfs_server_uuid_is_local(struct nfs_client *clp) 151 { 152 u8 uuid[UUID_SIZE]; 153 struct rpc_message msg = { 154 .rpc_argp = &uuid, 155 }; 156 struct rpc_clnt *rpcclient_localio; 157 int status; 158 159 rpcclient_localio = nfs_init_localioclient(clp); 160 if (IS_ERR(rpcclient_localio)) 161 return false; 162 163 export_uuid(uuid, &clp->cl_uuid.uuid); 164 165 msg.rpc_proc = &nfs_localio_procedures[LOCALIOPROC_UUID_IS_LOCAL]; 166 status = rpc_call_sync(rpcclient_localio, &msg, 0); 167 dprintk("%s: NFS reply UUID_IS_LOCAL: status=%d\n", 168 __func__, status); 169 rpc_shutdown_client(rpcclient_localio); 170 171 /* Server is only local if it initialized required struct members */ 172 if (status || !rcu_access_pointer(clp->cl_uuid.net) || !clp->cl_uuid.dom) 173 return false; 174 175 return true; 176 } 177 178 /* 179 * nfs_local_probe - probe local i/o support for an nfs_server and nfs_client 180 * - called after alloc_client and init_client (so cl_rpcclient exists) 181 * - this function is idempotent, it can be called for old or new clients 182 */ 183 static void nfs_local_probe(struct nfs_client *clp) 184 { 185 /* Disallow localio if disabled via sysfs or AUTH_SYS isn't used */ 186 if (!localio_enabled || 187 clp->cl_rpcclient->cl_auth->au_flavor != RPC_AUTH_UNIX) { 188 nfs_localio_disable_client(clp); 189 return; 190 } 191 192 if (nfs_client_is_local(clp)) 193 return; 194 195 if (!nfs_uuid_begin(&clp->cl_uuid)) 196 return; 197 if (nfs_server_uuid_is_local(clp)) 198 nfs_localio_enable_client(clp); 199 nfs_uuid_end(&clp->cl_uuid); 200 } 201 202 void nfs_local_probe_async_work(struct work_struct *work) 203 { 204 struct nfs_client *clp = 205 container_of(work, struct nfs_client, cl_local_probe_work); 206 207 if (!refcount_inc_not_zero(&clp->cl_count)) 208 return; 209 nfs_local_probe(clp); 210 nfs_put_client(clp); 211 } 212 213 void nfs_local_probe_async(struct nfs_client *clp) 214 { 215 queue_work(nfsiod_workqueue, &clp->cl_local_probe_work); 216 } 217 EXPORT_SYMBOL_GPL(nfs_local_probe_async); 218 219 static inline void nfs_local_file_put(struct nfsd_file *localio) 220 { 221 /* nfs_to_nfsd_file_put_local() expects an __rcu pointer 222 * but we have a __kernel pointer. It is always safe 223 * to cast a __kernel pointer to an __rcu pointer 224 * because the cast only weakens what is known about the pointer. 225 */ 226 struct nfsd_file __rcu *nf = (struct nfsd_file __rcu*) localio; 227 228 nfs_to_nfsd_file_put_local(&nf); 229 } 230 231 /* 232 * __nfs_local_open_fh - open a local filehandle in terms of nfsd_file. 233 * 234 * Returns a pointer to a struct nfsd_file or ERR_PTR. 235 * Caller must release returned nfsd_file with nfs_to_nfsd_file_put_local(). 236 */ 237 static struct nfsd_file * 238 __nfs_local_open_fh(struct nfs_client *clp, const struct cred *cred, 239 struct nfs_fh *fh, struct nfs_file_localio *nfl, 240 struct nfsd_file __rcu **pnf, 241 const fmode_t mode) 242 { 243 int status = 0; 244 struct nfsd_file *localio; 245 246 localio = nfs_open_local_fh(&clp->cl_uuid, clp->cl_rpcclient, 247 cred, fh, nfl, pnf, mode); 248 if (IS_ERR(localio)) { 249 status = PTR_ERR(localio); 250 switch (status) { 251 case -ENOMEM: 252 case -ENXIO: 253 case -ENOENT: 254 /* Revalidate localio */ 255 nfs_localio_disable_client(clp); 256 nfs_local_probe(clp); 257 } 258 } 259 trace_nfs_local_open_fh(fh, mode, status); 260 return localio; 261 } 262 263 /* 264 * nfs_local_open_fh - open a local filehandle in terms of nfsd_file. 265 * First checking if the open nfsd_file is already cached, otherwise 266 * must __nfs_local_open_fh and insert the nfsd_file in nfs_file_localio. 267 * 268 * Returns a pointer to a struct nfsd_file or NULL. 269 */ 270 struct nfsd_file * 271 nfs_local_open_fh(struct nfs_client *clp, const struct cred *cred, 272 struct nfs_fh *fh, struct nfs_file_localio *nfl, 273 const fmode_t mode) 274 { 275 struct nfsd_file *nf, __rcu **pnf; 276 277 if (!nfs_server_is_local(clp)) 278 return NULL; 279 if (mode & ~(FMODE_READ | FMODE_WRITE)) 280 return NULL; 281 282 if (mode & FMODE_WRITE) 283 pnf = &nfl->rw_file; 284 else 285 pnf = &nfl->ro_file; 286 287 nf = __nfs_local_open_fh(clp, cred, fh, nfl, pnf, mode); 288 if (IS_ERR(nf)) 289 return NULL; 290 return nf; 291 } 292 EXPORT_SYMBOL_GPL(nfs_local_open_fh); 293 294 /* 295 * Ensure all page cache allocations are done from GFP_NOFS context to 296 * prevent direct reclaim recursion back into NFS via nfs_writepages. 297 */ 298 static void 299 nfs_local_mapping_set_gfp_nofs_context(struct address_space *m) 300 { 301 gfp_t gfp_mask = mapping_gfp_mask(m); 302 303 mapping_set_gfp_mask(m, (gfp_mask & ~(__GFP_FS))); 304 } 305 306 static void 307 nfs_local_iocb_free(struct nfs_local_kiocb *iocb) 308 { 309 kfree(iocb->bvec); 310 kfree(iocb); 311 } 312 313 static struct nfs_local_kiocb * 314 nfs_local_iocb_alloc(struct nfs_pgio_header *hdr, 315 struct file *file, gfp_t flags) 316 { 317 struct nfs_local_kiocb *iocb; 318 319 iocb = kzalloc(sizeof(*iocb), flags); 320 if (iocb == NULL) 321 return NULL; 322 323 iocb->bvec = kmalloc_array(hdr->page_array.npages, 324 sizeof(struct bio_vec), flags); 325 if (iocb->bvec == NULL) { 326 kfree(iocb); 327 return NULL; 328 } 329 330 nfs_local_mapping_set_gfp_nofs_context(file->f_mapping); 331 init_sync_kiocb(&iocb->kiocb, file); 332 333 iocb->hdr = hdr; 334 iocb->kiocb.ki_pos = hdr->args.offset; 335 iocb->kiocb.ki_flags &= ~IOCB_APPEND; 336 iocb->kiocb.ki_complete = NULL; 337 iocb->aio_complete_work = NULL; 338 339 iocb->end_iter_index = -1; 340 341 return iocb; 342 } 343 344 static bool 345 nfs_is_local_dio_possible(struct nfs_local_kiocb *iocb, int rw, 346 size_t len, struct nfs_local_dio *local_dio) 347 { 348 struct nfs_pgio_header *hdr = iocb->hdr; 349 loff_t offset = hdr->args.offset; 350 u32 nf_dio_mem_align, nf_dio_offset_align, nf_dio_read_offset_align; 351 loff_t start_end, orig_end, middle_end; 352 353 nfs_to->nfsd_file_dio_alignment(iocb->localio, &nf_dio_mem_align, 354 &nf_dio_offset_align, &nf_dio_read_offset_align); 355 if (rw == ITER_DEST) 356 nf_dio_offset_align = nf_dio_read_offset_align; 357 358 if (unlikely(!nf_dio_mem_align || !nf_dio_offset_align)) 359 return false; 360 if (unlikely(len < nf_dio_offset_align)) 361 return false; 362 363 local_dio->mem_align = nf_dio_mem_align; 364 local_dio->offset_align = nf_dio_offset_align; 365 366 start_end = round_up(offset, nf_dio_offset_align); 367 orig_end = offset + len; 368 middle_end = round_down(orig_end, nf_dio_offset_align); 369 370 local_dio->middle_offset = start_end; 371 local_dio->end_offset = middle_end; 372 373 local_dio->start_len = start_end - offset; 374 local_dio->middle_len = middle_end - start_end; 375 local_dio->end_len = orig_end - middle_end; 376 377 if (rw == ITER_DEST) 378 trace_nfs_local_dio_read(hdr->inode, offset, len, local_dio); 379 else 380 trace_nfs_local_dio_write(hdr->inode, offset, len, local_dio); 381 return true; 382 } 383 384 static bool nfs_iov_iter_aligned_bvec(const struct iov_iter *i, 385 unsigned int addr_mask, unsigned int len_mask) 386 { 387 const struct bio_vec *bvec = i->bvec; 388 size_t skip = i->iov_offset; 389 size_t size = i->count; 390 391 if (size & len_mask) 392 return false; 393 do { 394 size_t len = bvec->bv_len; 395 396 if (len > size) 397 len = size; 398 if ((unsigned long)(bvec->bv_offset + skip) & addr_mask) 399 return false; 400 bvec++; 401 size -= len; 402 skip = 0; 403 } while (size); 404 405 return true; 406 } 407 408 static void 409 nfs_local_iter_setup(struct iov_iter *iter, int rw, struct bio_vec *bvec, 410 unsigned int nvecs, unsigned long total, 411 size_t start, size_t len) 412 { 413 iov_iter_bvec(iter, rw, bvec, nvecs, total); 414 if (start) 415 iov_iter_advance(iter, start); 416 iov_iter_truncate(iter, len); 417 } 418 419 /* 420 * Setup as many as 3 iov_iter based on extents described by @local_dio. 421 * Returns the number of iov_iter that were setup. 422 */ 423 static int 424 nfs_local_iters_setup_dio(struct nfs_local_kiocb *iocb, int rw, 425 unsigned int nvecs, unsigned long total, 426 struct nfs_local_dio *local_dio) 427 { 428 int n_iters = 0; 429 struct iov_iter *iters = iocb->iters; 430 431 /* Setup misaligned start? */ 432 if (local_dio->start_len) { 433 nfs_local_iter_setup(&iters[n_iters], rw, iocb->bvec, 434 nvecs, total, 0, local_dio->start_len); 435 ++n_iters; 436 } 437 438 /* 439 * Setup DIO-aligned middle, if there is no misaligned end (below) 440 * then AIO completion is used, see nfs_local_call_{read,write} 441 */ 442 nfs_local_iter_setup(&iters[n_iters], rw, iocb->bvec, nvecs, 443 total, local_dio->start_len, local_dio->middle_len); 444 445 iocb->iter_is_dio_aligned[n_iters] = 446 nfs_iov_iter_aligned_bvec(&iters[n_iters], 447 local_dio->mem_align-1, local_dio->offset_align-1); 448 449 if (unlikely(!iocb->iter_is_dio_aligned[n_iters])) { 450 trace_nfs_local_dio_misaligned(iocb->hdr->inode, 451 local_dio->start_len, local_dio->middle_len, local_dio); 452 return 0; /* no DIO-aligned IO possible */ 453 } 454 iocb->end_iter_index = n_iters; 455 ++n_iters; 456 457 /* Setup misaligned end? */ 458 if (local_dio->end_len) { 459 nfs_local_iter_setup(&iters[n_iters], rw, iocb->bvec, 460 nvecs, total, local_dio->start_len + 461 local_dio->middle_len, local_dio->end_len); 462 iocb->end_iter_index = n_iters; 463 ++n_iters; 464 } 465 466 atomic_set(&iocb->n_iters, n_iters); 467 return n_iters; 468 } 469 470 static noinline_for_stack void 471 nfs_local_iters_init(struct nfs_local_kiocb *iocb, int rw) 472 { 473 struct nfs_pgio_header *hdr = iocb->hdr; 474 struct page **pagevec = hdr->page_array.pagevec; 475 unsigned long v, total; 476 unsigned int base; 477 size_t len; 478 479 v = 0; 480 total = hdr->args.count; 481 base = hdr->args.pgbase; 482 pagevec += base >> PAGE_SHIFT; 483 base &= ~PAGE_MASK; 484 while (total && v < hdr->page_array.npages) { 485 len = min_t(size_t, total, PAGE_SIZE - base); 486 bvec_set_page(&iocb->bvec[v], *pagevec, len, base); 487 total -= len; 488 ++pagevec; 489 ++v; 490 base = 0; 491 } 492 len = hdr->args.count - total; 493 494 /* 495 * For each iocb, iocb->n_iters is always at least 1 and we always 496 * end io after first nfs_local_pgio_done call unless misaligned DIO. 497 */ 498 atomic_set(&iocb->n_iters, 1); 499 500 if (test_bit(NFS_IOHDR_ODIRECT, &hdr->flags)) { 501 struct nfs_local_dio local_dio; 502 503 if (nfs_is_local_dio_possible(iocb, rw, len, &local_dio) && 504 nfs_local_iters_setup_dio(iocb, rw, v, len, &local_dio) != 0) { 505 /* Ensure DIO WRITE's IO on stable storage upon completion */ 506 if (rw == ITER_SOURCE) 507 iocb->kiocb.ki_flags |= IOCB_DSYNC|IOCB_SYNC; 508 return; /* is DIO-aligned */ 509 } 510 } 511 512 /* Use buffered IO */ 513 iov_iter_bvec(&iocb->iters[0], rw, iocb->bvec, v, len); 514 } 515 516 static void 517 nfs_local_hdr_release(struct nfs_pgio_header *hdr, 518 const struct rpc_call_ops *call_ops) 519 { 520 call_ops->rpc_call_done(&hdr->task, hdr); 521 call_ops->rpc_release(hdr); 522 } 523 524 static void 525 nfs_local_pgio_init(struct nfs_pgio_header *hdr, 526 const struct rpc_call_ops *call_ops) 527 { 528 hdr->task.tk_ops = call_ops; 529 if (!hdr->task.tk_start) 530 hdr->task.tk_start = ktime_get(); 531 } 532 533 static bool nfs_local_pgio_done(struct nfs_local_kiocb *iocb, long status) 534 { 535 struct nfs_pgio_header *hdr = iocb->hdr; 536 537 /* Must handle partial completions */ 538 if (status >= 0) { 539 hdr->res.count += status; 540 /* @hdr was initialized to 0 (zeroed during allocation) */ 541 if (hdr->task.tk_status == 0) 542 hdr->res.op_status = NFS4_OK; 543 } else { 544 hdr->res.op_status = nfs_localio_errno_to_nfs4_stat(status); 545 hdr->task.tk_status = status; 546 } 547 548 BUG_ON(atomic_read(&iocb->n_iters) <= 0); 549 return atomic_dec_and_test(&iocb->n_iters); 550 } 551 552 static void 553 nfs_local_iocb_release(struct nfs_local_kiocb *iocb) 554 { 555 nfs_local_file_put(iocb->localio); 556 nfs_local_iocb_free(iocb); 557 } 558 559 static void nfs_local_pgio_restart(struct nfs_local_kiocb *iocb, 560 struct nfs_pgio_header *hdr) 561 { 562 int status = 0; 563 564 iocb->kiocb.ki_pos = hdr->args.offset; 565 iocb->kiocb.ki_flags &= ~(IOCB_DSYNC | IOCB_SYNC | IOCB_DIRECT); 566 iocb->kiocb.ki_complete = NULL; 567 iocb->aio_complete_work = NULL; 568 iocb->end_iter_index = -1; 569 570 switch (hdr->rw_mode) { 571 case FMODE_READ: 572 nfs_local_iters_init(iocb, ITER_DEST); 573 nfs_local_do_read(iocb, hdr->task.tk_ops); 574 break; 575 case FMODE_WRITE: 576 nfs_local_iters_init(iocb, ITER_SOURCE); 577 nfs_local_do_write(iocb, hdr->task.tk_ops); 578 break; 579 default: 580 status = -EOPNOTSUPP; 581 } 582 583 if (unlikely(status != 0)) { 584 nfs_local_iocb_release(iocb); 585 hdr->task.tk_status = status; 586 nfs_local_hdr_release(hdr, hdr->task.tk_ops); 587 } 588 } 589 590 static void nfs_local_pgio_release(struct nfs_local_kiocb *iocb) 591 { 592 struct nfs_pgio_header *hdr = iocb->hdr; 593 struct rpc_task *task = &hdr->task; 594 595 task->tk_action = NULL; 596 task->tk_ops->rpc_call_done(task, hdr); 597 598 if (task->tk_action == NULL) { 599 nfs_local_iocb_release(iocb); 600 task->tk_ops->rpc_release(hdr); 601 } else 602 nfs_local_pgio_restart(iocb, hdr); 603 } 604 605 /* 606 * Complete the I/O from iocb->kiocb.ki_complete() 607 * 608 * Note that this function can be called from a bottom half context, 609 * hence we need to queue the rpc_call_done() etc to a workqueue 610 */ 611 static inline void nfs_local_pgio_aio_complete(struct nfs_local_kiocb *iocb) 612 { 613 INIT_WORK(&iocb->work, iocb->aio_complete_work); 614 queue_work(nfsiod_workqueue, &iocb->work); 615 } 616 617 static void nfs_local_read_done(struct nfs_local_kiocb *iocb) 618 { 619 struct nfs_pgio_header *hdr = iocb->hdr; 620 struct file *filp = iocb->kiocb.ki_filp; 621 long status = hdr->task.tk_status; 622 623 if ((iocb->kiocb.ki_flags & IOCB_DIRECT) && status == -EINVAL) { 624 /* Underlying FS will return -EINVAL if misaligned DIO is attempted. */ 625 pr_info_ratelimited("nfs: Unexpected direct I/O read alignment failure\n"); 626 } 627 628 /* 629 * Must clear replen otherwise NFSv3 data corruption will occur 630 * if/when switching from LOCALIO back to using normal RPC. 631 */ 632 hdr->res.replen = 0; 633 634 /* nfs_readpage_result() handles short read */ 635 636 if (hdr->args.offset + hdr->res.count >= i_size_read(file_inode(filp))) 637 hdr->res.eof = true; 638 639 dprintk("%s: read %ld bytes eof %d.\n", __func__, 640 status > 0 ? status : 0, hdr->res.eof); 641 } 642 643 static inline void nfs_local_read_iocb_done(struct nfs_local_kiocb *iocb) 644 { 645 nfs_local_read_done(iocb); 646 nfs_local_pgio_release(iocb); 647 } 648 649 static void nfs_local_read_aio_complete_work(struct work_struct *work) 650 { 651 struct nfs_local_kiocb *iocb = 652 container_of(work, struct nfs_local_kiocb, work); 653 654 nfs_local_read_iocb_done(iocb); 655 } 656 657 static void nfs_local_read_aio_complete(struct kiocb *kiocb, long ret) 658 { 659 struct nfs_local_kiocb *iocb = 660 container_of(kiocb, struct nfs_local_kiocb, kiocb); 661 662 /* AIO completion of DIO read should always be last to complete */ 663 if (unlikely(!nfs_local_pgio_done(iocb, ret))) 664 return; 665 666 nfs_local_pgio_aio_complete(iocb); /* Calls nfs_local_read_aio_complete_work */ 667 } 668 669 static void nfs_local_call_read(struct work_struct *work) 670 { 671 struct nfs_local_kiocb *iocb = 672 container_of(work, struct nfs_local_kiocb, work); 673 struct file *filp = iocb->kiocb.ki_filp; 674 ssize_t status; 675 int n_iters; 676 677 n_iters = atomic_read(&iocb->n_iters); 678 for (int i = 0; i < n_iters ; i++) { 679 if (iocb->iter_is_dio_aligned[i]) { 680 iocb->kiocb.ki_flags |= IOCB_DIRECT; 681 /* Only use AIO completion if DIO-aligned segment is last */ 682 if (i == iocb->end_iter_index) { 683 iocb->kiocb.ki_complete = nfs_local_read_aio_complete; 684 iocb->aio_complete_work = nfs_local_read_aio_complete_work; 685 } 686 } else 687 iocb->kiocb.ki_flags &= ~IOCB_DIRECT; 688 689 scoped_with_creds(filp->f_cred) 690 status = filp->f_op->read_iter(&iocb->kiocb, &iocb->iters[i]); 691 692 if (status == -EIOCBQUEUED) 693 continue; 694 /* Break on completion, errors, or short reads */ 695 if (nfs_local_pgio_done(iocb, status) || status < 0 || 696 (size_t)status < iov_iter_count(&iocb->iters[i])) { 697 nfs_local_read_iocb_done(iocb); 698 break; 699 } 700 } 701 } 702 703 static void nfs_local_do_read(struct nfs_local_kiocb *iocb, 704 const struct rpc_call_ops *call_ops) 705 { 706 struct nfs_pgio_header *hdr = iocb->hdr; 707 708 dprintk("%s: vfs_read count=%u pos=%llu\n", 709 __func__, hdr->args.count, hdr->args.offset); 710 711 nfs_local_pgio_init(hdr, call_ops); 712 hdr->res.eof = false; 713 714 INIT_WORK(&iocb->work, nfs_local_call_read); 715 queue_work(nfslocaliod_workqueue, &iocb->work); 716 } 717 718 static void 719 nfs_copy_boot_verifier(struct nfs_write_verifier *verifier, struct inode *inode) 720 { 721 struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; 722 u32 *verf = (u32 *)verifier->data; 723 unsigned int seq; 724 725 do { 726 seq = read_seqbegin(&clp->cl_boot_lock); 727 verf[0] = (u32)clp->cl_nfssvc_boot.tv_sec; 728 verf[1] = (u32)clp->cl_nfssvc_boot.tv_nsec; 729 } while (read_seqretry(&clp->cl_boot_lock, seq)); 730 } 731 732 static void 733 nfs_reset_boot_verifier(struct inode *inode) 734 { 735 struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; 736 737 write_seqlock(&clp->cl_boot_lock); 738 ktime_get_real_ts64(&clp->cl_nfssvc_boot); 739 write_sequnlock(&clp->cl_boot_lock); 740 } 741 742 static void 743 nfs_set_local_verifier(struct inode *inode, 744 struct nfs_writeverf *verf, 745 enum nfs3_stable_how how) 746 { 747 nfs_copy_boot_verifier(&verf->verifier, inode); 748 verf->committed = how; 749 } 750 751 /* Factored out from fs/nfsd/vfs.h:fh_getattr() */ 752 static int __vfs_getattr(const struct path *p, struct kstat *stat, int version) 753 { 754 u32 request_mask = STATX_BASIC_STATS; 755 756 if (version == 4) 757 request_mask |= (STATX_BTIME | STATX_CHANGE_COOKIE); 758 return vfs_getattr(p, stat, request_mask, AT_STATX_SYNC_AS_STAT); 759 } 760 761 /* Copied from fs/nfsd/nfsfh.c:nfsd4_change_attribute() */ 762 static u64 __nfsd4_change_attribute(const struct kstat *stat, 763 const struct inode *inode) 764 { 765 u64 chattr; 766 767 if (stat->result_mask & STATX_CHANGE_COOKIE) { 768 chattr = stat->change_cookie; 769 if (S_ISREG(inode->i_mode) && 770 !(stat->attributes & STATX_ATTR_CHANGE_MONOTONIC)) { 771 chattr += (u64)stat->ctime.tv_sec << 30; 772 chattr += stat->ctime.tv_nsec; 773 } 774 } else { 775 chattr = time_to_chattr(&stat->ctime); 776 } 777 return chattr; 778 } 779 780 static void nfs_local_vfs_getattr(struct nfs_local_kiocb *iocb) 781 { 782 struct kstat stat; 783 struct file *filp = iocb->kiocb.ki_filp; 784 struct nfs_pgio_header *hdr = iocb->hdr; 785 struct nfs_fattr *fattr = hdr->res.fattr; 786 int version = NFS_PROTO(hdr->inode)->version; 787 788 if (unlikely(!fattr) || __vfs_getattr(&filp->f_path, &stat, version)) 789 return; 790 791 fattr->valid = (NFS_ATTR_FATTR_FILEID | 792 NFS_ATTR_FATTR_CHANGE | 793 NFS_ATTR_FATTR_SIZE | 794 NFS_ATTR_FATTR_ATIME | 795 NFS_ATTR_FATTR_MTIME | 796 NFS_ATTR_FATTR_CTIME | 797 NFS_ATTR_FATTR_SPACE_USED); 798 799 fattr->fileid = stat.ino; 800 fattr->size = stat.size; 801 fattr->atime = stat.atime; 802 fattr->mtime = stat.mtime; 803 fattr->ctime = stat.ctime; 804 if (version == 4) { 805 fattr->change_attr = 806 __nfsd4_change_attribute(&stat, file_inode(filp)); 807 } else 808 fattr->change_attr = nfs_timespec_to_change_attr(&fattr->ctime); 809 fattr->du.nfs3.used = stat.blocks << 9; 810 } 811 812 static void nfs_local_write_done(struct nfs_local_kiocb *iocb) 813 { 814 struct nfs_pgio_header *hdr = iocb->hdr; 815 long status = hdr->task.tk_status; 816 817 dprintk("%s: wrote %ld bytes.\n", __func__, status > 0 ? status : 0); 818 819 if ((iocb->kiocb.ki_flags & IOCB_DIRECT) && status == -EINVAL) { 820 /* Underlying FS will return -EINVAL if misaligned DIO is attempted. */ 821 pr_info_ratelimited("nfs: Unexpected direct I/O write alignment failure\n"); 822 } 823 824 if (status < 0) 825 nfs_reset_boot_verifier(hdr->inode); 826 } 827 828 static inline void nfs_local_write_iocb_done(struct nfs_local_kiocb *iocb) 829 { 830 nfs_local_write_done(iocb); 831 nfs_local_vfs_getattr(iocb); 832 nfs_local_pgio_release(iocb); 833 } 834 835 static void nfs_local_write_aio_complete_work(struct work_struct *work) 836 { 837 struct nfs_local_kiocb *iocb = 838 container_of(work, struct nfs_local_kiocb, work); 839 840 nfs_local_write_iocb_done(iocb); 841 } 842 843 static void nfs_local_write_aio_complete(struct kiocb *kiocb, long ret) 844 { 845 struct nfs_local_kiocb *iocb = 846 container_of(kiocb, struct nfs_local_kiocb, kiocb); 847 848 /* AIO completion of DIO write should always be last to complete */ 849 if (unlikely(!nfs_local_pgio_done(iocb, ret))) 850 return; 851 852 nfs_local_pgio_aio_complete(iocb); /* Calls nfs_local_write_aio_complete_work */ 853 } 854 855 static void nfs_local_call_write(struct work_struct *work) 856 { 857 struct nfs_local_kiocb *iocb = 858 container_of(work, struct nfs_local_kiocb, work); 859 struct file *filp = iocb->kiocb.ki_filp; 860 unsigned long old_flags = current->flags; 861 ssize_t status; 862 int n_iters; 863 864 current->flags |= PF_LOCAL_THROTTLE | PF_MEMALLOC_NOIO; 865 866 file_start_write(filp); 867 n_iters = atomic_read(&iocb->n_iters); 868 for (int i = 0; i < n_iters ; i++) { 869 if (iocb->iter_is_dio_aligned[i]) { 870 iocb->kiocb.ki_flags |= IOCB_DIRECT; 871 /* Only use AIO completion if DIO-aligned segment is last */ 872 if (i == iocb->end_iter_index) { 873 iocb->kiocb.ki_complete = nfs_local_write_aio_complete; 874 iocb->aio_complete_work = nfs_local_write_aio_complete_work; 875 } 876 } else 877 iocb->kiocb.ki_flags &= ~IOCB_DIRECT; 878 879 scoped_with_creds(filp->f_cred) 880 status = filp->f_op->write_iter(&iocb->kiocb, &iocb->iters[i]); 881 882 if (status == -EIOCBQUEUED) 883 continue; 884 /* Break on completion, errors, or short writes */ 885 if (nfs_local_pgio_done(iocb, status) || status < 0 || 886 (size_t)status < iov_iter_count(&iocb->iters[i])) { 887 nfs_local_write_iocb_done(iocb); 888 break; 889 } 890 } 891 file_end_write(filp); 892 893 current->flags = old_flags; 894 } 895 896 static void nfs_local_do_write(struct nfs_local_kiocb *iocb, 897 const struct rpc_call_ops *call_ops) 898 { 899 struct nfs_pgio_header *hdr = iocb->hdr; 900 901 dprintk("%s: vfs_write count=%u pos=%llu %s\n", 902 __func__, hdr->args.count, hdr->args.offset, 903 (hdr->args.stable == NFS_UNSTABLE) ? "unstable" : "stable"); 904 905 switch (hdr->args.stable) { 906 default: 907 break; 908 case NFS_DATA_SYNC: 909 iocb->kiocb.ki_flags |= IOCB_DSYNC; 910 break; 911 case NFS_FILE_SYNC: 912 iocb->kiocb.ki_flags |= IOCB_DSYNC|IOCB_SYNC; 913 } 914 915 nfs_local_pgio_init(hdr, call_ops); 916 917 nfs_set_local_verifier(hdr->inode, hdr->res.verf, hdr->args.stable); 918 919 INIT_WORK(&iocb->work, nfs_local_call_write); 920 queue_work(nfslocaliod_workqueue, &iocb->work); 921 } 922 923 static struct nfs_local_kiocb * 924 nfs_local_iocb_init(struct nfs_pgio_header *hdr, struct nfsd_file *localio) 925 { 926 struct file *file = nfs_to->nfsd_file_file(localio); 927 struct nfs_local_kiocb *iocb; 928 gfp_t gfp_mask; 929 int rw; 930 931 if (hdr->rw_mode & FMODE_READ) { 932 if (!file->f_op->read_iter) 933 return ERR_PTR(-EOPNOTSUPP); 934 gfp_mask = GFP_KERNEL; 935 rw = ITER_DEST; 936 } else { 937 if (!file->f_op->write_iter) 938 return ERR_PTR(-EOPNOTSUPP); 939 gfp_mask = GFP_NOIO; 940 rw = ITER_SOURCE; 941 } 942 943 iocb = nfs_local_iocb_alloc(hdr, file, gfp_mask); 944 if (iocb == NULL) 945 return ERR_PTR(-ENOMEM); 946 iocb->hdr = hdr; 947 iocb->localio = localio; 948 949 nfs_local_iters_init(iocb, rw); 950 951 return iocb; 952 } 953 954 int nfs_local_doio(struct nfs_client *clp, struct nfsd_file *localio, 955 struct nfs_pgio_header *hdr, 956 const struct rpc_call_ops *call_ops) 957 { 958 struct nfs_local_kiocb *iocb; 959 int status = 0; 960 961 if (!hdr->args.count) 962 return 0; 963 964 iocb = nfs_local_iocb_init(hdr, localio); 965 if (IS_ERR(iocb)) 966 return PTR_ERR(iocb); 967 968 switch (hdr->rw_mode) { 969 case FMODE_READ: 970 nfs_local_do_read(iocb, call_ops); 971 break; 972 case FMODE_WRITE: 973 nfs_local_do_write(iocb, call_ops); 974 break; 975 default: 976 dprintk("%s: invalid mode: %d\n", __func__, 977 hdr->rw_mode); 978 status = -EOPNOTSUPP; 979 } 980 981 if (unlikely(status != 0)) { 982 nfs_local_iocb_release(iocb); 983 hdr->task.tk_status = status; 984 nfs_local_hdr_release(hdr, call_ops); 985 } 986 return status; 987 } 988 989 static void 990 nfs_local_init_commit(struct nfs_commit_data *data, 991 const struct rpc_call_ops *call_ops) 992 { 993 data->task.tk_ops = call_ops; 994 } 995 996 static int 997 nfs_local_run_commit(struct file *filp, struct nfs_commit_data *data) 998 { 999 loff_t start = data->args.offset; 1000 loff_t end = LLONG_MAX; 1001 1002 if (data->args.count > 0) { 1003 end = start + data->args.count - 1; 1004 if (end < start) 1005 end = LLONG_MAX; 1006 } 1007 1008 nfs_local_mapping_set_gfp_nofs_context(filp->f_mapping); 1009 1010 dprintk("%s: commit %llu - %llu\n", __func__, start, end); 1011 return vfs_fsync_range(filp, start, end, 0); 1012 } 1013 1014 static void 1015 nfs_local_commit_done(struct nfs_commit_data *data, int status) 1016 { 1017 if (status >= 0) { 1018 nfs_set_local_verifier(data->inode, 1019 data->res.verf, 1020 NFS_FILE_SYNC); 1021 data->res.op_status = NFS4_OK; 1022 data->task.tk_status = 0; 1023 } else { 1024 nfs_reset_boot_verifier(data->inode); 1025 data->res.op_status = nfs_localio_errno_to_nfs4_stat(status); 1026 data->task.tk_status = status; 1027 } 1028 } 1029 1030 static void 1031 nfs_local_release_commit_data(struct nfsd_file *localio, 1032 struct nfs_commit_data *data, 1033 const struct rpc_call_ops *call_ops) 1034 { 1035 nfs_local_file_put(localio); 1036 call_ops->rpc_call_done(&data->task, data); 1037 call_ops->rpc_release(data); 1038 } 1039 1040 static void 1041 nfs_local_fsync_ctx_free(struct nfs_local_fsync_ctx *ctx) 1042 { 1043 nfs_local_release_commit_data(ctx->localio, ctx->data, 1044 ctx->data->task.tk_ops); 1045 kfree(ctx); 1046 } 1047 1048 static void 1049 nfs_local_fsync_work(struct work_struct *work) 1050 { 1051 unsigned long old_flags = current->flags; 1052 struct nfs_local_fsync_ctx *ctx; 1053 int status; 1054 1055 ctx = container_of(work, struct nfs_local_fsync_ctx, work); 1056 1057 current->flags |= PF_LOCAL_THROTTLE | PF_MEMALLOC_NOIO; 1058 1059 status = nfs_local_run_commit(nfs_to->nfsd_file_file(ctx->localio), 1060 ctx->data); 1061 nfs_local_commit_done(ctx->data, status); 1062 if (ctx->done != NULL) 1063 complete(ctx->done); 1064 nfs_local_fsync_ctx_free(ctx); 1065 1066 current->flags = old_flags; 1067 } 1068 1069 static struct nfs_local_fsync_ctx * 1070 nfs_local_fsync_ctx_alloc(struct nfs_commit_data *data, 1071 struct nfsd_file *localio, gfp_t flags) 1072 { 1073 struct nfs_local_fsync_ctx *ctx = kmalloc(sizeof(*ctx), flags); 1074 1075 if (ctx != NULL) { 1076 ctx->localio = localio; 1077 ctx->data = data; 1078 INIT_WORK(&ctx->work, nfs_local_fsync_work); 1079 ctx->done = NULL; 1080 } 1081 return ctx; 1082 } 1083 1084 int nfs_local_commit(struct nfsd_file *localio, 1085 struct nfs_commit_data *data, 1086 const struct rpc_call_ops *call_ops, int how) 1087 { 1088 struct nfs_local_fsync_ctx *ctx; 1089 1090 ctx = nfs_local_fsync_ctx_alloc(data, localio, GFP_NOIO); 1091 if (!ctx) { 1092 nfs_local_commit_done(data, -ENOMEM); 1093 nfs_local_release_commit_data(localio, data, call_ops); 1094 return -ENOMEM; 1095 } 1096 1097 nfs_local_init_commit(data, call_ops); 1098 1099 if (how & FLUSH_SYNC) { 1100 DECLARE_COMPLETION_ONSTACK(done); 1101 ctx->done = &done; 1102 queue_work(nfslocaliod_workqueue, &ctx->work); 1103 wait_for_completion(&done); 1104 } else 1105 queue_work(nfslocaliod_workqueue, &ctx->work); 1106 1107 return 0; 1108 } 1109