1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * NFS client support for local clients to bypass network stack 4 * 5 * Copyright (C) 2014 Weston Andros Adamson <dros@primarydata.com> 6 * Copyright (C) 2019 Trond Myklebust <trond.myklebust@hammerspace.com> 7 * Copyright (C) 2024 Mike Snitzer <snitzer@hammerspace.com> 8 * Copyright (C) 2024 NeilBrown <neilb@suse.de> 9 */ 10 11 #include <linux/module.h> 12 #include <linux/errno.h> 13 #include <linux/vfs.h> 14 #include <linux/file.h> 15 #include <linux/inet.h> 16 #include <linux/sunrpc/addr.h> 17 #include <linux/inetdevice.h> 18 #include <net/addrconf.h> 19 #include <linux/nfs_common.h> 20 #include <linux/nfslocalio.h> 21 #include <linux/bvec.h> 22 23 #include <linux/nfs.h> 24 #include <linux/nfs_fs.h> 25 #include <linux/nfs_xdr.h> 26 27 #include "internal.h" 28 #include "pnfs.h" 29 #include "nfstrace.h" 30 31 #define NFSDBG_FACILITY NFSDBG_VFS 32 33 #define NFSLOCAL_MAX_IOS 3 34 35 struct nfs_local_kiocb { 36 struct kiocb kiocb; 37 struct bio_vec *bvec; 38 struct nfs_pgio_header *hdr; 39 struct work_struct work; 40 void (*aio_complete_work)(struct work_struct *); 41 struct nfsd_file *localio; 42 /* Begin mostly DIO-specific members */ 43 size_t end_len; 44 short int end_iter_index; 45 atomic_t n_iters; 46 bool iter_is_dio_aligned[NFSLOCAL_MAX_IOS]; 47 struct iov_iter iters[NFSLOCAL_MAX_IOS] ____cacheline_aligned; 48 /* End mostly DIO-specific members */ 49 }; 50 51 struct nfs_local_fsync_ctx { 52 struct nfsd_file *localio; 53 struct nfs_commit_data *data; 54 struct work_struct work; 55 struct completion *done; 56 }; 57 58 static bool localio_enabled __read_mostly = true; 59 module_param(localio_enabled, bool, 0644); 60 61 static inline bool nfs_client_is_local(const struct nfs_client *clp) 62 { 63 return !!rcu_access_pointer(clp->cl_uuid.net); 64 } 65 66 bool nfs_server_is_local(const struct nfs_client *clp) 67 { 68 return nfs_client_is_local(clp) && localio_enabled; 69 } 70 EXPORT_SYMBOL_GPL(nfs_server_is_local); 71 72 /* 73 * UUID_IS_LOCAL XDR functions 74 */ 75 76 static void localio_xdr_enc_uuidargs(struct rpc_rqst *req, 77 struct xdr_stream *xdr, 78 const void *data) 79 { 80 const u8 *uuid = data; 81 82 encode_opaque_fixed(xdr, uuid, UUID_SIZE); 83 } 84 85 static int localio_xdr_dec_uuidres(struct rpc_rqst *req, 86 struct xdr_stream *xdr, 87 void *result) 88 { 89 /* void return */ 90 return 0; 91 } 92 93 static const struct rpc_procinfo nfs_localio_procedures[] = { 94 [LOCALIOPROC_UUID_IS_LOCAL] = { 95 .p_proc = LOCALIOPROC_UUID_IS_LOCAL, 96 .p_encode = localio_xdr_enc_uuidargs, 97 .p_decode = localio_xdr_dec_uuidres, 98 .p_arglen = XDR_QUADLEN(UUID_SIZE), 99 .p_replen = 0, 100 .p_statidx = LOCALIOPROC_UUID_IS_LOCAL, 101 .p_name = "UUID_IS_LOCAL", 102 }, 103 }; 104 105 static unsigned int nfs_localio_counts[ARRAY_SIZE(nfs_localio_procedures)]; 106 static const struct rpc_version nfslocalio_version1 = { 107 .number = 1, 108 .nrprocs = ARRAY_SIZE(nfs_localio_procedures), 109 .procs = nfs_localio_procedures, 110 .counts = nfs_localio_counts, 111 }; 112 113 static const struct rpc_version *nfslocalio_version[] = { 114 [1] = &nfslocalio_version1, 115 }; 116 117 extern const struct rpc_program nfslocalio_program; 118 static struct rpc_stat nfslocalio_rpcstat = { &nfslocalio_program }; 119 120 const struct rpc_program nfslocalio_program = { 121 .name = "nfslocalio", 122 .number = NFS_LOCALIO_PROGRAM, 123 .nrvers = ARRAY_SIZE(nfslocalio_version), 124 .version = nfslocalio_version, 125 .stats = &nfslocalio_rpcstat, 126 }; 127 128 /* 129 * nfs_init_localioclient - Initialise an NFS localio client connection 130 */ 131 static struct rpc_clnt *nfs_init_localioclient(struct nfs_client *clp) 132 { 133 struct rpc_clnt *rpcclient_localio; 134 135 rpcclient_localio = rpc_bind_new_program(clp->cl_rpcclient, 136 &nfslocalio_program, 1); 137 138 dprintk_rcu("%s: server (%s) %s NFS LOCALIO.\n", 139 __func__, rpc_peeraddr2str(clp->cl_rpcclient, RPC_DISPLAY_ADDR), 140 (IS_ERR(rpcclient_localio) ? "does not support" : "supports")); 141 142 return rpcclient_localio; 143 } 144 145 static bool nfs_server_uuid_is_local(struct nfs_client *clp) 146 { 147 u8 uuid[UUID_SIZE]; 148 struct rpc_message msg = { 149 .rpc_argp = &uuid, 150 }; 151 struct rpc_clnt *rpcclient_localio; 152 int status; 153 154 rpcclient_localio = nfs_init_localioclient(clp); 155 if (IS_ERR(rpcclient_localio)) 156 return false; 157 158 export_uuid(uuid, &clp->cl_uuid.uuid); 159 160 msg.rpc_proc = &nfs_localio_procedures[LOCALIOPROC_UUID_IS_LOCAL]; 161 status = rpc_call_sync(rpcclient_localio, &msg, 0); 162 dprintk("%s: NFS reply UUID_IS_LOCAL: status=%d\n", 163 __func__, status); 164 rpc_shutdown_client(rpcclient_localio); 165 166 /* Server is only local if it initialized required struct members */ 167 if (status || !rcu_access_pointer(clp->cl_uuid.net) || !clp->cl_uuid.dom) 168 return false; 169 170 return true; 171 } 172 173 /* 174 * nfs_local_probe - probe local i/o support for an nfs_server and nfs_client 175 * - called after alloc_client and init_client (so cl_rpcclient exists) 176 * - this function is idempotent, it can be called for old or new clients 177 */ 178 static void nfs_local_probe(struct nfs_client *clp) 179 { 180 /* Disallow localio if disabled via sysfs or AUTH_SYS isn't used */ 181 if (!localio_enabled || 182 clp->cl_rpcclient->cl_auth->au_flavor != RPC_AUTH_UNIX) { 183 nfs_localio_disable_client(clp); 184 return; 185 } 186 187 if (nfs_client_is_local(clp)) 188 return; 189 190 if (!nfs_uuid_begin(&clp->cl_uuid)) 191 return; 192 if (nfs_server_uuid_is_local(clp)) 193 nfs_localio_enable_client(clp); 194 nfs_uuid_end(&clp->cl_uuid); 195 } 196 197 void nfs_local_probe_async_work(struct work_struct *work) 198 { 199 struct nfs_client *clp = 200 container_of(work, struct nfs_client, cl_local_probe_work); 201 202 if (!refcount_inc_not_zero(&clp->cl_count)) 203 return; 204 nfs_local_probe(clp); 205 nfs_put_client(clp); 206 } 207 208 void nfs_local_probe_async(struct nfs_client *clp) 209 { 210 queue_work(nfsiod_workqueue, &clp->cl_local_probe_work); 211 } 212 EXPORT_SYMBOL_GPL(nfs_local_probe_async); 213 214 static inline void nfs_local_file_put(struct nfsd_file *localio) 215 { 216 /* nfs_to_nfsd_file_put_local() expects an __rcu pointer 217 * but we have a __kernel pointer. It is always safe 218 * to cast a __kernel pointer to an __rcu pointer 219 * because the cast only weakens what is known about the pointer. 220 */ 221 struct nfsd_file __rcu *nf = (struct nfsd_file __rcu*) localio; 222 223 nfs_to_nfsd_file_put_local(&nf); 224 } 225 226 /* 227 * __nfs_local_open_fh - open a local filehandle in terms of nfsd_file. 228 * 229 * Returns a pointer to a struct nfsd_file or ERR_PTR. 230 * Caller must release returned nfsd_file with nfs_to_nfsd_file_put_local(). 231 */ 232 static struct nfsd_file * 233 __nfs_local_open_fh(struct nfs_client *clp, const struct cred *cred, 234 struct nfs_fh *fh, struct nfs_file_localio *nfl, 235 struct nfsd_file __rcu **pnf, 236 const fmode_t mode) 237 { 238 int status = 0; 239 struct nfsd_file *localio; 240 241 localio = nfs_open_local_fh(&clp->cl_uuid, clp->cl_rpcclient, 242 cred, fh, nfl, pnf, mode); 243 if (IS_ERR(localio)) { 244 status = PTR_ERR(localio); 245 switch (status) { 246 case -ENOMEM: 247 case -ENXIO: 248 case -ENOENT: 249 /* Revalidate localio */ 250 nfs_localio_disable_client(clp); 251 nfs_local_probe(clp); 252 } 253 } 254 trace_nfs_local_open_fh(fh, mode, status); 255 return localio; 256 } 257 258 /* 259 * nfs_local_open_fh - open a local filehandle in terms of nfsd_file. 260 * First checking if the open nfsd_file is already cached, otherwise 261 * must __nfs_local_open_fh and insert the nfsd_file in nfs_file_localio. 262 * 263 * Returns a pointer to a struct nfsd_file or NULL. 264 */ 265 struct nfsd_file * 266 nfs_local_open_fh(struct nfs_client *clp, const struct cred *cred, 267 struct nfs_fh *fh, struct nfs_file_localio *nfl, 268 const fmode_t mode) 269 { 270 struct nfsd_file *nf, __rcu **pnf; 271 272 if (!nfs_server_is_local(clp)) 273 return NULL; 274 if (mode & ~(FMODE_READ | FMODE_WRITE)) 275 return NULL; 276 277 if (mode & FMODE_WRITE) 278 pnf = &nfl->rw_file; 279 else 280 pnf = &nfl->ro_file; 281 282 nf = __nfs_local_open_fh(clp, cred, fh, nfl, pnf, mode); 283 if (IS_ERR(nf)) 284 return NULL; 285 return nf; 286 } 287 EXPORT_SYMBOL_GPL(nfs_local_open_fh); 288 289 static void 290 nfs_local_iocb_free(struct nfs_local_kiocb *iocb) 291 { 292 kfree(iocb->bvec); 293 kfree(iocb); 294 } 295 296 static struct nfs_local_kiocb * 297 nfs_local_iocb_alloc(struct nfs_pgio_header *hdr, 298 struct file *file, gfp_t flags) 299 { 300 struct nfs_local_kiocb *iocb; 301 302 iocb = kzalloc(sizeof(*iocb), flags); 303 if (iocb == NULL) 304 return NULL; 305 306 iocb->bvec = kmalloc_array(hdr->page_array.npages, 307 sizeof(struct bio_vec), flags); 308 if (iocb->bvec == NULL) { 309 kfree(iocb); 310 return NULL; 311 } 312 313 init_sync_kiocb(&iocb->kiocb, file); 314 315 iocb->hdr = hdr; 316 iocb->kiocb.ki_pos = hdr->args.offset; 317 iocb->kiocb.ki_flags &= ~IOCB_APPEND; 318 iocb->kiocb.ki_complete = NULL; 319 iocb->aio_complete_work = NULL; 320 321 iocb->end_iter_index = -1; 322 323 return iocb; 324 } 325 326 static bool 327 nfs_is_local_dio_possible(struct nfs_local_kiocb *iocb, int rw, 328 size_t len, struct nfs_local_dio *local_dio) 329 { 330 struct nfs_pgio_header *hdr = iocb->hdr; 331 loff_t offset = hdr->args.offset; 332 u32 nf_dio_mem_align, nf_dio_offset_align, nf_dio_read_offset_align; 333 loff_t start_end, orig_end, middle_end; 334 335 nfs_to->nfsd_file_dio_alignment(iocb->localio, &nf_dio_mem_align, 336 &nf_dio_offset_align, &nf_dio_read_offset_align); 337 if (rw == ITER_DEST) 338 nf_dio_offset_align = nf_dio_read_offset_align; 339 340 if (unlikely(!nf_dio_mem_align || !nf_dio_offset_align)) 341 return false; 342 if (unlikely(nf_dio_offset_align > PAGE_SIZE)) 343 return false; 344 if (unlikely(len < nf_dio_offset_align)) 345 return false; 346 347 local_dio->mem_align = nf_dio_mem_align; 348 local_dio->offset_align = nf_dio_offset_align; 349 350 start_end = round_up(offset, nf_dio_offset_align); 351 orig_end = offset + len; 352 middle_end = round_down(orig_end, nf_dio_offset_align); 353 354 local_dio->middle_offset = start_end; 355 local_dio->end_offset = middle_end; 356 357 local_dio->start_len = start_end - offset; 358 local_dio->middle_len = middle_end - start_end; 359 local_dio->end_len = orig_end - middle_end; 360 361 if (rw == ITER_DEST) 362 trace_nfs_local_dio_read(hdr->inode, offset, len, local_dio); 363 else 364 trace_nfs_local_dio_write(hdr->inode, offset, len, local_dio); 365 return true; 366 } 367 368 static bool nfs_iov_iter_aligned_bvec(const struct iov_iter *i, 369 unsigned int addr_mask, unsigned int len_mask) 370 { 371 const struct bio_vec *bvec = i->bvec; 372 size_t skip = i->iov_offset; 373 size_t size = i->count; 374 375 if (size & len_mask) 376 return false; 377 do { 378 size_t len = bvec->bv_len; 379 380 if (len > size) 381 len = size; 382 if ((unsigned long)(bvec->bv_offset + skip) & addr_mask) 383 return false; 384 bvec++; 385 size -= len; 386 skip = 0; 387 } while (size); 388 389 return true; 390 } 391 392 static void 393 nfs_local_iter_setup(struct iov_iter *iter, int rw, struct bio_vec *bvec, 394 unsigned int nvecs, unsigned long total, 395 size_t start, size_t len) 396 { 397 iov_iter_bvec(iter, rw, bvec, nvecs, total); 398 if (start) 399 iov_iter_advance(iter, start); 400 iov_iter_truncate(iter, len); 401 } 402 403 /* 404 * Setup as many as 3 iov_iter based on extents described by @local_dio. 405 * Returns the number of iov_iter that were setup. 406 */ 407 static int 408 nfs_local_iters_setup_dio(struct nfs_local_kiocb *iocb, int rw, 409 unsigned int nvecs, unsigned long total, 410 struct nfs_local_dio *local_dio) 411 { 412 int n_iters = 0; 413 struct iov_iter *iters = iocb->iters; 414 415 /* Setup misaligned start? */ 416 if (local_dio->start_len) { 417 nfs_local_iter_setup(&iters[n_iters], rw, iocb->bvec, 418 nvecs, total, 0, local_dio->start_len); 419 ++n_iters; 420 } 421 422 /* 423 * Setup DIO-aligned middle, if there is no misaligned end (below) 424 * then AIO completion is used, see nfs_local_call_{read,write} 425 */ 426 nfs_local_iter_setup(&iters[n_iters], rw, iocb->bvec, nvecs, 427 total, local_dio->start_len, local_dio->middle_len); 428 429 iocb->iter_is_dio_aligned[n_iters] = 430 nfs_iov_iter_aligned_bvec(&iters[n_iters], 431 local_dio->mem_align-1, local_dio->offset_align-1); 432 433 if (unlikely(!iocb->iter_is_dio_aligned[n_iters])) { 434 trace_nfs_local_dio_misaligned(iocb->hdr->inode, 435 local_dio->start_len, local_dio->middle_len, local_dio); 436 return 0; /* no DIO-aligned IO possible */ 437 } 438 iocb->end_iter_index = n_iters; 439 ++n_iters; 440 441 /* Setup misaligned end? */ 442 if (local_dio->end_len) { 443 nfs_local_iter_setup(&iters[n_iters], rw, iocb->bvec, 444 nvecs, total, local_dio->start_len + 445 local_dio->middle_len, local_dio->end_len); 446 iocb->end_iter_index = n_iters; 447 ++n_iters; 448 } 449 450 atomic_set(&iocb->n_iters, n_iters); 451 return n_iters; 452 } 453 454 static noinline_for_stack void 455 nfs_local_iters_init(struct nfs_local_kiocb *iocb, int rw) 456 { 457 struct nfs_pgio_header *hdr = iocb->hdr; 458 struct page **pagevec = hdr->page_array.pagevec; 459 unsigned long v, total; 460 unsigned int base; 461 size_t len; 462 463 v = 0; 464 total = hdr->args.count; 465 base = hdr->args.pgbase; 466 while (total && v < hdr->page_array.npages) { 467 len = min_t(size_t, total, PAGE_SIZE - base); 468 bvec_set_page(&iocb->bvec[v], *pagevec, len, base); 469 total -= len; 470 ++pagevec; 471 ++v; 472 base = 0; 473 } 474 len = hdr->args.count - total; 475 476 /* 477 * For each iocb, iocb->n_iters is always at least 1 and we always 478 * end io after first nfs_local_pgio_done call unless misaligned DIO. 479 */ 480 atomic_set(&iocb->n_iters, 1); 481 482 if (test_bit(NFS_IOHDR_ODIRECT, &hdr->flags)) { 483 struct nfs_local_dio local_dio; 484 485 if (nfs_is_local_dio_possible(iocb, rw, len, &local_dio) && 486 nfs_local_iters_setup_dio(iocb, rw, v, len, &local_dio) != 0) { 487 /* Ensure DIO WRITE's IO on stable storage upon completion */ 488 if (rw == ITER_SOURCE) 489 iocb->kiocb.ki_flags |= IOCB_DSYNC|IOCB_SYNC; 490 return; /* is DIO-aligned */ 491 } 492 } 493 494 /* Use buffered IO */ 495 iov_iter_bvec(&iocb->iters[0], rw, iocb->bvec, v, len); 496 } 497 498 static void 499 nfs_local_hdr_release(struct nfs_pgio_header *hdr, 500 const struct rpc_call_ops *call_ops) 501 { 502 call_ops->rpc_call_done(&hdr->task, hdr); 503 call_ops->rpc_release(hdr); 504 } 505 506 static void 507 nfs_local_pgio_init(struct nfs_pgio_header *hdr, 508 const struct rpc_call_ops *call_ops) 509 { 510 hdr->task.tk_ops = call_ops; 511 if (!hdr->task.tk_start) 512 hdr->task.tk_start = ktime_get(); 513 } 514 515 static bool 516 nfs_local_pgio_done(struct nfs_local_kiocb *iocb, long status, bool force) 517 { 518 struct nfs_pgio_header *hdr = iocb->hdr; 519 520 /* Must handle partial completions */ 521 if (status >= 0) { 522 hdr->res.count += status; 523 /* @hdr was initialized to 0 (zeroed during allocation) */ 524 if (hdr->task.tk_status == 0) 525 hdr->res.op_status = NFS4_OK; 526 } else { 527 hdr->res.op_status = nfs_localio_errno_to_nfs4_stat(status); 528 hdr->task.tk_status = status; 529 } 530 531 if (force) 532 return true; 533 534 BUG_ON(atomic_read(&iocb->n_iters) <= 0); 535 return atomic_dec_and_test(&iocb->n_iters); 536 } 537 538 static void 539 nfs_local_iocb_release(struct nfs_local_kiocb *iocb) 540 { 541 nfs_local_file_put(iocb->localio); 542 nfs_local_iocb_free(iocb); 543 } 544 545 static void 546 nfs_local_pgio_release(struct nfs_local_kiocb *iocb) 547 { 548 struct nfs_pgio_header *hdr = iocb->hdr; 549 550 nfs_local_iocb_release(iocb); 551 nfs_local_hdr_release(hdr, hdr->task.tk_ops); 552 } 553 554 /* 555 * Complete the I/O from iocb->kiocb.ki_complete() 556 * 557 * Note that this function can be called from a bottom half context, 558 * hence we need to queue the rpc_call_done() etc to a workqueue 559 */ 560 static inline void nfs_local_pgio_aio_complete(struct nfs_local_kiocb *iocb) 561 { 562 INIT_WORK(&iocb->work, iocb->aio_complete_work); 563 queue_work(nfsiod_workqueue, &iocb->work); 564 } 565 566 static void nfs_local_read_done(struct nfs_local_kiocb *iocb) 567 { 568 struct nfs_pgio_header *hdr = iocb->hdr; 569 struct file *filp = iocb->kiocb.ki_filp; 570 long status = hdr->task.tk_status; 571 572 if ((iocb->kiocb.ki_flags & IOCB_DIRECT) && status == -EINVAL) { 573 /* Underlying FS will return -EINVAL if misaligned DIO is attempted. */ 574 pr_info_ratelimited("nfs: Unexpected direct I/O read alignment failure\n"); 575 } 576 577 /* 578 * Must clear replen otherwise NFSv3 data corruption will occur 579 * if/when switching from LOCALIO back to using normal RPC. 580 */ 581 hdr->res.replen = 0; 582 583 /* nfs_readpage_result() handles short read */ 584 585 if (hdr->args.offset + hdr->res.count >= i_size_read(file_inode(filp))) 586 hdr->res.eof = true; 587 588 dprintk("%s: read %ld bytes eof %d.\n", __func__, 589 status > 0 ? status : 0, hdr->res.eof); 590 } 591 592 static inline void nfs_local_read_iocb_done(struct nfs_local_kiocb *iocb) 593 { 594 nfs_local_read_done(iocb); 595 nfs_local_pgio_release(iocb); 596 } 597 598 static void nfs_local_read_aio_complete_work(struct work_struct *work) 599 { 600 struct nfs_local_kiocb *iocb = 601 container_of(work, struct nfs_local_kiocb, work); 602 603 nfs_local_read_iocb_done(iocb); 604 } 605 606 static void nfs_local_read_aio_complete(struct kiocb *kiocb, long ret) 607 { 608 struct nfs_local_kiocb *iocb = 609 container_of(kiocb, struct nfs_local_kiocb, kiocb); 610 611 /* AIO completion of DIO read should always be last to complete */ 612 if (unlikely(!nfs_local_pgio_done(iocb, ret, false))) 613 return; 614 615 nfs_local_pgio_aio_complete(iocb); /* Calls nfs_local_read_aio_complete_work */ 616 } 617 618 static void nfs_local_call_read(struct work_struct *work) 619 { 620 struct nfs_local_kiocb *iocb = 621 container_of(work, struct nfs_local_kiocb, work); 622 struct file *filp = iocb->kiocb.ki_filp; 623 const struct cred *save_cred; 624 bool force_done = false; 625 ssize_t status; 626 int n_iters; 627 628 save_cred = override_creds(filp->f_cred); 629 630 n_iters = atomic_read(&iocb->n_iters); 631 for (int i = 0; i < n_iters ; i++) { 632 if (iocb->iter_is_dio_aligned[i]) { 633 iocb->kiocb.ki_flags |= IOCB_DIRECT; 634 /* Only use AIO completion if DIO-aligned segment is last */ 635 if (i == iocb->end_iter_index) { 636 iocb->kiocb.ki_complete = nfs_local_read_aio_complete; 637 iocb->aio_complete_work = nfs_local_read_aio_complete_work; 638 } 639 } else 640 iocb->kiocb.ki_flags &= ~IOCB_DIRECT; 641 642 status = filp->f_op->read_iter(&iocb->kiocb, &iocb->iters[i]); 643 if (status != -EIOCBQUEUED) { 644 if (unlikely(status >= 0 && status < iocb->iters[i].count)) 645 force_done = true; /* Partial read */ 646 if (nfs_local_pgio_done(iocb, status, force_done)) { 647 nfs_local_read_iocb_done(iocb); 648 break; 649 } 650 } 651 } 652 653 revert_creds(save_cred); 654 } 655 656 static int 657 nfs_local_do_read(struct nfs_local_kiocb *iocb, 658 const struct rpc_call_ops *call_ops) 659 { 660 struct nfs_pgio_header *hdr = iocb->hdr; 661 662 dprintk("%s: vfs_read count=%u pos=%llu\n", 663 __func__, hdr->args.count, hdr->args.offset); 664 665 nfs_local_pgio_init(hdr, call_ops); 666 hdr->res.eof = false; 667 668 INIT_WORK(&iocb->work, nfs_local_call_read); 669 queue_work(nfslocaliod_workqueue, &iocb->work); 670 671 return 0; 672 } 673 674 static void 675 nfs_copy_boot_verifier(struct nfs_write_verifier *verifier, struct inode *inode) 676 { 677 struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; 678 u32 *verf = (u32 *)verifier->data; 679 unsigned int seq; 680 681 do { 682 seq = read_seqbegin(&clp->cl_boot_lock); 683 verf[0] = (u32)clp->cl_nfssvc_boot.tv_sec; 684 verf[1] = (u32)clp->cl_nfssvc_boot.tv_nsec; 685 } while (read_seqretry(&clp->cl_boot_lock, seq)); 686 } 687 688 static void 689 nfs_reset_boot_verifier(struct inode *inode) 690 { 691 struct nfs_client *clp = NFS_SERVER(inode)->nfs_client; 692 693 write_seqlock(&clp->cl_boot_lock); 694 ktime_get_real_ts64(&clp->cl_nfssvc_boot); 695 write_sequnlock(&clp->cl_boot_lock); 696 } 697 698 static void 699 nfs_set_local_verifier(struct inode *inode, 700 struct nfs_writeverf *verf, 701 enum nfs3_stable_how how) 702 { 703 nfs_copy_boot_verifier(&verf->verifier, inode); 704 verf->committed = how; 705 } 706 707 /* Factored out from fs/nfsd/vfs.h:fh_getattr() */ 708 static int __vfs_getattr(const struct path *p, struct kstat *stat, int version) 709 { 710 u32 request_mask = STATX_BASIC_STATS; 711 712 if (version == 4) 713 request_mask |= (STATX_BTIME | STATX_CHANGE_COOKIE); 714 return vfs_getattr(p, stat, request_mask, AT_STATX_SYNC_AS_STAT); 715 } 716 717 /* Copied from fs/nfsd/nfsfh.c:nfsd4_change_attribute() */ 718 static u64 __nfsd4_change_attribute(const struct kstat *stat, 719 const struct inode *inode) 720 { 721 u64 chattr; 722 723 if (stat->result_mask & STATX_CHANGE_COOKIE) { 724 chattr = stat->change_cookie; 725 if (S_ISREG(inode->i_mode) && 726 !(stat->attributes & STATX_ATTR_CHANGE_MONOTONIC)) { 727 chattr += (u64)stat->ctime.tv_sec << 30; 728 chattr += stat->ctime.tv_nsec; 729 } 730 } else { 731 chattr = time_to_chattr(&stat->ctime); 732 } 733 return chattr; 734 } 735 736 static void nfs_local_vfs_getattr(struct nfs_local_kiocb *iocb) 737 { 738 struct kstat stat; 739 struct file *filp = iocb->kiocb.ki_filp; 740 struct nfs_pgio_header *hdr = iocb->hdr; 741 struct nfs_fattr *fattr = hdr->res.fattr; 742 int version = NFS_PROTO(hdr->inode)->version; 743 744 if (unlikely(!fattr) || __vfs_getattr(&filp->f_path, &stat, version)) 745 return; 746 747 fattr->valid = (NFS_ATTR_FATTR_FILEID | 748 NFS_ATTR_FATTR_CHANGE | 749 NFS_ATTR_FATTR_SIZE | 750 NFS_ATTR_FATTR_ATIME | 751 NFS_ATTR_FATTR_MTIME | 752 NFS_ATTR_FATTR_CTIME | 753 NFS_ATTR_FATTR_SPACE_USED); 754 755 fattr->fileid = stat.ino; 756 fattr->size = stat.size; 757 fattr->atime = stat.atime; 758 fattr->mtime = stat.mtime; 759 fattr->ctime = stat.ctime; 760 if (version == 4) { 761 fattr->change_attr = 762 __nfsd4_change_attribute(&stat, file_inode(filp)); 763 } else 764 fattr->change_attr = nfs_timespec_to_change_attr(&fattr->ctime); 765 fattr->du.nfs3.used = stat.blocks << 9; 766 } 767 768 static void nfs_local_write_done(struct nfs_local_kiocb *iocb) 769 { 770 struct nfs_pgio_header *hdr = iocb->hdr; 771 long status = hdr->task.tk_status; 772 773 dprintk("%s: wrote %ld bytes.\n", __func__, status > 0 ? status : 0); 774 775 if ((iocb->kiocb.ki_flags & IOCB_DIRECT) && status == -EINVAL) { 776 /* Underlying FS will return -EINVAL if misaligned DIO is attempted. */ 777 pr_info_ratelimited("nfs: Unexpected direct I/O write alignment failure\n"); 778 } 779 780 /* Handle short writes as if they are ENOSPC */ 781 status = hdr->res.count; 782 if (status > 0 && status < hdr->args.count) { 783 hdr->mds_offset += status; 784 hdr->args.offset += status; 785 hdr->args.pgbase += status; 786 hdr->args.count -= status; 787 nfs_set_pgio_error(hdr, -ENOSPC, hdr->args.offset); 788 status = -ENOSPC; 789 /* record -ENOSPC in terms of nfs_local_pgio_done */ 790 (void) nfs_local_pgio_done(iocb, status, true); 791 } 792 if (hdr->task.tk_status < 0) 793 nfs_reset_boot_verifier(hdr->inode); 794 } 795 796 static inline void nfs_local_write_iocb_done(struct nfs_local_kiocb *iocb) 797 { 798 nfs_local_write_done(iocb); 799 nfs_local_vfs_getattr(iocb); 800 nfs_local_pgio_release(iocb); 801 } 802 803 static void nfs_local_write_aio_complete_work(struct work_struct *work) 804 { 805 struct nfs_local_kiocb *iocb = 806 container_of(work, struct nfs_local_kiocb, work); 807 808 nfs_local_write_iocb_done(iocb); 809 } 810 811 static void nfs_local_write_aio_complete(struct kiocb *kiocb, long ret) 812 { 813 struct nfs_local_kiocb *iocb = 814 container_of(kiocb, struct nfs_local_kiocb, kiocb); 815 816 /* AIO completion of DIO write should always be last to complete */ 817 if (unlikely(!nfs_local_pgio_done(iocb, ret, false))) 818 return; 819 820 nfs_local_pgio_aio_complete(iocb); /* Calls nfs_local_write_aio_complete_work */ 821 } 822 823 static void nfs_local_call_write(struct work_struct *work) 824 { 825 struct nfs_local_kiocb *iocb = 826 container_of(work, struct nfs_local_kiocb, work); 827 struct file *filp = iocb->kiocb.ki_filp; 828 unsigned long old_flags = current->flags; 829 const struct cred *save_cred; 830 bool force_done = false; 831 ssize_t status; 832 int n_iters; 833 834 current->flags |= PF_LOCAL_THROTTLE | PF_MEMALLOC_NOIO; 835 save_cred = override_creds(filp->f_cred); 836 837 file_start_write(filp); 838 n_iters = atomic_read(&iocb->n_iters); 839 for (int i = 0; i < n_iters ; i++) { 840 if (iocb->iter_is_dio_aligned[i]) { 841 iocb->kiocb.ki_flags |= IOCB_DIRECT; 842 /* Only use AIO completion if DIO-aligned segment is last */ 843 if (i == iocb->end_iter_index) { 844 iocb->kiocb.ki_complete = nfs_local_write_aio_complete; 845 iocb->aio_complete_work = nfs_local_write_aio_complete_work; 846 } 847 } else 848 iocb->kiocb.ki_flags &= ~IOCB_DIRECT; 849 850 status = filp->f_op->write_iter(&iocb->kiocb, &iocb->iters[i]); 851 if (status != -EIOCBQUEUED) { 852 if (unlikely(status >= 0 && status < iocb->iters[i].count)) 853 force_done = true; /* Partial write */ 854 if (nfs_local_pgio_done(iocb, status, force_done)) { 855 nfs_local_write_iocb_done(iocb); 856 break; 857 } 858 } 859 } 860 file_end_write(filp); 861 862 revert_creds(save_cred); 863 current->flags = old_flags; 864 } 865 866 static int 867 nfs_local_do_write(struct nfs_local_kiocb *iocb, 868 const struct rpc_call_ops *call_ops) 869 { 870 struct nfs_pgio_header *hdr = iocb->hdr; 871 872 dprintk("%s: vfs_write count=%u pos=%llu %s\n", 873 __func__, hdr->args.count, hdr->args.offset, 874 (hdr->args.stable == NFS_UNSTABLE) ? "unstable" : "stable"); 875 876 switch (hdr->args.stable) { 877 default: 878 break; 879 case NFS_DATA_SYNC: 880 iocb->kiocb.ki_flags |= IOCB_DSYNC; 881 break; 882 case NFS_FILE_SYNC: 883 iocb->kiocb.ki_flags |= IOCB_DSYNC|IOCB_SYNC; 884 } 885 886 nfs_local_pgio_init(hdr, call_ops); 887 888 nfs_set_local_verifier(hdr->inode, hdr->res.verf, hdr->args.stable); 889 890 INIT_WORK(&iocb->work, nfs_local_call_write); 891 queue_work(nfslocaliod_workqueue, &iocb->work); 892 893 return 0; 894 } 895 896 static struct nfs_local_kiocb * 897 nfs_local_iocb_init(struct nfs_pgio_header *hdr, struct nfsd_file *localio) 898 { 899 struct file *file = nfs_to->nfsd_file_file(localio); 900 struct nfs_local_kiocb *iocb; 901 gfp_t gfp_mask; 902 int rw; 903 904 if (hdr->rw_mode & FMODE_READ) { 905 if (!file->f_op->read_iter) 906 return ERR_PTR(-EOPNOTSUPP); 907 gfp_mask = GFP_KERNEL; 908 rw = ITER_DEST; 909 } else { 910 if (!file->f_op->write_iter) 911 return ERR_PTR(-EOPNOTSUPP); 912 gfp_mask = GFP_NOIO; 913 rw = ITER_SOURCE; 914 } 915 916 iocb = nfs_local_iocb_alloc(hdr, file, gfp_mask); 917 if (iocb == NULL) 918 return ERR_PTR(-ENOMEM); 919 iocb->hdr = hdr; 920 iocb->localio = localio; 921 922 nfs_local_iters_init(iocb, rw); 923 924 return iocb; 925 } 926 927 int nfs_local_doio(struct nfs_client *clp, struct nfsd_file *localio, 928 struct nfs_pgio_header *hdr, 929 const struct rpc_call_ops *call_ops) 930 { 931 struct nfs_local_kiocb *iocb; 932 int status = 0; 933 934 if (!hdr->args.count) 935 return 0; 936 937 iocb = nfs_local_iocb_init(hdr, localio); 938 if (IS_ERR(iocb)) 939 return PTR_ERR(iocb); 940 941 switch (hdr->rw_mode) { 942 case FMODE_READ: 943 status = nfs_local_do_read(iocb, call_ops); 944 break; 945 case FMODE_WRITE: 946 status = nfs_local_do_write(iocb, call_ops); 947 break; 948 default: 949 dprintk("%s: invalid mode: %d\n", __func__, 950 hdr->rw_mode); 951 status = -EOPNOTSUPP; 952 } 953 954 if (status != 0) { 955 if (status == -EAGAIN) 956 nfs_localio_disable_client(clp); 957 nfs_local_iocb_release(iocb); 958 hdr->task.tk_status = status; 959 nfs_local_hdr_release(hdr, call_ops); 960 } 961 return status; 962 } 963 964 static void 965 nfs_local_init_commit(struct nfs_commit_data *data, 966 const struct rpc_call_ops *call_ops) 967 { 968 data->task.tk_ops = call_ops; 969 } 970 971 static int 972 nfs_local_run_commit(struct file *filp, struct nfs_commit_data *data) 973 { 974 loff_t start = data->args.offset; 975 loff_t end = LLONG_MAX; 976 977 if (data->args.count > 0) { 978 end = start + data->args.count - 1; 979 if (end < start) 980 end = LLONG_MAX; 981 } 982 983 dprintk("%s: commit %llu - %llu\n", __func__, start, end); 984 return vfs_fsync_range(filp, start, end, 0); 985 } 986 987 static void 988 nfs_local_commit_done(struct nfs_commit_data *data, int status) 989 { 990 if (status >= 0) { 991 nfs_set_local_verifier(data->inode, 992 data->res.verf, 993 NFS_FILE_SYNC); 994 data->res.op_status = NFS4_OK; 995 data->task.tk_status = 0; 996 } else { 997 nfs_reset_boot_verifier(data->inode); 998 data->res.op_status = nfs_localio_errno_to_nfs4_stat(status); 999 data->task.tk_status = status; 1000 } 1001 } 1002 1003 static void 1004 nfs_local_release_commit_data(struct nfsd_file *localio, 1005 struct nfs_commit_data *data, 1006 const struct rpc_call_ops *call_ops) 1007 { 1008 nfs_local_file_put(localio); 1009 call_ops->rpc_call_done(&data->task, data); 1010 call_ops->rpc_release(data); 1011 } 1012 1013 static void 1014 nfs_local_fsync_ctx_free(struct nfs_local_fsync_ctx *ctx) 1015 { 1016 nfs_local_release_commit_data(ctx->localio, ctx->data, 1017 ctx->data->task.tk_ops); 1018 kfree(ctx); 1019 } 1020 1021 static void 1022 nfs_local_fsync_work(struct work_struct *work) 1023 { 1024 struct nfs_local_fsync_ctx *ctx; 1025 int status; 1026 1027 ctx = container_of(work, struct nfs_local_fsync_ctx, work); 1028 1029 status = nfs_local_run_commit(nfs_to->nfsd_file_file(ctx->localio), 1030 ctx->data); 1031 nfs_local_commit_done(ctx->data, status); 1032 if (ctx->done != NULL) 1033 complete(ctx->done); 1034 nfs_local_fsync_ctx_free(ctx); 1035 } 1036 1037 static struct nfs_local_fsync_ctx * 1038 nfs_local_fsync_ctx_alloc(struct nfs_commit_data *data, 1039 struct nfsd_file *localio, gfp_t flags) 1040 { 1041 struct nfs_local_fsync_ctx *ctx = kmalloc(sizeof(*ctx), flags); 1042 1043 if (ctx != NULL) { 1044 ctx->localio = localio; 1045 ctx->data = data; 1046 INIT_WORK(&ctx->work, nfs_local_fsync_work); 1047 ctx->done = NULL; 1048 } 1049 return ctx; 1050 } 1051 1052 int nfs_local_commit(struct nfsd_file *localio, 1053 struct nfs_commit_data *data, 1054 const struct rpc_call_ops *call_ops, int how) 1055 { 1056 struct nfs_local_fsync_ctx *ctx; 1057 1058 ctx = nfs_local_fsync_ctx_alloc(data, localio, GFP_KERNEL); 1059 if (!ctx) { 1060 nfs_local_commit_done(data, -ENOMEM); 1061 nfs_local_release_commit_data(localio, data, call_ops); 1062 return -ENOMEM; 1063 } 1064 1065 nfs_local_init_commit(data, call_ops); 1066 1067 if (how & FLUSH_SYNC) { 1068 DECLARE_COMPLETION_ONSTACK(done); 1069 ctx->done = &done; 1070 queue_work(nfsiod_workqueue, &ctx->work); 1071 wait_for_completion(&done); 1072 } else 1073 queue_work(nfsiod_workqueue, &ctx->work); 1074 1075 return 0; 1076 } 1077