1 /* 2 * linux/fs/nfs/read.c 3 * 4 * Block I/O for NFS 5 * 6 * Partial copy of Linus' read cache modifications to fs/nfs/file.c 7 * modified for async RPC by okir@monad.swb.de 8 */ 9 10 #include <linux/time.h> 11 #include <linux/kernel.h> 12 #include <linux/errno.h> 13 #include <linux/fcntl.h> 14 #include <linux/stat.h> 15 #include <linux/mm.h> 16 #include <linux/slab.h> 17 #include <linux/pagemap.h> 18 #include <linux/sunrpc/clnt.h> 19 #include <linux/nfs_fs.h> 20 #include <linux/nfs_page.h> 21 #include <linux/module.h> 22 23 #include "pnfs.h" 24 25 #include "nfs4_fs.h" 26 #include "internal.h" 27 #include "iostat.h" 28 #include "fscache.h" 29 30 #define NFSDBG_FACILITY NFSDBG_PAGECACHE 31 32 static const struct nfs_pageio_ops nfs_pageio_read_ops; 33 static const struct rpc_call_ops nfs_read_common_ops; 34 static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops; 35 36 static struct kmem_cache *nfs_rdata_cachep; 37 38 struct nfs_read_header *nfs_readhdr_alloc(void) 39 { 40 struct nfs_read_header *rhdr; 41 42 rhdr = kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL); 43 if (rhdr) { 44 struct nfs_pgio_header *hdr = &rhdr->header; 45 46 INIT_LIST_HEAD(&hdr->pages); 47 INIT_LIST_HEAD(&hdr->rpc_list); 48 spin_lock_init(&hdr->lock); 49 atomic_set(&hdr->refcnt, 0); 50 } 51 return rhdr; 52 } 53 54 static struct nfs_read_data *nfs_readdata_alloc(struct nfs_pgio_header *hdr, 55 unsigned int pagecount) 56 { 57 struct nfs_read_data *data, *prealloc; 58 59 prealloc = &container_of(hdr, struct nfs_read_header, header)->rpc_data; 60 if (prealloc->header == NULL) 61 data = prealloc; 62 else 63 data = kzalloc(sizeof(*data), GFP_KERNEL); 64 if (!data) 65 goto out; 66 67 if (nfs_pgarray_set(&data->pages, pagecount)) { 68 data->header = hdr; 69 atomic_inc(&hdr->refcnt); 70 } else { 71 if (data != prealloc) 72 kfree(data); 73 data = NULL; 74 } 75 out: 76 return data; 77 } 78 79 void nfs_readhdr_free(struct nfs_pgio_header *hdr) 80 { 81 struct nfs_read_header *rhdr = container_of(hdr, struct nfs_read_header, header); 82 83 kmem_cache_free(nfs_rdata_cachep, rhdr); 84 } 85 86 void nfs_readdata_release(struct nfs_read_data *rdata) 87 { 88 struct nfs_pgio_header *hdr = rdata->header; 89 struct nfs_read_header *read_header = container_of(hdr, struct nfs_read_header, header); 90 91 put_nfs_open_context(rdata->args.context); 92 if (rdata->pages.pagevec != rdata->pages.page_array) 93 kfree(rdata->pages.pagevec); 94 if (rdata != &read_header->rpc_data) 95 kfree(rdata); 96 else 97 rdata->header = NULL; 98 if (atomic_dec_and_test(&hdr->refcnt)) 99 hdr->completion_ops->completion(hdr); 100 } 101 102 static 103 int nfs_return_empty_page(struct page *page) 104 { 105 zero_user(page, 0, PAGE_CACHE_SIZE); 106 SetPageUptodate(page); 107 unlock_page(page); 108 return 0; 109 } 110 111 void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio, 112 struct inode *inode, 113 const struct nfs_pgio_completion_ops *compl_ops) 114 { 115 nfs_pageio_init(pgio, inode, &nfs_pageio_read_ops, compl_ops, 116 NFS_SERVER(inode)->rsize, 0); 117 } 118 119 void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio) 120 { 121 pgio->pg_ops = &nfs_pageio_read_ops; 122 pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->rsize; 123 } 124 EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds); 125 126 void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, 127 struct inode *inode, 128 const struct nfs_pgio_completion_ops *compl_ops) 129 { 130 if (!pnfs_pageio_init_read(pgio, inode, compl_ops)) 131 nfs_pageio_init_read_mds(pgio, inode, compl_ops); 132 } 133 134 int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, 135 struct page *page) 136 { 137 struct nfs_page *new; 138 unsigned int len; 139 struct nfs_pageio_descriptor pgio; 140 141 len = nfs_page_length(page); 142 if (len == 0) 143 return nfs_return_empty_page(page); 144 new = nfs_create_request(ctx, inode, page, 0, len); 145 if (IS_ERR(new)) { 146 unlock_page(page); 147 return PTR_ERR(new); 148 } 149 if (len < PAGE_CACHE_SIZE) 150 zero_user_segment(page, len, PAGE_CACHE_SIZE); 151 152 nfs_pageio_init_read(&pgio, inode, &nfs_async_read_completion_ops); 153 nfs_pageio_add_request(&pgio, new); 154 nfs_pageio_complete(&pgio); 155 NFS_I(inode)->read_io += pgio.pg_bytes_written; 156 return 0; 157 } 158 159 static void nfs_readpage_release(struct nfs_page *req) 160 { 161 struct inode *d_inode = req->wb_context->dentry->d_inode; 162 163 if (PageUptodate(req->wb_page)) 164 nfs_readpage_to_fscache(d_inode, req->wb_page, 0); 165 166 unlock_page(req->wb_page); 167 168 dprintk("NFS: read done (%s/%Ld %d@%Ld)\n", 169 req->wb_context->dentry->d_inode->i_sb->s_id, 170 (long long)NFS_FILEID(req->wb_context->dentry->d_inode), 171 req->wb_bytes, 172 (long long)req_offset(req)); 173 nfs_release_request(req); 174 } 175 176 /* Note io was page aligned */ 177 static void nfs_read_completion(struct nfs_pgio_header *hdr) 178 { 179 unsigned long bytes = 0; 180 181 if (test_bit(NFS_IOHDR_REDO, &hdr->flags)) 182 goto out; 183 while (!list_empty(&hdr->pages)) { 184 struct nfs_page *req = nfs_list_entry(hdr->pages.next); 185 struct page *page = req->wb_page; 186 187 if (test_bit(NFS_IOHDR_EOF, &hdr->flags)) { 188 if (bytes > hdr->good_bytes) 189 zero_user(page, 0, PAGE_SIZE); 190 else if (hdr->good_bytes - bytes < PAGE_SIZE) 191 zero_user_segment(page, 192 hdr->good_bytes & ~PAGE_MASK, 193 PAGE_SIZE); 194 } 195 bytes += req->wb_bytes; 196 if (test_bit(NFS_IOHDR_ERROR, &hdr->flags)) { 197 if (bytes <= hdr->good_bytes) 198 SetPageUptodate(page); 199 } else 200 SetPageUptodate(page); 201 nfs_list_remove_request(req); 202 nfs_readpage_release(req); 203 } 204 out: 205 hdr->release(hdr); 206 } 207 208 int nfs_initiate_read(struct rpc_clnt *clnt, 209 struct nfs_read_data *data, 210 const struct rpc_call_ops *call_ops, int flags) 211 { 212 struct inode *inode = data->header->inode; 213 int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0; 214 struct rpc_task *task; 215 struct rpc_message msg = { 216 .rpc_argp = &data->args, 217 .rpc_resp = &data->res, 218 .rpc_cred = data->header->cred, 219 }; 220 struct rpc_task_setup task_setup_data = { 221 .task = &data->task, 222 .rpc_client = clnt, 223 .rpc_message = &msg, 224 .callback_ops = call_ops, 225 .callback_data = data, 226 .workqueue = nfsiod_workqueue, 227 .flags = RPC_TASK_ASYNC | swap_flags | flags, 228 }; 229 230 /* Set up the initial task struct. */ 231 NFS_PROTO(inode)->read_setup(data, &msg); 232 233 dprintk("NFS: %5u initiated read call (req %s/%lld, %u bytes @ " 234 "offset %llu)\n", 235 data->task.tk_pid, 236 inode->i_sb->s_id, 237 (long long)NFS_FILEID(inode), 238 data->args.count, 239 (unsigned long long)data->args.offset); 240 241 task = rpc_run_task(&task_setup_data); 242 if (IS_ERR(task)) 243 return PTR_ERR(task); 244 rpc_put_task(task); 245 return 0; 246 } 247 EXPORT_SYMBOL_GPL(nfs_initiate_read); 248 249 /* 250 * Set up the NFS read request struct 251 */ 252 static void nfs_read_rpcsetup(struct nfs_read_data *data, 253 unsigned int count, unsigned int offset) 254 { 255 struct nfs_page *req = data->header->req; 256 257 data->args.fh = NFS_FH(data->header->inode); 258 data->args.offset = req_offset(req) + offset; 259 data->args.pgbase = req->wb_pgbase + offset; 260 data->args.pages = data->pages.pagevec; 261 data->args.count = count; 262 data->args.context = get_nfs_open_context(req->wb_context); 263 data->args.lock_context = req->wb_lock_context; 264 265 data->res.fattr = &data->fattr; 266 data->res.count = count; 267 data->res.eof = 0; 268 nfs_fattr_init(&data->fattr); 269 } 270 271 static int nfs_do_read(struct nfs_read_data *data, 272 const struct rpc_call_ops *call_ops) 273 { 274 struct inode *inode = data->header->inode; 275 276 return nfs_initiate_read(NFS_CLIENT(inode), data, call_ops, 0); 277 } 278 279 static int 280 nfs_do_multiple_reads(struct list_head *head, 281 const struct rpc_call_ops *call_ops) 282 { 283 struct nfs_read_data *data; 284 int ret = 0; 285 286 while (!list_empty(head)) { 287 int ret2; 288 289 data = list_first_entry(head, struct nfs_read_data, list); 290 list_del_init(&data->list); 291 292 ret2 = nfs_do_read(data, call_ops); 293 if (ret == 0) 294 ret = ret2; 295 } 296 return ret; 297 } 298 299 static void 300 nfs_async_read_error(struct list_head *head) 301 { 302 struct nfs_page *req; 303 304 while (!list_empty(head)) { 305 req = nfs_list_entry(head->next); 306 nfs_list_remove_request(req); 307 nfs_readpage_release(req); 308 } 309 } 310 311 static const struct nfs_pgio_completion_ops nfs_async_read_completion_ops = { 312 .error_cleanup = nfs_async_read_error, 313 .completion = nfs_read_completion, 314 }; 315 316 static void nfs_pagein_error(struct nfs_pageio_descriptor *desc, 317 struct nfs_pgio_header *hdr) 318 { 319 set_bit(NFS_IOHDR_REDO, &hdr->flags); 320 while (!list_empty(&hdr->rpc_list)) { 321 struct nfs_read_data *data = list_first_entry(&hdr->rpc_list, 322 struct nfs_read_data, list); 323 list_del(&data->list); 324 nfs_readdata_release(data); 325 } 326 desc->pg_completion_ops->error_cleanup(&desc->pg_list); 327 } 328 329 /* 330 * Generate multiple requests to fill a single page. 331 * 332 * We optimize to reduce the number of read operations on the wire. If we 333 * detect that we're reading a page, or an area of a page, that is past the 334 * end of file, we do not generate NFS read operations but just clear the 335 * parts of the page that would have come back zero from the server anyway. 336 * 337 * We rely on the cached value of i_size to make this determination; another 338 * client can fill pages on the server past our cached end-of-file, but we 339 * won't see the new data until our attribute cache is updated. This is more 340 * or less conventional NFS client behavior. 341 */ 342 static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, 343 struct nfs_pgio_header *hdr) 344 { 345 struct nfs_page *req = hdr->req; 346 struct page *page = req->wb_page; 347 struct nfs_read_data *data; 348 size_t rsize = desc->pg_bsize, nbytes; 349 unsigned int offset; 350 351 offset = 0; 352 nbytes = desc->pg_count; 353 do { 354 size_t len = min(nbytes,rsize); 355 356 data = nfs_readdata_alloc(hdr, 1); 357 if (!data) { 358 nfs_pagein_error(desc, hdr); 359 return -ENOMEM; 360 } 361 data->pages.pagevec[0] = page; 362 nfs_read_rpcsetup(data, len, offset); 363 list_add(&data->list, &hdr->rpc_list); 364 nbytes -= len; 365 offset += len; 366 } while (nbytes != 0); 367 368 nfs_list_remove_request(req); 369 nfs_list_add_request(req, &hdr->pages); 370 desc->pg_rpc_callops = &nfs_read_common_ops; 371 return 0; 372 } 373 374 static int nfs_pagein_one(struct nfs_pageio_descriptor *desc, 375 struct nfs_pgio_header *hdr) 376 { 377 struct nfs_page *req; 378 struct page **pages; 379 struct nfs_read_data *data; 380 struct list_head *head = &desc->pg_list; 381 382 data = nfs_readdata_alloc(hdr, nfs_page_array_len(desc->pg_base, 383 desc->pg_count)); 384 if (!data) { 385 nfs_pagein_error(desc, hdr); 386 return -ENOMEM; 387 } 388 389 pages = data->pages.pagevec; 390 while (!list_empty(head)) { 391 req = nfs_list_entry(head->next); 392 nfs_list_remove_request(req); 393 nfs_list_add_request(req, &hdr->pages); 394 *pages++ = req->wb_page; 395 } 396 397 nfs_read_rpcsetup(data, desc->pg_count, 0); 398 list_add(&data->list, &hdr->rpc_list); 399 desc->pg_rpc_callops = &nfs_read_common_ops; 400 return 0; 401 } 402 403 int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, 404 struct nfs_pgio_header *hdr) 405 { 406 if (desc->pg_bsize < PAGE_CACHE_SIZE) 407 return nfs_pagein_multi(desc, hdr); 408 return nfs_pagein_one(desc, hdr); 409 } 410 411 static int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) 412 { 413 struct nfs_read_header *rhdr; 414 struct nfs_pgio_header *hdr; 415 int ret; 416 417 rhdr = nfs_readhdr_alloc(); 418 if (!rhdr) { 419 desc->pg_completion_ops->error_cleanup(&desc->pg_list); 420 return -ENOMEM; 421 } 422 hdr = &rhdr->header; 423 nfs_pgheader_init(desc, hdr, nfs_readhdr_free); 424 atomic_inc(&hdr->refcnt); 425 ret = nfs_generic_pagein(desc, hdr); 426 if (ret == 0) 427 ret = nfs_do_multiple_reads(&hdr->rpc_list, 428 desc->pg_rpc_callops); 429 if (atomic_dec_and_test(&hdr->refcnt)) 430 hdr->completion_ops->completion(hdr); 431 return ret; 432 } 433 434 static const struct nfs_pageio_ops nfs_pageio_read_ops = { 435 .pg_test = nfs_generic_pg_test, 436 .pg_doio = nfs_generic_pg_readpages, 437 }; 438 439 /* 440 * This is the callback from RPC telling us whether a reply was 441 * received or some error occurred (timeout or socket shutdown). 442 */ 443 int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data) 444 { 445 struct inode *inode = data->header->inode; 446 int status; 447 448 dprintk("NFS: %s: %5u, (status %d)\n", __func__, task->tk_pid, 449 task->tk_status); 450 451 status = NFS_PROTO(inode)->read_done(task, data); 452 if (status != 0) 453 return status; 454 455 nfs_add_stats(inode, NFSIOS_SERVERREADBYTES, data->res.count); 456 457 if (task->tk_status == -ESTALE) { 458 set_bit(NFS_INO_STALE, &NFS_I(inode)->flags); 459 nfs_mark_for_revalidate(inode); 460 } 461 return 0; 462 } 463 464 static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data) 465 { 466 struct nfs_readargs *argp = &data->args; 467 struct nfs_readres *resp = &data->res; 468 469 /* This is a short read! */ 470 nfs_inc_stats(data->header->inode, NFSIOS_SHORTREAD); 471 /* Has the server at least made some progress? */ 472 if (resp->count == 0) { 473 nfs_set_pgio_error(data->header, -EIO, argp->offset); 474 return; 475 } 476 /* Yes, so retry the read at the end of the data */ 477 data->mds_offset += resp->count; 478 argp->offset += resp->count; 479 argp->pgbase += resp->count; 480 argp->count -= resp->count; 481 rpc_restart_call_prepare(task); 482 } 483 484 static void nfs_readpage_result_common(struct rpc_task *task, void *calldata) 485 { 486 struct nfs_read_data *data = calldata; 487 struct nfs_pgio_header *hdr = data->header; 488 489 /* Note the only returns of nfs_readpage_result are 0 and -EAGAIN */ 490 if (nfs_readpage_result(task, data) != 0) 491 return; 492 if (task->tk_status < 0) 493 nfs_set_pgio_error(hdr, task->tk_status, data->args.offset); 494 else if (data->res.eof) { 495 loff_t bound; 496 497 bound = data->args.offset + data->res.count; 498 spin_lock(&hdr->lock); 499 if (bound < hdr->io_start + hdr->good_bytes) { 500 set_bit(NFS_IOHDR_EOF, &hdr->flags); 501 clear_bit(NFS_IOHDR_ERROR, &hdr->flags); 502 hdr->good_bytes = bound - hdr->io_start; 503 } 504 spin_unlock(&hdr->lock); 505 } else if (data->res.count != data->args.count) 506 nfs_readpage_retry(task, data); 507 } 508 509 static void nfs_readpage_release_common(void *calldata) 510 { 511 nfs_readdata_release(calldata); 512 } 513 514 void nfs_read_prepare(struct rpc_task *task, void *calldata) 515 { 516 struct nfs_read_data *data = calldata; 517 NFS_PROTO(data->header->inode)->read_rpc_prepare(task, data); 518 } 519 520 static const struct rpc_call_ops nfs_read_common_ops = { 521 .rpc_call_prepare = nfs_read_prepare, 522 .rpc_call_done = nfs_readpage_result_common, 523 .rpc_release = nfs_readpage_release_common, 524 }; 525 526 /* 527 * Read a page over NFS. 528 * We read the page synchronously in the following case: 529 * - The error flag is set for this page. This happens only when a 530 * previous async read operation failed. 531 */ 532 int nfs_readpage(struct file *file, struct page *page) 533 { 534 struct nfs_open_context *ctx; 535 struct inode *inode = page->mapping->host; 536 int error; 537 538 dprintk("NFS: nfs_readpage (%p %ld@%lu)\n", 539 page, PAGE_CACHE_SIZE, page->index); 540 nfs_inc_stats(inode, NFSIOS_VFSREADPAGE); 541 nfs_add_stats(inode, NFSIOS_READPAGES, 1); 542 543 /* 544 * Try to flush any pending writes to the file.. 545 * 546 * NOTE! Because we own the page lock, there cannot 547 * be any new pending writes generated at this point 548 * for this page (other pages can be written to). 549 */ 550 error = nfs_wb_page(inode, page); 551 if (error) 552 goto out_unlock; 553 if (PageUptodate(page)) 554 goto out_unlock; 555 556 error = -ESTALE; 557 if (NFS_STALE(inode)) 558 goto out_unlock; 559 560 if (file == NULL) { 561 error = -EBADF; 562 ctx = nfs_find_open_context(inode, NULL, FMODE_READ); 563 if (ctx == NULL) 564 goto out_unlock; 565 } else 566 ctx = get_nfs_open_context(nfs_file_open_context(file)); 567 568 if (!IS_SYNC(inode)) { 569 error = nfs_readpage_from_fscache(ctx, inode, page); 570 if (error == 0) 571 goto out; 572 } 573 574 error = nfs_readpage_async(ctx, inode, page); 575 576 out: 577 put_nfs_open_context(ctx); 578 return error; 579 out_unlock: 580 unlock_page(page); 581 return error; 582 } 583 584 struct nfs_readdesc { 585 struct nfs_pageio_descriptor *pgio; 586 struct nfs_open_context *ctx; 587 }; 588 589 static int 590 readpage_async_filler(void *data, struct page *page) 591 { 592 struct nfs_readdesc *desc = (struct nfs_readdesc *)data; 593 struct inode *inode = page->mapping->host; 594 struct nfs_page *new; 595 unsigned int len; 596 int error; 597 598 len = nfs_page_length(page); 599 if (len == 0) 600 return nfs_return_empty_page(page); 601 602 new = nfs_create_request(desc->ctx, inode, page, 0, len); 603 if (IS_ERR(new)) 604 goto out_error; 605 606 if (len < PAGE_CACHE_SIZE) 607 zero_user_segment(page, len, PAGE_CACHE_SIZE); 608 if (!nfs_pageio_add_request(desc->pgio, new)) { 609 error = desc->pgio->pg_error; 610 goto out_unlock; 611 } 612 return 0; 613 out_error: 614 error = PTR_ERR(new); 615 out_unlock: 616 unlock_page(page); 617 return error; 618 } 619 620 int nfs_readpages(struct file *filp, struct address_space *mapping, 621 struct list_head *pages, unsigned nr_pages) 622 { 623 struct nfs_pageio_descriptor pgio; 624 struct nfs_readdesc desc = { 625 .pgio = &pgio, 626 }; 627 struct inode *inode = mapping->host; 628 unsigned long npages; 629 int ret = -ESTALE; 630 631 dprintk("NFS: nfs_readpages (%s/%Ld %d)\n", 632 inode->i_sb->s_id, 633 (long long)NFS_FILEID(inode), 634 nr_pages); 635 nfs_inc_stats(inode, NFSIOS_VFSREADPAGES); 636 637 if (NFS_STALE(inode)) 638 goto out; 639 640 if (filp == NULL) { 641 desc.ctx = nfs_find_open_context(inode, NULL, FMODE_READ); 642 if (desc.ctx == NULL) 643 return -EBADF; 644 } else 645 desc.ctx = get_nfs_open_context(nfs_file_open_context(filp)); 646 647 /* attempt to read as many of the pages as possible from the cache 648 * - this returns -ENOBUFS immediately if the cookie is negative 649 */ 650 ret = nfs_readpages_from_fscache(desc.ctx, inode, mapping, 651 pages, &nr_pages); 652 if (ret == 0) 653 goto read_complete; /* all pages were read */ 654 655 nfs_pageio_init_read(&pgio, inode, &nfs_async_read_completion_ops); 656 657 ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc); 658 659 nfs_pageio_complete(&pgio); 660 NFS_I(inode)->read_io += pgio.pg_bytes_written; 661 npages = (pgio.pg_bytes_written + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 662 nfs_add_stats(inode, NFSIOS_READPAGES, npages); 663 read_complete: 664 put_nfs_open_context(desc.ctx); 665 out: 666 return ret; 667 } 668 669 int __init nfs_init_readpagecache(void) 670 { 671 nfs_rdata_cachep = kmem_cache_create("nfs_read_data", 672 sizeof(struct nfs_read_header), 673 0, SLAB_HWCACHE_ALIGN, 674 NULL); 675 if (nfs_rdata_cachep == NULL) 676 return -ENOMEM; 677 678 return 0; 679 } 680 681 void nfs_destroy_readpagecache(void) 682 { 683 kmem_cache_destroy(nfs_rdata_cachep); 684 } 685