1 /* 2 * linux/fs/nfs/read.c 3 * 4 * Block I/O for NFS 5 * 6 * Partial copy of Linus' read cache modifications to fs/nfs/file.c 7 * modified for async RPC by okir@monad.swb.de 8 */ 9 10 #include <linux/time.h> 11 #include <linux/kernel.h> 12 #include <linux/errno.h> 13 #include <linux/fcntl.h> 14 #include <linux/stat.h> 15 #include <linux/mm.h> 16 #include <linux/slab.h> 17 #include <linux/pagemap.h> 18 #include <linux/sunrpc/clnt.h> 19 #include <linux/nfs_fs.h> 20 #include <linux/nfs_page.h> 21 #include <linux/module.h> 22 23 #include "pnfs.h" 24 25 #include "nfs4_fs.h" 26 #include "internal.h" 27 #include "iostat.h" 28 #include "fscache.h" 29 30 #define NFSDBG_FACILITY NFSDBG_PAGECACHE 31 32 static const struct nfs_pageio_ops nfs_pageio_read_ops; 33 static const struct rpc_call_ops nfs_read_partial_ops; 34 static const struct rpc_call_ops nfs_read_full_ops; 35 36 static struct kmem_cache *nfs_rdata_cachep; 37 38 struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount) 39 { 40 struct nfs_read_data *p; 41 42 p = kmem_cache_zalloc(nfs_rdata_cachep, GFP_KERNEL); 43 if (p) { 44 INIT_LIST_HEAD(&p->pages); 45 p->npages = pagecount; 46 if (pagecount <= ARRAY_SIZE(p->page_array)) 47 p->pagevec = p->page_array; 48 else { 49 p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_KERNEL); 50 if (!p->pagevec) { 51 kmem_cache_free(nfs_rdata_cachep, p); 52 p = NULL; 53 } 54 } 55 } 56 return p; 57 } 58 59 void nfs_readdata_free(struct nfs_read_data *p) 60 { 61 if (p && (p->pagevec != &p->page_array[0])) 62 kfree(p->pagevec); 63 kmem_cache_free(nfs_rdata_cachep, p); 64 } 65 66 void nfs_readdata_release(struct nfs_read_data *rdata) 67 { 68 put_nfs_open_context(rdata->args.context); 69 nfs_readdata_free(rdata); 70 } 71 72 static 73 int nfs_return_empty_page(struct page *page) 74 { 75 zero_user(page, 0, PAGE_CACHE_SIZE); 76 SetPageUptodate(page); 77 unlock_page(page); 78 return 0; 79 } 80 81 static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data) 82 { 83 unsigned int remainder = data->args.count - data->res.count; 84 unsigned int base = data->args.pgbase + data->res.count; 85 unsigned int pglen; 86 struct page **pages; 87 88 if (data->res.eof == 0 || remainder == 0) 89 return; 90 /* 91 * Note: "remainder" can never be negative, since we check for 92 * this in the XDR code. 93 */ 94 pages = &data->args.pages[base >> PAGE_CACHE_SHIFT]; 95 base &= ~PAGE_CACHE_MASK; 96 pglen = PAGE_CACHE_SIZE - base; 97 for (;;) { 98 if (remainder <= pglen) { 99 zero_user(*pages, base, remainder); 100 break; 101 } 102 zero_user(*pages, base, pglen); 103 pages++; 104 remainder -= pglen; 105 pglen = PAGE_CACHE_SIZE; 106 base = 0; 107 } 108 } 109 110 void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio, 111 struct inode *inode) 112 { 113 nfs_pageio_init(pgio, inode, &nfs_pageio_read_ops, 114 NFS_SERVER(inode)->rsize, 0); 115 } 116 117 void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio) 118 { 119 pgio->pg_ops = &nfs_pageio_read_ops; 120 pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->rsize; 121 } 122 EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds); 123 124 static void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, 125 struct inode *inode) 126 { 127 if (!pnfs_pageio_init_read(pgio, inode)) 128 nfs_pageio_init_read_mds(pgio, inode); 129 } 130 131 int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, 132 struct page *page) 133 { 134 struct nfs_page *new; 135 unsigned int len; 136 struct nfs_pageio_descriptor pgio; 137 138 len = nfs_page_length(page); 139 if (len == 0) 140 return nfs_return_empty_page(page); 141 new = nfs_create_request(ctx, inode, page, 0, len); 142 if (IS_ERR(new)) { 143 unlock_page(page); 144 return PTR_ERR(new); 145 } 146 if (len < PAGE_CACHE_SIZE) 147 zero_user_segment(page, len, PAGE_CACHE_SIZE); 148 149 nfs_pageio_init_read(&pgio, inode); 150 nfs_pageio_add_request(&pgio, new); 151 nfs_pageio_complete(&pgio); 152 return 0; 153 } 154 155 static void nfs_readpage_release(struct nfs_page *req) 156 { 157 struct inode *d_inode = req->wb_context->dentry->d_inode; 158 159 if (PageUptodate(req->wb_page)) 160 nfs_readpage_to_fscache(d_inode, req->wb_page, 0); 161 162 unlock_page(req->wb_page); 163 164 dprintk("NFS: read done (%s/%Ld %d@%Ld)\n", 165 req->wb_context->dentry->d_inode->i_sb->s_id, 166 (long long)NFS_FILEID(req->wb_context->dentry->d_inode), 167 req->wb_bytes, 168 (long long)req_offset(req)); 169 nfs_release_request(req); 170 } 171 172 int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt, 173 const struct rpc_call_ops *call_ops) 174 { 175 struct inode *inode = data->inode; 176 int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0; 177 struct rpc_task *task; 178 struct rpc_message msg = { 179 .rpc_argp = &data->args, 180 .rpc_resp = &data->res, 181 .rpc_cred = data->cred, 182 }; 183 struct rpc_task_setup task_setup_data = { 184 .task = &data->task, 185 .rpc_client = clnt, 186 .rpc_message = &msg, 187 .callback_ops = call_ops, 188 .callback_data = data, 189 .workqueue = nfsiod_workqueue, 190 .flags = RPC_TASK_ASYNC | swap_flags, 191 }; 192 193 /* Set up the initial task struct. */ 194 NFS_PROTO(inode)->read_setup(data, &msg); 195 196 dprintk("NFS: %5u initiated read call (req %s/%lld, %u bytes @ " 197 "offset %llu)\n", 198 data->task.tk_pid, 199 inode->i_sb->s_id, 200 (long long)NFS_FILEID(inode), 201 data->args.count, 202 (unsigned long long)data->args.offset); 203 204 task = rpc_run_task(&task_setup_data); 205 if (IS_ERR(task)) 206 return PTR_ERR(task); 207 rpc_put_task(task); 208 return 0; 209 } 210 EXPORT_SYMBOL_GPL(nfs_initiate_read); 211 212 /* 213 * Set up the NFS read request struct 214 */ 215 static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, 216 unsigned int count, unsigned int offset) 217 { 218 struct inode *inode = req->wb_context->dentry->d_inode; 219 220 data->req = req; 221 data->inode = inode; 222 data->cred = req->wb_context->cred; 223 224 data->args.fh = NFS_FH(inode); 225 data->args.offset = req_offset(req) + offset; 226 data->args.pgbase = req->wb_pgbase + offset; 227 data->args.pages = data->pagevec; 228 data->args.count = count; 229 data->args.context = get_nfs_open_context(req->wb_context); 230 data->args.lock_context = req->wb_lock_context; 231 232 data->res.fattr = &data->fattr; 233 data->res.count = count; 234 data->res.eof = 0; 235 nfs_fattr_init(&data->fattr); 236 } 237 238 static int nfs_do_read(struct nfs_read_data *data, 239 const struct rpc_call_ops *call_ops) 240 { 241 struct inode *inode = data->args.context->dentry->d_inode; 242 243 return nfs_initiate_read(data, NFS_CLIENT(inode), call_ops); 244 } 245 246 static int 247 nfs_do_multiple_reads(struct list_head *head, 248 const struct rpc_call_ops *call_ops) 249 { 250 struct nfs_read_data *data; 251 int ret = 0; 252 253 while (!list_empty(head)) { 254 int ret2; 255 256 data = list_entry(head->next, struct nfs_read_data, list); 257 list_del_init(&data->list); 258 259 ret2 = nfs_do_read(data, call_ops); 260 if (ret == 0) 261 ret = ret2; 262 } 263 return ret; 264 } 265 266 static void 267 nfs_async_read_error(struct list_head *head) 268 { 269 struct nfs_page *req; 270 271 while (!list_empty(head)) { 272 req = nfs_list_entry(head->next); 273 nfs_list_remove_request(req); 274 nfs_readpage_release(req); 275 } 276 } 277 278 /* 279 * Generate multiple requests to fill a single page. 280 * 281 * We optimize to reduce the number of read operations on the wire. If we 282 * detect that we're reading a page, or an area of a page, that is past the 283 * end of file, we do not generate NFS read operations but just clear the 284 * parts of the page that would have come back zero from the server anyway. 285 * 286 * We rely on the cached value of i_size to make this determination; another 287 * client can fill pages on the server past our cached end-of-file, but we 288 * won't see the new data until our attribute cache is updated. This is more 289 * or less conventional NFS client behavior. 290 */ 291 static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, struct list_head *res) 292 { 293 struct nfs_page *req = nfs_list_entry(desc->pg_list.next); 294 struct page *page = req->wb_page; 295 struct nfs_read_data *data; 296 size_t rsize = desc->pg_bsize, nbytes; 297 unsigned int offset; 298 int requests = 0; 299 int ret = 0; 300 301 nfs_list_remove_request(req); 302 303 offset = 0; 304 nbytes = desc->pg_count; 305 do { 306 size_t len = min(nbytes,rsize); 307 308 data = nfs_readdata_alloc(1); 309 if (!data) 310 goto out_bad; 311 data->pagevec[0] = page; 312 nfs_read_rpcsetup(req, data, len, offset); 313 list_add(&data->list, res); 314 requests++; 315 nbytes -= len; 316 offset += len; 317 } while(nbytes != 0); 318 atomic_set(&req->wb_complete, requests); 319 desc->pg_rpc_callops = &nfs_read_partial_ops; 320 return ret; 321 out_bad: 322 while (!list_empty(res)) { 323 data = list_entry(res->next, struct nfs_read_data, list); 324 list_del(&data->list); 325 nfs_readdata_release(data); 326 } 327 nfs_readpage_release(req); 328 return -ENOMEM; 329 } 330 331 static int nfs_pagein_one(struct nfs_pageio_descriptor *desc, struct list_head *res) 332 { 333 struct nfs_page *req; 334 struct page **pages; 335 struct nfs_read_data *data; 336 struct list_head *head = &desc->pg_list; 337 int ret = 0; 338 339 data = nfs_readdata_alloc(nfs_page_array_len(desc->pg_base, 340 desc->pg_count)); 341 if (!data) { 342 nfs_async_read_error(head); 343 ret = -ENOMEM; 344 goto out; 345 } 346 347 pages = data->pagevec; 348 while (!list_empty(head)) { 349 req = nfs_list_entry(head->next); 350 nfs_list_remove_request(req); 351 nfs_list_add_request(req, &data->pages); 352 *pages++ = req->wb_page; 353 } 354 req = nfs_list_entry(data->pages.next); 355 356 nfs_read_rpcsetup(req, data, desc->pg_count, 0); 357 list_add(&data->list, res); 358 desc->pg_rpc_callops = &nfs_read_full_ops; 359 out: 360 return ret; 361 } 362 363 int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, struct list_head *head) 364 { 365 if (desc->pg_bsize < PAGE_CACHE_SIZE) 366 return nfs_pagein_multi(desc, head); 367 return nfs_pagein_one(desc, head); 368 } 369 370 static int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) 371 { 372 LIST_HEAD(head); 373 int ret; 374 375 ret = nfs_generic_pagein(desc, &head); 376 if (ret == 0) 377 ret = nfs_do_multiple_reads(&head, desc->pg_rpc_callops); 378 return ret; 379 } 380 381 static const struct nfs_pageio_ops nfs_pageio_read_ops = { 382 .pg_test = nfs_generic_pg_test, 383 .pg_doio = nfs_generic_pg_readpages, 384 }; 385 386 /* 387 * This is the callback from RPC telling us whether a reply was 388 * received or some error occurred (timeout or socket shutdown). 389 */ 390 int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data) 391 { 392 int status; 393 394 dprintk("NFS: %s: %5u, (status %d)\n", __func__, task->tk_pid, 395 task->tk_status); 396 397 status = NFS_PROTO(data->inode)->read_done(task, data); 398 if (status != 0) 399 return status; 400 401 nfs_add_stats(data->inode, NFSIOS_SERVERREADBYTES, data->res.count); 402 403 if (task->tk_status == -ESTALE) { 404 set_bit(NFS_INO_STALE, &NFS_I(data->inode)->flags); 405 nfs_mark_for_revalidate(data->inode); 406 } 407 return 0; 408 } 409 410 static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data) 411 { 412 struct nfs_readargs *argp = &data->args; 413 struct nfs_readres *resp = &data->res; 414 415 if (resp->eof || resp->count == argp->count) 416 return; 417 418 /* This is a short read! */ 419 nfs_inc_stats(data->inode, NFSIOS_SHORTREAD); 420 /* Has the server at least made some progress? */ 421 if (resp->count == 0) 422 return; 423 424 /* Yes, so retry the read at the end of the data */ 425 data->mds_offset += resp->count; 426 argp->offset += resp->count; 427 argp->pgbase += resp->count; 428 argp->count -= resp->count; 429 rpc_restart_call_prepare(task); 430 } 431 432 /* 433 * Handle a read reply that fills part of a page. 434 */ 435 static void nfs_readpage_result_partial(struct rpc_task *task, void *calldata) 436 { 437 struct nfs_read_data *data = calldata; 438 439 if (nfs_readpage_result(task, data) != 0) 440 return; 441 if (task->tk_status < 0) 442 return; 443 444 nfs_readpage_truncate_uninitialised_page(data); 445 nfs_readpage_retry(task, data); 446 } 447 448 static void nfs_readpage_release_partial(void *calldata) 449 { 450 struct nfs_read_data *data = calldata; 451 struct nfs_page *req = data->req; 452 struct page *page = req->wb_page; 453 int status = data->task.tk_status; 454 455 if (status < 0) 456 set_bit(PG_PARTIAL_READ_FAILED, &req->wb_flags); 457 458 if (atomic_dec_and_test(&req->wb_complete)) { 459 if (!test_bit(PG_PARTIAL_READ_FAILED, &req->wb_flags)) 460 SetPageUptodate(page); 461 nfs_readpage_release(req); 462 } 463 nfs_readdata_release(calldata); 464 } 465 466 void nfs_read_prepare(struct rpc_task *task, void *calldata) 467 { 468 struct nfs_read_data *data = calldata; 469 NFS_PROTO(data->inode)->read_rpc_prepare(task, data); 470 } 471 472 static const struct rpc_call_ops nfs_read_partial_ops = { 473 .rpc_call_prepare = nfs_read_prepare, 474 .rpc_call_done = nfs_readpage_result_partial, 475 .rpc_release = nfs_readpage_release_partial, 476 }; 477 478 static void nfs_readpage_set_pages_uptodate(struct nfs_read_data *data) 479 { 480 unsigned int count = data->res.count; 481 unsigned int base = data->args.pgbase; 482 struct page **pages; 483 484 if (data->res.eof) 485 count = data->args.count; 486 if (unlikely(count == 0)) 487 return; 488 pages = &data->args.pages[base >> PAGE_CACHE_SHIFT]; 489 base &= ~PAGE_CACHE_MASK; 490 count += base; 491 for (;count >= PAGE_CACHE_SIZE; count -= PAGE_CACHE_SIZE, pages++) 492 SetPageUptodate(*pages); 493 if (count == 0) 494 return; 495 /* Was this a short read? */ 496 if (data->res.eof || data->res.count == data->args.count) 497 SetPageUptodate(*pages); 498 } 499 500 /* 501 * This is the callback from RPC telling us whether a reply was 502 * received or some error occurred (timeout or socket shutdown). 503 */ 504 static void nfs_readpage_result_full(struct rpc_task *task, void *calldata) 505 { 506 struct nfs_read_data *data = calldata; 507 508 if (nfs_readpage_result(task, data) != 0) 509 return; 510 if (task->tk_status < 0) 511 return; 512 /* 513 * Note: nfs_readpage_retry may change the values of 514 * data->args. In the multi-page case, we therefore need 515 * to ensure that we call nfs_readpage_set_pages_uptodate() 516 * first. 517 */ 518 nfs_readpage_truncate_uninitialised_page(data); 519 nfs_readpage_set_pages_uptodate(data); 520 nfs_readpage_retry(task, data); 521 } 522 523 static void nfs_readpage_release_full(void *calldata) 524 { 525 struct nfs_read_data *data = calldata; 526 527 while (!list_empty(&data->pages)) { 528 struct nfs_page *req = nfs_list_entry(data->pages.next); 529 530 nfs_list_remove_request(req); 531 nfs_readpage_release(req); 532 } 533 nfs_readdata_release(calldata); 534 } 535 536 static const struct rpc_call_ops nfs_read_full_ops = { 537 .rpc_call_prepare = nfs_read_prepare, 538 .rpc_call_done = nfs_readpage_result_full, 539 .rpc_release = nfs_readpage_release_full, 540 }; 541 542 /* 543 * Read a page over NFS. 544 * We read the page synchronously in the following case: 545 * - The error flag is set for this page. This happens only when a 546 * previous async read operation failed. 547 */ 548 int nfs_readpage(struct file *file, struct page *page) 549 { 550 struct nfs_open_context *ctx; 551 struct inode *inode = page->mapping->host; 552 int error; 553 554 dprintk("NFS: nfs_readpage (%p %ld@%lu)\n", 555 page, PAGE_CACHE_SIZE, page->index); 556 nfs_inc_stats(inode, NFSIOS_VFSREADPAGE); 557 nfs_add_stats(inode, NFSIOS_READPAGES, 1); 558 559 /* 560 * Try to flush any pending writes to the file.. 561 * 562 * NOTE! Because we own the page lock, there cannot 563 * be any new pending writes generated at this point 564 * for this page (other pages can be written to). 565 */ 566 error = nfs_wb_page(inode, page); 567 if (error) 568 goto out_unlock; 569 if (PageUptodate(page)) 570 goto out_unlock; 571 572 error = -ESTALE; 573 if (NFS_STALE(inode)) 574 goto out_unlock; 575 576 if (file == NULL) { 577 error = -EBADF; 578 ctx = nfs_find_open_context(inode, NULL, FMODE_READ); 579 if (ctx == NULL) 580 goto out_unlock; 581 } else 582 ctx = get_nfs_open_context(nfs_file_open_context(file)); 583 584 if (!IS_SYNC(inode)) { 585 error = nfs_readpage_from_fscache(ctx, inode, page); 586 if (error == 0) 587 goto out; 588 } 589 590 error = nfs_readpage_async(ctx, inode, page); 591 592 out: 593 put_nfs_open_context(ctx); 594 return error; 595 out_unlock: 596 unlock_page(page); 597 return error; 598 } 599 600 struct nfs_readdesc { 601 struct nfs_pageio_descriptor *pgio; 602 struct nfs_open_context *ctx; 603 }; 604 605 static int 606 readpage_async_filler(void *data, struct page *page) 607 { 608 struct nfs_readdesc *desc = (struct nfs_readdesc *)data; 609 struct inode *inode = page->mapping->host; 610 struct nfs_page *new; 611 unsigned int len; 612 int error; 613 614 len = nfs_page_length(page); 615 if (len == 0) 616 return nfs_return_empty_page(page); 617 618 new = nfs_create_request(desc->ctx, inode, page, 0, len); 619 if (IS_ERR(new)) 620 goto out_error; 621 622 if (len < PAGE_CACHE_SIZE) 623 zero_user_segment(page, len, PAGE_CACHE_SIZE); 624 if (!nfs_pageio_add_request(desc->pgio, new)) { 625 error = desc->pgio->pg_error; 626 goto out_unlock; 627 } 628 return 0; 629 out_error: 630 error = PTR_ERR(new); 631 out_unlock: 632 unlock_page(page); 633 return error; 634 } 635 636 int nfs_readpages(struct file *filp, struct address_space *mapping, 637 struct list_head *pages, unsigned nr_pages) 638 { 639 struct nfs_pageio_descriptor pgio; 640 struct nfs_readdesc desc = { 641 .pgio = &pgio, 642 }; 643 struct inode *inode = mapping->host; 644 unsigned long npages; 645 int ret = -ESTALE; 646 647 dprintk("NFS: nfs_readpages (%s/%Ld %d)\n", 648 inode->i_sb->s_id, 649 (long long)NFS_FILEID(inode), 650 nr_pages); 651 nfs_inc_stats(inode, NFSIOS_VFSREADPAGES); 652 653 if (NFS_STALE(inode)) 654 goto out; 655 656 if (filp == NULL) { 657 desc.ctx = nfs_find_open_context(inode, NULL, FMODE_READ); 658 if (desc.ctx == NULL) 659 return -EBADF; 660 } else 661 desc.ctx = get_nfs_open_context(nfs_file_open_context(filp)); 662 663 /* attempt to read as many of the pages as possible from the cache 664 * - this returns -ENOBUFS immediately if the cookie is negative 665 */ 666 ret = nfs_readpages_from_fscache(desc.ctx, inode, mapping, 667 pages, &nr_pages); 668 if (ret == 0) 669 goto read_complete; /* all pages were read */ 670 671 nfs_pageio_init_read(&pgio, inode); 672 673 ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc); 674 675 nfs_pageio_complete(&pgio); 676 npages = (pgio.pg_bytes_written + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 677 nfs_add_stats(inode, NFSIOS_READPAGES, npages); 678 read_complete: 679 put_nfs_open_context(desc.ctx); 680 out: 681 return ret; 682 } 683 684 int __init nfs_init_readpagecache(void) 685 { 686 nfs_rdata_cachep = kmem_cache_create("nfs_read_data", 687 sizeof(struct nfs_read_data), 688 0, SLAB_HWCACHE_ALIGN, 689 NULL); 690 if (nfs_rdata_cachep == NULL) 691 return -ENOMEM; 692 693 return 0; 694 } 695 696 void nfs_destroy_readpagecache(void) 697 { 698 kmem_cache_destroy(nfs_rdata_cachep); 699 } 700