1 /* 2 * linux/fs/nfs/read.c 3 * 4 * Block I/O for NFS 5 * 6 * Partial copy of Linus' read cache modifications to fs/nfs/file.c 7 * modified for async RPC by okir@monad.swb.de 8 */ 9 10 #include <linux/time.h> 11 #include <linux/kernel.h> 12 #include <linux/errno.h> 13 #include <linux/fcntl.h> 14 #include <linux/stat.h> 15 #include <linux/mm.h> 16 #include <linux/slab.h> 17 #include <linux/pagemap.h> 18 #include <linux/sunrpc/clnt.h> 19 #include <linux/nfs_fs.h> 20 #include <linux/nfs_page.h> 21 #include <linux/module.h> 22 23 #include <asm/system.h> 24 #include "pnfs.h" 25 26 #include "nfs4_fs.h" 27 #include "internal.h" 28 #include "iostat.h" 29 #include "fscache.h" 30 31 #define NFSDBG_FACILITY NFSDBG_PAGECACHE 32 33 static const struct nfs_pageio_ops nfs_pageio_read_ops; 34 static const struct rpc_call_ops nfs_read_partial_ops; 35 static const struct rpc_call_ops nfs_read_full_ops; 36 37 static struct kmem_cache *nfs_rdata_cachep; 38 static mempool_t *nfs_rdata_mempool; 39 40 #define MIN_POOL_READ (32) 41 42 struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount) 43 { 44 struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, GFP_KERNEL); 45 46 if (p) { 47 memset(p, 0, sizeof(*p)); 48 INIT_LIST_HEAD(&p->pages); 49 p->npages = pagecount; 50 if (pagecount <= ARRAY_SIZE(p->page_array)) 51 p->pagevec = p->page_array; 52 else { 53 p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_KERNEL); 54 if (!p->pagevec) { 55 mempool_free(p, nfs_rdata_mempool); 56 p = NULL; 57 } 58 } 59 } 60 return p; 61 } 62 63 void nfs_readdata_free(struct nfs_read_data *p) 64 { 65 if (p && (p->pagevec != &p->page_array[0])) 66 kfree(p->pagevec); 67 mempool_free(p, nfs_rdata_mempool); 68 } 69 70 void nfs_readdata_release(struct nfs_read_data *rdata) 71 { 72 put_lseg(rdata->lseg); 73 put_nfs_open_context(rdata->args.context); 74 nfs_readdata_free(rdata); 75 } 76 77 static 78 int nfs_return_empty_page(struct page *page) 79 { 80 zero_user(page, 0, PAGE_CACHE_SIZE); 81 SetPageUptodate(page); 82 unlock_page(page); 83 return 0; 84 } 85 86 static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data) 87 { 88 unsigned int remainder = data->args.count - data->res.count; 89 unsigned int base = data->args.pgbase + data->res.count; 90 unsigned int pglen; 91 struct page **pages; 92 93 if (data->res.eof == 0 || remainder == 0) 94 return; 95 /* 96 * Note: "remainder" can never be negative, since we check for 97 * this in the XDR code. 98 */ 99 pages = &data->args.pages[base >> PAGE_CACHE_SHIFT]; 100 base &= ~PAGE_CACHE_MASK; 101 pglen = PAGE_CACHE_SIZE - base; 102 for (;;) { 103 if (remainder <= pglen) { 104 zero_user(*pages, base, remainder); 105 break; 106 } 107 zero_user(*pages, base, pglen); 108 pages++; 109 remainder -= pglen; 110 pglen = PAGE_CACHE_SIZE; 111 base = 0; 112 } 113 } 114 115 static void nfs_pageio_init_read_mds(struct nfs_pageio_descriptor *pgio, 116 struct inode *inode) 117 { 118 nfs_pageio_init(pgio, inode, &nfs_pageio_read_ops, 119 NFS_SERVER(inode)->rsize, 0); 120 } 121 122 void nfs_pageio_reset_read_mds(struct nfs_pageio_descriptor *pgio) 123 { 124 pgio->pg_ops = &nfs_pageio_read_ops; 125 pgio->pg_bsize = NFS_SERVER(pgio->pg_inode)->rsize; 126 } 127 EXPORT_SYMBOL_GPL(nfs_pageio_reset_read_mds); 128 129 static void nfs_pageio_init_read(struct nfs_pageio_descriptor *pgio, 130 struct inode *inode) 131 { 132 if (!pnfs_pageio_init_read(pgio, inode)) 133 nfs_pageio_init_read_mds(pgio, inode); 134 } 135 136 int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, 137 struct page *page) 138 { 139 struct nfs_page *new; 140 unsigned int len; 141 struct nfs_pageio_descriptor pgio; 142 143 len = nfs_page_length(page); 144 if (len == 0) 145 return nfs_return_empty_page(page); 146 new = nfs_create_request(ctx, inode, page, 0, len); 147 if (IS_ERR(new)) { 148 unlock_page(page); 149 return PTR_ERR(new); 150 } 151 if (len < PAGE_CACHE_SIZE) 152 zero_user_segment(page, len, PAGE_CACHE_SIZE); 153 154 nfs_pageio_init_read(&pgio, inode); 155 nfs_pageio_add_request(&pgio, new); 156 nfs_pageio_complete(&pgio); 157 return 0; 158 } 159 160 static void nfs_readpage_release(struct nfs_page *req) 161 { 162 struct inode *d_inode = req->wb_context->dentry->d_inode; 163 164 if (PageUptodate(req->wb_page)) 165 nfs_readpage_to_fscache(d_inode, req->wb_page, 0); 166 167 unlock_page(req->wb_page); 168 169 dprintk("NFS: read done (%s/%Ld %d@%Ld)\n", 170 req->wb_context->dentry->d_inode->i_sb->s_id, 171 (long long)NFS_FILEID(req->wb_context->dentry->d_inode), 172 req->wb_bytes, 173 (long long)req_offset(req)); 174 nfs_release_request(req); 175 } 176 177 int nfs_initiate_read(struct nfs_read_data *data, struct rpc_clnt *clnt, 178 const struct rpc_call_ops *call_ops) 179 { 180 struct inode *inode = data->inode; 181 int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0; 182 struct rpc_task *task; 183 struct rpc_message msg = { 184 .rpc_argp = &data->args, 185 .rpc_resp = &data->res, 186 .rpc_cred = data->cred, 187 }; 188 struct rpc_task_setup task_setup_data = { 189 .task = &data->task, 190 .rpc_client = clnt, 191 .rpc_message = &msg, 192 .callback_ops = call_ops, 193 .callback_data = data, 194 .workqueue = nfsiod_workqueue, 195 .flags = RPC_TASK_ASYNC | swap_flags, 196 }; 197 198 /* Set up the initial task struct. */ 199 NFS_PROTO(inode)->read_setup(data, &msg); 200 201 dprintk("NFS: %5u initiated read call (req %s/%lld, %u bytes @ " 202 "offset %llu)\n", 203 data->task.tk_pid, 204 inode->i_sb->s_id, 205 (long long)NFS_FILEID(inode), 206 data->args.count, 207 (unsigned long long)data->args.offset); 208 209 task = rpc_run_task(&task_setup_data); 210 if (IS_ERR(task)) 211 return PTR_ERR(task); 212 rpc_put_task(task); 213 return 0; 214 } 215 EXPORT_SYMBOL_GPL(nfs_initiate_read); 216 217 /* 218 * Set up the NFS read request struct 219 */ 220 static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, 221 unsigned int count, unsigned int offset) 222 { 223 struct inode *inode = req->wb_context->dentry->d_inode; 224 225 data->req = req; 226 data->inode = inode; 227 data->cred = req->wb_context->cred; 228 229 data->args.fh = NFS_FH(inode); 230 data->args.offset = req_offset(req) + offset; 231 data->args.pgbase = req->wb_pgbase + offset; 232 data->args.pages = data->pagevec; 233 data->args.count = count; 234 data->args.context = get_nfs_open_context(req->wb_context); 235 data->args.lock_context = req->wb_lock_context; 236 237 data->res.fattr = &data->fattr; 238 data->res.count = count; 239 data->res.eof = 0; 240 nfs_fattr_init(&data->fattr); 241 } 242 243 static int nfs_do_read(struct nfs_read_data *data, 244 const struct rpc_call_ops *call_ops) 245 { 246 struct inode *inode = data->args.context->dentry->d_inode; 247 248 return nfs_initiate_read(data, NFS_CLIENT(inode), call_ops); 249 } 250 251 static int 252 nfs_do_multiple_reads(struct list_head *head, 253 const struct rpc_call_ops *call_ops) 254 { 255 struct nfs_read_data *data; 256 int ret = 0; 257 258 while (!list_empty(head)) { 259 int ret2; 260 261 data = list_entry(head->next, struct nfs_read_data, list); 262 list_del_init(&data->list); 263 264 ret2 = nfs_do_read(data, call_ops); 265 if (ret == 0) 266 ret = ret2; 267 } 268 return ret; 269 } 270 271 static void 272 nfs_async_read_error(struct list_head *head) 273 { 274 struct nfs_page *req; 275 276 while (!list_empty(head)) { 277 req = nfs_list_entry(head->next); 278 nfs_list_remove_request(req); 279 SetPageError(req->wb_page); 280 nfs_readpage_release(req); 281 } 282 } 283 284 /* 285 * Generate multiple requests to fill a single page. 286 * 287 * We optimize to reduce the number of read operations on the wire. If we 288 * detect that we're reading a page, or an area of a page, that is past the 289 * end of file, we do not generate NFS read operations but just clear the 290 * parts of the page that would have come back zero from the server anyway. 291 * 292 * We rely on the cached value of i_size to make this determination; another 293 * client can fill pages on the server past our cached end-of-file, but we 294 * won't see the new data until our attribute cache is updated. This is more 295 * or less conventional NFS client behavior. 296 */ 297 static int nfs_pagein_multi(struct nfs_pageio_descriptor *desc, struct list_head *res) 298 { 299 struct nfs_page *req = nfs_list_entry(desc->pg_list.next); 300 struct page *page = req->wb_page; 301 struct nfs_read_data *data; 302 size_t rsize = desc->pg_bsize, nbytes; 303 unsigned int offset; 304 int requests = 0; 305 int ret = 0; 306 307 nfs_list_remove_request(req); 308 309 offset = 0; 310 nbytes = desc->pg_count; 311 do { 312 size_t len = min(nbytes,rsize); 313 314 data = nfs_readdata_alloc(1); 315 if (!data) 316 goto out_bad; 317 data->pagevec[0] = page; 318 nfs_read_rpcsetup(req, data, len, offset); 319 list_add(&data->list, res); 320 requests++; 321 nbytes -= len; 322 offset += len; 323 } while(nbytes != 0); 324 atomic_set(&req->wb_complete, requests); 325 ClearPageError(page); 326 desc->pg_rpc_callops = &nfs_read_partial_ops; 327 return ret; 328 out_bad: 329 while (!list_empty(res)) { 330 data = list_entry(res->next, struct nfs_read_data, list); 331 list_del(&data->list); 332 nfs_readdata_free(data); 333 } 334 SetPageError(page); 335 nfs_readpage_release(req); 336 return -ENOMEM; 337 } 338 339 static int nfs_pagein_one(struct nfs_pageio_descriptor *desc, struct list_head *res) 340 { 341 struct nfs_page *req; 342 struct page **pages; 343 struct nfs_read_data *data; 344 struct list_head *head = &desc->pg_list; 345 int ret = 0; 346 347 data = nfs_readdata_alloc(nfs_page_array_len(desc->pg_base, 348 desc->pg_count)); 349 if (!data) { 350 nfs_async_read_error(head); 351 ret = -ENOMEM; 352 goto out; 353 } 354 355 pages = data->pagevec; 356 while (!list_empty(head)) { 357 req = nfs_list_entry(head->next); 358 nfs_list_remove_request(req); 359 nfs_list_add_request(req, &data->pages); 360 ClearPageError(req->wb_page); 361 *pages++ = req->wb_page; 362 } 363 req = nfs_list_entry(data->pages.next); 364 365 nfs_read_rpcsetup(req, data, desc->pg_count, 0); 366 list_add(&data->list, res); 367 desc->pg_rpc_callops = &nfs_read_full_ops; 368 out: 369 return ret; 370 } 371 372 int nfs_generic_pagein(struct nfs_pageio_descriptor *desc, struct list_head *head) 373 { 374 if (desc->pg_bsize < PAGE_CACHE_SIZE) 375 return nfs_pagein_multi(desc, head); 376 return nfs_pagein_one(desc, head); 377 } 378 379 static int nfs_generic_pg_readpages(struct nfs_pageio_descriptor *desc) 380 { 381 LIST_HEAD(head); 382 int ret; 383 384 ret = nfs_generic_pagein(desc, &head); 385 if (ret == 0) 386 ret = nfs_do_multiple_reads(&head, desc->pg_rpc_callops); 387 return ret; 388 } 389 390 static const struct nfs_pageio_ops nfs_pageio_read_ops = { 391 .pg_test = nfs_generic_pg_test, 392 .pg_doio = nfs_generic_pg_readpages, 393 }; 394 395 /* 396 * This is the callback from RPC telling us whether a reply was 397 * received or some error occurred (timeout or socket shutdown). 398 */ 399 int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data) 400 { 401 int status; 402 403 dprintk("NFS: %s: %5u, (status %d)\n", __func__, task->tk_pid, 404 task->tk_status); 405 406 status = NFS_PROTO(data->inode)->read_done(task, data); 407 if (status != 0) 408 return status; 409 410 nfs_add_stats(data->inode, NFSIOS_SERVERREADBYTES, data->res.count); 411 412 if (task->tk_status == -ESTALE) { 413 set_bit(NFS_INO_STALE, &NFS_I(data->inode)->flags); 414 nfs_mark_for_revalidate(data->inode); 415 } 416 return 0; 417 } 418 419 static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data) 420 { 421 struct nfs_readargs *argp = &data->args; 422 struct nfs_readres *resp = &data->res; 423 424 if (resp->eof || resp->count == argp->count) 425 return; 426 427 /* This is a short read! */ 428 nfs_inc_stats(data->inode, NFSIOS_SHORTREAD); 429 /* Has the server at least made some progress? */ 430 if (resp->count == 0) 431 return; 432 433 /* Yes, so retry the read at the end of the data */ 434 data->mds_offset += resp->count; 435 argp->offset += resp->count; 436 argp->pgbase += resp->count; 437 argp->count -= resp->count; 438 nfs_restart_rpc(task, NFS_SERVER(data->inode)->nfs_client); 439 } 440 441 /* 442 * Handle a read reply that fills part of a page. 443 */ 444 static void nfs_readpage_result_partial(struct rpc_task *task, void *calldata) 445 { 446 struct nfs_read_data *data = calldata; 447 448 if (nfs_readpage_result(task, data) != 0) 449 return; 450 if (task->tk_status < 0) 451 return; 452 453 nfs_readpage_truncate_uninitialised_page(data); 454 nfs_readpage_retry(task, data); 455 } 456 457 static void nfs_readpage_release_partial(void *calldata) 458 { 459 struct nfs_read_data *data = calldata; 460 struct nfs_page *req = data->req; 461 struct page *page = req->wb_page; 462 int status = data->task.tk_status; 463 464 if (status < 0) 465 SetPageError(page); 466 467 if (atomic_dec_and_test(&req->wb_complete)) { 468 if (!PageError(page)) 469 SetPageUptodate(page); 470 nfs_readpage_release(req); 471 } 472 nfs_readdata_release(calldata); 473 } 474 475 #if defined(CONFIG_NFS_V4_1) 476 void nfs_read_prepare(struct rpc_task *task, void *calldata) 477 { 478 struct nfs_read_data *data = calldata; 479 480 if (nfs4_setup_sequence(NFS_SERVER(data->inode), 481 &data->args.seq_args, &data->res.seq_res, 482 0, task)) 483 return; 484 rpc_call_start(task); 485 } 486 #endif /* CONFIG_NFS_V4_1 */ 487 488 static const struct rpc_call_ops nfs_read_partial_ops = { 489 #if defined(CONFIG_NFS_V4_1) 490 .rpc_call_prepare = nfs_read_prepare, 491 #endif /* CONFIG_NFS_V4_1 */ 492 .rpc_call_done = nfs_readpage_result_partial, 493 .rpc_release = nfs_readpage_release_partial, 494 }; 495 496 static void nfs_readpage_set_pages_uptodate(struct nfs_read_data *data) 497 { 498 unsigned int count = data->res.count; 499 unsigned int base = data->args.pgbase; 500 struct page **pages; 501 502 if (data->res.eof) 503 count = data->args.count; 504 if (unlikely(count == 0)) 505 return; 506 pages = &data->args.pages[base >> PAGE_CACHE_SHIFT]; 507 base &= ~PAGE_CACHE_MASK; 508 count += base; 509 for (;count >= PAGE_CACHE_SIZE; count -= PAGE_CACHE_SIZE, pages++) 510 SetPageUptodate(*pages); 511 if (count == 0) 512 return; 513 /* Was this a short read? */ 514 if (data->res.eof || data->res.count == data->args.count) 515 SetPageUptodate(*pages); 516 } 517 518 /* 519 * This is the callback from RPC telling us whether a reply was 520 * received or some error occurred (timeout or socket shutdown). 521 */ 522 static void nfs_readpage_result_full(struct rpc_task *task, void *calldata) 523 { 524 struct nfs_read_data *data = calldata; 525 526 if (nfs_readpage_result(task, data) != 0) 527 return; 528 if (task->tk_status < 0) 529 return; 530 /* 531 * Note: nfs_readpage_retry may change the values of 532 * data->args. In the multi-page case, we therefore need 533 * to ensure that we call nfs_readpage_set_pages_uptodate() 534 * first. 535 */ 536 nfs_readpage_truncate_uninitialised_page(data); 537 nfs_readpage_set_pages_uptodate(data); 538 nfs_readpage_retry(task, data); 539 } 540 541 static void nfs_readpage_release_full(void *calldata) 542 { 543 struct nfs_read_data *data = calldata; 544 545 while (!list_empty(&data->pages)) { 546 struct nfs_page *req = nfs_list_entry(data->pages.next); 547 548 nfs_list_remove_request(req); 549 nfs_readpage_release(req); 550 } 551 nfs_readdata_release(calldata); 552 } 553 554 static const struct rpc_call_ops nfs_read_full_ops = { 555 #if defined(CONFIG_NFS_V4_1) 556 .rpc_call_prepare = nfs_read_prepare, 557 #endif /* CONFIG_NFS_V4_1 */ 558 .rpc_call_done = nfs_readpage_result_full, 559 .rpc_release = nfs_readpage_release_full, 560 }; 561 562 /* 563 * Read a page over NFS. 564 * We read the page synchronously in the following case: 565 * - The error flag is set for this page. This happens only when a 566 * previous async read operation failed. 567 */ 568 int nfs_readpage(struct file *file, struct page *page) 569 { 570 struct nfs_open_context *ctx; 571 struct inode *inode = page->mapping->host; 572 int error; 573 574 dprintk("NFS: nfs_readpage (%p %ld@%lu)\n", 575 page, PAGE_CACHE_SIZE, page->index); 576 nfs_inc_stats(inode, NFSIOS_VFSREADPAGE); 577 nfs_add_stats(inode, NFSIOS_READPAGES, 1); 578 579 /* 580 * Try to flush any pending writes to the file.. 581 * 582 * NOTE! Because we own the page lock, there cannot 583 * be any new pending writes generated at this point 584 * for this page (other pages can be written to). 585 */ 586 error = nfs_wb_page(inode, page); 587 if (error) 588 goto out_unlock; 589 if (PageUptodate(page)) 590 goto out_unlock; 591 592 error = -ESTALE; 593 if (NFS_STALE(inode)) 594 goto out_unlock; 595 596 if (file == NULL) { 597 error = -EBADF; 598 ctx = nfs_find_open_context(inode, NULL, FMODE_READ); 599 if (ctx == NULL) 600 goto out_unlock; 601 } else 602 ctx = get_nfs_open_context(nfs_file_open_context(file)); 603 604 if (!IS_SYNC(inode)) { 605 error = nfs_readpage_from_fscache(ctx, inode, page); 606 if (error == 0) 607 goto out; 608 } 609 610 error = nfs_readpage_async(ctx, inode, page); 611 612 out: 613 put_nfs_open_context(ctx); 614 return error; 615 out_unlock: 616 unlock_page(page); 617 return error; 618 } 619 620 struct nfs_readdesc { 621 struct nfs_pageio_descriptor *pgio; 622 struct nfs_open_context *ctx; 623 }; 624 625 static int 626 readpage_async_filler(void *data, struct page *page) 627 { 628 struct nfs_readdesc *desc = (struct nfs_readdesc *)data; 629 struct inode *inode = page->mapping->host; 630 struct nfs_page *new; 631 unsigned int len; 632 int error; 633 634 len = nfs_page_length(page); 635 if (len == 0) 636 return nfs_return_empty_page(page); 637 638 new = nfs_create_request(desc->ctx, inode, page, 0, len); 639 if (IS_ERR(new)) 640 goto out_error; 641 642 if (len < PAGE_CACHE_SIZE) 643 zero_user_segment(page, len, PAGE_CACHE_SIZE); 644 if (!nfs_pageio_add_request(desc->pgio, new)) { 645 error = desc->pgio->pg_error; 646 goto out_unlock; 647 } 648 return 0; 649 out_error: 650 error = PTR_ERR(new); 651 SetPageError(page); 652 out_unlock: 653 unlock_page(page); 654 return error; 655 } 656 657 int nfs_readpages(struct file *filp, struct address_space *mapping, 658 struct list_head *pages, unsigned nr_pages) 659 { 660 struct nfs_pageio_descriptor pgio; 661 struct nfs_readdesc desc = { 662 .pgio = &pgio, 663 }; 664 struct inode *inode = mapping->host; 665 unsigned long npages; 666 int ret = -ESTALE; 667 668 dprintk("NFS: nfs_readpages (%s/%Ld %d)\n", 669 inode->i_sb->s_id, 670 (long long)NFS_FILEID(inode), 671 nr_pages); 672 nfs_inc_stats(inode, NFSIOS_VFSREADPAGES); 673 674 if (NFS_STALE(inode)) 675 goto out; 676 677 if (filp == NULL) { 678 desc.ctx = nfs_find_open_context(inode, NULL, FMODE_READ); 679 if (desc.ctx == NULL) 680 return -EBADF; 681 } else 682 desc.ctx = get_nfs_open_context(nfs_file_open_context(filp)); 683 684 /* attempt to read as many of the pages as possible from the cache 685 * - this returns -ENOBUFS immediately if the cookie is negative 686 */ 687 ret = nfs_readpages_from_fscache(desc.ctx, inode, mapping, 688 pages, &nr_pages); 689 if (ret == 0) 690 goto read_complete; /* all pages were read */ 691 692 nfs_pageio_init_read(&pgio, inode); 693 694 ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc); 695 696 nfs_pageio_complete(&pgio); 697 npages = (pgio.pg_bytes_written + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 698 nfs_add_stats(inode, NFSIOS_READPAGES, npages); 699 read_complete: 700 put_nfs_open_context(desc.ctx); 701 out: 702 return ret; 703 } 704 705 int __init nfs_init_readpagecache(void) 706 { 707 nfs_rdata_cachep = kmem_cache_create("nfs_read_data", 708 sizeof(struct nfs_read_data), 709 0, SLAB_HWCACHE_ALIGN, 710 NULL); 711 if (nfs_rdata_cachep == NULL) 712 return -ENOMEM; 713 714 nfs_rdata_mempool = mempool_create_slab_pool(MIN_POOL_READ, 715 nfs_rdata_cachep); 716 if (nfs_rdata_mempool == NULL) 717 return -ENOMEM; 718 719 return 0; 720 } 721 722 void nfs_destroy_readpagecache(void) 723 { 724 mempool_destroy(nfs_rdata_mempool); 725 kmem_cache_destroy(nfs_rdata_cachep); 726 } 727