1 /* 2 * linux/fs/nfs/read.c 3 * 4 * Block I/O for NFS 5 * 6 * Partial copy of Linus' read cache modifications to fs/nfs/file.c 7 * modified for async RPC by okir@monad.swb.de 8 */ 9 10 #include <linux/time.h> 11 #include <linux/kernel.h> 12 #include <linux/errno.h> 13 #include <linux/fcntl.h> 14 #include <linux/stat.h> 15 #include <linux/mm.h> 16 #include <linux/slab.h> 17 #include <linux/pagemap.h> 18 #include <linux/sunrpc/clnt.h> 19 #include <linux/nfs_fs.h> 20 #include <linux/nfs_page.h> 21 22 #include <asm/system.h> 23 24 #include "nfs4_fs.h" 25 #include "internal.h" 26 #include "iostat.h" 27 #include "fscache.h" 28 #include "pnfs.h" 29 30 #define NFSDBG_FACILITY NFSDBG_PAGECACHE 31 32 static int nfs_pagein_multi(struct inode *, struct list_head *, unsigned int, size_t, int); 33 static int nfs_pagein_one(struct inode *, struct list_head *, unsigned int, size_t, int); 34 static const struct rpc_call_ops nfs_read_partial_ops; 35 static const struct rpc_call_ops nfs_read_full_ops; 36 37 static struct kmem_cache *nfs_rdata_cachep; 38 static mempool_t *nfs_rdata_mempool; 39 40 #define MIN_POOL_READ (32) 41 42 struct nfs_read_data *nfs_readdata_alloc(unsigned int pagecount) 43 { 44 struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, GFP_KERNEL); 45 46 if (p) { 47 memset(p, 0, sizeof(*p)); 48 INIT_LIST_HEAD(&p->pages); 49 p->npages = pagecount; 50 if (pagecount <= ARRAY_SIZE(p->page_array)) 51 p->pagevec = p->page_array; 52 else { 53 p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_KERNEL); 54 if (!p->pagevec) { 55 mempool_free(p, nfs_rdata_mempool); 56 p = NULL; 57 } 58 } 59 } 60 return p; 61 } 62 63 void nfs_readdata_free(struct nfs_read_data *p) 64 { 65 if (p && (p->pagevec != &p->page_array[0])) 66 kfree(p->pagevec); 67 mempool_free(p, nfs_rdata_mempool); 68 } 69 70 static void nfs_readdata_release(struct nfs_read_data *rdata) 71 { 72 put_nfs_open_context(rdata->args.context); 73 nfs_readdata_free(rdata); 74 } 75 76 static 77 int nfs_return_empty_page(struct page *page) 78 { 79 zero_user(page, 0, PAGE_CACHE_SIZE); 80 SetPageUptodate(page); 81 unlock_page(page); 82 return 0; 83 } 84 85 static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data) 86 { 87 unsigned int remainder = data->args.count - data->res.count; 88 unsigned int base = data->args.pgbase + data->res.count; 89 unsigned int pglen; 90 struct page **pages; 91 92 if (data->res.eof == 0 || remainder == 0) 93 return; 94 /* 95 * Note: "remainder" can never be negative, since we check for 96 * this in the XDR code. 97 */ 98 pages = &data->args.pages[base >> PAGE_CACHE_SHIFT]; 99 base &= ~PAGE_CACHE_MASK; 100 pglen = PAGE_CACHE_SIZE - base; 101 for (;;) { 102 if (remainder <= pglen) { 103 zero_user(*pages, base, remainder); 104 break; 105 } 106 zero_user(*pages, base, pglen); 107 pages++; 108 remainder -= pglen; 109 pglen = PAGE_CACHE_SIZE; 110 base = 0; 111 } 112 } 113 114 int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, 115 struct page *page) 116 { 117 LIST_HEAD(one_request); 118 struct nfs_page *new; 119 unsigned int len; 120 121 len = nfs_page_length(page); 122 if (len == 0) 123 return nfs_return_empty_page(page); 124 pnfs_update_layout(inode, ctx, IOMODE_READ); 125 new = nfs_create_request(ctx, inode, page, 0, len); 126 if (IS_ERR(new)) { 127 unlock_page(page); 128 return PTR_ERR(new); 129 } 130 if (len < PAGE_CACHE_SIZE) 131 zero_user_segment(page, len, PAGE_CACHE_SIZE); 132 133 nfs_list_add_request(new, &one_request); 134 if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE) 135 nfs_pagein_multi(inode, &one_request, 1, len, 0); 136 else 137 nfs_pagein_one(inode, &one_request, 1, len, 0); 138 return 0; 139 } 140 141 static void nfs_readpage_release(struct nfs_page *req) 142 { 143 struct inode *d_inode = req->wb_context->path.dentry->d_inode; 144 145 if (PageUptodate(req->wb_page)) 146 nfs_readpage_to_fscache(d_inode, req->wb_page, 0); 147 148 unlock_page(req->wb_page); 149 150 dprintk("NFS: read done (%s/%Ld %d@%Ld)\n", 151 req->wb_context->path.dentry->d_inode->i_sb->s_id, 152 (long long)NFS_FILEID(req->wb_context->path.dentry->d_inode), 153 req->wb_bytes, 154 (long long)req_offset(req)); 155 nfs_clear_request(req); 156 nfs_release_request(req); 157 } 158 159 /* 160 * Set up the NFS read request struct 161 */ 162 static int nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, 163 const struct rpc_call_ops *call_ops, 164 unsigned int count, unsigned int offset) 165 { 166 struct inode *inode = req->wb_context->path.dentry->d_inode; 167 int swap_flags = IS_SWAPFILE(inode) ? NFS_RPC_SWAPFLAGS : 0; 168 struct rpc_task *task; 169 struct rpc_message msg = { 170 .rpc_argp = &data->args, 171 .rpc_resp = &data->res, 172 .rpc_cred = req->wb_context->cred, 173 }; 174 struct rpc_task_setup task_setup_data = { 175 .task = &data->task, 176 .rpc_client = NFS_CLIENT(inode), 177 .rpc_message = &msg, 178 .callback_ops = call_ops, 179 .callback_data = data, 180 .workqueue = nfsiod_workqueue, 181 .flags = RPC_TASK_ASYNC | swap_flags, 182 }; 183 184 data->req = req; 185 data->inode = inode; 186 data->cred = msg.rpc_cred; 187 188 data->args.fh = NFS_FH(inode); 189 data->args.offset = req_offset(req) + offset; 190 data->args.pgbase = req->wb_pgbase + offset; 191 data->args.pages = data->pagevec; 192 data->args.count = count; 193 data->args.context = get_nfs_open_context(req->wb_context); 194 data->args.lock_context = req->wb_lock_context; 195 196 data->res.fattr = &data->fattr; 197 data->res.count = count; 198 data->res.eof = 0; 199 nfs_fattr_init(&data->fattr); 200 201 /* Set up the initial task struct. */ 202 NFS_PROTO(inode)->read_setup(data, &msg); 203 204 dprintk("NFS: %5u initiated read call (req %s/%Ld, %u bytes @ offset %Lu)\n", 205 data->task.tk_pid, 206 inode->i_sb->s_id, 207 (long long)NFS_FILEID(inode), 208 count, 209 (unsigned long long)data->args.offset); 210 211 task = rpc_run_task(&task_setup_data); 212 if (IS_ERR(task)) 213 return PTR_ERR(task); 214 rpc_put_task(task); 215 return 0; 216 } 217 218 static void 219 nfs_async_read_error(struct list_head *head) 220 { 221 struct nfs_page *req; 222 223 while (!list_empty(head)) { 224 req = nfs_list_entry(head->next); 225 nfs_list_remove_request(req); 226 SetPageError(req->wb_page); 227 nfs_readpage_release(req); 228 } 229 } 230 231 /* 232 * Generate multiple requests to fill a single page. 233 * 234 * We optimize to reduce the number of read operations on the wire. If we 235 * detect that we're reading a page, or an area of a page, that is past the 236 * end of file, we do not generate NFS read operations but just clear the 237 * parts of the page that would have come back zero from the server anyway. 238 * 239 * We rely on the cached value of i_size to make this determination; another 240 * client can fill pages on the server past our cached end-of-file, but we 241 * won't see the new data until our attribute cache is updated. This is more 242 * or less conventional NFS client behavior. 243 */ 244 static int nfs_pagein_multi(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int flags) 245 { 246 struct nfs_page *req = nfs_list_entry(head->next); 247 struct page *page = req->wb_page; 248 struct nfs_read_data *data; 249 size_t rsize = NFS_SERVER(inode)->rsize, nbytes; 250 unsigned int offset; 251 int requests = 0; 252 int ret = 0; 253 LIST_HEAD(list); 254 255 nfs_list_remove_request(req); 256 257 nbytes = count; 258 do { 259 size_t len = min(nbytes,rsize); 260 261 data = nfs_readdata_alloc(1); 262 if (!data) 263 goto out_bad; 264 list_add(&data->pages, &list); 265 requests++; 266 nbytes -= len; 267 } while(nbytes != 0); 268 atomic_set(&req->wb_complete, requests); 269 270 ClearPageError(page); 271 offset = 0; 272 nbytes = count; 273 do { 274 int ret2; 275 276 data = list_entry(list.next, struct nfs_read_data, pages); 277 list_del_init(&data->pages); 278 279 data->pagevec[0] = page; 280 281 if (nbytes < rsize) 282 rsize = nbytes; 283 ret2 = nfs_read_rpcsetup(req, data, &nfs_read_partial_ops, 284 rsize, offset); 285 if (ret == 0) 286 ret = ret2; 287 offset += rsize; 288 nbytes -= rsize; 289 } while (nbytes != 0); 290 291 return ret; 292 293 out_bad: 294 while (!list_empty(&list)) { 295 data = list_entry(list.next, struct nfs_read_data, pages); 296 list_del(&data->pages); 297 nfs_readdata_free(data); 298 } 299 SetPageError(page); 300 nfs_readpage_release(req); 301 return -ENOMEM; 302 } 303 304 static int nfs_pagein_one(struct inode *inode, struct list_head *head, unsigned int npages, size_t count, int flags) 305 { 306 struct nfs_page *req; 307 struct page **pages; 308 struct nfs_read_data *data; 309 int ret = -ENOMEM; 310 311 data = nfs_readdata_alloc(npages); 312 if (!data) 313 goto out_bad; 314 315 pages = data->pagevec; 316 while (!list_empty(head)) { 317 req = nfs_list_entry(head->next); 318 nfs_list_remove_request(req); 319 nfs_list_add_request(req, &data->pages); 320 ClearPageError(req->wb_page); 321 *pages++ = req->wb_page; 322 } 323 req = nfs_list_entry(data->pages.next); 324 325 return nfs_read_rpcsetup(req, data, &nfs_read_full_ops, count, 0); 326 out_bad: 327 nfs_async_read_error(head); 328 return ret; 329 } 330 331 /* 332 * This is the callback from RPC telling us whether a reply was 333 * received or some error occurred (timeout or socket shutdown). 334 */ 335 int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data) 336 { 337 int status; 338 339 dprintk("NFS: %s: %5u, (status %d)\n", __func__, task->tk_pid, 340 task->tk_status); 341 342 status = NFS_PROTO(data->inode)->read_done(task, data); 343 if (status != 0) 344 return status; 345 346 nfs_add_stats(data->inode, NFSIOS_SERVERREADBYTES, data->res.count); 347 348 if (task->tk_status == -ESTALE) { 349 set_bit(NFS_INO_STALE, &NFS_I(data->inode)->flags); 350 nfs_mark_for_revalidate(data->inode); 351 } 352 return 0; 353 } 354 355 static void nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data) 356 { 357 struct nfs_readargs *argp = &data->args; 358 struct nfs_readres *resp = &data->res; 359 360 if (resp->eof || resp->count == argp->count) 361 return; 362 363 /* This is a short read! */ 364 nfs_inc_stats(data->inode, NFSIOS_SHORTREAD); 365 /* Has the server at least made some progress? */ 366 if (resp->count == 0) 367 return; 368 369 /* Yes, so retry the read at the end of the data */ 370 argp->offset += resp->count; 371 argp->pgbase += resp->count; 372 argp->count -= resp->count; 373 nfs_restart_rpc(task, NFS_SERVER(data->inode)->nfs_client); 374 } 375 376 /* 377 * Handle a read reply that fills part of a page. 378 */ 379 static void nfs_readpage_result_partial(struct rpc_task *task, void *calldata) 380 { 381 struct nfs_read_data *data = calldata; 382 383 if (nfs_readpage_result(task, data) != 0) 384 return; 385 if (task->tk_status < 0) 386 return; 387 388 nfs_readpage_truncate_uninitialised_page(data); 389 nfs_readpage_retry(task, data); 390 } 391 392 static void nfs_readpage_release_partial(void *calldata) 393 { 394 struct nfs_read_data *data = calldata; 395 struct nfs_page *req = data->req; 396 struct page *page = req->wb_page; 397 int status = data->task.tk_status; 398 399 if (status < 0) 400 SetPageError(page); 401 402 if (atomic_dec_and_test(&req->wb_complete)) { 403 if (!PageError(page)) 404 SetPageUptodate(page); 405 nfs_readpage_release(req); 406 } 407 nfs_readdata_release(calldata); 408 } 409 410 #if defined(CONFIG_NFS_V4_1) 411 void nfs_read_prepare(struct rpc_task *task, void *calldata) 412 { 413 struct nfs_read_data *data = calldata; 414 415 if (nfs4_setup_sequence(NFS_SERVER(data->inode), 416 &data->args.seq_args, &data->res.seq_res, 417 0, task)) 418 return; 419 rpc_call_start(task); 420 } 421 #endif /* CONFIG_NFS_V4_1 */ 422 423 static const struct rpc_call_ops nfs_read_partial_ops = { 424 #if defined(CONFIG_NFS_V4_1) 425 .rpc_call_prepare = nfs_read_prepare, 426 #endif /* CONFIG_NFS_V4_1 */ 427 .rpc_call_done = nfs_readpage_result_partial, 428 .rpc_release = nfs_readpage_release_partial, 429 }; 430 431 static void nfs_readpage_set_pages_uptodate(struct nfs_read_data *data) 432 { 433 unsigned int count = data->res.count; 434 unsigned int base = data->args.pgbase; 435 struct page **pages; 436 437 if (data->res.eof) 438 count = data->args.count; 439 if (unlikely(count == 0)) 440 return; 441 pages = &data->args.pages[base >> PAGE_CACHE_SHIFT]; 442 base &= ~PAGE_CACHE_MASK; 443 count += base; 444 for (;count >= PAGE_CACHE_SIZE; count -= PAGE_CACHE_SIZE, pages++) 445 SetPageUptodate(*pages); 446 if (count == 0) 447 return; 448 /* Was this a short read? */ 449 if (data->res.eof || data->res.count == data->args.count) 450 SetPageUptodate(*pages); 451 } 452 453 /* 454 * This is the callback from RPC telling us whether a reply was 455 * received or some error occurred (timeout or socket shutdown). 456 */ 457 static void nfs_readpage_result_full(struct rpc_task *task, void *calldata) 458 { 459 struct nfs_read_data *data = calldata; 460 461 if (nfs_readpage_result(task, data) != 0) 462 return; 463 if (task->tk_status < 0) 464 return; 465 /* 466 * Note: nfs_readpage_retry may change the values of 467 * data->args. In the multi-page case, we therefore need 468 * to ensure that we call nfs_readpage_set_pages_uptodate() 469 * first. 470 */ 471 nfs_readpage_truncate_uninitialised_page(data); 472 nfs_readpage_set_pages_uptodate(data); 473 nfs_readpage_retry(task, data); 474 } 475 476 static void nfs_readpage_release_full(void *calldata) 477 { 478 struct nfs_read_data *data = calldata; 479 480 while (!list_empty(&data->pages)) { 481 struct nfs_page *req = nfs_list_entry(data->pages.next); 482 483 nfs_list_remove_request(req); 484 nfs_readpage_release(req); 485 } 486 nfs_readdata_release(calldata); 487 } 488 489 static const struct rpc_call_ops nfs_read_full_ops = { 490 #if defined(CONFIG_NFS_V4_1) 491 .rpc_call_prepare = nfs_read_prepare, 492 #endif /* CONFIG_NFS_V4_1 */ 493 .rpc_call_done = nfs_readpage_result_full, 494 .rpc_release = nfs_readpage_release_full, 495 }; 496 497 /* 498 * Read a page over NFS. 499 * We read the page synchronously in the following case: 500 * - The error flag is set for this page. This happens only when a 501 * previous async read operation failed. 502 */ 503 int nfs_readpage(struct file *file, struct page *page) 504 { 505 struct nfs_open_context *ctx; 506 struct inode *inode = page->mapping->host; 507 int error; 508 509 dprintk("NFS: nfs_readpage (%p %ld@%lu)\n", 510 page, PAGE_CACHE_SIZE, page->index); 511 nfs_inc_stats(inode, NFSIOS_VFSREADPAGE); 512 nfs_add_stats(inode, NFSIOS_READPAGES, 1); 513 514 /* 515 * Try to flush any pending writes to the file.. 516 * 517 * NOTE! Because we own the page lock, there cannot 518 * be any new pending writes generated at this point 519 * for this page (other pages can be written to). 520 */ 521 error = nfs_wb_page(inode, page); 522 if (error) 523 goto out_unlock; 524 if (PageUptodate(page)) 525 goto out_unlock; 526 527 error = -ESTALE; 528 if (NFS_STALE(inode)) 529 goto out_unlock; 530 531 if (file == NULL) { 532 error = -EBADF; 533 ctx = nfs_find_open_context(inode, NULL, FMODE_READ); 534 if (ctx == NULL) 535 goto out_unlock; 536 } else 537 ctx = get_nfs_open_context(nfs_file_open_context(file)); 538 539 if (!IS_SYNC(inode)) { 540 error = nfs_readpage_from_fscache(ctx, inode, page); 541 if (error == 0) 542 goto out; 543 } 544 545 error = nfs_readpage_async(ctx, inode, page); 546 547 out: 548 put_nfs_open_context(ctx); 549 return error; 550 out_unlock: 551 unlock_page(page); 552 return error; 553 } 554 555 struct nfs_readdesc { 556 struct nfs_pageio_descriptor *pgio; 557 struct nfs_open_context *ctx; 558 }; 559 560 static int 561 readpage_async_filler(void *data, struct page *page) 562 { 563 struct nfs_readdesc *desc = (struct nfs_readdesc *)data; 564 struct inode *inode = page->mapping->host; 565 struct nfs_page *new; 566 unsigned int len; 567 int error; 568 569 len = nfs_page_length(page); 570 if (len == 0) 571 return nfs_return_empty_page(page); 572 573 new = nfs_create_request(desc->ctx, inode, page, 0, len); 574 if (IS_ERR(new)) 575 goto out_error; 576 577 if (len < PAGE_CACHE_SIZE) 578 zero_user_segment(page, len, PAGE_CACHE_SIZE); 579 if (!nfs_pageio_add_request(desc->pgio, new)) { 580 error = desc->pgio->pg_error; 581 goto out_unlock; 582 } 583 return 0; 584 out_error: 585 error = PTR_ERR(new); 586 SetPageError(page); 587 out_unlock: 588 unlock_page(page); 589 return error; 590 } 591 592 int nfs_readpages(struct file *filp, struct address_space *mapping, 593 struct list_head *pages, unsigned nr_pages) 594 { 595 struct nfs_pageio_descriptor pgio; 596 struct nfs_readdesc desc = { 597 .pgio = &pgio, 598 }; 599 struct inode *inode = mapping->host; 600 struct nfs_server *server = NFS_SERVER(inode); 601 size_t rsize = server->rsize; 602 unsigned long npages; 603 int ret = -ESTALE; 604 605 dprintk("NFS: nfs_readpages (%s/%Ld %d)\n", 606 inode->i_sb->s_id, 607 (long long)NFS_FILEID(inode), 608 nr_pages); 609 nfs_inc_stats(inode, NFSIOS_VFSREADPAGES); 610 611 if (NFS_STALE(inode)) 612 goto out; 613 614 if (filp == NULL) { 615 desc.ctx = nfs_find_open_context(inode, NULL, FMODE_READ); 616 if (desc.ctx == NULL) 617 return -EBADF; 618 } else 619 desc.ctx = get_nfs_open_context(nfs_file_open_context(filp)); 620 621 /* attempt to read as many of the pages as possible from the cache 622 * - this returns -ENOBUFS immediately if the cookie is negative 623 */ 624 ret = nfs_readpages_from_fscache(desc.ctx, inode, mapping, 625 pages, &nr_pages); 626 if (ret == 0) 627 goto read_complete; /* all pages were read */ 628 629 pnfs_update_layout(inode, desc.ctx, IOMODE_READ); 630 if (rsize < PAGE_CACHE_SIZE) 631 nfs_pageio_init(&pgio, inode, nfs_pagein_multi, rsize, 0); 632 else 633 nfs_pageio_init(&pgio, inode, nfs_pagein_one, rsize, 0); 634 635 ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc); 636 637 nfs_pageio_complete(&pgio); 638 npages = (pgio.pg_bytes_written + PAGE_CACHE_SIZE - 1) >> PAGE_CACHE_SHIFT; 639 nfs_add_stats(inode, NFSIOS_READPAGES, npages); 640 read_complete: 641 put_nfs_open_context(desc.ctx); 642 out: 643 return ret; 644 } 645 646 int __init nfs_init_readpagecache(void) 647 { 648 nfs_rdata_cachep = kmem_cache_create("nfs_read_data", 649 sizeof(struct nfs_read_data), 650 0, SLAB_HWCACHE_ALIGN, 651 NULL); 652 if (nfs_rdata_cachep == NULL) 653 return -ENOMEM; 654 655 nfs_rdata_mempool = mempool_create_slab_pool(MIN_POOL_READ, 656 nfs_rdata_cachep); 657 if (nfs_rdata_mempool == NULL) 658 return -ENOMEM; 659 660 return 0; 661 } 662 663 void nfs_destroy_readpagecache(void) 664 { 665 mempool_destroy(nfs_rdata_mempool); 666 kmem_cache_destroy(nfs_rdata_cachep); 667 } 668