1 /* 2 * linux/fs/nfs/read.c 3 * 4 * Block I/O for NFS 5 * 6 * Partial copy of Linus' read cache modifications to fs/nfs/file.c 7 * modified for async RPC by okir@monad.swb.de 8 */ 9 10 #include <linux/time.h> 11 #include <linux/kernel.h> 12 #include <linux/errno.h> 13 #include <linux/fcntl.h> 14 #include <linux/stat.h> 15 #include <linux/mm.h> 16 #include <linux/slab.h> 17 #include <linux/pagemap.h> 18 #include <linux/sunrpc/clnt.h> 19 #include <linux/nfs_fs.h> 20 #include <linux/nfs_page.h> 21 #include <linux/smp_lock.h> 22 23 #include <asm/system.h> 24 25 #include "internal.h" 26 #include "iostat.h" 27 28 #define NFSDBG_FACILITY NFSDBG_PAGECACHE 29 30 static int nfs_pagein_one(struct list_head *, struct inode *); 31 static const struct rpc_call_ops nfs_read_partial_ops; 32 static const struct rpc_call_ops nfs_read_full_ops; 33 34 static struct kmem_cache *nfs_rdata_cachep; 35 static mempool_t *nfs_rdata_mempool; 36 37 #define MIN_POOL_READ (32) 38 39 struct nfs_read_data *nfs_readdata_alloc(size_t len) 40 { 41 unsigned int pagecount = (len + PAGE_SIZE - 1) >> PAGE_SHIFT; 42 struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, GFP_NOFS); 43 44 if (p) { 45 memset(p, 0, sizeof(*p)); 46 INIT_LIST_HEAD(&p->pages); 47 p->npages = pagecount; 48 if (pagecount <= ARRAY_SIZE(p->page_array)) 49 p->pagevec = p->page_array; 50 else { 51 p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_NOFS); 52 if (!p->pagevec) { 53 mempool_free(p, nfs_rdata_mempool); 54 p = NULL; 55 } 56 } 57 } 58 return p; 59 } 60 61 static void nfs_readdata_rcu_free(struct rcu_head *head) 62 { 63 struct nfs_read_data *p = container_of(head, struct nfs_read_data, task.u.tk_rcu); 64 if (p && (p->pagevec != &p->page_array[0])) 65 kfree(p->pagevec); 66 mempool_free(p, nfs_rdata_mempool); 67 } 68 69 static void nfs_readdata_free(struct nfs_read_data *rdata) 70 { 71 call_rcu_bh(&rdata->task.u.tk_rcu, nfs_readdata_rcu_free); 72 } 73 74 void nfs_readdata_release(void *data) 75 { 76 nfs_readdata_free(data); 77 } 78 79 static 80 int nfs_return_empty_page(struct page *page) 81 { 82 memclear_highpage_flush(page, 0, PAGE_CACHE_SIZE); 83 SetPageUptodate(page); 84 unlock_page(page); 85 return 0; 86 } 87 88 static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data) 89 { 90 unsigned int remainder = data->args.count - data->res.count; 91 unsigned int base = data->args.pgbase + data->res.count; 92 unsigned int pglen; 93 struct page **pages; 94 95 if (data->res.eof == 0 || remainder == 0) 96 return; 97 /* 98 * Note: "remainder" can never be negative, since we check for 99 * this in the XDR code. 100 */ 101 pages = &data->args.pages[base >> PAGE_CACHE_SHIFT]; 102 base &= ~PAGE_CACHE_MASK; 103 pglen = PAGE_CACHE_SIZE - base; 104 for (;;) { 105 if (remainder <= pglen) { 106 memclear_highpage_flush(*pages, base, remainder); 107 break; 108 } 109 memclear_highpage_flush(*pages, base, pglen); 110 pages++; 111 remainder -= pglen; 112 pglen = PAGE_CACHE_SIZE; 113 base = 0; 114 } 115 } 116 117 static int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, 118 struct page *page) 119 { 120 LIST_HEAD(one_request); 121 struct nfs_page *new; 122 unsigned int len; 123 124 len = nfs_page_length(page); 125 if (len == 0) 126 return nfs_return_empty_page(page); 127 new = nfs_create_request(ctx, inode, page, 0, len); 128 if (IS_ERR(new)) { 129 unlock_page(page); 130 return PTR_ERR(new); 131 } 132 if (len < PAGE_CACHE_SIZE) 133 memclear_highpage_flush(page, len, PAGE_CACHE_SIZE - len); 134 135 nfs_list_add_request(new, &one_request); 136 nfs_pagein_one(&one_request, inode); 137 return 0; 138 } 139 140 static void nfs_readpage_release(struct nfs_page *req) 141 { 142 unlock_page(req->wb_page); 143 144 dprintk("NFS: read done (%s/%Ld %d@%Ld)\n", 145 req->wb_context->dentry->d_inode->i_sb->s_id, 146 (long long)NFS_FILEID(req->wb_context->dentry->d_inode), 147 req->wb_bytes, 148 (long long)req_offset(req)); 149 nfs_clear_request(req); 150 nfs_release_request(req); 151 } 152 153 /* 154 * Set up the NFS read request struct 155 */ 156 static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, 157 const struct rpc_call_ops *call_ops, 158 unsigned int count, unsigned int offset) 159 { 160 struct inode *inode; 161 int flags; 162 163 data->req = req; 164 data->inode = inode = req->wb_context->dentry->d_inode; 165 data->cred = req->wb_context->cred; 166 167 data->args.fh = NFS_FH(inode); 168 data->args.offset = req_offset(req) + offset; 169 data->args.pgbase = req->wb_pgbase + offset; 170 data->args.pages = data->pagevec; 171 data->args.count = count; 172 data->args.context = req->wb_context; 173 174 data->res.fattr = &data->fattr; 175 data->res.count = count; 176 data->res.eof = 0; 177 nfs_fattr_init(&data->fattr); 178 179 /* Set up the initial task struct. */ 180 flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0); 181 rpc_init_task(&data->task, NFS_CLIENT(inode), flags, call_ops, data); 182 NFS_PROTO(inode)->read_setup(data); 183 184 data->task.tk_cookie = (unsigned long)inode; 185 186 dprintk("NFS: %5u initiated read call (req %s/%Ld, %u bytes @ offset %Lu)\n", 187 data->task.tk_pid, 188 inode->i_sb->s_id, 189 (long long)NFS_FILEID(inode), 190 count, 191 (unsigned long long)data->args.offset); 192 } 193 194 static void 195 nfs_async_read_error(struct list_head *head) 196 { 197 struct nfs_page *req; 198 199 while (!list_empty(head)) { 200 req = nfs_list_entry(head->next); 201 nfs_list_remove_request(req); 202 SetPageError(req->wb_page); 203 nfs_readpage_release(req); 204 } 205 } 206 207 /* 208 * Start an async read operation 209 */ 210 static void nfs_execute_read(struct nfs_read_data *data) 211 { 212 struct rpc_clnt *clnt = NFS_CLIENT(data->inode); 213 sigset_t oldset; 214 215 rpc_clnt_sigmask(clnt, &oldset); 216 rpc_execute(&data->task); 217 rpc_clnt_sigunmask(clnt, &oldset); 218 } 219 220 /* 221 * Generate multiple requests to fill a single page. 222 * 223 * We optimize to reduce the number of read operations on the wire. If we 224 * detect that we're reading a page, or an area of a page, that is past the 225 * end of file, we do not generate NFS read operations but just clear the 226 * parts of the page that would have come back zero from the server anyway. 227 * 228 * We rely on the cached value of i_size to make this determination; another 229 * client can fill pages on the server past our cached end-of-file, but we 230 * won't see the new data until our attribute cache is updated. This is more 231 * or less conventional NFS client behavior. 232 */ 233 static int nfs_pagein_multi(struct list_head *head, struct inode *inode) 234 { 235 struct nfs_page *req = nfs_list_entry(head->next); 236 struct page *page = req->wb_page; 237 struct nfs_read_data *data; 238 size_t rsize = NFS_SERVER(inode)->rsize, nbytes; 239 unsigned int offset; 240 int requests = 0; 241 LIST_HEAD(list); 242 243 nfs_list_remove_request(req); 244 245 nbytes = req->wb_bytes; 246 do { 247 size_t len = min(nbytes,rsize); 248 249 data = nfs_readdata_alloc(len); 250 if (!data) 251 goto out_bad; 252 INIT_LIST_HEAD(&data->pages); 253 list_add(&data->pages, &list); 254 requests++; 255 nbytes -= len; 256 } while(nbytes != 0); 257 atomic_set(&req->wb_complete, requests); 258 259 ClearPageError(page); 260 offset = 0; 261 nbytes = req->wb_bytes; 262 do { 263 data = list_entry(list.next, struct nfs_read_data, pages); 264 list_del_init(&data->pages); 265 266 data->pagevec[0] = page; 267 268 if (nbytes > rsize) { 269 nfs_read_rpcsetup(req, data, &nfs_read_partial_ops, 270 rsize, offset); 271 offset += rsize; 272 nbytes -= rsize; 273 } else { 274 nfs_read_rpcsetup(req, data, &nfs_read_partial_ops, 275 nbytes, offset); 276 nbytes = 0; 277 } 278 nfs_execute_read(data); 279 } while (nbytes != 0); 280 281 return 0; 282 283 out_bad: 284 while (!list_empty(&list)) { 285 data = list_entry(list.next, struct nfs_read_data, pages); 286 list_del(&data->pages); 287 nfs_readdata_free(data); 288 } 289 SetPageError(page); 290 nfs_readpage_release(req); 291 return -ENOMEM; 292 } 293 294 static int nfs_pagein_one(struct list_head *head, struct inode *inode) 295 { 296 struct nfs_page *req; 297 struct page **pages; 298 struct nfs_read_data *data; 299 unsigned int count; 300 301 if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE) 302 return nfs_pagein_multi(head, inode); 303 304 data = nfs_readdata_alloc(NFS_SERVER(inode)->rsize); 305 if (!data) 306 goto out_bad; 307 308 INIT_LIST_HEAD(&data->pages); 309 pages = data->pagevec; 310 count = 0; 311 while (!list_empty(head)) { 312 req = nfs_list_entry(head->next); 313 nfs_list_remove_request(req); 314 nfs_list_add_request(req, &data->pages); 315 ClearPageError(req->wb_page); 316 *pages++ = req->wb_page; 317 count += req->wb_bytes; 318 } 319 req = nfs_list_entry(data->pages.next); 320 321 nfs_read_rpcsetup(req, data, &nfs_read_full_ops, count, 0); 322 323 nfs_execute_read(data); 324 return 0; 325 out_bad: 326 nfs_async_read_error(head); 327 return -ENOMEM; 328 } 329 330 static int 331 nfs_pagein_list(struct list_head *head, int rpages) 332 { 333 LIST_HEAD(one_request); 334 struct nfs_page *req; 335 int error = 0; 336 unsigned int pages = 0; 337 338 while (!list_empty(head)) { 339 pages += nfs_coalesce_requests(head, &one_request, rpages); 340 req = nfs_list_entry(one_request.next); 341 error = nfs_pagein_one(&one_request, req->wb_context->dentry->d_inode); 342 if (error < 0) 343 break; 344 } 345 if (error >= 0) 346 return pages; 347 348 nfs_async_read_error(head); 349 return error; 350 } 351 352 /* 353 * This is the callback from RPC telling us whether a reply was 354 * received or some error occurred (timeout or socket shutdown). 355 */ 356 int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data) 357 { 358 int status; 359 360 dprintk("NFS: %s: %5u, (status %d)\n", __FUNCTION__, task->tk_pid, 361 task->tk_status); 362 363 status = NFS_PROTO(data->inode)->read_done(task, data); 364 if (status != 0) 365 return status; 366 367 nfs_add_stats(data->inode, NFSIOS_SERVERREADBYTES, data->res.count); 368 369 if (task->tk_status == -ESTALE) { 370 set_bit(NFS_INO_STALE, &NFS_FLAGS(data->inode)); 371 nfs_mark_for_revalidate(data->inode); 372 } 373 spin_lock(&data->inode->i_lock); 374 NFS_I(data->inode)->cache_validity |= NFS_INO_INVALID_ATIME; 375 spin_unlock(&data->inode->i_lock); 376 return 0; 377 } 378 379 static int nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data) 380 { 381 struct nfs_readargs *argp = &data->args; 382 struct nfs_readres *resp = &data->res; 383 384 if (resp->eof || resp->count == argp->count) 385 return 0; 386 387 /* This is a short read! */ 388 nfs_inc_stats(data->inode, NFSIOS_SHORTREAD); 389 /* Has the server at least made some progress? */ 390 if (resp->count == 0) 391 return 0; 392 393 /* Yes, so retry the read at the end of the data */ 394 argp->offset += resp->count; 395 argp->pgbase += resp->count; 396 argp->count -= resp->count; 397 rpc_restart_call(task); 398 return -EAGAIN; 399 } 400 401 /* 402 * Handle a read reply that fills part of a page. 403 */ 404 static void nfs_readpage_result_partial(struct rpc_task *task, void *calldata) 405 { 406 struct nfs_read_data *data = calldata; 407 struct nfs_page *req = data->req; 408 struct page *page = req->wb_page; 409 410 if (nfs_readpage_result(task, data) != 0) 411 return; 412 413 if (likely(task->tk_status >= 0)) { 414 nfs_readpage_truncate_uninitialised_page(data); 415 if (nfs_readpage_retry(task, data) != 0) 416 return; 417 } 418 if (unlikely(task->tk_status < 0)) 419 SetPageError(page); 420 if (atomic_dec_and_test(&req->wb_complete)) { 421 if (!PageError(page)) 422 SetPageUptodate(page); 423 nfs_readpage_release(req); 424 } 425 } 426 427 static const struct rpc_call_ops nfs_read_partial_ops = { 428 .rpc_call_done = nfs_readpage_result_partial, 429 .rpc_release = nfs_readdata_release, 430 }; 431 432 static void nfs_readpage_set_pages_uptodate(struct nfs_read_data *data) 433 { 434 unsigned int count = data->res.count; 435 unsigned int base = data->args.pgbase; 436 struct page **pages; 437 438 if (data->res.eof) 439 count = data->args.count; 440 if (unlikely(count == 0)) 441 return; 442 pages = &data->args.pages[base >> PAGE_CACHE_SHIFT]; 443 base &= ~PAGE_CACHE_MASK; 444 count += base; 445 for (;count >= PAGE_CACHE_SIZE; count -= PAGE_CACHE_SIZE, pages++) 446 SetPageUptodate(*pages); 447 if (count == 0) 448 return; 449 /* Was this a short read? */ 450 if (data->res.eof || data->res.count == data->args.count) 451 SetPageUptodate(*pages); 452 } 453 454 /* 455 * This is the callback from RPC telling us whether a reply was 456 * received or some error occurred (timeout or socket shutdown). 457 */ 458 static void nfs_readpage_result_full(struct rpc_task *task, void *calldata) 459 { 460 struct nfs_read_data *data = calldata; 461 462 if (nfs_readpage_result(task, data) != 0) 463 return; 464 /* 465 * Note: nfs_readpage_retry may change the values of 466 * data->args. In the multi-page case, we therefore need 467 * to ensure that we call nfs_readpage_set_pages_uptodate() 468 * first. 469 */ 470 if (likely(task->tk_status >= 0)) { 471 nfs_readpage_truncate_uninitialised_page(data); 472 nfs_readpage_set_pages_uptodate(data); 473 if (nfs_readpage_retry(task, data) != 0) 474 return; 475 } 476 while (!list_empty(&data->pages)) { 477 struct nfs_page *req = nfs_list_entry(data->pages.next); 478 479 nfs_list_remove_request(req); 480 nfs_readpage_release(req); 481 } 482 } 483 484 static const struct rpc_call_ops nfs_read_full_ops = { 485 .rpc_call_done = nfs_readpage_result_full, 486 .rpc_release = nfs_readdata_release, 487 }; 488 489 /* 490 * Read a page over NFS. 491 * We read the page synchronously in the following case: 492 * - The error flag is set for this page. This happens only when a 493 * previous async read operation failed. 494 */ 495 int nfs_readpage(struct file *file, struct page *page) 496 { 497 struct nfs_open_context *ctx; 498 struct inode *inode = page->mapping->host; 499 int error; 500 501 dprintk("NFS: nfs_readpage (%p %ld@%lu)\n", 502 page, PAGE_CACHE_SIZE, page->index); 503 nfs_inc_stats(inode, NFSIOS_VFSREADPAGE); 504 nfs_add_stats(inode, NFSIOS_READPAGES, 1); 505 506 /* 507 * Try to flush any pending writes to the file.. 508 * 509 * NOTE! Because we own the page lock, there cannot 510 * be any new pending writes generated at this point 511 * for this page (other pages can be written to). 512 */ 513 error = nfs_wb_page(inode, page); 514 if (error) 515 goto out_error; 516 517 error = -ESTALE; 518 if (NFS_STALE(inode)) 519 goto out_error; 520 521 if (file == NULL) { 522 error = -EBADF; 523 ctx = nfs_find_open_context(inode, NULL, FMODE_READ); 524 if (ctx == NULL) 525 goto out_error; 526 } else 527 ctx = get_nfs_open_context((struct nfs_open_context *) 528 file->private_data); 529 530 error = nfs_readpage_async(ctx, inode, page); 531 532 put_nfs_open_context(ctx); 533 return error; 534 535 out_error: 536 unlock_page(page); 537 return error; 538 } 539 540 struct nfs_readdesc { 541 struct list_head *head; 542 struct nfs_open_context *ctx; 543 }; 544 545 static int 546 readpage_async_filler(void *data, struct page *page) 547 { 548 struct nfs_readdesc *desc = (struct nfs_readdesc *)data; 549 struct inode *inode = page->mapping->host; 550 struct nfs_page *new; 551 unsigned int len; 552 553 nfs_wb_page(inode, page); 554 len = nfs_page_length(page); 555 if (len == 0) 556 return nfs_return_empty_page(page); 557 new = nfs_create_request(desc->ctx, inode, page, 0, len); 558 if (IS_ERR(new)) { 559 SetPageError(page); 560 unlock_page(page); 561 return PTR_ERR(new); 562 } 563 if (len < PAGE_CACHE_SIZE) 564 memclear_highpage_flush(page, len, PAGE_CACHE_SIZE - len); 565 nfs_list_add_request(new, desc->head); 566 return 0; 567 } 568 569 int nfs_readpages(struct file *filp, struct address_space *mapping, 570 struct list_head *pages, unsigned nr_pages) 571 { 572 LIST_HEAD(head); 573 struct nfs_readdesc desc = { 574 .head = &head, 575 }; 576 struct inode *inode = mapping->host; 577 struct nfs_server *server = NFS_SERVER(inode); 578 int ret = -ESTALE; 579 580 dprintk("NFS: nfs_readpages (%s/%Ld %d)\n", 581 inode->i_sb->s_id, 582 (long long)NFS_FILEID(inode), 583 nr_pages); 584 nfs_inc_stats(inode, NFSIOS_VFSREADPAGES); 585 586 if (NFS_STALE(inode)) 587 goto out; 588 589 if (filp == NULL) { 590 desc.ctx = nfs_find_open_context(inode, NULL, FMODE_READ); 591 if (desc.ctx == NULL) 592 return -EBADF; 593 } else 594 desc.ctx = get_nfs_open_context((struct nfs_open_context *) 595 filp->private_data); 596 ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc); 597 if (!list_empty(&head)) { 598 int err = nfs_pagein_list(&head, server->rpages); 599 if (!ret) 600 nfs_add_stats(inode, NFSIOS_READPAGES, err); 601 ret = err; 602 } 603 put_nfs_open_context(desc.ctx); 604 out: 605 return ret; 606 } 607 608 int __init nfs_init_readpagecache(void) 609 { 610 nfs_rdata_cachep = kmem_cache_create("nfs_read_data", 611 sizeof(struct nfs_read_data), 612 0, SLAB_HWCACHE_ALIGN, 613 NULL, NULL); 614 if (nfs_rdata_cachep == NULL) 615 return -ENOMEM; 616 617 nfs_rdata_mempool = mempool_create_slab_pool(MIN_POOL_READ, 618 nfs_rdata_cachep); 619 if (nfs_rdata_mempool == NULL) 620 return -ENOMEM; 621 622 return 0; 623 } 624 625 void nfs_destroy_readpagecache(void) 626 { 627 mempool_destroy(nfs_rdata_mempool); 628 kmem_cache_destroy(nfs_rdata_cachep); 629 } 630