1 /* 2 * linux/fs/nfs/read.c 3 * 4 * Block I/O for NFS 5 * 6 * Partial copy of Linus' read cache modifications to fs/nfs/file.c 7 * modified for async RPC by okir@monad.swb.de 8 * 9 * We do an ugly hack here in order to return proper error codes to the 10 * user program when a read request failed: since generic_file_read 11 * only checks the return value of inode->i_op->readpage() which is always 0 12 * for async RPC, we set the error bit of the page to 1 when an error occurs, 13 * and make nfs_readpage transmit requests synchronously when encountering this. 14 * This is only a small problem, though, since we now retry all operations 15 * within the RPC code when root squashing is suspected. 16 */ 17 18 #include <linux/config.h> 19 #include <linux/time.h> 20 #include <linux/kernel.h> 21 #include <linux/errno.h> 22 #include <linux/fcntl.h> 23 #include <linux/stat.h> 24 #include <linux/mm.h> 25 #include <linux/slab.h> 26 #include <linux/pagemap.h> 27 #include <linux/sunrpc/clnt.h> 28 #include <linux/nfs_fs.h> 29 #include <linux/nfs_page.h> 30 #include <linux/smp_lock.h> 31 32 #include <asm/system.h> 33 34 #define NFSDBG_FACILITY NFSDBG_PAGECACHE 35 36 static int nfs_pagein_one(struct list_head *, struct inode *); 37 static void nfs_readpage_result_partial(struct nfs_read_data *, int); 38 static void nfs_readpage_result_full(struct nfs_read_data *, int); 39 40 static kmem_cache_t *nfs_rdata_cachep; 41 mempool_t *nfs_rdata_mempool; 42 43 #define MIN_POOL_READ (32) 44 45 void nfs_readdata_release(void *data) 46 { 47 nfs_readdata_free(data); 48 } 49 50 static 51 unsigned int nfs_page_length(struct inode *inode, struct page *page) 52 { 53 loff_t i_size = i_size_read(inode); 54 unsigned long idx; 55 56 if (i_size <= 0) 57 return 0; 58 idx = (i_size - 1) >> PAGE_CACHE_SHIFT; 59 if (page->index > idx) 60 return 0; 61 if (page->index != idx) 62 return PAGE_CACHE_SIZE; 63 return 1 + ((i_size - 1) & (PAGE_CACHE_SIZE - 1)); 64 } 65 66 static 67 int nfs_return_empty_page(struct page *page) 68 { 69 memclear_highpage_flush(page, 0, PAGE_CACHE_SIZE); 70 SetPageUptodate(page); 71 unlock_page(page); 72 return 0; 73 } 74 75 /* 76 * Read a page synchronously. 77 */ 78 static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode, 79 struct page *page) 80 { 81 unsigned int rsize = NFS_SERVER(inode)->rsize; 82 unsigned int count = PAGE_CACHE_SIZE; 83 int result; 84 struct nfs_read_data *rdata; 85 86 rdata = nfs_readdata_alloc(1); 87 if (!rdata) 88 return -ENOMEM; 89 90 memset(rdata, 0, sizeof(*rdata)); 91 rdata->flags = (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0); 92 rdata->cred = ctx->cred; 93 rdata->inode = inode; 94 INIT_LIST_HEAD(&rdata->pages); 95 rdata->args.fh = NFS_FH(inode); 96 rdata->args.context = ctx; 97 rdata->args.pages = &page; 98 rdata->args.pgbase = 0UL; 99 rdata->args.count = rsize; 100 rdata->res.fattr = &rdata->fattr; 101 102 dprintk("NFS: nfs_readpage_sync(%p)\n", page); 103 104 /* 105 * This works now because the socket layer never tries to DMA 106 * into this buffer directly. 107 */ 108 do { 109 if (count < rsize) 110 rdata->args.count = count; 111 rdata->res.count = rdata->args.count; 112 rdata->args.offset = page_offset(page) + rdata->args.pgbase; 113 114 dprintk("NFS: nfs_proc_read(%s, (%s/%Ld), %Lu, %u)\n", 115 NFS_SERVER(inode)->hostname, 116 inode->i_sb->s_id, 117 (long long)NFS_FILEID(inode), 118 (unsigned long long)rdata->args.pgbase, 119 rdata->args.count); 120 121 lock_kernel(); 122 result = NFS_PROTO(inode)->read(rdata); 123 unlock_kernel(); 124 125 /* 126 * Even if we had a partial success we can't mark the page 127 * cache valid. 128 */ 129 if (result < 0) { 130 if (result == -EISDIR) 131 result = -EINVAL; 132 goto io_error; 133 } 134 count -= result; 135 rdata->args.pgbase += result; 136 /* Note: result == 0 should only happen if we're caching 137 * a write that extends the file and punches a hole. 138 */ 139 if (rdata->res.eof != 0 || result == 0) 140 break; 141 } while (count); 142 spin_lock(&inode->i_lock); 143 NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME; 144 spin_unlock(&inode->i_lock); 145 146 if (count) 147 memclear_highpage_flush(page, rdata->args.pgbase, count); 148 SetPageUptodate(page); 149 if (PageError(page)) 150 ClearPageError(page); 151 result = 0; 152 153 io_error: 154 unlock_page(page); 155 nfs_readdata_free(rdata); 156 return result; 157 } 158 159 static int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, 160 struct page *page) 161 { 162 LIST_HEAD(one_request); 163 struct nfs_page *new; 164 unsigned int len; 165 166 len = nfs_page_length(inode, page); 167 if (len == 0) 168 return nfs_return_empty_page(page); 169 new = nfs_create_request(ctx, inode, page, 0, len); 170 if (IS_ERR(new)) { 171 unlock_page(page); 172 return PTR_ERR(new); 173 } 174 if (len < PAGE_CACHE_SIZE) 175 memclear_highpage_flush(page, len, PAGE_CACHE_SIZE - len); 176 177 nfs_list_add_request(new, &one_request); 178 nfs_pagein_one(&one_request, inode); 179 return 0; 180 } 181 182 static void nfs_readpage_release(struct nfs_page *req) 183 { 184 unlock_page(req->wb_page); 185 186 dprintk("NFS: read done (%s/%Ld %d@%Ld)\n", 187 req->wb_context->dentry->d_inode->i_sb->s_id, 188 (long long)NFS_FILEID(req->wb_context->dentry->d_inode), 189 req->wb_bytes, 190 (long long)req_offset(req)); 191 nfs_clear_request(req); 192 nfs_release_request(req); 193 } 194 195 /* 196 * Set up the NFS read request struct 197 */ 198 static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, 199 unsigned int count, unsigned int offset) 200 { 201 struct inode *inode; 202 203 data->req = req; 204 data->inode = inode = req->wb_context->dentry->d_inode; 205 data->cred = req->wb_context->cred; 206 207 data->args.fh = NFS_FH(inode); 208 data->args.offset = req_offset(req) + offset; 209 data->args.pgbase = req->wb_pgbase + offset; 210 data->args.pages = data->pagevec; 211 data->args.count = count; 212 data->args.context = req->wb_context; 213 214 data->res.fattr = &data->fattr; 215 data->res.count = count; 216 data->res.eof = 0; 217 nfs_fattr_init(&data->fattr); 218 219 NFS_PROTO(inode)->read_setup(data); 220 221 data->task.tk_cookie = (unsigned long)inode; 222 223 dprintk("NFS: %4d initiated read call (req %s/%Ld, %u bytes @ offset %Lu)\n", 224 data->task.tk_pid, 225 inode->i_sb->s_id, 226 (long long)NFS_FILEID(inode), 227 count, 228 (unsigned long long)data->args.offset); 229 } 230 231 static void 232 nfs_async_read_error(struct list_head *head) 233 { 234 struct nfs_page *req; 235 236 while (!list_empty(head)) { 237 req = nfs_list_entry(head->next); 238 nfs_list_remove_request(req); 239 SetPageError(req->wb_page); 240 nfs_readpage_release(req); 241 } 242 } 243 244 /* 245 * Start an async read operation 246 */ 247 static void nfs_execute_read(struct nfs_read_data *data) 248 { 249 struct rpc_clnt *clnt = NFS_CLIENT(data->inode); 250 sigset_t oldset; 251 252 rpc_clnt_sigmask(clnt, &oldset); 253 lock_kernel(); 254 rpc_execute(&data->task); 255 unlock_kernel(); 256 rpc_clnt_sigunmask(clnt, &oldset); 257 } 258 259 /* 260 * Generate multiple requests to fill a single page. 261 * 262 * We optimize to reduce the number of read operations on the wire. If we 263 * detect that we're reading a page, or an area of a page, that is past the 264 * end of file, we do not generate NFS read operations but just clear the 265 * parts of the page that would have come back zero from the server anyway. 266 * 267 * We rely on the cached value of i_size to make this determination; another 268 * client can fill pages on the server past our cached end-of-file, but we 269 * won't see the new data until our attribute cache is updated. This is more 270 * or less conventional NFS client behavior. 271 */ 272 static int nfs_pagein_multi(struct list_head *head, struct inode *inode) 273 { 274 struct nfs_page *req = nfs_list_entry(head->next); 275 struct page *page = req->wb_page; 276 struct nfs_read_data *data; 277 unsigned int rsize = NFS_SERVER(inode)->rsize; 278 unsigned int nbytes, offset; 279 int requests = 0; 280 LIST_HEAD(list); 281 282 nfs_list_remove_request(req); 283 284 nbytes = req->wb_bytes; 285 for(;;) { 286 data = nfs_readdata_alloc(1); 287 if (!data) 288 goto out_bad; 289 INIT_LIST_HEAD(&data->pages); 290 list_add(&data->pages, &list); 291 requests++; 292 if (nbytes <= rsize) 293 break; 294 nbytes -= rsize; 295 } 296 atomic_set(&req->wb_complete, requests); 297 298 ClearPageError(page); 299 offset = 0; 300 nbytes = req->wb_bytes; 301 do { 302 data = list_entry(list.next, struct nfs_read_data, pages); 303 list_del_init(&data->pages); 304 305 data->pagevec[0] = page; 306 data->complete = nfs_readpage_result_partial; 307 308 if (nbytes > rsize) { 309 nfs_read_rpcsetup(req, data, rsize, offset); 310 offset += rsize; 311 nbytes -= rsize; 312 } else { 313 nfs_read_rpcsetup(req, data, nbytes, offset); 314 nbytes = 0; 315 } 316 nfs_execute_read(data); 317 } while (nbytes != 0); 318 319 return 0; 320 321 out_bad: 322 while (!list_empty(&list)) { 323 data = list_entry(list.next, struct nfs_read_data, pages); 324 list_del(&data->pages); 325 nfs_readdata_free(data); 326 } 327 SetPageError(page); 328 nfs_readpage_release(req); 329 return -ENOMEM; 330 } 331 332 static int nfs_pagein_one(struct list_head *head, struct inode *inode) 333 { 334 struct nfs_page *req; 335 struct page **pages; 336 struct nfs_read_data *data; 337 unsigned int count; 338 339 if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE) 340 return nfs_pagein_multi(head, inode); 341 342 data = nfs_readdata_alloc(NFS_SERVER(inode)->rpages); 343 if (!data) 344 goto out_bad; 345 346 INIT_LIST_HEAD(&data->pages); 347 pages = data->pagevec; 348 count = 0; 349 while (!list_empty(head)) { 350 req = nfs_list_entry(head->next); 351 nfs_list_remove_request(req); 352 nfs_list_add_request(req, &data->pages); 353 ClearPageError(req->wb_page); 354 *pages++ = req->wb_page; 355 count += req->wb_bytes; 356 } 357 req = nfs_list_entry(data->pages.next); 358 359 data->complete = nfs_readpage_result_full; 360 nfs_read_rpcsetup(req, data, count, 0); 361 362 nfs_execute_read(data); 363 return 0; 364 out_bad: 365 nfs_async_read_error(head); 366 return -ENOMEM; 367 } 368 369 static int 370 nfs_pagein_list(struct list_head *head, int rpages) 371 { 372 LIST_HEAD(one_request); 373 struct nfs_page *req; 374 int error = 0; 375 unsigned int pages = 0; 376 377 while (!list_empty(head)) { 378 pages += nfs_coalesce_requests(head, &one_request, rpages); 379 req = nfs_list_entry(one_request.next); 380 error = nfs_pagein_one(&one_request, req->wb_context->dentry->d_inode); 381 if (error < 0) 382 break; 383 } 384 if (error >= 0) 385 return pages; 386 387 nfs_async_read_error(head); 388 return error; 389 } 390 391 /* 392 * Handle a read reply that fills part of a page. 393 */ 394 static void nfs_readpage_result_partial(struct nfs_read_data *data, int status) 395 { 396 struct nfs_page *req = data->req; 397 struct page *page = req->wb_page; 398 399 if (status >= 0) { 400 unsigned int request = data->args.count; 401 unsigned int result = data->res.count; 402 403 if (result < request) { 404 memclear_highpage_flush(page, 405 data->args.pgbase + result, 406 request - result); 407 } 408 } else 409 SetPageError(page); 410 411 if (atomic_dec_and_test(&req->wb_complete)) { 412 if (!PageError(page)) 413 SetPageUptodate(page); 414 nfs_readpage_release(req); 415 } 416 } 417 418 /* 419 * This is the callback from RPC telling us whether a reply was 420 * received or some error occurred (timeout or socket shutdown). 421 */ 422 static void nfs_readpage_result_full(struct nfs_read_data *data, int status) 423 { 424 unsigned int count = data->res.count; 425 426 while (!list_empty(&data->pages)) { 427 struct nfs_page *req = nfs_list_entry(data->pages.next); 428 struct page *page = req->wb_page; 429 nfs_list_remove_request(req); 430 431 if (status >= 0) { 432 if (count < PAGE_CACHE_SIZE) { 433 if (count < req->wb_bytes) 434 memclear_highpage_flush(page, 435 req->wb_pgbase + count, 436 req->wb_bytes - count); 437 count = 0; 438 } else 439 count -= PAGE_CACHE_SIZE; 440 SetPageUptodate(page); 441 } else 442 SetPageError(page); 443 nfs_readpage_release(req); 444 } 445 } 446 447 /* 448 * This is the callback from RPC telling us whether a reply was 449 * received or some error occurred (timeout or socket shutdown). 450 */ 451 void nfs_readpage_result(struct rpc_task *task, void *calldata) 452 { 453 struct nfs_read_data *data = calldata; 454 struct nfs_readargs *argp = &data->args; 455 struct nfs_readres *resp = &data->res; 456 int status = task->tk_status; 457 458 dprintk("NFS: %4d nfs_readpage_result, (status %d)\n", 459 task->tk_pid, status); 460 461 /* Is this a short read? */ 462 if (task->tk_status >= 0 && resp->count < argp->count && !resp->eof) { 463 /* Has the server at least made some progress? */ 464 if (resp->count != 0) { 465 /* Yes, so retry the read at the end of the data */ 466 argp->offset += resp->count; 467 argp->pgbase += resp->count; 468 argp->count -= resp->count; 469 rpc_restart_call(task); 470 return; 471 } 472 task->tk_status = -EIO; 473 } 474 spin_lock(&data->inode->i_lock); 475 NFS_I(data->inode)->cache_validity |= NFS_INO_INVALID_ATIME; 476 spin_unlock(&data->inode->i_lock); 477 data->complete(data, status); 478 } 479 480 /* 481 * Read a page over NFS. 482 * We read the page synchronously in the following case: 483 * - The error flag is set for this page. This happens only when a 484 * previous async read operation failed. 485 */ 486 int nfs_readpage(struct file *file, struct page *page) 487 { 488 struct nfs_open_context *ctx; 489 struct inode *inode = page->mapping->host; 490 int error; 491 492 dprintk("NFS: nfs_readpage (%p %ld@%lu)\n", 493 page, PAGE_CACHE_SIZE, page->index); 494 /* 495 * Try to flush any pending writes to the file.. 496 * 497 * NOTE! Because we own the page lock, there cannot 498 * be any new pending writes generated at this point 499 * for this page (other pages can be written to). 500 */ 501 error = nfs_wb_page(inode, page); 502 if (error) 503 goto out_error; 504 505 if (file == NULL) { 506 ctx = nfs_find_open_context(inode, NULL, FMODE_READ); 507 if (ctx == NULL) 508 return -EBADF; 509 } else 510 ctx = get_nfs_open_context((struct nfs_open_context *) 511 file->private_data); 512 if (!IS_SYNC(inode)) { 513 error = nfs_readpage_async(ctx, inode, page); 514 goto out; 515 } 516 517 error = nfs_readpage_sync(ctx, inode, page); 518 if (error < 0 && IS_SWAPFILE(inode)) 519 printk("Aiee.. nfs swap-in of page failed!\n"); 520 out: 521 put_nfs_open_context(ctx); 522 return error; 523 524 out_error: 525 unlock_page(page); 526 return error; 527 } 528 529 struct nfs_readdesc { 530 struct list_head *head; 531 struct nfs_open_context *ctx; 532 }; 533 534 static int 535 readpage_async_filler(void *data, struct page *page) 536 { 537 struct nfs_readdesc *desc = (struct nfs_readdesc *)data; 538 struct inode *inode = page->mapping->host; 539 struct nfs_page *new; 540 unsigned int len; 541 542 nfs_wb_page(inode, page); 543 len = nfs_page_length(inode, page); 544 if (len == 0) 545 return nfs_return_empty_page(page); 546 new = nfs_create_request(desc->ctx, inode, page, 0, len); 547 if (IS_ERR(new)) { 548 SetPageError(page); 549 unlock_page(page); 550 return PTR_ERR(new); 551 } 552 if (len < PAGE_CACHE_SIZE) 553 memclear_highpage_flush(page, len, PAGE_CACHE_SIZE - len); 554 nfs_list_add_request(new, desc->head); 555 return 0; 556 } 557 558 int nfs_readpages(struct file *filp, struct address_space *mapping, 559 struct list_head *pages, unsigned nr_pages) 560 { 561 LIST_HEAD(head); 562 struct nfs_readdesc desc = { 563 .head = &head, 564 }; 565 struct inode *inode = mapping->host; 566 struct nfs_server *server = NFS_SERVER(inode); 567 int ret; 568 569 dprintk("NFS: nfs_readpages (%s/%Ld %d)\n", 570 inode->i_sb->s_id, 571 (long long)NFS_FILEID(inode), 572 nr_pages); 573 574 if (filp == NULL) { 575 desc.ctx = nfs_find_open_context(inode, NULL, FMODE_READ); 576 if (desc.ctx == NULL) 577 return -EBADF; 578 } else 579 desc.ctx = get_nfs_open_context((struct nfs_open_context *) 580 filp->private_data); 581 ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc); 582 if (!list_empty(&head)) { 583 int err = nfs_pagein_list(&head, server->rpages); 584 if (!ret) 585 ret = err; 586 } 587 put_nfs_open_context(desc.ctx); 588 return ret; 589 } 590 591 int nfs_init_readpagecache(void) 592 { 593 nfs_rdata_cachep = kmem_cache_create("nfs_read_data", 594 sizeof(struct nfs_read_data), 595 0, SLAB_HWCACHE_ALIGN, 596 NULL, NULL); 597 if (nfs_rdata_cachep == NULL) 598 return -ENOMEM; 599 600 nfs_rdata_mempool = mempool_create(MIN_POOL_READ, 601 mempool_alloc_slab, 602 mempool_free_slab, 603 nfs_rdata_cachep); 604 if (nfs_rdata_mempool == NULL) 605 return -ENOMEM; 606 607 return 0; 608 } 609 610 void nfs_destroy_readpagecache(void) 611 { 612 mempool_destroy(nfs_rdata_mempool); 613 if (kmem_cache_destroy(nfs_rdata_cachep)) 614 printk(KERN_INFO "nfs_read_data: not all structures were freed\n"); 615 } 616