1 /* 2 * linux/fs/nfs/read.c 3 * 4 * Block I/O for NFS 5 * 6 * Partial copy of Linus' read cache modifications to fs/nfs/file.c 7 * modified for async RPC by okir@monad.swb.de 8 * 9 * We do an ugly hack here in order to return proper error codes to the 10 * user program when a read request failed: since generic_file_read 11 * only checks the return value of inode->i_op->readpage() which is always 0 12 * for async RPC, we set the error bit of the page to 1 when an error occurs, 13 * and make nfs_readpage transmit requests synchronously when encountering this. 14 * This is only a small problem, though, since we now retry all operations 15 * within the RPC code when root squashing is suspected. 16 */ 17 18 #include <linux/config.h> 19 #include <linux/time.h> 20 #include <linux/kernel.h> 21 #include <linux/errno.h> 22 #include <linux/fcntl.h> 23 #include <linux/stat.h> 24 #include <linux/mm.h> 25 #include <linux/slab.h> 26 #include <linux/pagemap.h> 27 #include <linux/sunrpc/clnt.h> 28 #include <linux/nfs_fs.h> 29 #include <linux/nfs_page.h> 30 #include <linux/smp_lock.h> 31 32 #include <asm/system.h> 33 34 #define NFSDBG_FACILITY NFSDBG_PAGECACHE 35 36 static int nfs_pagein_one(struct list_head *, struct inode *); 37 static void nfs_readpage_result_partial(struct nfs_read_data *, int); 38 static void nfs_readpage_result_full(struct nfs_read_data *, int); 39 40 static kmem_cache_t *nfs_rdata_cachep; 41 mempool_t *nfs_rdata_mempool; 42 43 #define MIN_POOL_READ (32) 44 45 void nfs_readdata_release(struct rpc_task *task) 46 { 47 struct nfs_read_data *data = (struct nfs_read_data *)task->tk_calldata; 48 nfs_readdata_free(data); 49 } 50 51 static 52 unsigned int nfs_page_length(struct inode *inode, struct page *page) 53 { 54 loff_t i_size = i_size_read(inode); 55 unsigned long idx; 56 57 if (i_size <= 0) 58 return 0; 59 idx = (i_size - 1) >> PAGE_CACHE_SHIFT; 60 if (page->index > idx) 61 return 0; 62 if (page->index != idx) 63 return PAGE_CACHE_SIZE; 64 return 1 + ((i_size - 1) & (PAGE_CACHE_SIZE - 1)); 65 } 66 67 static 68 int nfs_return_empty_page(struct page *page) 69 { 70 memclear_highpage_flush(page, 0, PAGE_CACHE_SIZE); 71 SetPageUptodate(page); 72 unlock_page(page); 73 return 0; 74 } 75 76 /* 77 * Read a page synchronously. 78 */ 79 static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode, 80 struct page *page) 81 { 82 unsigned int rsize = NFS_SERVER(inode)->rsize; 83 unsigned int count = PAGE_CACHE_SIZE; 84 int result; 85 struct nfs_read_data *rdata; 86 87 rdata = nfs_readdata_alloc(); 88 if (!rdata) 89 return -ENOMEM; 90 91 memset(rdata, 0, sizeof(*rdata)); 92 rdata->flags = (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0); 93 rdata->cred = ctx->cred; 94 rdata->inode = inode; 95 INIT_LIST_HEAD(&rdata->pages); 96 rdata->args.fh = NFS_FH(inode); 97 rdata->args.context = ctx; 98 rdata->args.pages = &page; 99 rdata->args.pgbase = 0UL; 100 rdata->args.count = rsize; 101 rdata->res.fattr = &rdata->fattr; 102 103 dprintk("NFS: nfs_readpage_sync(%p)\n", page); 104 105 /* 106 * This works now because the socket layer never tries to DMA 107 * into this buffer directly. 108 */ 109 do { 110 if (count < rsize) 111 rdata->args.count = count; 112 rdata->res.count = rdata->args.count; 113 rdata->args.offset = page_offset(page) + rdata->args.pgbase; 114 115 dprintk("NFS: nfs_proc_read(%s, (%s/%Ld), %Lu, %u)\n", 116 NFS_SERVER(inode)->hostname, 117 inode->i_sb->s_id, 118 (long long)NFS_FILEID(inode), 119 (unsigned long long)rdata->args.pgbase, 120 rdata->args.count); 121 122 lock_kernel(); 123 result = NFS_PROTO(inode)->read(rdata); 124 unlock_kernel(); 125 126 /* 127 * Even if we had a partial success we can't mark the page 128 * cache valid. 129 */ 130 if (result < 0) { 131 if (result == -EISDIR) 132 result = -EINVAL; 133 goto io_error; 134 } 135 count -= result; 136 rdata->args.pgbase += result; 137 /* Note: result == 0 should only happen if we're caching 138 * a write that extends the file and punches a hole. 139 */ 140 if (rdata->res.eof != 0 || result == 0) 141 break; 142 } while (count); 143 spin_lock(&inode->i_lock); 144 NFS_I(inode)->cache_validity |= NFS_INO_INVALID_ATIME; 145 spin_unlock(&inode->i_lock); 146 147 if (count) 148 memclear_highpage_flush(page, rdata->args.pgbase, count); 149 SetPageUptodate(page); 150 if (PageError(page)) 151 ClearPageError(page); 152 result = 0; 153 154 io_error: 155 unlock_page(page); 156 nfs_readdata_free(rdata); 157 return result; 158 } 159 160 static int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, 161 struct page *page) 162 { 163 LIST_HEAD(one_request); 164 struct nfs_page *new; 165 unsigned int len; 166 167 len = nfs_page_length(inode, page); 168 if (len == 0) 169 return nfs_return_empty_page(page); 170 new = nfs_create_request(ctx, inode, page, 0, len); 171 if (IS_ERR(new)) { 172 unlock_page(page); 173 return PTR_ERR(new); 174 } 175 if (len < PAGE_CACHE_SIZE) 176 memclear_highpage_flush(page, len, PAGE_CACHE_SIZE - len); 177 178 nfs_list_add_request(new, &one_request); 179 nfs_pagein_one(&one_request, inode); 180 return 0; 181 } 182 183 static void nfs_readpage_release(struct nfs_page *req) 184 { 185 unlock_page(req->wb_page); 186 187 dprintk("NFS: read done (%s/%Ld %d@%Ld)\n", 188 req->wb_context->dentry->d_inode->i_sb->s_id, 189 (long long)NFS_FILEID(req->wb_context->dentry->d_inode), 190 req->wb_bytes, 191 (long long)req_offset(req)); 192 nfs_clear_request(req); 193 nfs_release_request(req); 194 } 195 196 /* 197 * Set up the NFS read request struct 198 */ 199 static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, 200 unsigned int count, unsigned int offset) 201 { 202 struct inode *inode; 203 204 data->req = req; 205 data->inode = inode = req->wb_context->dentry->d_inode; 206 data->cred = req->wb_context->cred; 207 208 data->args.fh = NFS_FH(inode); 209 data->args.offset = req_offset(req) + offset; 210 data->args.pgbase = req->wb_pgbase + offset; 211 data->args.pages = data->pagevec; 212 data->args.count = count; 213 data->args.context = req->wb_context; 214 215 data->res.fattr = &data->fattr; 216 data->res.count = count; 217 data->res.eof = 0; 218 219 NFS_PROTO(inode)->read_setup(data); 220 221 data->task.tk_cookie = (unsigned long)inode; 222 data->task.tk_calldata = data; 223 /* Release requests */ 224 data->task.tk_release = nfs_readdata_release; 225 226 dprintk("NFS: %4d initiated read call (req %s/%Ld, %u bytes @ offset %Lu)\n", 227 data->task.tk_pid, 228 inode->i_sb->s_id, 229 (long long)NFS_FILEID(inode), 230 count, 231 (unsigned long long)data->args.offset); 232 } 233 234 static void 235 nfs_async_read_error(struct list_head *head) 236 { 237 struct nfs_page *req; 238 239 while (!list_empty(head)) { 240 req = nfs_list_entry(head->next); 241 nfs_list_remove_request(req); 242 SetPageError(req->wb_page); 243 nfs_readpage_release(req); 244 } 245 } 246 247 /* 248 * Start an async read operation 249 */ 250 static void nfs_execute_read(struct nfs_read_data *data) 251 { 252 struct rpc_clnt *clnt = NFS_CLIENT(data->inode); 253 sigset_t oldset; 254 255 rpc_clnt_sigmask(clnt, &oldset); 256 lock_kernel(); 257 rpc_execute(&data->task); 258 unlock_kernel(); 259 rpc_clnt_sigunmask(clnt, &oldset); 260 } 261 262 /* 263 * Generate multiple requests to fill a single page. 264 * 265 * We optimize to reduce the number of read operations on the wire. If we 266 * detect that we're reading a page, or an area of a page, that is past the 267 * end of file, we do not generate NFS read operations but just clear the 268 * parts of the page that would have come back zero from the server anyway. 269 * 270 * We rely on the cached value of i_size to make this determination; another 271 * client can fill pages on the server past our cached end-of-file, but we 272 * won't see the new data until our attribute cache is updated. This is more 273 * or less conventional NFS client behavior. 274 */ 275 static int nfs_pagein_multi(struct list_head *head, struct inode *inode) 276 { 277 struct nfs_page *req = nfs_list_entry(head->next); 278 struct page *page = req->wb_page; 279 struct nfs_read_data *data; 280 unsigned int rsize = NFS_SERVER(inode)->rsize; 281 unsigned int nbytes, offset; 282 int requests = 0; 283 LIST_HEAD(list); 284 285 nfs_list_remove_request(req); 286 287 nbytes = req->wb_bytes; 288 for(;;) { 289 data = nfs_readdata_alloc(); 290 if (!data) 291 goto out_bad; 292 INIT_LIST_HEAD(&data->pages); 293 list_add(&data->pages, &list); 294 requests++; 295 if (nbytes <= rsize) 296 break; 297 nbytes -= rsize; 298 } 299 atomic_set(&req->wb_complete, requests); 300 301 ClearPageError(page); 302 offset = 0; 303 nbytes = req->wb_bytes; 304 do { 305 data = list_entry(list.next, struct nfs_read_data, pages); 306 list_del_init(&data->pages); 307 308 data->pagevec[0] = page; 309 data->complete = nfs_readpage_result_partial; 310 311 if (nbytes > rsize) { 312 nfs_read_rpcsetup(req, data, rsize, offset); 313 offset += rsize; 314 nbytes -= rsize; 315 } else { 316 nfs_read_rpcsetup(req, data, nbytes, offset); 317 nbytes = 0; 318 } 319 nfs_execute_read(data); 320 } while (nbytes != 0); 321 322 return 0; 323 324 out_bad: 325 while (!list_empty(&list)) { 326 data = list_entry(list.next, struct nfs_read_data, pages); 327 list_del(&data->pages); 328 nfs_readdata_free(data); 329 } 330 SetPageError(page); 331 nfs_readpage_release(req); 332 return -ENOMEM; 333 } 334 335 static int nfs_pagein_one(struct list_head *head, struct inode *inode) 336 { 337 struct nfs_page *req; 338 struct page **pages; 339 struct nfs_read_data *data; 340 unsigned int count; 341 342 if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE) 343 return nfs_pagein_multi(head, inode); 344 345 data = nfs_readdata_alloc(); 346 if (!data) 347 goto out_bad; 348 349 INIT_LIST_HEAD(&data->pages); 350 pages = data->pagevec; 351 count = 0; 352 while (!list_empty(head)) { 353 req = nfs_list_entry(head->next); 354 nfs_list_remove_request(req); 355 nfs_list_add_request(req, &data->pages); 356 ClearPageError(req->wb_page); 357 *pages++ = req->wb_page; 358 count += req->wb_bytes; 359 } 360 req = nfs_list_entry(data->pages.next); 361 362 data->complete = nfs_readpage_result_full; 363 nfs_read_rpcsetup(req, data, count, 0); 364 365 nfs_execute_read(data); 366 return 0; 367 out_bad: 368 nfs_async_read_error(head); 369 return -ENOMEM; 370 } 371 372 static int 373 nfs_pagein_list(struct list_head *head, int rpages) 374 { 375 LIST_HEAD(one_request); 376 struct nfs_page *req; 377 int error = 0; 378 unsigned int pages = 0; 379 380 while (!list_empty(head)) { 381 pages += nfs_coalesce_requests(head, &one_request, rpages); 382 req = nfs_list_entry(one_request.next); 383 error = nfs_pagein_one(&one_request, req->wb_context->dentry->d_inode); 384 if (error < 0) 385 break; 386 } 387 if (error >= 0) 388 return pages; 389 390 nfs_async_read_error(head); 391 return error; 392 } 393 394 /* 395 * Handle a read reply that fills part of a page. 396 */ 397 static void nfs_readpage_result_partial(struct nfs_read_data *data, int status) 398 { 399 struct nfs_page *req = data->req; 400 struct page *page = req->wb_page; 401 402 if (status >= 0) { 403 unsigned int request = data->args.count; 404 unsigned int result = data->res.count; 405 406 if (result < request) { 407 memclear_highpage_flush(page, 408 data->args.pgbase + result, 409 request - result); 410 } 411 } else 412 SetPageError(page); 413 414 if (atomic_dec_and_test(&req->wb_complete)) { 415 if (!PageError(page)) 416 SetPageUptodate(page); 417 nfs_readpage_release(req); 418 } 419 } 420 421 /* 422 * This is the callback from RPC telling us whether a reply was 423 * received or some error occurred (timeout or socket shutdown). 424 */ 425 static void nfs_readpage_result_full(struct nfs_read_data *data, int status) 426 { 427 unsigned int count = data->res.count; 428 429 while (!list_empty(&data->pages)) { 430 struct nfs_page *req = nfs_list_entry(data->pages.next); 431 struct page *page = req->wb_page; 432 nfs_list_remove_request(req); 433 434 if (status >= 0) { 435 if (count < PAGE_CACHE_SIZE) { 436 if (count < req->wb_bytes) 437 memclear_highpage_flush(page, 438 req->wb_pgbase + count, 439 req->wb_bytes - count); 440 count = 0; 441 } else 442 count -= PAGE_CACHE_SIZE; 443 SetPageUptodate(page); 444 } else 445 SetPageError(page); 446 nfs_readpage_release(req); 447 } 448 } 449 450 /* 451 * This is the callback from RPC telling us whether a reply was 452 * received or some error occurred (timeout or socket shutdown). 453 */ 454 void nfs_readpage_result(struct rpc_task *task) 455 { 456 struct nfs_read_data *data = (struct nfs_read_data *)task->tk_calldata; 457 struct nfs_readargs *argp = &data->args; 458 struct nfs_readres *resp = &data->res; 459 int status = task->tk_status; 460 461 dprintk("NFS: %4d nfs_readpage_result, (status %d)\n", 462 task->tk_pid, status); 463 464 /* Is this a short read? */ 465 if (task->tk_status >= 0 && resp->count < argp->count && !resp->eof) { 466 /* Has the server at least made some progress? */ 467 if (resp->count != 0) { 468 /* Yes, so retry the read at the end of the data */ 469 argp->offset += resp->count; 470 argp->pgbase += resp->count; 471 argp->count -= resp->count; 472 rpc_restart_call(task); 473 return; 474 } 475 task->tk_status = -EIO; 476 } 477 spin_lock(&data->inode->i_lock); 478 NFS_I(data->inode)->cache_validity |= NFS_INO_INVALID_ATIME; 479 spin_unlock(&data->inode->i_lock); 480 data->complete(data, status); 481 } 482 483 /* 484 * Read a page over NFS. 485 * We read the page synchronously in the following case: 486 * - The error flag is set for this page. This happens only when a 487 * previous async read operation failed. 488 */ 489 int nfs_readpage(struct file *file, struct page *page) 490 { 491 struct nfs_open_context *ctx; 492 struct inode *inode = page->mapping->host; 493 int error; 494 495 dprintk("NFS: nfs_readpage (%p %ld@%lu)\n", 496 page, PAGE_CACHE_SIZE, page->index); 497 /* 498 * Try to flush any pending writes to the file.. 499 * 500 * NOTE! Because we own the page lock, there cannot 501 * be any new pending writes generated at this point 502 * for this page (other pages can be written to). 503 */ 504 error = nfs_wb_page(inode, page); 505 if (error) 506 goto out_error; 507 508 if (file == NULL) { 509 ctx = nfs_find_open_context(inode, FMODE_READ); 510 if (ctx == NULL) 511 return -EBADF; 512 } else 513 ctx = get_nfs_open_context((struct nfs_open_context *) 514 file->private_data); 515 if (!IS_SYNC(inode)) { 516 error = nfs_readpage_async(ctx, inode, page); 517 goto out; 518 } 519 520 error = nfs_readpage_sync(ctx, inode, page); 521 if (error < 0 && IS_SWAPFILE(inode)) 522 printk("Aiee.. nfs swap-in of page failed!\n"); 523 out: 524 put_nfs_open_context(ctx); 525 return error; 526 527 out_error: 528 unlock_page(page); 529 return error; 530 } 531 532 struct nfs_readdesc { 533 struct list_head *head; 534 struct nfs_open_context *ctx; 535 }; 536 537 static int 538 readpage_async_filler(void *data, struct page *page) 539 { 540 struct nfs_readdesc *desc = (struct nfs_readdesc *)data; 541 struct inode *inode = page->mapping->host; 542 struct nfs_page *new; 543 unsigned int len; 544 545 nfs_wb_page(inode, page); 546 len = nfs_page_length(inode, page); 547 if (len == 0) 548 return nfs_return_empty_page(page); 549 new = nfs_create_request(desc->ctx, inode, page, 0, len); 550 if (IS_ERR(new)) { 551 SetPageError(page); 552 unlock_page(page); 553 return PTR_ERR(new); 554 } 555 if (len < PAGE_CACHE_SIZE) 556 memclear_highpage_flush(page, len, PAGE_CACHE_SIZE - len); 557 nfs_list_add_request(new, desc->head); 558 return 0; 559 } 560 561 int nfs_readpages(struct file *filp, struct address_space *mapping, 562 struct list_head *pages, unsigned nr_pages) 563 { 564 LIST_HEAD(head); 565 struct nfs_readdesc desc = { 566 .head = &head, 567 }; 568 struct inode *inode = mapping->host; 569 struct nfs_server *server = NFS_SERVER(inode); 570 int ret; 571 572 dprintk("NFS: nfs_readpages (%s/%Ld %d)\n", 573 inode->i_sb->s_id, 574 (long long)NFS_FILEID(inode), 575 nr_pages); 576 577 if (filp == NULL) { 578 desc.ctx = nfs_find_open_context(inode, FMODE_READ); 579 if (desc.ctx == NULL) 580 return -EBADF; 581 } else 582 desc.ctx = get_nfs_open_context((struct nfs_open_context *) 583 filp->private_data); 584 ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc); 585 if (!list_empty(&head)) { 586 int err = nfs_pagein_list(&head, server->rpages); 587 if (!ret) 588 ret = err; 589 } 590 put_nfs_open_context(desc.ctx); 591 return ret; 592 } 593 594 int nfs_init_readpagecache(void) 595 { 596 nfs_rdata_cachep = kmem_cache_create("nfs_read_data", 597 sizeof(struct nfs_read_data), 598 0, SLAB_HWCACHE_ALIGN, 599 NULL, NULL); 600 if (nfs_rdata_cachep == NULL) 601 return -ENOMEM; 602 603 nfs_rdata_mempool = mempool_create(MIN_POOL_READ, 604 mempool_alloc_slab, 605 mempool_free_slab, 606 nfs_rdata_cachep); 607 if (nfs_rdata_mempool == NULL) 608 return -ENOMEM; 609 610 return 0; 611 } 612 613 void nfs_destroy_readpagecache(void) 614 { 615 mempool_destroy(nfs_rdata_mempool); 616 if (kmem_cache_destroy(nfs_rdata_cachep)) 617 printk(KERN_INFO "nfs_read_data: not all structures were freed\n"); 618 } 619