1 /* 2 * linux/fs/nfs/read.c 3 * 4 * Block I/O for NFS 5 * 6 * Partial copy of Linus' read cache modifications to fs/nfs/file.c 7 * modified for async RPC by okir@monad.swb.de 8 * 9 * We do an ugly hack here in order to return proper error codes to the 10 * user program when a read request failed: since generic_file_read 11 * only checks the return value of inode->i_op->readpage() which is always 0 12 * for async RPC, we set the error bit of the page to 1 when an error occurs, 13 * and make nfs_readpage transmit requests synchronously when encountering this. 14 * This is only a small problem, though, since we now retry all operations 15 * within the RPC code when root squashing is suspected. 16 */ 17 18 #include <linux/config.h> 19 #include <linux/time.h> 20 #include <linux/kernel.h> 21 #include <linux/errno.h> 22 #include <linux/fcntl.h> 23 #include <linux/stat.h> 24 #include <linux/mm.h> 25 #include <linux/slab.h> 26 #include <linux/pagemap.h> 27 #include <linux/sunrpc/clnt.h> 28 #include <linux/nfs_fs.h> 29 #include <linux/nfs_page.h> 30 #include <linux/smp_lock.h> 31 32 #include <asm/system.h> 33 34 #define NFSDBG_FACILITY NFSDBG_PAGECACHE 35 36 static int nfs_pagein_one(struct list_head *, struct inode *); 37 static void nfs_readpage_result_partial(struct nfs_read_data *, int); 38 static void nfs_readpage_result_full(struct nfs_read_data *, int); 39 40 static kmem_cache_t *nfs_rdata_cachep; 41 mempool_t *nfs_rdata_mempool; 42 43 #define MIN_POOL_READ (32) 44 45 void nfs_readdata_release(struct rpc_task *task) 46 { 47 struct nfs_read_data *data = (struct nfs_read_data *)task->tk_calldata; 48 nfs_readdata_free(data); 49 } 50 51 static 52 unsigned int nfs_page_length(struct inode *inode, struct page *page) 53 { 54 loff_t i_size = i_size_read(inode); 55 unsigned long idx; 56 57 if (i_size <= 0) 58 return 0; 59 idx = (i_size - 1) >> PAGE_CACHE_SHIFT; 60 if (page->index > idx) 61 return 0; 62 if (page->index != idx) 63 return PAGE_CACHE_SIZE; 64 return 1 + ((i_size - 1) & (PAGE_CACHE_SIZE - 1)); 65 } 66 67 static 68 int nfs_return_empty_page(struct page *page) 69 { 70 memclear_highpage_flush(page, 0, PAGE_CACHE_SIZE); 71 SetPageUptodate(page); 72 unlock_page(page); 73 return 0; 74 } 75 76 /* 77 * Read a page synchronously. 78 */ 79 static int nfs_readpage_sync(struct nfs_open_context *ctx, struct inode *inode, 80 struct page *page) 81 { 82 unsigned int rsize = NFS_SERVER(inode)->rsize; 83 unsigned int count = PAGE_CACHE_SIZE; 84 int result; 85 struct nfs_read_data *rdata; 86 87 rdata = nfs_readdata_alloc(); 88 if (!rdata) 89 return -ENOMEM; 90 91 memset(rdata, 0, sizeof(*rdata)); 92 rdata->flags = (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0); 93 rdata->cred = ctx->cred; 94 rdata->inode = inode; 95 INIT_LIST_HEAD(&rdata->pages); 96 rdata->args.fh = NFS_FH(inode); 97 rdata->args.context = ctx; 98 rdata->args.pages = &page; 99 rdata->args.pgbase = 0UL; 100 rdata->args.count = rsize; 101 rdata->res.fattr = &rdata->fattr; 102 103 dprintk("NFS: nfs_readpage_sync(%p)\n", page); 104 105 /* 106 * This works now because the socket layer never tries to DMA 107 * into this buffer directly. 108 */ 109 do { 110 if (count < rsize) 111 rdata->args.count = count; 112 rdata->res.count = rdata->args.count; 113 rdata->args.offset = page_offset(page) + rdata->args.pgbase; 114 115 dprintk("NFS: nfs_proc_read(%s, (%s/%Ld), %Lu, %u)\n", 116 NFS_SERVER(inode)->hostname, 117 inode->i_sb->s_id, 118 (long long)NFS_FILEID(inode), 119 (unsigned long long)rdata->args.pgbase, 120 rdata->args.count); 121 122 lock_kernel(); 123 result = NFS_PROTO(inode)->read(rdata); 124 unlock_kernel(); 125 126 /* 127 * Even if we had a partial success we can't mark the page 128 * cache valid. 129 */ 130 if (result < 0) { 131 if (result == -EISDIR) 132 result = -EINVAL; 133 goto io_error; 134 } 135 count -= result; 136 rdata->args.pgbase += result; 137 /* Note: result == 0 should only happen if we're caching 138 * a write that extends the file and punches a hole. 139 */ 140 if (rdata->res.eof != 0 || result == 0) 141 break; 142 } while (count); 143 NFS_FLAGS(inode) |= NFS_INO_INVALID_ATIME; 144 145 if (count) 146 memclear_highpage_flush(page, rdata->args.pgbase, count); 147 SetPageUptodate(page); 148 if (PageError(page)) 149 ClearPageError(page); 150 result = 0; 151 152 io_error: 153 unlock_page(page); 154 nfs_readdata_free(rdata); 155 return result; 156 } 157 158 static int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode, 159 struct page *page) 160 { 161 LIST_HEAD(one_request); 162 struct nfs_page *new; 163 unsigned int len; 164 165 len = nfs_page_length(inode, page); 166 if (len == 0) 167 return nfs_return_empty_page(page); 168 new = nfs_create_request(ctx, inode, page, 0, len); 169 if (IS_ERR(new)) { 170 unlock_page(page); 171 return PTR_ERR(new); 172 } 173 if (len < PAGE_CACHE_SIZE) 174 memclear_highpage_flush(page, len, PAGE_CACHE_SIZE - len); 175 176 nfs_list_add_request(new, &one_request); 177 nfs_pagein_one(&one_request, inode); 178 return 0; 179 } 180 181 static void nfs_readpage_release(struct nfs_page *req) 182 { 183 unlock_page(req->wb_page); 184 185 nfs_clear_request(req); 186 nfs_release_request(req); 187 188 dprintk("NFS: read done (%s/%Ld %d@%Ld)\n", 189 req->wb_context->dentry->d_inode->i_sb->s_id, 190 (long long)NFS_FILEID(req->wb_context->dentry->d_inode), 191 req->wb_bytes, 192 (long long)req_offset(req)); 193 } 194 195 /* 196 * Set up the NFS read request struct 197 */ 198 static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data, 199 unsigned int count, unsigned int offset) 200 { 201 struct inode *inode; 202 203 data->req = req; 204 data->inode = inode = req->wb_context->dentry->d_inode; 205 data->cred = req->wb_context->cred; 206 207 data->args.fh = NFS_FH(inode); 208 data->args.offset = req_offset(req) + offset; 209 data->args.pgbase = req->wb_pgbase + offset; 210 data->args.pages = data->pagevec; 211 data->args.count = count; 212 data->args.context = req->wb_context; 213 214 data->res.fattr = &data->fattr; 215 data->res.count = count; 216 data->res.eof = 0; 217 218 NFS_PROTO(inode)->read_setup(data); 219 220 data->task.tk_cookie = (unsigned long)inode; 221 data->task.tk_calldata = data; 222 /* Release requests */ 223 data->task.tk_release = nfs_readdata_release; 224 225 dprintk("NFS: %4d initiated read call (req %s/%Ld, %u bytes @ offset %Lu)\n", 226 data->task.tk_pid, 227 inode->i_sb->s_id, 228 (long long)NFS_FILEID(inode), 229 count, 230 (unsigned long long)data->args.offset); 231 } 232 233 static void 234 nfs_async_read_error(struct list_head *head) 235 { 236 struct nfs_page *req; 237 238 while (!list_empty(head)) { 239 req = nfs_list_entry(head->next); 240 nfs_list_remove_request(req); 241 SetPageError(req->wb_page); 242 nfs_readpage_release(req); 243 } 244 } 245 246 /* 247 * Start an async read operation 248 */ 249 static void nfs_execute_read(struct nfs_read_data *data) 250 { 251 struct rpc_clnt *clnt = NFS_CLIENT(data->inode); 252 sigset_t oldset; 253 254 rpc_clnt_sigmask(clnt, &oldset); 255 lock_kernel(); 256 rpc_execute(&data->task); 257 unlock_kernel(); 258 rpc_clnt_sigunmask(clnt, &oldset); 259 } 260 261 /* 262 * Generate multiple requests to fill a single page. 263 * 264 * We optimize to reduce the number of read operations on the wire. If we 265 * detect that we're reading a page, or an area of a page, that is past the 266 * end of file, we do not generate NFS read operations but just clear the 267 * parts of the page that would have come back zero from the server anyway. 268 * 269 * We rely on the cached value of i_size to make this determination; another 270 * client can fill pages on the server past our cached end-of-file, but we 271 * won't see the new data until our attribute cache is updated. This is more 272 * or less conventional NFS client behavior. 273 */ 274 static int nfs_pagein_multi(struct list_head *head, struct inode *inode) 275 { 276 struct nfs_page *req = nfs_list_entry(head->next); 277 struct page *page = req->wb_page; 278 struct nfs_read_data *data; 279 unsigned int rsize = NFS_SERVER(inode)->rsize; 280 unsigned int nbytes, offset; 281 int requests = 0; 282 LIST_HEAD(list); 283 284 nfs_list_remove_request(req); 285 286 nbytes = req->wb_bytes; 287 for(;;) { 288 data = nfs_readdata_alloc(); 289 if (!data) 290 goto out_bad; 291 INIT_LIST_HEAD(&data->pages); 292 list_add(&data->pages, &list); 293 requests++; 294 if (nbytes <= rsize) 295 break; 296 nbytes -= rsize; 297 } 298 atomic_set(&req->wb_complete, requests); 299 300 ClearPageError(page); 301 offset = 0; 302 nbytes = req->wb_bytes; 303 do { 304 data = list_entry(list.next, struct nfs_read_data, pages); 305 list_del_init(&data->pages); 306 307 data->pagevec[0] = page; 308 data->complete = nfs_readpage_result_partial; 309 310 if (nbytes > rsize) { 311 nfs_read_rpcsetup(req, data, rsize, offset); 312 offset += rsize; 313 nbytes -= rsize; 314 } else { 315 nfs_read_rpcsetup(req, data, nbytes, offset); 316 nbytes = 0; 317 } 318 nfs_execute_read(data); 319 } while (nbytes != 0); 320 321 return 0; 322 323 out_bad: 324 while (!list_empty(&list)) { 325 data = list_entry(list.next, struct nfs_read_data, pages); 326 list_del(&data->pages); 327 nfs_readdata_free(data); 328 } 329 SetPageError(page); 330 nfs_readpage_release(req); 331 return -ENOMEM; 332 } 333 334 static int nfs_pagein_one(struct list_head *head, struct inode *inode) 335 { 336 struct nfs_page *req; 337 struct page **pages; 338 struct nfs_read_data *data; 339 unsigned int count; 340 341 if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE) 342 return nfs_pagein_multi(head, inode); 343 344 data = nfs_readdata_alloc(); 345 if (!data) 346 goto out_bad; 347 348 INIT_LIST_HEAD(&data->pages); 349 pages = data->pagevec; 350 count = 0; 351 while (!list_empty(head)) { 352 req = nfs_list_entry(head->next); 353 nfs_list_remove_request(req); 354 nfs_list_add_request(req, &data->pages); 355 ClearPageError(req->wb_page); 356 *pages++ = req->wb_page; 357 count += req->wb_bytes; 358 } 359 req = nfs_list_entry(data->pages.next); 360 361 data->complete = nfs_readpage_result_full; 362 nfs_read_rpcsetup(req, data, count, 0); 363 364 nfs_execute_read(data); 365 return 0; 366 out_bad: 367 nfs_async_read_error(head); 368 return -ENOMEM; 369 } 370 371 static int 372 nfs_pagein_list(struct list_head *head, int rpages) 373 { 374 LIST_HEAD(one_request); 375 struct nfs_page *req; 376 int error = 0; 377 unsigned int pages = 0; 378 379 while (!list_empty(head)) { 380 pages += nfs_coalesce_requests(head, &one_request, rpages); 381 req = nfs_list_entry(one_request.next); 382 error = nfs_pagein_one(&one_request, req->wb_context->dentry->d_inode); 383 if (error < 0) 384 break; 385 } 386 if (error >= 0) 387 return pages; 388 389 nfs_async_read_error(head); 390 return error; 391 } 392 393 /* 394 * Handle a read reply that fills part of a page. 395 */ 396 static void nfs_readpage_result_partial(struct nfs_read_data *data, int status) 397 { 398 struct nfs_page *req = data->req; 399 struct page *page = req->wb_page; 400 401 if (status >= 0) { 402 unsigned int request = data->args.count; 403 unsigned int result = data->res.count; 404 405 if (result < request) { 406 memclear_highpage_flush(page, 407 data->args.pgbase + result, 408 request - result); 409 } 410 } else 411 SetPageError(page); 412 413 if (atomic_dec_and_test(&req->wb_complete)) { 414 if (!PageError(page)) 415 SetPageUptodate(page); 416 nfs_readpage_release(req); 417 } 418 } 419 420 /* 421 * This is the callback from RPC telling us whether a reply was 422 * received or some error occurred (timeout or socket shutdown). 423 */ 424 static void nfs_readpage_result_full(struct nfs_read_data *data, int status) 425 { 426 unsigned int count = data->res.count; 427 428 while (!list_empty(&data->pages)) { 429 struct nfs_page *req = nfs_list_entry(data->pages.next); 430 struct page *page = req->wb_page; 431 nfs_list_remove_request(req); 432 433 if (status >= 0) { 434 if (count < PAGE_CACHE_SIZE) { 435 if (count < req->wb_bytes) 436 memclear_highpage_flush(page, 437 req->wb_pgbase + count, 438 req->wb_bytes - count); 439 count = 0; 440 } else 441 count -= PAGE_CACHE_SIZE; 442 SetPageUptodate(page); 443 } else 444 SetPageError(page); 445 nfs_readpage_release(req); 446 } 447 } 448 449 /* 450 * This is the callback from RPC telling us whether a reply was 451 * received or some error occurred (timeout or socket shutdown). 452 */ 453 void nfs_readpage_result(struct rpc_task *task) 454 { 455 struct nfs_read_data *data = (struct nfs_read_data *)task->tk_calldata; 456 struct nfs_readargs *argp = &data->args; 457 struct nfs_readres *resp = &data->res; 458 int status = task->tk_status; 459 460 dprintk("NFS: %4d nfs_readpage_result, (status %d)\n", 461 task->tk_pid, status); 462 463 /* Is this a short read? */ 464 if (task->tk_status >= 0 && resp->count < argp->count && !resp->eof) { 465 /* Has the server at least made some progress? */ 466 if (resp->count != 0) { 467 /* Yes, so retry the read at the end of the data */ 468 argp->offset += resp->count; 469 argp->pgbase += resp->count; 470 argp->count -= resp->count; 471 rpc_restart_call(task); 472 return; 473 } 474 task->tk_status = -EIO; 475 } 476 NFS_FLAGS(data->inode) |= NFS_INO_INVALID_ATIME; 477 data->complete(data, status); 478 } 479 480 /* 481 * Read a page over NFS. 482 * We read the page synchronously in the following case: 483 * - The error flag is set for this page. This happens only when a 484 * previous async read operation failed. 485 */ 486 int nfs_readpage(struct file *file, struct page *page) 487 { 488 struct nfs_open_context *ctx; 489 struct inode *inode = page->mapping->host; 490 int error; 491 492 dprintk("NFS: nfs_readpage (%p %ld@%lu)\n", 493 page, PAGE_CACHE_SIZE, page->index); 494 /* 495 * Try to flush any pending writes to the file.. 496 * 497 * NOTE! Because we own the page lock, there cannot 498 * be any new pending writes generated at this point 499 * for this page (other pages can be written to). 500 */ 501 error = nfs_wb_page(inode, page); 502 if (error) 503 goto out_error; 504 505 if (file == NULL) { 506 ctx = nfs_find_open_context(inode, FMODE_READ); 507 if (ctx == NULL) 508 return -EBADF; 509 } else 510 ctx = get_nfs_open_context((struct nfs_open_context *) 511 file->private_data); 512 if (!IS_SYNC(inode)) { 513 error = nfs_readpage_async(ctx, inode, page); 514 goto out; 515 } 516 517 error = nfs_readpage_sync(ctx, inode, page); 518 if (error < 0 && IS_SWAPFILE(inode)) 519 printk("Aiee.. nfs swap-in of page failed!\n"); 520 out: 521 put_nfs_open_context(ctx); 522 return error; 523 524 out_error: 525 unlock_page(page); 526 return error; 527 } 528 529 struct nfs_readdesc { 530 struct list_head *head; 531 struct nfs_open_context *ctx; 532 }; 533 534 static int 535 readpage_async_filler(void *data, struct page *page) 536 { 537 struct nfs_readdesc *desc = (struct nfs_readdesc *)data; 538 struct inode *inode = page->mapping->host; 539 struct nfs_page *new; 540 unsigned int len; 541 542 nfs_wb_page(inode, page); 543 len = nfs_page_length(inode, page); 544 if (len == 0) 545 return nfs_return_empty_page(page); 546 new = nfs_create_request(desc->ctx, inode, page, 0, len); 547 if (IS_ERR(new)) { 548 SetPageError(page); 549 unlock_page(page); 550 return PTR_ERR(new); 551 } 552 if (len < PAGE_CACHE_SIZE) 553 memclear_highpage_flush(page, len, PAGE_CACHE_SIZE - len); 554 nfs_list_add_request(new, desc->head); 555 return 0; 556 } 557 558 int nfs_readpages(struct file *filp, struct address_space *mapping, 559 struct list_head *pages, unsigned nr_pages) 560 { 561 LIST_HEAD(head); 562 struct nfs_readdesc desc = { 563 .head = &head, 564 }; 565 struct inode *inode = mapping->host; 566 struct nfs_server *server = NFS_SERVER(inode); 567 int ret; 568 569 dprintk("NFS: nfs_readpages (%s/%Ld %d)\n", 570 inode->i_sb->s_id, 571 (long long)NFS_FILEID(inode), 572 nr_pages); 573 574 if (filp == NULL) { 575 desc.ctx = nfs_find_open_context(inode, FMODE_READ); 576 if (desc.ctx == NULL) 577 return -EBADF; 578 } else 579 desc.ctx = get_nfs_open_context((struct nfs_open_context *) 580 filp->private_data); 581 ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc); 582 if (!list_empty(&head)) { 583 int err = nfs_pagein_list(&head, server->rpages); 584 if (!ret) 585 ret = err; 586 } 587 put_nfs_open_context(desc.ctx); 588 return ret; 589 } 590 591 int nfs_init_readpagecache(void) 592 { 593 nfs_rdata_cachep = kmem_cache_create("nfs_read_data", 594 sizeof(struct nfs_read_data), 595 0, SLAB_HWCACHE_ALIGN, 596 NULL, NULL); 597 if (nfs_rdata_cachep == NULL) 598 return -ENOMEM; 599 600 nfs_rdata_mempool = mempool_create(MIN_POOL_READ, 601 mempool_alloc_slab, 602 mempool_free_slab, 603 nfs_rdata_cachep); 604 if (nfs_rdata_mempool == NULL) 605 return -ENOMEM; 606 607 return 0; 608 } 609 610 void nfs_destroy_readpagecache(void) 611 { 612 mempool_destroy(nfs_rdata_mempool); 613 if (kmem_cache_destroy(nfs_rdata_cachep)) 614 printk(KERN_INFO "nfs_read_data: not all structures were freed\n"); 615 } 616