xref: /linux/fs/nfs/read.c (revision c537b994505099b7197e7d3125b942ecbcc51eb6)
1 /*
2  * linux/fs/nfs/read.c
3  *
4  * Block I/O for NFS
5  *
6  * Partial copy of Linus' read cache modifications to fs/nfs/file.c
7  * modified for async RPC by okir@monad.swb.de
8  */
9 
10 #include <linux/time.h>
11 #include <linux/kernel.h>
12 #include <linux/errno.h>
13 #include <linux/fcntl.h>
14 #include <linux/stat.h>
15 #include <linux/mm.h>
16 #include <linux/slab.h>
17 #include <linux/pagemap.h>
18 #include <linux/sunrpc/clnt.h>
19 #include <linux/nfs_fs.h>
20 #include <linux/nfs_page.h>
21 #include <linux/smp_lock.h>
22 
23 #include <asm/system.h>
24 
25 #include "internal.h"
26 #include "iostat.h"
27 
28 #define NFSDBG_FACILITY		NFSDBG_PAGECACHE
29 
30 static int nfs_pagein_one(struct list_head *, struct inode *);
31 static const struct rpc_call_ops nfs_read_partial_ops;
32 static const struct rpc_call_ops nfs_read_full_ops;
33 
34 static struct kmem_cache *nfs_rdata_cachep;
35 static mempool_t *nfs_rdata_mempool;
36 
37 #define MIN_POOL_READ	(32)
38 
39 struct nfs_read_data *nfs_readdata_alloc(size_t len)
40 {
41 	unsigned int pagecount = (len + PAGE_SIZE - 1) >> PAGE_SHIFT;
42 	struct nfs_read_data *p = mempool_alloc(nfs_rdata_mempool, GFP_NOFS);
43 
44 	if (p) {
45 		memset(p, 0, sizeof(*p));
46 		INIT_LIST_HEAD(&p->pages);
47 		p->npages = pagecount;
48 		if (pagecount <= ARRAY_SIZE(p->page_array))
49 			p->pagevec = p->page_array;
50 		else {
51 			p->pagevec = kcalloc(pagecount, sizeof(struct page *), GFP_NOFS);
52 			if (!p->pagevec) {
53 				mempool_free(p, nfs_rdata_mempool);
54 				p = NULL;
55 			}
56 		}
57 	}
58 	return p;
59 }
60 
61 static void nfs_readdata_rcu_free(struct rcu_head *head)
62 {
63 	struct nfs_read_data *p = container_of(head, struct nfs_read_data, task.u.tk_rcu);
64 	if (p && (p->pagevec != &p->page_array[0]))
65 		kfree(p->pagevec);
66 	mempool_free(p, nfs_rdata_mempool);
67 }
68 
69 static void nfs_readdata_free(struct nfs_read_data *rdata)
70 {
71 	call_rcu_bh(&rdata->task.u.tk_rcu, nfs_readdata_rcu_free);
72 }
73 
74 void nfs_readdata_release(void *data)
75 {
76         nfs_readdata_free(data);
77 }
78 
79 static
80 int nfs_return_empty_page(struct page *page)
81 {
82 	memclear_highpage_flush(page, 0, PAGE_CACHE_SIZE);
83 	SetPageUptodate(page);
84 	unlock_page(page);
85 	return 0;
86 }
87 
88 static void nfs_readpage_truncate_uninitialised_page(struct nfs_read_data *data)
89 {
90 	unsigned int remainder = data->args.count - data->res.count;
91 	unsigned int base = data->args.pgbase + data->res.count;
92 	unsigned int pglen;
93 	struct page **pages;
94 
95 	if (data->res.eof == 0 || remainder == 0)
96 		return;
97 	/*
98 	 * Note: "remainder" can never be negative, since we check for
99 	 * 	this in the XDR code.
100 	 */
101 	pages = &data->args.pages[base >> PAGE_CACHE_SHIFT];
102 	base &= ~PAGE_CACHE_MASK;
103 	pglen = PAGE_CACHE_SIZE - base;
104 	for (;;) {
105 		if (remainder <= pglen) {
106 			memclear_highpage_flush(*pages, base, remainder);
107 			break;
108 		}
109 		memclear_highpage_flush(*pages, base, pglen);
110 		pages++;
111 		remainder -= pglen;
112 		pglen = PAGE_CACHE_SIZE;
113 		base = 0;
114 	}
115 }
116 
117 static int nfs_readpage_async(struct nfs_open_context *ctx, struct inode *inode,
118 		struct page *page)
119 {
120 	LIST_HEAD(one_request);
121 	struct nfs_page	*new;
122 	unsigned int len;
123 
124 	len = nfs_page_length(page);
125 	if (len == 0)
126 		return nfs_return_empty_page(page);
127 	new = nfs_create_request(ctx, inode, page, 0, len);
128 	if (IS_ERR(new)) {
129 		unlock_page(page);
130 		return PTR_ERR(new);
131 	}
132 	if (len < PAGE_CACHE_SIZE)
133 		memclear_highpage_flush(page, len, PAGE_CACHE_SIZE - len);
134 
135 	nfs_list_add_request(new, &one_request);
136 	nfs_pagein_one(&one_request, inode);
137 	return 0;
138 }
139 
140 static void nfs_readpage_release(struct nfs_page *req)
141 {
142 	unlock_page(req->wb_page);
143 
144 	dprintk("NFS: read done (%s/%Ld %d@%Ld)\n",
145 			req->wb_context->dentry->d_inode->i_sb->s_id,
146 			(long long)NFS_FILEID(req->wb_context->dentry->d_inode),
147 			req->wb_bytes,
148 			(long long)req_offset(req));
149 	nfs_clear_request(req);
150 	nfs_release_request(req);
151 }
152 
153 /*
154  * Set up the NFS read request struct
155  */
156 static void nfs_read_rpcsetup(struct nfs_page *req, struct nfs_read_data *data,
157 		const struct rpc_call_ops *call_ops,
158 		unsigned int count, unsigned int offset)
159 {
160 	struct inode		*inode;
161 	int flags;
162 
163 	data->req	  = req;
164 	data->inode	  = inode = req->wb_context->dentry->d_inode;
165 	data->cred	  = req->wb_context->cred;
166 
167 	data->args.fh     = NFS_FH(inode);
168 	data->args.offset = req_offset(req) + offset;
169 	data->args.pgbase = req->wb_pgbase + offset;
170 	data->args.pages  = data->pagevec;
171 	data->args.count  = count;
172 	data->args.context = req->wb_context;
173 
174 	data->res.fattr   = &data->fattr;
175 	data->res.count   = count;
176 	data->res.eof     = 0;
177 	nfs_fattr_init(&data->fattr);
178 
179 	/* Set up the initial task struct. */
180 	flags = RPC_TASK_ASYNC | (IS_SWAPFILE(inode)? NFS_RPC_SWAPFLAGS : 0);
181 	rpc_init_task(&data->task, NFS_CLIENT(inode), flags, call_ops, data);
182 	NFS_PROTO(inode)->read_setup(data);
183 
184 	data->task.tk_cookie = (unsigned long)inode;
185 
186 	dprintk("NFS: %5u initiated read call (req %s/%Ld, %u bytes @ offset %Lu)\n",
187 			data->task.tk_pid,
188 			inode->i_sb->s_id,
189 			(long long)NFS_FILEID(inode),
190 			count,
191 			(unsigned long long)data->args.offset);
192 }
193 
194 static void
195 nfs_async_read_error(struct list_head *head)
196 {
197 	struct nfs_page	*req;
198 
199 	while (!list_empty(head)) {
200 		req = nfs_list_entry(head->next);
201 		nfs_list_remove_request(req);
202 		SetPageError(req->wb_page);
203 		nfs_readpage_release(req);
204 	}
205 }
206 
207 /*
208  * Start an async read operation
209  */
210 static void nfs_execute_read(struct nfs_read_data *data)
211 {
212 	struct rpc_clnt *clnt = NFS_CLIENT(data->inode);
213 	sigset_t oldset;
214 
215 	rpc_clnt_sigmask(clnt, &oldset);
216 	rpc_execute(&data->task);
217 	rpc_clnt_sigunmask(clnt, &oldset);
218 }
219 
220 /*
221  * Generate multiple requests to fill a single page.
222  *
223  * We optimize to reduce the number of read operations on the wire.  If we
224  * detect that we're reading a page, or an area of a page, that is past the
225  * end of file, we do not generate NFS read operations but just clear the
226  * parts of the page that would have come back zero from the server anyway.
227  *
228  * We rely on the cached value of i_size to make this determination; another
229  * client can fill pages on the server past our cached end-of-file, but we
230  * won't see the new data until our attribute cache is updated.  This is more
231  * or less conventional NFS client behavior.
232  */
233 static int nfs_pagein_multi(struct list_head *head, struct inode *inode)
234 {
235 	struct nfs_page *req = nfs_list_entry(head->next);
236 	struct page *page = req->wb_page;
237 	struct nfs_read_data *data;
238 	size_t rsize = NFS_SERVER(inode)->rsize, nbytes;
239 	unsigned int offset;
240 	int requests = 0;
241 	LIST_HEAD(list);
242 
243 	nfs_list_remove_request(req);
244 
245 	nbytes = req->wb_bytes;
246 	do {
247 		size_t len = min(nbytes,rsize);
248 
249 		data = nfs_readdata_alloc(len);
250 		if (!data)
251 			goto out_bad;
252 		INIT_LIST_HEAD(&data->pages);
253 		list_add(&data->pages, &list);
254 		requests++;
255 		nbytes -= len;
256 	} while(nbytes != 0);
257 	atomic_set(&req->wb_complete, requests);
258 
259 	ClearPageError(page);
260 	offset = 0;
261 	nbytes = req->wb_bytes;
262 	do {
263 		data = list_entry(list.next, struct nfs_read_data, pages);
264 		list_del_init(&data->pages);
265 
266 		data->pagevec[0] = page;
267 
268 		if (nbytes > rsize) {
269 			nfs_read_rpcsetup(req, data, &nfs_read_partial_ops,
270 					rsize, offset);
271 			offset += rsize;
272 			nbytes -= rsize;
273 		} else {
274 			nfs_read_rpcsetup(req, data, &nfs_read_partial_ops,
275 					nbytes, offset);
276 			nbytes = 0;
277 		}
278 		nfs_execute_read(data);
279 	} while (nbytes != 0);
280 
281 	return 0;
282 
283 out_bad:
284 	while (!list_empty(&list)) {
285 		data = list_entry(list.next, struct nfs_read_data, pages);
286 		list_del(&data->pages);
287 		nfs_readdata_free(data);
288 	}
289 	SetPageError(page);
290 	nfs_readpage_release(req);
291 	return -ENOMEM;
292 }
293 
294 static int nfs_pagein_one(struct list_head *head, struct inode *inode)
295 {
296 	struct nfs_page		*req;
297 	struct page		**pages;
298 	struct nfs_read_data	*data;
299 	unsigned int		count;
300 
301 	if (NFS_SERVER(inode)->rsize < PAGE_CACHE_SIZE)
302 		return nfs_pagein_multi(head, inode);
303 
304 	data = nfs_readdata_alloc(NFS_SERVER(inode)->rsize);
305 	if (!data)
306 		goto out_bad;
307 
308 	INIT_LIST_HEAD(&data->pages);
309 	pages = data->pagevec;
310 	count = 0;
311 	while (!list_empty(head)) {
312 		req = nfs_list_entry(head->next);
313 		nfs_list_remove_request(req);
314 		nfs_list_add_request(req, &data->pages);
315 		ClearPageError(req->wb_page);
316 		*pages++ = req->wb_page;
317 		count += req->wb_bytes;
318 	}
319 	req = nfs_list_entry(data->pages.next);
320 
321 	nfs_read_rpcsetup(req, data, &nfs_read_full_ops, count, 0);
322 
323 	nfs_execute_read(data);
324 	return 0;
325 out_bad:
326 	nfs_async_read_error(head);
327 	return -ENOMEM;
328 }
329 
330 static int
331 nfs_pagein_list(struct list_head *head, int rpages)
332 {
333 	LIST_HEAD(one_request);
334 	struct nfs_page		*req;
335 	int			error = 0;
336 	unsigned int		pages = 0;
337 
338 	while (!list_empty(head)) {
339 		pages += nfs_coalesce_requests(head, &one_request, rpages);
340 		req = nfs_list_entry(one_request.next);
341 		error = nfs_pagein_one(&one_request, req->wb_context->dentry->d_inode);
342 		if (error < 0)
343 			break;
344 	}
345 	if (error >= 0)
346 		return pages;
347 
348 	nfs_async_read_error(head);
349 	return error;
350 }
351 
352 /*
353  * This is the callback from RPC telling us whether a reply was
354  * received or some error occurred (timeout or socket shutdown).
355  */
356 int nfs_readpage_result(struct rpc_task *task, struct nfs_read_data *data)
357 {
358 	int status;
359 
360 	dprintk("NFS: %s: %5u, (status %d)\n", __FUNCTION__, task->tk_pid,
361 			task->tk_status);
362 
363 	status = NFS_PROTO(data->inode)->read_done(task, data);
364 	if (status != 0)
365 		return status;
366 
367 	nfs_add_stats(data->inode, NFSIOS_SERVERREADBYTES, data->res.count);
368 
369 	if (task->tk_status == -ESTALE) {
370 		set_bit(NFS_INO_STALE, &NFS_FLAGS(data->inode));
371 		nfs_mark_for_revalidate(data->inode);
372 	}
373 	spin_lock(&data->inode->i_lock);
374 	NFS_I(data->inode)->cache_validity |= NFS_INO_INVALID_ATIME;
375 	spin_unlock(&data->inode->i_lock);
376 	return 0;
377 }
378 
379 static int nfs_readpage_retry(struct rpc_task *task, struct nfs_read_data *data)
380 {
381 	struct nfs_readargs *argp = &data->args;
382 	struct nfs_readres *resp = &data->res;
383 
384 	if (resp->eof || resp->count == argp->count)
385 		return 0;
386 
387 	/* This is a short read! */
388 	nfs_inc_stats(data->inode, NFSIOS_SHORTREAD);
389 	/* Has the server at least made some progress? */
390 	if (resp->count == 0)
391 		return 0;
392 
393 	/* Yes, so retry the read at the end of the data */
394 	argp->offset += resp->count;
395 	argp->pgbase += resp->count;
396 	argp->count -= resp->count;
397 	rpc_restart_call(task);
398 	return -EAGAIN;
399 }
400 
401 /*
402  * Handle a read reply that fills part of a page.
403  */
404 static void nfs_readpage_result_partial(struct rpc_task *task, void *calldata)
405 {
406 	struct nfs_read_data *data = calldata;
407 	struct nfs_page *req = data->req;
408 	struct page *page = req->wb_page;
409 
410 	if (nfs_readpage_result(task, data) != 0)
411 		return;
412 
413 	if (likely(task->tk_status >= 0)) {
414 		nfs_readpage_truncate_uninitialised_page(data);
415 		if (nfs_readpage_retry(task, data) != 0)
416 			return;
417 	}
418 	if (unlikely(task->tk_status < 0))
419 		SetPageError(page);
420 	if (atomic_dec_and_test(&req->wb_complete)) {
421 		if (!PageError(page))
422 			SetPageUptodate(page);
423 		nfs_readpage_release(req);
424 	}
425 }
426 
427 static const struct rpc_call_ops nfs_read_partial_ops = {
428 	.rpc_call_done = nfs_readpage_result_partial,
429 	.rpc_release = nfs_readdata_release,
430 };
431 
432 static void nfs_readpage_set_pages_uptodate(struct nfs_read_data *data)
433 {
434 	unsigned int count = data->res.count;
435 	unsigned int base = data->args.pgbase;
436 	struct page **pages;
437 
438 	if (data->res.eof)
439 		count = data->args.count;
440 	if (unlikely(count == 0))
441 		return;
442 	pages = &data->args.pages[base >> PAGE_CACHE_SHIFT];
443 	base &= ~PAGE_CACHE_MASK;
444 	count += base;
445 	for (;count >= PAGE_CACHE_SIZE; count -= PAGE_CACHE_SIZE, pages++)
446 		SetPageUptodate(*pages);
447 	if (count == 0)
448 		return;
449 	/* Was this a short read? */
450 	if (data->res.eof || data->res.count == data->args.count)
451 		SetPageUptodate(*pages);
452 }
453 
454 /*
455  * This is the callback from RPC telling us whether a reply was
456  * received or some error occurred (timeout or socket shutdown).
457  */
458 static void nfs_readpage_result_full(struct rpc_task *task, void *calldata)
459 {
460 	struct nfs_read_data *data = calldata;
461 
462 	if (nfs_readpage_result(task, data) != 0)
463 		return;
464 	/*
465 	 * Note: nfs_readpage_retry may change the values of
466 	 * data->args. In the multi-page case, we therefore need
467 	 * to ensure that we call nfs_readpage_set_pages_uptodate()
468 	 * first.
469 	 */
470 	if (likely(task->tk_status >= 0)) {
471 		nfs_readpage_truncate_uninitialised_page(data);
472 		nfs_readpage_set_pages_uptodate(data);
473 		if (nfs_readpage_retry(task, data) != 0)
474 			return;
475 	}
476 	while (!list_empty(&data->pages)) {
477 		struct nfs_page *req = nfs_list_entry(data->pages.next);
478 
479 		nfs_list_remove_request(req);
480 		nfs_readpage_release(req);
481 	}
482 }
483 
484 static const struct rpc_call_ops nfs_read_full_ops = {
485 	.rpc_call_done = nfs_readpage_result_full,
486 	.rpc_release = nfs_readdata_release,
487 };
488 
489 /*
490  * Read a page over NFS.
491  * We read the page synchronously in the following case:
492  *  -	The error flag is set for this page. This happens only when a
493  *	previous async read operation failed.
494  */
495 int nfs_readpage(struct file *file, struct page *page)
496 {
497 	struct nfs_open_context *ctx;
498 	struct inode *inode = page->mapping->host;
499 	int		error;
500 
501 	dprintk("NFS: nfs_readpage (%p %ld@%lu)\n",
502 		page, PAGE_CACHE_SIZE, page->index);
503 	nfs_inc_stats(inode, NFSIOS_VFSREADPAGE);
504 	nfs_add_stats(inode, NFSIOS_READPAGES, 1);
505 
506 	/*
507 	 * Try to flush any pending writes to the file..
508 	 *
509 	 * NOTE! Because we own the page lock, there cannot
510 	 * be any new pending writes generated at this point
511 	 * for this page (other pages can be written to).
512 	 */
513 	error = nfs_wb_page(inode, page);
514 	if (error)
515 		goto out_error;
516 
517 	error = -ESTALE;
518 	if (NFS_STALE(inode))
519 		goto out_error;
520 
521 	if (file == NULL) {
522 		error = -EBADF;
523 		ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
524 		if (ctx == NULL)
525 			goto out_error;
526 	} else
527 		ctx = get_nfs_open_context((struct nfs_open_context *)
528 				file->private_data);
529 
530 	error = nfs_readpage_async(ctx, inode, page);
531 
532 	put_nfs_open_context(ctx);
533 	return error;
534 
535 out_error:
536 	unlock_page(page);
537 	return error;
538 }
539 
540 struct nfs_readdesc {
541 	struct list_head *head;
542 	struct nfs_open_context *ctx;
543 };
544 
545 static int
546 readpage_async_filler(void *data, struct page *page)
547 {
548 	struct nfs_readdesc *desc = (struct nfs_readdesc *)data;
549 	struct inode *inode = page->mapping->host;
550 	struct nfs_page *new;
551 	unsigned int len;
552 
553 	nfs_wb_page(inode, page);
554 	len = nfs_page_length(page);
555 	if (len == 0)
556 		return nfs_return_empty_page(page);
557 	new = nfs_create_request(desc->ctx, inode, page, 0, len);
558 	if (IS_ERR(new)) {
559 			SetPageError(page);
560 			unlock_page(page);
561 			return PTR_ERR(new);
562 	}
563 	if (len < PAGE_CACHE_SIZE)
564 		memclear_highpage_flush(page, len, PAGE_CACHE_SIZE - len);
565 	nfs_list_add_request(new, desc->head);
566 	return 0;
567 }
568 
569 int nfs_readpages(struct file *filp, struct address_space *mapping,
570 		struct list_head *pages, unsigned nr_pages)
571 {
572 	LIST_HEAD(head);
573 	struct nfs_readdesc desc = {
574 		.head		= &head,
575 	};
576 	struct inode *inode = mapping->host;
577 	struct nfs_server *server = NFS_SERVER(inode);
578 	int ret = -ESTALE;
579 
580 	dprintk("NFS: nfs_readpages (%s/%Ld %d)\n",
581 			inode->i_sb->s_id,
582 			(long long)NFS_FILEID(inode),
583 			nr_pages);
584 	nfs_inc_stats(inode, NFSIOS_VFSREADPAGES);
585 
586 	if (NFS_STALE(inode))
587 		goto out;
588 
589 	if (filp == NULL) {
590 		desc.ctx = nfs_find_open_context(inode, NULL, FMODE_READ);
591 		if (desc.ctx == NULL)
592 			return -EBADF;
593 	} else
594 		desc.ctx = get_nfs_open_context((struct nfs_open_context *)
595 				filp->private_data);
596 	ret = read_cache_pages(mapping, pages, readpage_async_filler, &desc);
597 	if (!list_empty(&head)) {
598 		int err = nfs_pagein_list(&head, server->rpages);
599 		if (!ret)
600 			nfs_add_stats(inode, NFSIOS_READPAGES, err);
601 			ret = err;
602 	}
603 	put_nfs_open_context(desc.ctx);
604 out:
605 	return ret;
606 }
607 
608 int __init nfs_init_readpagecache(void)
609 {
610 	nfs_rdata_cachep = kmem_cache_create("nfs_read_data",
611 					     sizeof(struct nfs_read_data),
612 					     0, SLAB_HWCACHE_ALIGN,
613 					     NULL, NULL);
614 	if (nfs_rdata_cachep == NULL)
615 		return -ENOMEM;
616 
617 	nfs_rdata_mempool = mempool_create_slab_pool(MIN_POOL_READ,
618 						     nfs_rdata_cachep);
619 	if (nfs_rdata_mempool == NULL)
620 		return -ENOMEM;
621 
622 	return 0;
623 }
624 
625 void nfs_destroy_readpagecache(void)
626 {
627 	mempool_destroy(nfs_rdata_mempool);
628 	kmem_cache_destroy(nfs_rdata_cachep);
629 }
630