xref: /linux/fs/cachefiles/io.c (revision f9aec1648df09d55436a0e3a94acff1df507751f)
1 // SPDX-License-Identifier: GPL-2.0-or-later
2 /* kiocb-using read/write
3  *
4  * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved.
5  * Written by David Howells (dhowells@redhat.com)
6  */
7 
8 #include <linux/mount.h>
9 #include <linux/slab.h>
10 #include <linux/file.h>
11 #include <linux/uio.h>
12 #include <linux/sched/mm.h>
13 #include <linux/netfs.h>
14 #include "internal.h"
15 
16 struct cachefiles_kiocb {
17 	struct kiocb		iocb;
18 	refcount_t		ki_refcnt;
19 	loff_t			start;
20 	union {
21 		size_t		skipped;
22 		size_t		len;
23 	};
24 	netfs_io_terminated_t	term_func;
25 	void			*term_func_priv;
26 	bool			was_async;
27 };
28 
29 static inline void cachefiles_put_kiocb(struct cachefiles_kiocb *ki)
30 {
31 	if (refcount_dec_and_test(&ki->ki_refcnt)) {
32 		fput(ki->iocb.ki_filp);
33 		kfree(ki);
34 	}
35 }
36 
37 /*
38  * Handle completion of a read from the cache.
39  */
40 static void cachefiles_read_complete(struct kiocb *iocb, long ret, long ret2)
41 {
42 	struct cachefiles_kiocb *ki = container_of(iocb, struct cachefiles_kiocb, iocb);
43 
44 	_enter("%ld,%ld", ret, ret2);
45 
46 	if (ki->term_func) {
47 		if (ret >= 0)
48 			ret += ki->skipped;
49 		ki->term_func(ki->term_func_priv, ret, ki->was_async);
50 	}
51 
52 	cachefiles_put_kiocb(ki);
53 }
54 
55 /*
56  * Initiate a read from the cache.
57  */
58 static int cachefiles_read(struct netfs_cache_resources *cres,
59 			   loff_t start_pos,
60 			   struct iov_iter *iter,
61 			   bool seek_data,
62 			   netfs_io_terminated_t term_func,
63 			   void *term_func_priv)
64 {
65 	struct cachefiles_kiocb *ki;
66 	struct file *file = cres->cache_priv2;
67 	unsigned int old_nofs;
68 	ssize_t ret = -ENOBUFS;
69 	size_t len = iov_iter_count(iter), skipped = 0;
70 
71 	_enter("%pD,%li,%llx,%zx/%llx",
72 	       file, file_inode(file)->i_ino, start_pos, len,
73 	       i_size_read(file_inode(file)));
74 
75 	/* If the caller asked us to seek for data before doing the read, then
76 	 * we should do that now.  If we find a gap, we fill it with zeros.
77 	 */
78 	if (seek_data) {
79 		loff_t off = start_pos, off2;
80 
81 		off2 = vfs_llseek(file, off, SEEK_DATA);
82 		if (off2 < 0 && off2 >= (loff_t)-MAX_ERRNO && off2 != -ENXIO) {
83 			skipped = 0;
84 			ret = off2;
85 			goto presubmission_error;
86 		}
87 
88 		if (off2 == -ENXIO || off2 >= start_pos + len) {
89 			/* The region is beyond the EOF or there's no more data
90 			 * in the region, so clear the rest of the buffer and
91 			 * return success.
92 			 */
93 			iov_iter_zero(len, iter);
94 			skipped = len;
95 			ret = 0;
96 			goto presubmission_error;
97 		}
98 
99 		skipped = off2 - off;
100 		iov_iter_zero(skipped, iter);
101 	}
102 
103 	ret = -ENOBUFS;
104 	ki = kzalloc(sizeof(struct cachefiles_kiocb), GFP_KERNEL);
105 	if (!ki)
106 		goto presubmission_error;
107 
108 	refcount_set(&ki->ki_refcnt, 2);
109 	ki->iocb.ki_filp	= file;
110 	ki->iocb.ki_pos		= start_pos + skipped;
111 	ki->iocb.ki_flags	= IOCB_DIRECT;
112 	ki->iocb.ki_hint	= ki_hint_validate(file_write_hint(file));
113 	ki->iocb.ki_ioprio	= get_current_ioprio();
114 	ki->skipped		= skipped;
115 	ki->term_func		= term_func;
116 	ki->term_func_priv	= term_func_priv;
117 	ki->was_async		= true;
118 
119 	if (ki->term_func)
120 		ki->iocb.ki_complete = cachefiles_read_complete;
121 
122 	get_file(ki->iocb.ki_filp);
123 
124 	old_nofs = memalloc_nofs_save();
125 	ret = vfs_iocb_iter_read(file, &ki->iocb, iter);
126 	memalloc_nofs_restore(old_nofs);
127 	switch (ret) {
128 	case -EIOCBQUEUED:
129 		goto in_progress;
130 
131 	case -ERESTARTSYS:
132 	case -ERESTARTNOINTR:
133 	case -ERESTARTNOHAND:
134 	case -ERESTART_RESTARTBLOCK:
135 		/* There's no easy way to restart the syscall since other AIO's
136 		 * may be already running. Just fail this IO with EINTR.
137 		 */
138 		ret = -EINTR;
139 		fallthrough;
140 	default:
141 		ki->was_async = false;
142 		cachefiles_read_complete(&ki->iocb, ret, 0);
143 		if (ret > 0)
144 			ret = 0;
145 		break;
146 	}
147 
148 in_progress:
149 	cachefiles_put_kiocb(ki);
150 	_leave(" = %zd", ret);
151 	return ret;
152 
153 presubmission_error:
154 	if (term_func)
155 		term_func(term_func_priv, ret < 0 ? ret : skipped, false);
156 	return ret;
157 }
158 
159 /*
160  * Handle completion of a write to the cache.
161  */
162 static void cachefiles_write_complete(struct kiocb *iocb, long ret, long ret2)
163 {
164 	struct cachefiles_kiocb *ki = container_of(iocb, struct cachefiles_kiocb, iocb);
165 	struct inode *inode = file_inode(ki->iocb.ki_filp);
166 
167 	_enter("%ld,%ld", ret, ret2);
168 
169 	/* Tell lockdep we inherited freeze protection from submission thread */
170 	__sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE);
171 	__sb_end_write(inode->i_sb, SB_FREEZE_WRITE);
172 
173 	if (ki->term_func)
174 		ki->term_func(ki->term_func_priv, ret, ki->was_async);
175 
176 	cachefiles_put_kiocb(ki);
177 }
178 
179 /*
180  * Initiate a write to the cache.
181  */
182 static int cachefiles_write(struct netfs_cache_resources *cres,
183 			    loff_t start_pos,
184 			    struct iov_iter *iter,
185 			    netfs_io_terminated_t term_func,
186 			    void *term_func_priv)
187 {
188 	struct cachefiles_kiocb *ki;
189 	struct inode *inode;
190 	struct file *file = cres->cache_priv2;
191 	unsigned int old_nofs;
192 	ssize_t ret = -ENOBUFS;
193 	size_t len = iov_iter_count(iter);
194 
195 	_enter("%pD,%li,%llx,%zx/%llx",
196 	       file, file_inode(file)->i_ino, start_pos, len,
197 	       i_size_read(file_inode(file)));
198 
199 	ki = kzalloc(sizeof(struct cachefiles_kiocb), GFP_KERNEL);
200 	if (!ki)
201 		goto presubmission_error;
202 
203 	refcount_set(&ki->ki_refcnt, 2);
204 	ki->iocb.ki_filp	= file;
205 	ki->iocb.ki_pos		= start_pos;
206 	ki->iocb.ki_flags	= IOCB_DIRECT | IOCB_WRITE;
207 	ki->iocb.ki_hint	= ki_hint_validate(file_write_hint(file));
208 	ki->iocb.ki_ioprio	= get_current_ioprio();
209 	ki->start		= start_pos;
210 	ki->len			= len;
211 	ki->term_func		= term_func;
212 	ki->term_func_priv	= term_func_priv;
213 	ki->was_async		= true;
214 
215 	if (ki->term_func)
216 		ki->iocb.ki_complete = cachefiles_write_complete;
217 
218 	/* Open-code file_start_write here to grab freeze protection, which
219 	 * will be released by another thread in aio_complete_rw().  Fool
220 	 * lockdep by telling it the lock got released so that it doesn't
221 	 * complain about the held lock when we return to userspace.
222 	 */
223 	inode = file_inode(file);
224 	__sb_start_write(inode->i_sb, SB_FREEZE_WRITE);
225 	__sb_writers_release(inode->i_sb, SB_FREEZE_WRITE);
226 
227 	get_file(ki->iocb.ki_filp);
228 
229 	old_nofs = memalloc_nofs_save();
230 	ret = vfs_iocb_iter_write(file, &ki->iocb, iter);
231 	memalloc_nofs_restore(old_nofs);
232 	switch (ret) {
233 	case -EIOCBQUEUED:
234 		goto in_progress;
235 
236 	case -ERESTARTSYS:
237 	case -ERESTARTNOINTR:
238 	case -ERESTARTNOHAND:
239 	case -ERESTART_RESTARTBLOCK:
240 		/* There's no easy way to restart the syscall since other AIO's
241 		 * may be already running. Just fail this IO with EINTR.
242 		 */
243 		ret = -EINTR;
244 		fallthrough;
245 	default:
246 		ki->was_async = false;
247 		cachefiles_write_complete(&ki->iocb, ret, 0);
248 		if (ret > 0)
249 			ret = 0;
250 		break;
251 	}
252 
253 in_progress:
254 	cachefiles_put_kiocb(ki);
255 	_leave(" = %zd", ret);
256 	return ret;
257 
258 presubmission_error:
259 	if (term_func)
260 		term_func(term_func_priv, -ENOMEM, false);
261 	return -ENOMEM;
262 }
263 
264 /*
265  * Prepare a read operation, shortening it to a cached/uncached
266  * boundary as appropriate.
267  */
268 static enum netfs_read_source cachefiles_prepare_read(struct netfs_read_subrequest *subreq,
269 						      loff_t i_size)
270 {
271 	struct fscache_retrieval *op = subreq->rreq->cache_resources.cache_priv;
272 	struct cachefiles_object *object;
273 	struct cachefiles_cache *cache;
274 	const struct cred *saved_cred;
275 	struct file *file = subreq->rreq->cache_resources.cache_priv2;
276 	loff_t off, to;
277 
278 	_enter("%zx @%llx/%llx", subreq->len, subreq->start, i_size);
279 
280 	object = container_of(op->op.object,
281 			      struct cachefiles_object, fscache);
282 	cache = container_of(object->fscache.cache,
283 			     struct cachefiles_cache, cache);
284 
285 	if (!file)
286 		goto cache_fail_nosec;
287 
288 	if (subreq->start >= i_size)
289 		return NETFS_FILL_WITH_ZEROES;
290 
291 	cachefiles_begin_secure(cache, &saved_cred);
292 
293 	off = vfs_llseek(file, subreq->start, SEEK_DATA);
294 	if (off < 0 && off >= (loff_t)-MAX_ERRNO) {
295 		if (off == (loff_t)-ENXIO)
296 			goto download_and_store;
297 		goto cache_fail;
298 	}
299 
300 	if (off >= subreq->start + subreq->len)
301 		goto download_and_store;
302 
303 	if (off > subreq->start) {
304 		off = round_up(off, cache->bsize);
305 		subreq->len = off - subreq->start;
306 		goto download_and_store;
307 	}
308 
309 	to = vfs_llseek(file, subreq->start, SEEK_HOLE);
310 	if (to < 0 && to >= (loff_t)-MAX_ERRNO)
311 		goto cache_fail;
312 
313 	if (to < subreq->start + subreq->len) {
314 		if (subreq->start + subreq->len >= i_size)
315 			to = round_up(to, cache->bsize);
316 		else
317 			to = round_down(to, cache->bsize);
318 		subreq->len = to - subreq->start;
319 	}
320 
321 	cachefiles_end_secure(cache, saved_cred);
322 	return NETFS_READ_FROM_CACHE;
323 
324 download_and_store:
325 	if (cachefiles_has_space(cache, 0, (subreq->len + PAGE_SIZE - 1) / PAGE_SIZE) == 0)
326 		__set_bit(NETFS_SREQ_WRITE_TO_CACHE, &subreq->flags);
327 cache_fail:
328 	cachefiles_end_secure(cache, saved_cred);
329 cache_fail_nosec:
330 	return NETFS_DOWNLOAD_FROM_SERVER;
331 }
332 
333 /*
334  * Prepare for a write to occur.
335  */
336 static int cachefiles_prepare_write(struct netfs_cache_resources *cres,
337 				    loff_t *_start, size_t *_len, loff_t i_size)
338 {
339 	loff_t start = *_start;
340 	size_t len = *_len, down;
341 
342 	/* Round to DIO size */
343 	down = start - round_down(start, PAGE_SIZE);
344 	*_start = start - down;
345 	*_len = round_up(down + len, PAGE_SIZE);
346 	return 0;
347 }
348 
349 /*
350  * Clean up an operation.
351  */
352 static void cachefiles_end_operation(struct netfs_cache_resources *cres)
353 {
354 	struct fscache_retrieval *op = cres->cache_priv;
355 	struct file *file = cres->cache_priv2;
356 
357 	_enter("");
358 
359 	if (file)
360 		fput(file);
361 	if (op) {
362 		fscache_op_complete(&op->op, false);
363 		fscache_put_retrieval(op);
364 	}
365 
366 	_leave("");
367 }
368 
369 static const struct netfs_cache_ops cachefiles_netfs_cache_ops = {
370 	.end_operation		= cachefiles_end_operation,
371 	.read			= cachefiles_read,
372 	.write			= cachefiles_write,
373 	.prepare_read		= cachefiles_prepare_read,
374 	.prepare_write		= cachefiles_prepare_write,
375 };
376 
377 /*
378  * Open the cache file when beginning a cache operation.
379  */
380 int cachefiles_begin_read_operation(struct netfs_read_request *rreq,
381 				    struct fscache_retrieval *op)
382 {
383 	struct cachefiles_object *object;
384 	struct cachefiles_cache *cache;
385 	struct path path;
386 	struct file *file;
387 
388 	_enter("");
389 
390 	object = container_of(op->op.object,
391 			      struct cachefiles_object, fscache);
392 	cache = container_of(object->fscache.cache,
393 			     struct cachefiles_cache, cache);
394 
395 	path.mnt = cache->mnt;
396 	path.dentry = object->backer;
397 	file = open_with_fake_path(&path, O_RDWR | O_LARGEFILE | O_DIRECT,
398 				   d_inode(object->backer), cache->cache_cred);
399 	if (IS_ERR(file))
400 		return PTR_ERR(file);
401 	if (!S_ISREG(file_inode(file)->i_mode))
402 		goto error_file;
403 	if (unlikely(!file->f_op->read_iter) ||
404 	    unlikely(!file->f_op->write_iter)) {
405 		pr_notice("Cache does not support read_iter and write_iter\n");
406 		goto error_file;
407 	}
408 
409 	fscache_get_retrieval(op);
410 	rreq->cache_resources.cache_priv = op;
411 	rreq->cache_resources.cache_priv2 = file;
412 	rreq->cache_resources.ops = &cachefiles_netfs_cache_ops;
413 	rreq->cache_resources.debug_id = object->fscache.debug_id;
414 	_leave("");
415 	return 0;
416 
417 error_file:
418 	fput(file);
419 	return -EIO;
420 }
421