1 // SPDX-License-Identifier: GPL-2.0-or-later 2 /* kiocb-using read/write 3 * 4 * Copyright (C) 2021 Red Hat, Inc. All Rights Reserved. 5 * Written by David Howells (dhowells@redhat.com) 6 */ 7 8 #include <linux/mount.h> 9 #include <linux/slab.h> 10 #include <linux/file.h> 11 #include <linux/uio.h> 12 #include <linux/sched/mm.h> 13 #include <linux/netfs.h> 14 #include "internal.h" 15 16 struct cachefiles_kiocb { 17 struct kiocb iocb; 18 refcount_t ki_refcnt; 19 loff_t start; 20 union { 21 size_t skipped; 22 size_t len; 23 }; 24 netfs_io_terminated_t term_func; 25 void *term_func_priv; 26 bool was_async; 27 }; 28 29 static inline void cachefiles_put_kiocb(struct cachefiles_kiocb *ki) 30 { 31 if (refcount_dec_and_test(&ki->ki_refcnt)) { 32 fput(ki->iocb.ki_filp); 33 kfree(ki); 34 } 35 } 36 37 /* 38 * Handle completion of a read from the cache. 39 */ 40 static void cachefiles_read_complete(struct kiocb *iocb, long ret, long ret2) 41 { 42 struct cachefiles_kiocb *ki = container_of(iocb, struct cachefiles_kiocb, iocb); 43 44 _enter("%ld,%ld", ret, ret2); 45 46 if (ki->term_func) { 47 if (ret >= 0) 48 ret += ki->skipped; 49 ki->term_func(ki->term_func_priv, ret, ki->was_async); 50 } 51 52 cachefiles_put_kiocb(ki); 53 } 54 55 /* 56 * Initiate a read from the cache. 57 */ 58 static int cachefiles_read(struct netfs_cache_resources *cres, 59 loff_t start_pos, 60 struct iov_iter *iter, 61 bool seek_data, 62 netfs_io_terminated_t term_func, 63 void *term_func_priv) 64 { 65 struct cachefiles_kiocb *ki; 66 struct file *file = cres->cache_priv2; 67 unsigned int old_nofs; 68 ssize_t ret = -ENOBUFS; 69 size_t len = iov_iter_count(iter), skipped = 0; 70 71 _enter("%pD,%li,%llx,%zx/%llx", 72 file, file_inode(file)->i_ino, start_pos, len, 73 i_size_read(file_inode(file))); 74 75 /* If the caller asked us to seek for data before doing the read, then 76 * we should do that now. If we find a gap, we fill it with zeros. 77 */ 78 if (seek_data) { 79 loff_t off = start_pos, off2; 80 81 off2 = vfs_llseek(file, off, SEEK_DATA); 82 if (off2 < 0 && off2 >= (loff_t)-MAX_ERRNO && off2 != -ENXIO) { 83 skipped = 0; 84 ret = off2; 85 goto presubmission_error; 86 } 87 88 if (off2 == -ENXIO || off2 >= start_pos + len) { 89 /* The region is beyond the EOF or there's no more data 90 * in the region, so clear the rest of the buffer and 91 * return success. 92 */ 93 iov_iter_zero(len, iter); 94 skipped = len; 95 ret = 0; 96 goto presubmission_error; 97 } 98 99 skipped = off2 - off; 100 iov_iter_zero(skipped, iter); 101 } 102 103 ret = -ENOBUFS; 104 ki = kzalloc(sizeof(struct cachefiles_kiocb), GFP_KERNEL); 105 if (!ki) 106 goto presubmission_error; 107 108 refcount_set(&ki->ki_refcnt, 2); 109 ki->iocb.ki_filp = file; 110 ki->iocb.ki_pos = start_pos + skipped; 111 ki->iocb.ki_flags = IOCB_DIRECT; 112 ki->iocb.ki_hint = ki_hint_validate(file_write_hint(file)); 113 ki->iocb.ki_ioprio = get_current_ioprio(); 114 ki->skipped = skipped; 115 ki->term_func = term_func; 116 ki->term_func_priv = term_func_priv; 117 ki->was_async = true; 118 119 if (ki->term_func) 120 ki->iocb.ki_complete = cachefiles_read_complete; 121 122 get_file(ki->iocb.ki_filp); 123 124 old_nofs = memalloc_nofs_save(); 125 ret = vfs_iocb_iter_read(file, &ki->iocb, iter); 126 memalloc_nofs_restore(old_nofs); 127 switch (ret) { 128 case -EIOCBQUEUED: 129 goto in_progress; 130 131 case -ERESTARTSYS: 132 case -ERESTARTNOINTR: 133 case -ERESTARTNOHAND: 134 case -ERESTART_RESTARTBLOCK: 135 /* There's no easy way to restart the syscall since other AIO's 136 * may be already running. Just fail this IO with EINTR. 137 */ 138 ret = -EINTR; 139 fallthrough; 140 default: 141 ki->was_async = false; 142 cachefiles_read_complete(&ki->iocb, ret, 0); 143 if (ret > 0) 144 ret = 0; 145 break; 146 } 147 148 in_progress: 149 cachefiles_put_kiocb(ki); 150 _leave(" = %zd", ret); 151 return ret; 152 153 presubmission_error: 154 if (term_func) 155 term_func(term_func_priv, ret < 0 ? ret : skipped, false); 156 return ret; 157 } 158 159 /* 160 * Handle completion of a write to the cache. 161 */ 162 static void cachefiles_write_complete(struct kiocb *iocb, long ret, long ret2) 163 { 164 struct cachefiles_kiocb *ki = container_of(iocb, struct cachefiles_kiocb, iocb); 165 struct inode *inode = file_inode(ki->iocb.ki_filp); 166 167 _enter("%ld,%ld", ret, ret2); 168 169 /* Tell lockdep we inherited freeze protection from submission thread */ 170 __sb_writers_acquired(inode->i_sb, SB_FREEZE_WRITE); 171 __sb_end_write(inode->i_sb, SB_FREEZE_WRITE); 172 173 if (ki->term_func) 174 ki->term_func(ki->term_func_priv, ret, ki->was_async); 175 176 cachefiles_put_kiocb(ki); 177 } 178 179 /* 180 * Initiate a write to the cache. 181 */ 182 static int cachefiles_write(struct netfs_cache_resources *cres, 183 loff_t start_pos, 184 struct iov_iter *iter, 185 netfs_io_terminated_t term_func, 186 void *term_func_priv) 187 { 188 struct cachefiles_kiocb *ki; 189 struct inode *inode; 190 struct file *file = cres->cache_priv2; 191 unsigned int old_nofs; 192 ssize_t ret = -ENOBUFS; 193 size_t len = iov_iter_count(iter); 194 195 _enter("%pD,%li,%llx,%zx/%llx", 196 file, file_inode(file)->i_ino, start_pos, len, 197 i_size_read(file_inode(file))); 198 199 ki = kzalloc(sizeof(struct cachefiles_kiocb), GFP_KERNEL); 200 if (!ki) 201 goto presubmission_error; 202 203 refcount_set(&ki->ki_refcnt, 2); 204 ki->iocb.ki_filp = file; 205 ki->iocb.ki_pos = start_pos; 206 ki->iocb.ki_flags = IOCB_DIRECT | IOCB_WRITE; 207 ki->iocb.ki_hint = ki_hint_validate(file_write_hint(file)); 208 ki->iocb.ki_ioprio = get_current_ioprio(); 209 ki->start = start_pos; 210 ki->len = len; 211 ki->term_func = term_func; 212 ki->term_func_priv = term_func_priv; 213 ki->was_async = true; 214 215 if (ki->term_func) 216 ki->iocb.ki_complete = cachefiles_write_complete; 217 218 /* Open-code file_start_write here to grab freeze protection, which 219 * will be released by another thread in aio_complete_rw(). Fool 220 * lockdep by telling it the lock got released so that it doesn't 221 * complain about the held lock when we return to userspace. 222 */ 223 inode = file_inode(file); 224 __sb_start_write(inode->i_sb, SB_FREEZE_WRITE); 225 __sb_writers_release(inode->i_sb, SB_FREEZE_WRITE); 226 227 get_file(ki->iocb.ki_filp); 228 229 old_nofs = memalloc_nofs_save(); 230 ret = vfs_iocb_iter_write(file, &ki->iocb, iter); 231 memalloc_nofs_restore(old_nofs); 232 switch (ret) { 233 case -EIOCBQUEUED: 234 goto in_progress; 235 236 case -ERESTARTSYS: 237 case -ERESTARTNOINTR: 238 case -ERESTARTNOHAND: 239 case -ERESTART_RESTARTBLOCK: 240 /* There's no easy way to restart the syscall since other AIO's 241 * may be already running. Just fail this IO with EINTR. 242 */ 243 ret = -EINTR; 244 fallthrough; 245 default: 246 ki->was_async = false; 247 cachefiles_write_complete(&ki->iocb, ret, 0); 248 if (ret > 0) 249 ret = 0; 250 break; 251 } 252 253 in_progress: 254 cachefiles_put_kiocb(ki); 255 _leave(" = %zd", ret); 256 return ret; 257 258 presubmission_error: 259 if (term_func) 260 term_func(term_func_priv, -ENOMEM, false); 261 return -ENOMEM; 262 } 263 264 /* 265 * Prepare a read operation, shortening it to a cached/uncached 266 * boundary as appropriate. 267 */ 268 static enum netfs_read_source cachefiles_prepare_read(struct netfs_read_subrequest *subreq, 269 loff_t i_size) 270 { 271 struct fscache_retrieval *op = subreq->rreq->cache_resources.cache_priv; 272 struct cachefiles_object *object; 273 struct cachefiles_cache *cache; 274 const struct cred *saved_cred; 275 struct file *file = subreq->rreq->cache_resources.cache_priv2; 276 loff_t off, to; 277 278 _enter("%zx @%llx/%llx", subreq->len, subreq->start, i_size); 279 280 object = container_of(op->op.object, 281 struct cachefiles_object, fscache); 282 cache = container_of(object->fscache.cache, 283 struct cachefiles_cache, cache); 284 285 if (!file) 286 goto cache_fail_nosec; 287 288 if (subreq->start >= i_size) 289 return NETFS_FILL_WITH_ZEROES; 290 291 cachefiles_begin_secure(cache, &saved_cred); 292 293 off = vfs_llseek(file, subreq->start, SEEK_DATA); 294 if (off < 0 && off >= (loff_t)-MAX_ERRNO) { 295 if (off == (loff_t)-ENXIO) 296 goto download_and_store; 297 goto cache_fail; 298 } 299 300 if (off >= subreq->start + subreq->len) 301 goto download_and_store; 302 303 if (off > subreq->start) { 304 off = round_up(off, cache->bsize); 305 subreq->len = off - subreq->start; 306 goto download_and_store; 307 } 308 309 to = vfs_llseek(file, subreq->start, SEEK_HOLE); 310 if (to < 0 && to >= (loff_t)-MAX_ERRNO) 311 goto cache_fail; 312 313 if (to < subreq->start + subreq->len) { 314 if (subreq->start + subreq->len >= i_size) 315 to = round_up(to, cache->bsize); 316 else 317 to = round_down(to, cache->bsize); 318 subreq->len = to - subreq->start; 319 } 320 321 cachefiles_end_secure(cache, saved_cred); 322 return NETFS_READ_FROM_CACHE; 323 324 download_and_store: 325 if (cachefiles_has_space(cache, 0, (subreq->len + PAGE_SIZE - 1) / PAGE_SIZE) == 0) 326 __set_bit(NETFS_SREQ_WRITE_TO_CACHE, &subreq->flags); 327 cache_fail: 328 cachefiles_end_secure(cache, saved_cred); 329 cache_fail_nosec: 330 return NETFS_DOWNLOAD_FROM_SERVER; 331 } 332 333 /* 334 * Prepare for a write to occur. 335 */ 336 static int cachefiles_prepare_write(struct netfs_cache_resources *cres, 337 loff_t *_start, size_t *_len, loff_t i_size) 338 { 339 loff_t start = *_start; 340 size_t len = *_len, down; 341 342 /* Round to DIO size */ 343 down = start - round_down(start, PAGE_SIZE); 344 *_start = start - down; 345 *_len = round_up(down + len, PAGE_SIZE); 346 return 0; 347 } 348 349 /* 350 * Clean up an operation. 351 */ 352 static void cachefiles_end_operation(struct netfs_cache_resources *cres) 353 { 354 struct fscache_retrieval *op = cres->cache_priv; 355 struct file *file = cres->cache_priv2; 356 357 _enter(""); 358 359 if (file) 360 fput(file); 361 if (op) { 362 fscache_op_complete(&op->op, false); 363 fscache_put_retrieval(op); 364 } 365 366 _leave(""); 367 } 368 369 static const struct netfs_cache_ops cachefiles_netfs_cache_ops = { 370 .end_operation = cachefiles_end_operation, 371 .read = cachefiles_read, 372 .write = cachefiles_write, 373 .prepare_read = cachefiles_prepare_read, 374 .prepare_write = cachefiles_prepare_write, 375 }; 376 377 /* 378 * Open the cache file when beginning a cache operation. 379 */ 380 int cachefiles_begin_read_operation(struct netfs_read_request *rreq, 381 struct fscache_retrieval *op) 382 { 383 struct cachefiles_object *object; 384 struct cachefiles_cache *cache; 385 struct path path; 386 struct file *file; 387 388 _enter(""); 389 390 object = container_of(op->op.object, 391 struct cachefiles_object, fscache); 392 cache = container_of(object->fscache.cache, 393 struct cachefiles_cache, cache); 394 395 path.mnt = cache->mnt; 396 path.dentry = object->backer; 397 file = open_with_fake_path(&path, O_RDWR | O_LARGEFILE | O_DIRECT, 398 d_inode(object->backer), cache->cache_cred); 399 if (IS_ERR(file)) 400 return PTR_ERR(file); 401 if (!S_ISREG(file_inode(file)->i_mode)) 402 goto error_file; 403 if (unlikely(!file->f_op->read_iter) || 404 unlikely(!file->f_op->write_iter)) { 405 pr_notice("Cache does not support read_iter and write_iter\n"); 406 goto error_file; 407 } 408 409 fscache_get_retrieval(op); 410 rreq->cache_resources.cache_priv = op; 411 rreq->cache_resources.cache_priv2 = file; 412 rreq->cache_resources.ops = &cachefiles_netfs_cache_ops; 413 rreq->cache_resources.debug_id = object->fscache.debug_id; 414 _leave(""); 415 return 0; 416 417 error_file: 418 fput(file); 419 return -EIO; 420 } 421