1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Common helpers for stackable filesystems and backing files. 4 * 5 * Forked from fs/overlayfs/file.c. 6 * 7 * Copyright (C) 2017 Red Hat, Inc. 8 * Copyright (C) 2023 CTERA Networks. 9 */ 10 11 #include <linux/fs.h> 12 #include <linux/backing-file.h> 13 #include <linux/splice.h> 14 #include <linux/mm.h> 15 16 #include "internal.h" 17 18 /** 19 * backing_file_open - open a backing file for kernel internal use 20 * @user_path: path that the user reuqested to open 21 * @flags: open flags 22 * @real_path: path of the backing file 23 * @cred: credentials for open 24 * 25 * Open a backing file for a stackable filesystem (e.g., overlayfs). 26 * @user_path may be on the stackable filesystem and @real_path on the 27 * underlying filesystem. In this case, we want to be able to return the 28 * @user_path of the stackable filesystem. This is done by embedding the 29 * returned file into a container structure that also stores the stacked 30 * file's path, which can be retrieved using backing_file_user_path(). 31 */ 32 struct file *backing_file_open(const struct path *user_path, int flags, 33 const struct path *real_path, 34 const struct cred *cred) 35 { 36 struct file *f; 37 int error; 38 39 f = alloc_empty_backing_file(flags, cred); 40 if (IS_ERR(f)) 41 return f; 42 43 path_get(user_path); 44 *backing_file_user_path(f) = *user_path; 45 error = vfs_open(real_path, f); 46 if (error) { 47 fput(f); 48 f = ERR_PTR(error); 49 } 50 51 return f; 52 } 53 EXPORT_SYMBOL_GPL(backing_file_open); 54 55 struct file *backing_tmpfile_open(const struct path *user_path, int flags, 56 const struct path *real_parentpath, 57 umode_t mode, const struct cred *cred) 58 { 59 struct mnt_idmap *real_idmap = mnt_idmap(real_parentpath->mnt); 60 struct file *f; 61 int error; 62 63 f = alloc_empty_backing_file(flags, cred); 64 if (IS_ERR(f)) 65 return f; 66 67 path_get(user_path); 68 *backing_file_user_path(f) = *user_path; 69 error = vfs_tmpfile(real_idmap, real_parentpath, f, mode); 70 if (error) { 71 fput(f); 72 f = ERR_PTR(error); 73 } 74 return f; 75 } 76 EXPORT_SYMBOL(backing_tmpfile_open); 77 78 struct backing_aio { 79 struct kiocb iocb; 80 refcount_t ref; 81 struct kiocb *orig_iocb; 82 /* used for aio completion */ 83 void (*end_write)(struct file *); 84 struct work_struct work; 85 long res; 86 }; 87 88 static struct kmem_cache *backing_aio_cachep; 89 90 #define BACKING_IOCB_MASK \ 91 (IOCB_NOWAIT | IOCB_HIPRI | IOCB_DSYNC | IOCB_SYNC | IOCB_APPEND) 92 93 static rwf_t iocb_to_rw_flags(int flags) 94 { 95 return (__force rwf_t)(flags & BACKING_IOCB_MASK); 96 } 97 98 static void backing_aio_put(struct backing_aio *aio) 99 { 100 if (refcount_dec_and_test(&aio->ref)) { 101 fput(aio->iocb.ki_filp); 102 kmem_cache_free(backing_aio_cachep, aio); 103 } 104 } 105 106 static void backing_aio_cleanup(struct backing_aio *aio, long res) 107 { 108 struct kiocb *iocb = &aio->iocb; 109 struct kiocb *orig_iocb = aio->orig_iocb; 110 111 if (aio->end_write) 112 aio->end_write(orig_iocb->ki_filp); 113 114 orig_iocb->ki_pos = iocb->ki_pos; 115 backing_aio_put(aio); 116 } 117 118 static void backing_aio_rw_complete(struct kiocb *iocb, long res) 119 { 120 struct backing_aio *aio = container_of(iocb, struct backing_aio, iocb); 121 struct kiocb *orig_iocb = aio->orig_iocb; 122 123 if (iocb->ki_flags & IOCB_WRITE) 124 kiocb_end_write(iocb); 125 126 backing_aio_cleanup(aio, res); 127 orig_iocb->ki_complete(orig_iocb, res); 128 } 129 130 static void backing_aio_complete_work(struct work_struct *work) 131 { 132 struct backing_aio *aio = container_of(work, struct backing_aio, work); 133 134 backing_aio_rw_complete(&aio->iocb, aio->res); 135 } 136 137 static void backing_aio_queue_completion(struct kiocb *iocb, long res) 138 { 139 struct backing_aio *aio = container_of(iocb, struct backing_aio, iocb); 140 141 /* 142 * Punt to a work queue to serialize updates of mtime/size. 143 */ 144 aio->res = res; 145 INIT_WORK(&aio->work, backing_aio_complete_work); 146 queue_work(file_inode(aio->orig_iocb->ki_filp)->i_sb->s_dio_done_wq, 147 &aio->work); 148 } 149 150 static int backing_aio_init_wq(struct kiocb *iocb) 151 { 152 struct super_block *sb = file_inode(iocb->ki_filp)->i_sb; 153 154 if (sb->s_dio_done_wq) 155 return 0; 156 157 return sb_init_dio_done_wq(sb); 158 } 159 160 161 ssize_t backing_file_read_iter(struct file *file, struct iov_iter *iter, 162 struct kiocb *iocb, int flags, 163 struct backing_file_ctx *ctx) 164 { 165 struct backing_aio *aio = NULL; 166 const struct cred *old_cred; 167 ssize_t ret; 168 169 if (WARN_ON_ONCE(!(file->f_mode & FMODE_BACKING))) 170 return -EIO; 171 172 if (!iov_iter_count(iter)) 173 return 0; 174 175 if (iocb->ki_flags & IOCB_DIRECT && 176 !(file->f_mode & FMODE_CAN_ODIRECT)) 177 return -EINVAL; 178 179 old_cred = override_creds(ctx->cred); 180 if (is_sync_kiocb(iocb)) { 181 rwf_t rwf = iocb_to_rw_flags(flags); 182 183 ret = vfs_iter_read(file, iter, &iocb->ki_pos, rwf); 184 } else { 185 ret = -ENOMEM; 186 aio = kmem_cache_zalloc(backing_aio_cachep, GFP_KERNEL); 187 if (!aio) 188 goto out; 189 190 aio->orig_iocb = iocb; 191 kiocb_clone(&aio->iocb, iocb, get_file(file)); 192 aio->iocb.ki_complete = backing_aio_rw_complete; 193 refcount_set(&aio->ref, 2); 194 ret = vfs_iocb_iter_read(file, &aio->iocb, iter); 195 backing_aio_put(aio); 196 if (ret != -EIOCBQUEUED) 197 backing_aio_cleanup(aio, ret); 198 } 199 out: 200 revert_creds(old_cred); 201 202 if (ctx->accessed) 203 ctx->accessed(ctx->user_file); 204 205 return ret; 206 } 207 EXPORT_SYMBOL_GPL(backing_file_read_iter); 208 209 ssize_t backing_file_write_iter(struct file *file, struct iov_iter *iter, 210 struct kiocb *iocb, int flags, 211 struct backing_file_ctx *ctx) 212 { 213 const struct cred *old_cred; 214 ssize_t ret; 215 216 if (WARN_ON_ONCE(!(file->f_mode & FMODE_BACKING))) 217 return -EIO; 218 219 if (!iov_iter_count(iter)) 220 return 0; 221 222 ret = file_remove_privs(ctx->user_file); 223 if (ret) 224 return ret; 225 226 if (iocb->ki_flags & IOCB_DIRECT && 227 !(file->f_mode & FMODE_CAN_ODIRECT)) 228 return -EINVAL; 229 230 /* 231 * Stacked filesystems don't support deferred completions, don't copy 232 * this property in case it is set by the issuer. 233 */ 234 flags &= ~IOCB_DIO_CALLER_COMP; 235 236 old_cred = override_creds(ctx->cred); 237 if (is_sync_kiocb(iocb)) { 238 rwf_t rwf = iocb_to_rw_flags(flags); 239 240 ret = vfs_iter_write(file, iter, &iocb->ki_pos, rwf); 241 if (ctx->end_write) 242 ctx->end_write(ctx->user_file); 243 } else { 244 struct backing_aio *aio; 245 246 ret = backing_aio_init_wq(iocb); 247 if (ret) 248 goto out; 249 250 ret = -ENOMEM; 251 aio = kmem_cache_zalloc(backing_aio_cachep, GFP_KERNEL); 252 if (!aio) 253 goto out; 254 255 aio->orig_iocb = iocb; 256 aio->end_write = ctx->end_write; 257 kiocb_clone(&aio->iocb, iocb, get_file(file)); 258 aio->iocb.ki_flags = flags; 259 aio->iocb.ki_complete = backing_aio_queue_completion; 260 refcount_set(&aio->ref, 2); 261 ret = vfs_iocb_iter_write(file, &aio->iocb, iter); 262 backing_aio_put(aio); 263 if (ret != -EIOCBQUEUED) 264 backing_aio_cleanup(aio, ret); 265 } 266 out: 267 revert_creds(old_cred); 268 269 return ret; 270 } 271 EXPORT_SYMBOL_GPL(backing_file_write_iter); 272 273 ssize_t backing_file_splice_read(struct file *in, loff_t *ppos, 274 struct pipe_inode_info *pipe, size_t len, 275 unsigned int flags, 276 struct backing_file_ctx *ctx) 277 { 278 const struct cred *old_cred; 279 ssize_t ret; 280 281 if (WARN_ON_ONCE(!(in->f_mode & FMODE_BACKING))) 282 return -EIO; 283 284 old_cred = override_creds(ctx->cred); 285 ret = vfs_splice_read(in, ppos, pipe, len, flags); 286 revert_creds(old_cred); 287 288 if (ctx->accessed) 289 ctx->accessed(ctx->user_file); 290 291 return ret; 292 } 293 EXPORT_SYMBOL_GPL(backing_file_splice_read); 294 295 ssize_t backing_file_splice_write(struct pipe_inode_info *pipe, 296 struct file *out, loff_t *ppos, size_t len, 297 unsigned int flags, 298 struct backing_file_ctx *ctx) 299 { 300 const struct cred *old_cred; 301 ssize_t ret; 302 303 if (WARN_ON_ONCE(!(out->f_mode & FMODE_BACKING))) 304 return -EIO; 305 306 if (!out->f_op->splice_write) 307 return -EINVAL; 308 309 ret = file_remove_privs(ctx->user_file); 310 if (ret) 311 return ret; 312 313 old_cred = override_creds(ctx->cred); 314 file_start_write(out); 315 ret = out->f_op->splice_write(pipe, out, ppos, len, flags); 316 file_end_write(out); 317 revert_creds(old_cred); 318 319 if (ctx->end_write) 320 ctx->end_write(ctx->user_file); 321 322 return ret; 323 } 324 EXPORT_SYMBOL_GPL(backing_file_splice_write); 325 326 int backing_file_mmap(struct file *file, struct vm_area_struct *vma, 327 struct backing_file_ctx *ctx) 328 { 329 const struct cred *old_cred; 330 int ret; 331 332 if (WARN_ON_ONCE(!(file->f_mode & FMODE_BACKING)) || 333 WARN_ON_ONCE(ctx->user_file != vma->vm_file)) 334 return -EIO; 335 336 if (!file->f_op->mmap) 337 return -ENODEV; 338 339 vma_set_file(vma, file); 340 341 old_cred = override_creds(ctx->cred); 342 ret = call_mmap(vma->vm_file, vma); 343 revert_creds(old_cred); 344 345 if (ctx->accessed) 346 ctx->accessed(ctx->user_file); 347 348 return ret; 349 } 350 EXPORT_SYMBOL_GPL(backing_file_mmap); 351 352 static int __init backing_aio_init(void) 353 { 354 backing_aio_cachep = KMEM_CACHE(backing_aio, SLAB_HWCACHE_ALIGN); 355 if (!backing_aio_cachep) 356 return -ENOMEM; 357 358 return 0; 359 } 360 fs_initcall(backing_aio_init); 361