1 // SPDX-License-Identifier: GPL-2.0-only 2 /* 3 * Common helpers for stackable filesystems and backing files. 4 * 5 * Forked from fs/overlayfs/file.c. 6 * 7 * Copyright (C) 2017 Red Hat, Inc. 8 * Copyright (C) 2023 CTERA Networks. 9 */ 10 11 #include <linux/fs.h> 12 #include <linux/backing-file.h> 13 #include <linux/splice.h> 14 #include <linux/mm.h> 15 16 #include "internal.h" 17 18 /** 19 * backing_file_open - open a backing file for kernel internal use 20 * @user_path: path that the user reuqested to open 21 * @flags: open flags 22 * @real_path: path of the backing file 23 * @cred: credentials for open 24 * 25 * Open a backing file for a stackable filesystem (e.g., overlayfs). 26 * @user_path may be on the stackable filesystem and @real_path on the 27 * underlying filesystem. In this case, we want to be able to return the 28 * @user_path of the stackable filesystem. This is done by embedding the 29 * returned file into a container structure that also stores the stacked 30 * file's path, which can be retrieved using backing_file_user_path(). 31 */ 32 struct file *backing_file_open(const struct path *user_path, int flags, 33 const struct path *real_path, 34 const struct cred *cred) 35 { 36 struct file *f; 37 int error; 38 39 f = alloc_empty_backing_file(flags, cred); 40 if (IS_ERR(f)) 41 return f; 42 43 path_get(user_path); 44 *backing_file_user_path(f) = *user_path; 45 error = vfs_open(real_path, f); 46 if (error) { 47 fput(f); 48 f = ERR_PTR(error); 49 } 50 51 return f; 52 } 53 EXPORT_SYMBOL_GPL(backing_file_open); 54 55 struct backing_aio { 56 struct kiocb iocb; 57 refcount_t ref; 58 struct kiocb *orig_iocb; 59 /* used for aio completion */ 60 void (*end_write)(struct file *); 61 struct work_struct work; 62 long res; 63 }; 64 65 static struct kmem_cache *backing_aio_cachep; 66 67 #define BACKING_IOCB_MASK \ 68 (IOCB_NOWAIT | IOCB_HIPRI | IOCB_DSYNC | IOCB_SYNC | IOCB_APPEND) 69 70 static rwf_t iocb_to_rw_flags(int flags) 71 { 72 return (__force rwf_t)(flags & BACKING_IOCB_MASK); 73 } 74 75 static void backing_aio_put(struct backing_aio *aio) 76 { 77 if (refcount_dec_and_test(&aio->ref)) { 78 fput(aio->iocb.ki_filp); 79 kmem_cache_free(backing_aio_cachep, aio); 80 } 81 } 82 83 static void backing_aio_cleanup(struct backing_aio *aio, long res) 84 { 85 struct kiocb *iocb = &aio->iocb; 86 struct kiocb *orig_iocb = aio->orig_iocb; 87 88 if (aio->end_write) 89 aio->end_write(orig_iocb->ki_filp); 90 91 orig_iocb->ki_pos = iocb->ki_pos; 92 backing_aio_put(aio); 93 } 94 95 static void backing_aio_rw_complete(struct kiocb *iocb, long res) 96 { 97 struct backing_aio *aio = container_of(iocb, struct backing_aio, iocb); 98 struct kiocb *orig_iocb = aio->orig_iocb; 99 100 if (iocb->ki_flags & IOCB_WRITE) 101 kiocb_end_write(iocb); 102 103 backing_aio_cleanup(aio, res); 104 orig_iocb->ki_complete(orig_iocb, res); 105 } 106 107 static void backing_aio_complete_work(struct work_struct *work) 108 { 109 struct backing_aio *aio = container_of(work, struct backing_aio, work); 110 111 backing_aio_rw_complete(&aio->iocb, aio->res); 112 } 113 114 static void backing_aio_queue_completion(struct kiocb *iocb, long res) 115 { 116 struct backing_aio *aio = container_of(iocb, struct backing_aio, iocb); 117 118 /* 119 * Punt to a work queue to serialize updates of mtime/size. 120 */ 121 aio->res = res; 122 INIT_WORK(&aio->work, backing_aio_complete_work); 123 queue_work(file_inode(aio->orig_iocb->ki_filp)->i_sb->s_dio_done_wq, 124 &aio->work); 125 } 126 127 static int backing_aio_init_wq(struct kiocb *iocb) 128 { 129 struct super_block *sb = file_inode(iocb->ki_filp)->i_sb; 130 131 if (sb->s_dio_done_wq) 132 return 0; 133 134 return sb_init_dio_done_wq(sb); 135 } 136 137 138 ssize_t backing_file_read_iter(struct file *file, struct iov_iter *iter, 139 struct kiocb *iocb, int flags, 140 struct backing_file_ctx *ctx) 141 { 142 struct backing_aio *aio = NULL; 143 const struct cred *old_cred; 144 ssize_t ret; 145 146 if (WARN_ON_ONCE(!(file->f_mode & FMODE_BACKING))) 147 return -EIO; 148 149 if (!iov_iter_count(iter)) 150 return 0; 151 152 if (iocb->ki_flags & IOCB_DIRECT && 153 !(file->f_mode & FMODE_CAN_ODIRECT)) 154 return -EINVAL; 155 156 old_cred = override_creds(ctx->cred); 157 if (is_sync_kiocb(iocb)) { 158 rwf_t rwf = iocb_to_rw_flags(flags); 159 160 ret = vfs_iter_read(file, iter, &iocb->ki_pos, rwf); 161 } else { 162 ret = -ENOMEM; 163 aio = kmem_cache_zalloc(backing_aio_cachep, GFP_KERNEL); 164 if (!aio) 165 goto out; 166 167 aio->orig_iocb = iocb; 168 kiocb_clone(&aio->iocb, iocb, get_file(file)); 169 aio->iocb.ki_complete = backing_aio_rw_complete; 170 refcount_set(&aio->ref, 2); 171 ret = vfs_iocb_iter_read(file, &aio->iocb, iter); 172 backing_aio_put(aio); 173 if (ret != -EIOCBQUEUED) 174 backing_aio_cleanup(aio, ret); 175 } 176 out: 177 revert_creds(old_cred); 178 179 if (ctx->accessed) 180 ctx->accessed(ctx->user_file); 181 182 return ret; 183 } 184 EXPORT_SYMBOL_GPL(backing_file_read_iter); 185 186 ssize_t backing_file_write_iter(struct file *file, struct iov_iter *iter, 187 struct kiocb *iocb, int flags, 188 struct backing_file_ctx *ctx) 189 { 190 const struct cred *old_cred; 191 ssize_t ret; 192 193 if (WARN_ON_ONCE(!(file->f_mode & FMODE_BACKING))) 194 return -EIO; 195 196 if (!iov_iter_count(iter)) 197 return 0; 198 199 ret = file_remove_privs(ctx->user_file); 200 if (ret) 201 return ret; 202 203 if (iocb->ki_flags & IOCB_DIRECT && 204 !(file->f_mode & FMODE_CAN_ODIRECT)) 205 return -EINVAL; 206 207 /* 208 * Stacked filesystems don't support deferred completions, don't copy 209 * this property in case it is set by the issuer. 210 */ 211 flags &= ~IOCB_DIO_CALLER_COMP; 212 213 old_cred = override_creds(ctx->cred); 214 if (is_sync_kiocb(iocb)) { 215 rwf_t rwf = iocb_to_rw_flags(flags); 216 217 ret = vfs_iter_write(file, iter, &iocb->ki_pos, rwf); 218 if (ctx->end_write) 219 ctx->end_write(ctx->user_file); 220 } else { 221 struct backing_aio *aio; 222 223 ret = backing_aio_init_wq(iocb); 224 if (ret) 225 goto out; 226 227 ret = -ENOMEM; 228 aio = kmem_cache_zalloc(backing_aio_cachep, GFP_KERNEL); 229 if (!aio) 230 goto out; 231 232 aio->orig_iocb = iocb; 233 aio->end_write = ctx->end_write; 234 kiocb_clone(&aio->iocb, iocb, get_file(file)); 235 aio->iocb.ki_flags = flags; 236 aio->iocb.ki_complete = backing_aio_queue_completion; 237 refcount_set(&aio->ref, 2); 238 ret = vfs_iocb_iter_write(file, &aio->iocb, iter); 239 backing_aio_put(aio); 240 if (ret != -EIOCBQUEUED) 241 backing_aio_cleanup(aio, ret); 242 } 243 out: 244 revert_creds(old_cred); 245 246 return ret; 247 } 248 EXPORT_SYMBOL_GPL(backing_file_write_iter); 249 250 ssize_t backing_file_splice_read(struct file *in, loff_t *ppos, 251 struct pipe_inode_info *pipe, size_t len, 252 unsigned int flags, 253 struct backing_file_ctx *ctx) 254 { 255 const struct cred *old_cred; 256 ssize_t ret; 257 258 if (WARN_ON_ONCE(!(in->f_mode & FMODE_BACKING))) 259 return -EIO; 260 261 old_cred = override_creds(ctx->cred); 262 ret = vfs_splice_read(in, ppos, pipe, len, flags); 263 revert_creds(old_cred); 264 265 if (ctx->accessed) 266 ctx->accessed(ctx->user_file); 267 268 return ret; 269 } 270 EXPORT_SYMBOL_GPL(backing_file_splice_read); 271 272 ssize_t backing_file_splice_write(struct pipe_inode_info *pipe, 273 struct file *out, loff_t *ppos, size_t len, 274 unsigned int flags, 275 struct backing_file_ctx *ctx) 276 { 277 const struct cred *old_cred; 278 ssize_t ret; 279 280 if (WARN_ON_ONCE(!(out->f_mode & FMODE_BACKING))) 281 return -EIO; 282 283 ret = file_remove_privs(ctx->user_file); 284 if (ret) 285 return ret; 286 287 old_cred = override_creds(ctx->cred); 288 file_start_write(out); 289 ret = iter_file_splice_write(pipe, out, ppos, len, flags); 290 file_end_write(out); 291 revert_creds(old_cred); 292 293 if (ctx->end_write) 294 ctx->end_write(ctx->user_file); 295 296 return ret; 297 } 298 EXPORT_SYMBOL_GPL(backing_file_splice_write); 299 300 int backing_file_mmap(struct file *file, struct vm_area_struct *vma, 301 struct backing_file_ctx *ctx) 302 { 303 const struct cred *old_cred; 304 int ret; 305 306 if (WARN_ON_ONCE(!(file->f_mode & FMODE_BACKING)) || 307 WARN_ON_ONCE(ctx->user_file != vma->vm_file)) 308 return -EIO; 309 310 if (!file->f_op->mmap) 311 return -ENODEV; 312 313 vma_set_file(vma, file); 314 315 old_cred = override_creds(ctx->cred); 316 ret = call_mmap(vma->vm_file, vma); 317 revert_creds(old_cred); 318 319 if (ctx->accessed) 320 ctx->accessed(ctx->user_file); 321 322 return ret; 323 } 324 EXPORT_SYMBOL_GPL(backing_file_mmap); 325 326 static int __init backing_aio_init(void) 327 { 328 backing_aio_cachep = KMEM_CACHE(backing_aio, SLAB_HWCACHE_ALIGN); 329 if (!backing_aio_cachep) 330 return -ENOMEM; 331 332 return 0; 333 } 334 fs_initcall(backing_aio_init); 335