1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or https://opensource.org/licenses/CDDL-1.0.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright (c) 2023, Klara Inc.
23 */
24
25 #ifdef CONFIG_COMPAT
26 #include <linux/compat.h>
27 #endif
28 #include <linux/fs.h>
29 #ifdef HAVE_VFS_SPLICE_COPY_FILE_RANGE
30 #include <linux/splice.h>
31 #endif
32 #include <sys/file.h>
33 #include <sys/zfs_znode.h>
34 #include <sys/zfs_vnops.h>
35 #include <sys/zfeature.h>
36
37 /*
38 * Clone part of a file via block cloning.
39 *
40 * Note that we are not required to update file offsets; the kernel will take
41 * care of that depending on how it was called.
42 */
43 static ssize_t
zpl_clone_file_range_impl(struct file * src_file,loff_t src_off,struct file * dst_file,loff_t dst_off,size_t len)44 zpl_clone_file_range_impl(struct file *src_file, loff_t src_off,
45 struct file *dst_file, loff_t dst_off, size_t len)
46 {
47 struct inode *src_i = file_inode(src_file);
48 struct inode *dst_i = file_inode(dst_file);
49 uint64_t src_off_o = (uint64_t)src_off;
50 uint64_t dst_off_o = (uint64_t)dst_off;
51 uint64_t len_o = (uint64_t)len;
52 cred_t *cr = CRED();
53 fstrans_cookie_t cookie;
54 int err;
55
56 if (!zfs_bclone_enabled)
57 return (-EOPNOTSUPP);
58
59 if (!spa_feature_is_enabled(
60 dmu_objset_spa(ITOZSB(dst_i)->z_os), SPA_FEATURE_BLOCK_CLONING))
61 return (-EOPNOTSUPP);
62
63 if (src_i != dst_i)
64 spl_inode_lock_shared(src_i);
65 spl_inode_lock(dst_i);
66
67 crhold(cr);
68 cookie = spl_fstrans_mark();
69
70 err = -zfs_clone_range(ITOZ(src_i), &src_off_o, ITOZ(dst_i),
71 &dst_off_o, &len_o, cr);
72
73 spl_fstrans_unmark(cookie);
74 crfree(cr);
75
76 spl_inode_unlock(dst_i);
77 if (src_i != dst_i)
78 spl_inode_unlock_shared(src_i);
79
80 if (err < 0)
81 return (err);
82
83 return ((ssize_t)len_o);
84 }
85
86 /*
87 * Entry point for copy_file_range(). Copy len bytes from src_off in src_file
88 * to dst_off in dst_file. We are permitted to do this however we like, so we
89 * try to just clone the blocks, and if we can't support it, fall back to the
90 * kernel's generic byte copy function.
91 */
92 ssize_t
zpl_copy_file_range(struct file * src_file,loff_t src_off,struct file * dst_file,loff_t dst_off,size_t len,unsigned int flags)93 zpl_copy_file_range(struct file *src_file, loff_t src_off,
94 struct file *dst_file, loff_t dst_off, size_t len, unsigned int flags)
95 {
96 ssize_t ret;
97
98 /* Flags is reserved for future extensions and must be zero. */
99 if (flags != 0)
100 return (-EINVAL);
101
102 /* Try to do it via zfs_clone_range() and allow shortening. */
103 ret = zpl_clone_file_range_impl(src_file, src_off,
104 dst_file, dst_off, len);
105
106 #if defined(HAVE_VFS_GENERIC_COPY_FILE_RANGE)
107 /*
108 * Since Linux 5.3 the filesystem driver is responsible for executing
109 * an appropriate fallback, and a generic fallback function is provided.
110 */
111 if (ret == -EOPNOTSUPP || ret == -EINVAL || ret == -EXDEV ||
112 ret == -EAGAIN)
113 ret = generic_copy_file_range(src_file, src_off, dst_file,
114 dst_off, len, flags);
115 #elif defined(HAVE_VFS_SPLICE_COPY_FILE_RANGE)
116 /*
117 * Since 6.8 the fallback function is called splice_copy_file_range
118 * and has a slightly different signature.
119 */
120 if (ret == -EOPNOTSUPP || ret == -EINVAL || ret == -EXDEV ||
121 ret == -EAGAIN)
122 ret = splice_copy_file_range(src_file, src_off, dst_file,
123 dst_off, len);
124 #else
125 /*
126 * Before Linux 5.3 the filesystem has to return -EOPNOTSUPP to signal
127 * to the kernel that it should fallback to a content copy.
128 */
129 if (ret == -EINVAL || ret == -EXDEV || ret == -EAGAIN)
130 ret = -EOPNOTSUPP;
131 #endif /* HAVE_VFS_GENERIC_COPY_FILE_RANGE || HAVE_VFS_SPLICE_COPY_FILE_RANGE */
132
133 return (ret);
134 }
135
136 #ifdef HAVE_VFS_REMAP_FILE_RANGE
137 /*
138 * Entry point for FICLONE/FICLONERANGE/FIDEDUPERANGE.
139 *
140 * FICLONE and FICLONERANGE are basically the same as copy_file_range(), except
141 * that they must clone - they cannot fall back to copying. FICLONE is exactly
142 * FICLONERANGE, for the entire file. We don't need to try to tell them apart;
143 * the kernel will sort that out for us.
144 *
145 * FIDEDUPERANGE is for turning a non-clone into a clone, that is, compare the
146 * range in both files and if they're the same, arrange for them to be backed
147 * by the same storage.
148 *
149 * REMAP_FILE_CAN_SHORTEN lets us know we can clone less than the given range
150 * if we want. It's designed for filesystems that may need to shorten the
151 * length for alignment, EOF, or any other requirement. ZFS may shorten the
152 * request when there is outstanding dirty data which hasn't been written.
153 */
154 loff_t
zpl_remap_file_range(struct file * src_file,loff_t src_off,struct file * dst_file,loff_t dst_off,loff_t len,unsigned int flags)155 zpl_remap_file_range(struct file *src_file, loff_t src_off,
156 struct file *dst_file, loff_t dst_off, loff_t len, unsigned int flags)
157 {
158 if (flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_CAN_SHORTEN))
159 return (-EINVAL);
160
161 /* No support for dedup yet */
162 if (flags & REMAP_FILE_DEDUP)
163 return (-EOPNOTSUPP);
164
165 /* Zero length means to clone everything to the end of the file */
166 if (len == 0)
167 len = i_size_read(file_inode(src_file)) - src_off;
168
169 ssize_t ret = zpl_clone_file_range_impl(src_file, src_off,
170 dst_file, dst_off, len);
171
172 if (!(flags & REMAP_FILE_CAN_SHORTEN) && ret >= 0 && ret != len)
173 ret = -EINVAL;
174
175 return (ret);
176 }
177 #endif /* HAVE_VFS_REMAP_FILE_RANGE */
178
179 #if defined(HAVE_VFS_CLONE_FILE_RANGE)
180 /*
181 * Entry point for FICLONE and FICLONERANGE, before Linux 4.20.
182 */
183 int
zpl_clone_file_range(struct file * src_file,loff_t src_off,struct file * dst_file,loff_t dst_off,uint64_t len)184 zpl_clone_file_range(struct file *src_file, loff_t src_off,
185 struct file *dst_file, loff_t dst_off, uint64_t len)
186 {
187 /* Zero length means to clone everything to the end of the file */
188 if (len == 0)
189 len = i_size_read(file_inode(src_file)) - src_off;
190
191 /* The entire length must be cloned or this is an error. */
192 ssize_t ret = zpl_clone_file_range_impl(src_file, src_off,
193 dst_file, dst_off, len);
194
195 if (ret >= 0 && ret != len)
196 ret = -EINVAL;
197
198 return (ret);
199 }
200 #endif /* HAVE_VFS_CLONE_FILE_RANGE */
201
202 #ifdef HAVE_VFS_DEDUPE_FILE_RANGE
203 /*
204 * Entry point for FIDEDUPERANGE, before Linux 4.20.
205 */
206 int
zpl_dedupe_file_range(struct file * src_file,loff_t src_off,struct file * dst_file,loff_t dst_off,uint64_t len)207 zpl_dedupe_file_range(struct file *src_file, loff_t src_off,
208 struct file *dst_file, loff_t dst_off, uint64_t len)
209 {
210 /* No support for dedup yet */
211 return (-EOPNOTSUPP);
212 }
213 #endif /* HAVE_VFS_DEDUPE_FILE_RANGE */
214
215 /* Entry point for FICLONE, before Linux 4.5. */
216 long
zpl_ioctl_ficlone(struct file * dst_file,void * arg)217 zpl_ioctl_ficlone(struct file *dst_file, void *arg)
218 {
219 unsigned long sfd = (unsigned long)arg;
220
221 struct file *src_file = fget(sfd);
222 if (src_file == NULL)
223 return (-EBADF);
224
225 if (dst_file->f_op != src_file->f_op) {
226 fput(src_file);
227 return (-EXDEV);
228 }
229
230 size_t len = i_size_read(file_inode(src_file));
231
232 ssize_t ret = zpl_clone_file_range_impl(src_file, 0, dst_file, 0, len);
233
234 fput(src_file);
235
236 if (ret < 0) {
237 if (ret == -EOPNOTSUPP)
238 return (-ENOTTY);
239 return (ret);
240 }
241
242 if (ret != len)
243 return (-EINVAL);
244
245 return (0);
246 }
247
248 /* Entry point for FICLONERANGE, before Linux 4.5. */
249 long
zpl_ioctl_ficlonerange(struct file * dst_file,void __user * arg)250 zpl_ioctl_ficlonerange(struct file *dst_file, void __user *arg)
251 {
252 zfs_ioc_compat_file_clone_range_t fcr;
253
254 if (copy_from_user(&fcr, arg, sizeof (fcr)))
255 return (-EFAULT);
256
257 struct file *src_file = fget(fcr.fcr_src_fd);
258 if (src_file == NULL)
259 return (-EBADF);
260
261 if (dst_file->f_op != src_file->f_op) {
262 fput(src_file);
263 return (-EXDEV);
264 }
265
266 size_t len = fcr.fcr_src_length;
267 if (len == 0)
268 len = i_size_read(file_inode(src_file)) - fcr.fcr_src_offset;
269
270 ssize_t ret = zpl_clone_file_range_impl(src_file, fcr.fcr_src_offset,
271 dst_file, fcr.fcr_dest_offset, len);
272
273 fput(src_file);
274
275 if (ret < 0) {
276 if (ret == -EOPNOTSUPP)
277 return (-ENOTTY);
278 return (ret);
279 }
280
281 if (ret != len)
282 return (-EINVAL);
283
284 return (0);
285 }
286
287 /* Entry point for FIDEDUPERANGE, before Linux 4.5. */
288 long
zpl_ioctl_fideduperange(struct file * filp,void * arg)289 zpl_ioctl_fideduperange(struct file *filp, void *arg)
290 {
291 (void) arg;
292
293 /* No support for dedup yet */
294 return (-ENOTTY);
295 }
296