xref: /freebsd/sys/contrib/openzfs/module/os/linux/zfs/zpl_file_range.c (revision 61145dc2b94f12f6a47344fb9aac702321880e43)
1 // SPDX-License-Identifier: CDDL-1.0
2 /*
3  * CDDL HEADER START
4  *
5  * The contents of this file are subject to the terms of the
6  * Common Development and Distribution License (the "License").
7  * You may not use this file except in compliance with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or https://opensource.org/licenses/CDDL-1.0.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright (c) 2023, Klara Inc.
24  */
25 
26 #ifdef CONFIG_COMPAT
27 #include <linux/compat.h>
28 #endif
29 #include <linux/fs.h>
30 #ifdef HAVE_VFS_SPLICE_COPY_FILE_RANGE
31 #include <linux/splice.h>
32 #endif
33 #include <sys/file.h>
34 #include <sys/zfs_znode.h>
35 #include <sys/zfs_vnops.h>
36 #include <sys/zfeature.h>
37 
38 /*
39  * Clone part of a file via block cloning.
40  *
41  * Note that we are not required to update file offsets; the kernel will take
42  * care of that depending on how it was called.
43  */
44 static ssize_t
zpl_clone_file_range_impl(struct file * src_file,loff_t src_off,struct file * dst_file,loff_t dst_off,size_t len)45 zpl_clone_file_range_impl(struct file *src_file, loff_t src_off,
46     struct file *dst_file, loff_t dst_off, size_t len)
47 {
48 	struct inode *src_i = file_inode(src_file);
49 	struct inode *dst_i = file_inode(dst_file);
50 	uint64_t src_off_o = (uint64_t)src_off;
51 	uint64_t dst_off_o = (uint64_t)dst_off;
52 	uint64_t len_o = (uint64_t)len;
53 	cred_t *cr = CRED();
54 	fstrans_cookie_t cookie;
55 	int err;
56 
57 	if (!zfs_bclone_enabled)
58 		return (-EOPNOTSUPP);
59 
60 	if (!spa_feature_is_enabled(
61 	    dmu_objset_spa(ITOZSB(dst_i)->z_os), SPA_FEATURE_BLOCK_CLONING))
62 		return (-EOPNOTSUPP);
63 
64 	if (src_i != dst_i)
65 		spl_inode_lock_shared(src_i);
66 	spl_inode_lock(dst_i);
67 
68 	crhold(cr);
69 	cookie = spl_fstrans_mark();
70 
71 	err = -zfs_clone_range(ITOZ(src_i), &src_off_o, ITOZ(dst_i),
72 	    &dst_off_o, &len_o, cr);
73 
74 	spl_fstrans_unmark(cookie);
75 	crfree(cr);
76 
77 	spl_inode_unlock(dst_i);
78 	if (src_i != dst_i)
79 		spl_inode_unlock_shared(src_i);
80 
81 	if (err < 0)
82 		return (err);
83 
84 	return ((ssize_t)len_o);
85 }
86 
87 /*
88  * Entry point for copy_file_range(). Copy len bytes from src_off in src_file
89  * to dst_off in dst_file. We are permitted to do this however we like, so we
90  * try to just clone the blocks, and if we can't support it, fall back to the
91  * kernel's generic byte copy function.
92  */
93 ssize_t
zpl_copy_file_range(struct file * src_file,loff_t src_off,struct file * dst_file,loff_t dst_off,size_t len,unsigned int flags)94 zpl_copy_file_range(struct file *src_file, loff_t src_off,
95     struct file *dst_file, loff_t dst_off, size_t len, unsigned int flags)
96 {
97 	ssize_t ret;
98 
99 	/* Flags is reserved for future extensions and must be zero. */
100 	if (flags != 0)
101 		return (-EINVAL);
102 
103 	/* Try to do it via zfs_clone_range() and allow shortening. */
104 	ret = zpl_clone_file_range_impl(src_file, src_off,
105 	    dst_file, dst_off, len);
106 
107 #if defined(HAVE_VFS_GENERIC_COPY_FILE_RANGE)
108 	/*
109 	 * Since Linux 5.3 the filesystem driver is responsible for executing
110 	 * an appropriate fallback, and a generic fallback function is provided.
111 	 */
112 	if (ret == -EOPNOTSUPP || ret == -EINVAL || ret == -EXDEV ||
113 	    ret == -EAGAIN)
114 		ret = generic_copy_file_range(src_file, src_off, dst_file,
115 		    dst_off, len, flags);
116 #elif defined(HAVE_VFS_SPLICE_COPY_FILE_RANGE)
117 	/*
118 	 * Since 6.8 the fallback function is called splice_copy_file_range
119 	 * and has a slightly different signature.
120 	 */
121 	if (ret == -EOPNOTSUPP || ret == -EINVAL || ret == -EXDEV ||
122 	    ret == -EAGAIN)
123 		ret = splice_copy_file_range(src_file, src_off, dst_file,
124 		    dst_off, len);
125 #else
126 	/*
127 	 * Before Linux 5.3 the filesystem has to return -EOPNOTSUPP to signal
128 	 * to the kernel that it should fallback to a content copy.
129 	 */
130 	if (ret == -EINVAL || ret == -EXDEV || ret == -EAGAIN)
131 		ret = -EOPNOTSUPP;
132 #endif /* HAVE_VFS_GENERIC_COPY_FILE_RANGE || HAVE_VFS_SPLICE_COPY_FILE_RANGE */
133 
134 	return (ret);
135 }
136 
137 #ifdef HAVE_VFS_REMAP_FILE_RANGE
138 /*
139  * Entry point for FICLONE/FICLONERANGE/FIDEDUPERANGE.
140  *
141  * FICLONE and FICLONERANGE are basically the same as copy_file_range(), except
142  * that they must clone - they cannot fall back to copying. FICLONE is exactly
143  * FICLONERANGE, for the entire file. We don't need to try to tell them apart;
144  * the kernel will sort that out for us.
145  *
146  * FIDEDUPERANGE is for turning a non-clone into a clone, that is, compare the
147  * range in both files and if they're the same, arrange for them to be backed
148  * by the same storage.
149  *
150  * REMAP_FILE_CAN_SHORTEN lets us know we can clone less than the given range
151  * if we want. It's designed for filesystems that may need to shorten the
152  * length for alignment, EOF, or any other requirement. ZFS may shorten the
153  * request when there is outstanding dirty data which hasn't been written.
154  */
155 loff_t
zpl_remap_file_range(struct file * src_file,loff_t src_off,struct file * dst_file,loff_t dst_off,loff_t len,unsigned int flags)156 zpl_remap_file_range(struct file *src_file, loff_t src_off,
157     struct file *dst_file, loff_t dst_off, loff_t len, unsigned int flags)
158 {
159 	if (flags & ~(REMAP_FILE_DEDUP | REMAP_FILE_CAN_SHORTEN))
160 		return (-EINVAL);
161 
162 	/* No support for dedup yet */
163 	if (flags & REMAP_FILE_DEDUP)
164 		return (-EOPNOTSUPP);
165 
166 	/* Zero length means to clone everything to the end of the file */
167 	if (len == 0)
168 		len = i_size_read(file_inode(src_file)) - src_off;
169 
170 	ssize_t ret = zpl_clone_file_range_impl(src_file, src_off,
171 	    dst_file, dst_off, len);
172 
173 	if (!(flags & REMAP_FILE_CAN_SHORTEN) && ret >= 0 && ret != len)
174 		ret = -EINVAL;
175 
176 	return (ret);
177 }
178 #endif /* HAVE_VFS_REMAP_FILE_RANGE */
179 
180 #if defined(HAVE_VFS_CLONE_FILE_RANGE)
181 /*
182  * Entry point for FICLONE and FICLONERANGE, before Linux 4.20.
183  */
184 int
zpl_clone_file_range(struct file * src_file,loff_t src_off,struct file * dst_file,loff_t dst_off,uint64_t len)185 zpl_clone_file_range(struct file *src_file, loff_t src_off,
186     struct file *dst_file, loff_t dst_off, uint64_t len)
187 {
188 	/* Zero length means to clone everything to the end of the file */
189 	if (len == 0)
190 		len = i_size_read(file_inode(src_file)) - src_off;
191 
192 	/* The entire length must be cloned or this is an error. */
193 	ssize_t ret = zpl_clone_file_range_impl(src_file, src_off,
194 	    dst_file, dst_off, len);
195 
196 	if (ret >= 0 && ret != len)
197 		ret = -EINVAL;
198 
199 	return (ret);
200 }
201 #endif /* HAVE_VFS_CLONE_FILE_RANGE */
202 
203 #ifdef HAVE_VFS_DEDUPE_FILE_RANGE
204 /*
205  * Entry point for FIDEDUPERANGE, before Linux 4.20.
206  */
207 int
zpl_dedupe_file_range(struct file * src_file,loff_t src_off,struct file * dst_file,loff_t dst_off,uint64_t len)208 zpl_dedupe_file_range(struct file *src_file, loff_t src_off,
209     struct file *dst_file, loff_t dst_off, uint64_t len)
210 {
211 	/* No support for dedup yet */
212 	return (-EOPNOTSUPP);
213 }
214 #endif /* HAVE_VFS_DEDUPE_FILE_RANGE */
215 
216 /* Entry point for FICLONE, before Linux 4.5. */
217 long
zpl_ioctl_ficlone(struct file * dst_file,void * arg)218 zpl_ioctl_ficlone(struct file *dst_file, void *arg)
219 {
220 	unsigned long sfd = (unsigned long)arg;
221 
222 	struct file *src_file = fget(sfd);
223 	if (src_file == NULL)
224 		return (-EBADF);
225 
226 	if (dst_file->f_op != src_file->f_op) {
227 		fput(src_file);
228 		return (-EXDEV);
229 	}
230 
231 	size_t len = i_size_read(file_inode(src_file));
232 
233 	ssize_t ret = zpl_clone_file_range_impl(src_file, 0, dst_file, 0, len);
234 
235 	fput(src_file);
236 
237 	if (ret < 0) {
238 		if (ret == -EOPNOTSUPP)
239 			return (-ENOTTY);
240 		return (ret);
241 	}
242 
243 	if (ret != len)
244 		return (-EINVAL);
245 
246 	return (0);
247 }
248 
249 /* Entry point for FICLONERANGE, before Linux 4.5. */
250 long
zpl_ioctl_ficlonerange(struct file * dst_file,void __user * arg)251 zpl_ioctl_ficlonerange(struct file *dst_file, void __user *arg)
252 {
253 	zfs_ioc_compat_file_clone_range_t fcr;
254 
255 	if (copy_from_user(&fcr, arg, sizeof (fcr)))
256 		return (-EFAULT);
257 
258 	struct file *src_file = fget(fcr.fcr_src_fd);
259 	if (src_file == NULL)
260 		return (-EBADF);
261 
262 	if (dst_file->f_op != src_file->f_op) {
263 		fput(src_file);
264 		return (-EXDEV);
265 	}
266 
267 	size_t len = fcr.fcr_src_length;
268 	if (len == 0)
269 		len = i_size_read(file_inode(src_file)) - fcr.fcr_src_offset;
270 
271 	ssize_t ret = zpl_clone_file_range_impl(src_file, fcr.fcr_src_offset,
272 	    dst_file, fcr.fcr_dest_offset, len);
273 
274 	fput(src_file);
275 
276 	if (ret < 0) {
277 		if (ret == -EOPNOTSUPP)
278 			return (-ENOTTY);
279 		return (ret);
280 	}
281 
282 	if (ret != len)
283 		return (-EINVAL);
284 
285 	return (0);
286 }
287 
288 /* Entry point for FIDEDUPERANGE, before Linux 4.5. */
289 long
zpl_ioctl_fideduperange(struct file * filp,void * arg)290 zpl_ioctl_fideduperange(struct file *filp, void *arg)
291 {
292 	(void) arg;
293 
294 	/* No support for dedup yet */
295 	return (-ENOTTY);
296 }
297