1 // SPDX-License-Identifier: CDDL-1.0
2 /*
3 * CDDL HEADER START
4 *
5 * The contents of this file are subject to the terms of the
6 * Common Development and Distribution License (the "License").
7 * You may not use this file except in compliance with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or https://opensource.org/licenses/CDDL-1.0.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22 /*
23 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
24 * Copyright (c) 2012, 2018 by Delphix. All rights reserved.
25 * Copyright (c) 2016 Actifio, Inc. All rights reserved.
26 * Copyright (c) 2025, Klara, Inc.
27 */
28
29 #include <sys/zfs_context.h>
30 #include <sys/zfs_file.h>
31 #include <libzpool.h>
32 #include <libzutil.h>
33
34 /* If set, all blocks read will be copied to the specified directory. */
35 char *vn_dumpdir = NULL;
36
37 /*
38 * Open file
39 *
40 * path - fully qualified path to file
41 * flags - file attributes O_READ / O_WRITE / O_EXCL
42 * fpp - pointer to return file pointer
43 *
44 * Returns 0 on success underlying error on failure.
45 */
46 int
zfs_file_open(const char * path,int flags,int mode,zfs_file_t ** fpp)47 zfs_file_open(const char *path, int flags, int mode, zfs_file_t **fpp)
48 {
49 int fd;
50 int dump_fd;
51 int err;
52 int old_umask = 0;
53 zfs_file_t *fp;
54 struct stat64 st;
55
56 if (!(flags & O_CREAT) && stat64(path, &st) == -1)
57 return (errno);
58
59 if (!(flags & O_CREAT) && S_ISBLK(st.st_mode))
60 flags |= O_DIRECT;
61
62 if (flags & O_CREAT)
63 old_umask = umask(0);
64
65 fd = open64(path, flags, mode);
66 if (fd == -1)
67 return (errno);
68
69 if (flags & O_CREAT)
70 (void) umask(old_umask);
71
72 if (vn_dumpdir != NULL) {
73 char *dumppath = umem_zalloc(MAXPATHLEN, UMEM_NOFAIL);
74 const char *inpath = zfs_basename(path);
75
76 (void) snprintf(dumppath, MAXPATHLEN,
77 "%s/%s", vn_dumpdir, inpath);
78 dump_fd = open64(dumppath, O_CREAT | O_WRONLY, 0666);
79 umem_free(dumppath, MAXPATHLEN);
80 if (dump_fd == -1) {
81 err = errno;
82 close(fd);
83 return (err);
84 }
85 } else {
86 dump_fd = -1;
87 }
88
89 (void) fcntl(fd, F_SETFD, FD_CLOEXEC);
90
91 fp = umem_zalloc(sizeof (zfs_file_t), UMEM_NOFAIL);
92 fp->f_fd = fd;
93 fp->f_dump_fd = dump_fd;
94 *fpp = fp;
95
96 return (0);
97 }
98
99 void
zfs_file_close(zfs_file_t * fp)100 zfs_file_close(zfs_file_t *fp)
101 {
102 close(fp->f_fd);
103 if (fp->f_dump_fd != -1)
104 close(fp->f_dump_fd);
105
106 umem_free(fp, sizeof (zfs_file_t));
107 }
108
109 /*
110 * Stateful write - use os internal file pointer to determine where to
111 * write and update on successful completion.
112 *
113 * fp - pointer to file (pipe, socket, etc) to write to
114 * buf - buffer to write
115 * count - # of bytes to write
116 * resid - pointer to count of unwritten bytes (if short write)
117 *
118 * Returns 0 on success errno on failure.
119 */
120 int
zfs_file_write(zfs_file_t * fp,const void * buf,size_t count,ssize_t * resid)121 zfs_file_write(zfs_file_t *fp, const void *buf, size_t count, ssize_t *resid)
122 {
123 ssize_t rc;
124
125 rc = write(fp->f_fd, buf, count);
126 if (rc < 0)
127 return (errno);
128
129 if (resid) {
130 *resid = count - rc;
131 } else if (rc != count) {
132 return (EIO);
133 }
134
135 return (0);
136 }
137
138 /*
139 * Stateless write - os internal file pointer is not updated.
140 *
141 * fp - pointer to file (pipe, socket, etc) to write to
142 * buf - buffer to write
143 * count - # of bytes to write
144 * off - file offset to write to (only valid for seekable types)
145 * resid - pointer to count of unwritten bytes
146 *
147 * Returns 0 on success errno on failure.
148 */
149 int
zfs_file_pwrite(zfs_file_t * fp,const void * buf,size_t count,loff_t pos,uint8_t ashift,ssize_t * resid)150 zfs_file_pwrite(zfs_file_t *fp, const void *buf,
151 size_t count, loff_t pos, uint8_t ashift, ssize_t *resid)
152 {
153 ssize_t rc, split, done;
154 int sectors;
155
156 /*
157 * To simulate partial disk writes, we split writes into two
158 * system calls so that the process can be killed in between.
159 * This is used by ztest to simulate realistic failure modes.
160 */
161 sectors = count >> ashift;
162 split = (sectors > 0 ? rand() % sectors : 0) << ashift;
163 rc = pwrite64(fp->f_fd, buf, split, pos);
164 if (rc != -1) {
165 done = rc;
166 rc = pwrite64(fp->f_fd, (char *)buf + split,
167 count - split, pos + split);
168 }
169 #ifdef __linux__
170 if (rc == -1 && errno == EINVAL) {
171 /*
172 * Under Linux, this most likely means an alignment issue
173 * (memory or disk) due to O_DIRECT, so we abort() in order
174 * to catch the offender.
175 */
176 abort();
177 }
178 #endif
179
180 if (rc < 0)
181 return (errno);
182
183 done += rc;
184
185 if (resid) {
186 *resid = count - done;
187 } else if (done != count) {
188 return (EIO);
189 }
190
191 return (0);
192 }
193
194 /*
195 * Stateful read - use os internal file pointer to determine where to
196 * read and update on successful completion.
197 *
198 * fp - pointer to file (pipe, socket, etc) to read from
199 * buf - buffer to write
200 * count - # of bytes to read
201 * resid - pointer to count of unread bytes (if short read)
202 *
203 * Returns 0 on success errno on failure.
204 */
205 int
zfs_file_read(zfs_file_t * fp,void * buf,size_t count,ssize_t * resid)206 zfs_file_read(zfs_file_t *fp, void *buf, size_t count, ssize_t *resid)
207 {
208 int rc;
209
210 rc = read(fp->f_fd, buf, count);
211 if (rc < 0)
212 return (errno);
213
214 if (resid) {
215 *resid = count - rc;
216 } else if (rc != count) {
217 return (EIO);
218 }
219
220 return (0);
221 }
222
223 /*
224 * Stateless read - os internal file pointer is not updated.
225 *
226 * fp - pointer to file (pipe, socket, etc) to read from
227 * buf - buffer to write
228 * count - # of bytes to write
229 * off - file offset to read from (only valid for seekable types)
230 * resid - pointer to count of unwritten bytes (if short write)
231 *
232 * Returns 0 on success errno on failure.
233 */
234 int
zfs_file_pread(zfs_file_t * fp,void * buf,size_t count,loff_t off,ssize_t * resid)235 zfs_file_pread(zfs_file_t *fp, void *buf, size_t count, loff_t off,
236 ssize_t *resid)
237 {
238 ssize_t rc;
239
240 rc = pread64(fp->f_fd, buf, count, off);
241 if (rc < 0) {
242 #ifdef __linux__
243 /*
244 * Under Linux, this most likely means an alignment issue
245 * (memory or disk) due to O_DIRECT, so we abort() in order to
246 * catch the offender.
247 */
248 if (errno == EINVAL)
249 abort();
250 #endif
251 return (errno);
252 }
253
254 if (fp->f_dump_fd != -1) {
255 int status;
256
257 status = pwrite64(fp->f_dump_fd, buf, rc, off);
258 ASSERT(status != -1);
259 }
260
261 if (resid) {
262 *resid = count - rc;
263 } else if (rc != count) {
264 return (EIO);
265 }
266
267 return (0);
268 }
269
270 /*
271 * lseek - set / get file pointer
272 *
273 * fp - pointer to file (pipe, socket, etc) to read from
274 * offp - value to seek to, returns current value plus passed offset
275 * whence - see man pages for standard lseek whence values
276 *
277 * Returns 0 on success errno on failure (ESPIPE for non seekable types)
278 */
279 int
zfs_file_seek(zfs_file_t * fp,loff_t * offp,int whence)280 zfs_file_seek(zfs_file_t *fp, loff_t *offp, int whence)
281 {
282 loff_t rc;
283
284 rc = lseek(fp->f_fd, *offp, whence);
285 if (rc < 0)
286 return (errno);
287
288 *offp = rc;
289
290 return (0);
291 }
292
293 /*
294 * Get file attributes
295 *
296 * filp - file pointer
297 * zfattr - pointer to file attr structure
298 *
299 * Currently only used for fetching size and file mode
300 *
301 * Returns 0 on success or error code of underlying getattr call on failure.
302 */
303 int
zfs_file_getattr(zfs_file_t * fp,zfs_file_attr_t * zfattr)304 zfs_file_getattr(zfs_file_t *fp, zfs_file_attr_t *zfattr)
305 {
306 struct stat64 st;
307
308 if (fstat64_blk(fp->f_fd, &st) == -1)
309 return (errno);
310
311 zfattr->zfa_size = st.st_size;
312 zfattr->zfa_mode = st.st_mode;
313
314 return (0);
315 }
316
317 /*
318 * Sync file to disk
319 *
320 * filp - file pointer
321 * flags - O_SYNC and or O_DSYNC
322 *
323 * Returns 0 on success or error code of underlying sync call on failure.
324 */
325 int
zfs_file_fsync(zfs_file_t * fp,int flags)326 zfs_file_fsync(zfs_file_t *fp, int flags)
327 {
328 (void) flags;
329
330 if (fsync(fp->f_fd) < 0)
331 return (errno);
332
333 return (0);
334 }
335
336 /*
337 * deallocate - zero and/or deallocate file storage
338 *
339 * fp - file pointer
340 * offset - offset to start zeroing or deallocating
341 * len - length to zero or deallocate
342 */
343 int
zfs_file_deallocate(zfs_file_t * fp,loff_t offset,loff_t len)344 zfs_file_deallocate(zfs_file_t *fp, loff_t offset, loff_t len)
345 {
346 int rc;
347 #if defined(__linux__)
348 rc = fallocate(fp->f_fd,
349 FALLOC_FL_PUNCH_HOLE | FALLOC_FL_KEEP_SIZE, offset, len);
350 #elif defined(__FreeBSD__) && (__FreeBSD_version >= 1400029)
351 struct spacectl_range rqsr = {
352 .r_offset = offset,
353 .r_len = len,
354 };
355 rc = fspacectl(fp->f_fd, SPACECTL_DEALLOC, &rqsr, 0, &rqsr);
356 #else
357 (void) fp, (void) offset, (void) len;
358 rc = EOPNOTSUPP;
359 #endif
360 if (rc)
361 return (SET_ERROR(rc));
362 return (0);
363 }
364
365 /*
366 * Request current file pointer offset
367 *
368 * fp - pointer to file
369 *
370 * Returns current file offset.
371 */
372 loff_t
zfs_file_off(zfs_file_t * fp)373 zfs_file_off(zfs_file_t *fp)
374 {
375 return (lseek(fp->f_fd, SEEK_CUR, 0));
376 }
377
378 /*
379 * unlink file
380 *
381 * path - fully qualified file path
382 *
383 * Returns 0 on success.
384 *
385 * OPTIONAL
386 */
387 int
zfs_file_unlink(const char * path)388 zfs_file_unlink(const char *path)
389 {
390 return (remove(path));
391 }
392
393 /*
394 * Get reference to file pointer
395 *
396 * fd - input file descriptor
397 *
398 * Returns pointer to file struct or NULL.
399 * Unsupported in user space.
400 */
401 zfs_file_t *
zfs_file_get(int fd)402 zfs_file_get(int fd)
403 {
404 (void) fd;
405 abort();
406 return (NULL);
407 }
408 /*
409 * Drop reference to file pointer
410 *
411 * fp - pointer to file struct
412 *
413 * Unsupported in user space.
414 */
415 void
zfs_file_put(zfs_file_t * fp)416 zfs_file_put(zfs_file_t *fp)
417 {
418 abort();
419 (void) fp;
420 }
421