1 // SPDX-License-Identifier: CDDL-1.0
2 /*
3 * CDDL HEADER START
4 *
5 * The contents of this file are subject to the terms of the
6 * Common Development and Distribution License (the "License").
7 * You may not use this file except in compliance with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or https://opensource.org/licenses/CDDL-1.0.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22
23 /*
24 * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
25 * Portions Copyright 2011 Martin Matuska
26 * Copyright 2015, OmniTI Computer Consulting, Inc. All rights reserved.
27 * Copyright (c) 2012 Pawel Jakub Dawidek
28 * Copyright (c) 2014, 2016 Joyent, Inc. All rights reserved.
29 * Copyright 2016 Nexenta Systems, Inc. All rights reserved.
30 * Copyright (c) 2014, Joyent, Inc. All rights reserved.
31 * Copyright (c) 2011, 2024 by Delphix. All rights reserved.
32 * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
33 * Copyright (c) 2013 Steven Hartland. All rights reserved.
34 * Copyright (c) 2014 Integros [integros.com]
35 * Copyright 2016 Toomas Soome <tsoome@me.com>
36 * Copyright (c) 2016 Actifio, Inc. All rights reserved.
37 * Copyright (c) 2018, loli10K <ezomori.nozomu@gmail.com>. All rights reserved.
38 * Copyright 2017 RackTop Systems.
39 * Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
40 * Copyright (c) 2019 Datto Inc.
41 * Copyright (c) 2019, 2020 by Christian Schwarz. All rights reserved.
42 * Copyright (c) 2019, 2021, 2023, 2024, Klara Inc.
43 * Copyright (c) 2019, Allan Jude
44 * Copyright 2026 Oxide Computer Company
45 */
46
47 /*
48 * ZFS ioctls.
49 *
50 * This file handles the ioctls to /dev/zfs, used for configuring ZFS storage
51 * pools and filesystems, e.g. with /sbin/zfs and /sbin/zpool.
52 *
53 * There are two ways that we handle ioctls: the legacy way where almost
54 * all of the logic is in the ioctl callback, and the new way where most
55 * of the marshalling is handled in the common entry point, zfsdev_ioctl().
56 *
57 * Non-legacy ioctls should be registered by calling
58 * zfs_ioctl_register() from zfs_ioctl_init(). The ioctl is invoked
59 * from userland by lzc_ioctl().
60 *
61 * The registration arguments are as follows:
62 *
63 * const char *name
64 * The name of the ioctl. This is used for history logging. If the
65 * ioctl returns successfully (the callback returns 0), and allow_log
66 * is true, then a history log entry will be recorded with the input &
67 * output nvlists. The log entry can be printed with "zpool history -i".
68 *
69 * zfs_ioc_t ioc
70 * The ioctl request number, which userland will pass to ioctl(2).
71 * We want newer versions of libzfs and libzfs_core to run against
72 * existing zfs kernel modules (i.e. a deferred reboot after an update).
73 * Therefore the ioctl numbers cannot change from release to release.
74 *
75 * zfs_secpolicy_func_t *secpolicy
76 * This function will be called before the zfs_ioc_func_t, to
77 * determine if this operation is permitted. It should return EPERM
78 * on failure, and 0 on success. Checks include determining if the
79 * dataset is visible in this zone, and if the user has either all
80 * zfs privileges in the zone (SYS_MOUNT), or has been granted permission
81 * to do this operation on this dataset with "zfs allow".
82 *
83 * zfs_ioc_namecheck_t namecheck
84 * This specifies what to expect in the zfs_cmd_t:zc_name -- a pool
85 * name, a dataset name, or nothing. If the name is not well-formed,
86 * the ioctl will fail and the callback will not be called.
87 * Therefore, the callback can assume that the name is well-formed
88 * (e.g. is null-terminated, doesn't have more than one '@' character,
89 * doesn't have invalid characters).
90 *
91 * zfs_ioc_poolcheck_t pool_check
92 * This specifies requirements on the pool state. If the pool does
93 * not meet them (is suspended or is readonly), the ioctl will fail
94 * and the callback will not be called. If any checks are specified
95 * (i.e. it is not POOL_CHECK_NONE), namecheck must not be NO_NAME.
96 * Multiple checks can be or-ed together (e.g. POOL_CHECK_SUSPENDED |
97 * POOL_CHECK_READONLY).
98 *
99 * zfs_ioc_key_t *nvl_keys
100 * The list of expected/allowable innvl input keys. This list is used
101 * to validate the nvlist input to the ioctl.
102 *
103 * boolean_t smush_outnvlist
104 * If smush_outnvlist is true, then the output is presumed to be a
105 * list of errors, and it will be "smushed" down to fit into the
106 * caller's buffer, by removing some entries and replacing them with a
107 * single "N_MORE_ERRORS" entry indicating how many were removed. See
108 * nvlist_smush() for details. If smush_outnvlist is false, and the
109 * outnvlist does not fit into the userland-provided buffer, then the
110 * ioctl will fail with ENOMEM.
111 *
112 * zfs_ioc_func_t *func
113 * The callback function that will perform the operation.
114 *
115 * The callback should return 0 on success, or an error number on
116 * failure. If the function fails, the userland ioctl will return -1,
117 * and errno will be set to the callback's return value. The callback
118 * will be called with the following arguments:
119 *
120 * const char *name
121 * The name of the pool or dataset to operate on, from
122 * zfs_cmd_t:zc_name. The 'namecheck' argument specifies the
123 * expected type (pool, dataset, or none).
124 *
125 * nvlist_t *innvl
126 * The input nvlist, deserialized from zfs_cmd_t:zc_nvlist_src. Or
127 * NULL if no input nvlist was provided. Changes to this nvlist are
128 * ignored. If the input nvlist could not be deserialized, the
129 * ioctl will fail and the callback will not be called.
130 *
131 * nvlist_t *outnvl
132 * The output nvlist, initially empty. The callback can fill it in,
133 * and it will be returned to userland by serializing it into
134 * zfs_cmd_t:zc_nvlist_dst. If it is non-empty, and serialization
135 * fails (e.g. because the caller didn't supply a large enough
136 * buffer), then the overall ioctl will fail. See the
137 * 'smush_nvlist' argument above for additional behaviors.
138 *
139 * There are two typical uses of the output nvlist:
140 * - To return state, e.g. property values. In this case,
141 * smush_outnvlist should be false. If the buffer was not large
142 * enough, the caller will reallocate a larger buffer and try
143 * the ioctl again.
144 *
145 * - To return multiple errors from an ioctl which makes on-disk
146 * changes. In this case, smush_outnvlist should be true.
147 * Ioctls which make on-disk modifications should generally not
148 * use the outnvl if they succeed, because the caller can not
149 * distinguish between the operation failing, and
150 * deserialization failing.
151 *
152 * IOCTL Interface Errors
153 *
154 * The following ioctl input errors can be returned:
155 * ZFS_ERR_IOC_CMD_UNAVAIL the ioctl number is not supported by kernel
156 * ZFS_ERR_IOC_ARG_UNAVAIL an input argument is not supported by kernel
157 * ZFS_ERR_IOC_ARG_REQUIRED a required input argument is missing
158 * ZFS_ERR_IOC_ARG_BADTYPE an input argument has an invalid type
159 */
160
161 #include <sys/types.h>
162 #include <sys/param.h>
163 #include <sys/errno.h>
164 #include <sys/file.h>
165 #include <sys/kmem.h>
166 #include <sys/cmn_err.h>
167 #include <sys/stat.h>
168 #include <sys/zfs_ioctl.h>
169 #include <sys/zfs_quota.h>
170 #include <sys/zfs_vfsops.h>
171 #include <sys/zfs_znode.h>
172 #include <sys/zap.h>
173 #include <sys/spa.h>
174 #include <sys/spa_impl.h>
175 #include <sys/vdev.h>
176 #include <sys/vdev_impl.h>
177 #include <sys/dmu.h>
178 #include <sys/dsl_dir.h>
179 #include <sys/dsl_dataset.h>
180 #include <sys/dsl_prop.h>
181 #include <sys/dsl_deleg.h>
182 #include <sys/dmu_objset.h>
183 #include <sys/dmu_impl.h>
184 #include <sys/dmu_redact.h>
185 #include <sys/dmu_tx.h>
186 #include <sys/sunddi.h>
187 #include <sys/policy.h>
188 #include <sys/zone.h>
189 #include <sys/nvpair.h>
190 #include <sys/pathname.h>
191 #include <sys/fs/zfs.h>
192 #include <sys/zfs_ctldir.h>
193 #include <sys/zfs_dir.h>
194 #include <sys/zfs_onexit.h>
195 #include <sys/zvol.h>
196 #include <sys/dsl_scan.h>
197 #include <sys/fm/util.h>
198 #include <sys/dsl_crypt.h>
199 #include <sys/rrwlock.h>
200 #include <sys/zfs_file.h>
201
202 #include <sys/dmu_recv.h>
203 #include <sys/dmu_send.h>
204 #include <sys/dmu_recv.h>
205 #include <sys/dsl_destroy.h>
206 #include <sys/dsl_bookmark.h>
207 #include <sys/dsl_userhold.h>
208 #include <sys/zfeature.h>
209 #include <sys/zcp.h>
210 #include <sys/zio_checksum.h>
211 #include <sys/vdev_removal.h>
212 #include <sys/vdev_impl.h>
213 #include <sys/vdev_initialize.h>
214 #include <sys/vdev_trim.h>
215 #include <sys/brt.h>
216 #include <sys/ddt.h>
217
218 #include "zfs_namecheck.h"
219 #include "zfs_prop.h"
220 #include "zfs_deleg.h"
221 #include "zfs_comutil.h"
222
223 #include <sys/lua/lua.h>
224 #include <sys/lua/lauxlib.h>
225 #include <sys/zfs_ioctl_impl.h>
226
227 kmutex_t zfsdev_state_lock;
228 static zfsdev_state_t zfsdev_state_listhead;
229
230 /*
231 * Limit maximum nvlist size. We don't want users passing in insane values
232 * for zc->zc_nvlist_src_size, since we will need to allocate that much memory.
233 * Defaults to 0=auto which is handled by platform code.
234 */
235 uint64_t zfs_max_nvlist_src_size = 0;
236
237 /*
238 * When logging the output nvlist of an ioctl in the on-disk history, limit
239 * the logged size to this many bytes. This must be less than DMU_MAX_ACCESS.
240 * This applies primarily to zfs_ioc_channel_program().
241 */
242 static uint64_t zfs_history_output_max = 1024 * 1024;
243
244 uint_t zfs_allow_log_key;
245
246 /* DATA_TYPE_ANY is used when zkey_type can vary. */
247 #define DATA_TYPE_ANY DATA_TYPE_UNKNOWN
248
249 typedef struct zfs_ioc_vec {
250 zfs_ioc_legacy_func_t *zvec_legacy_func;
251 zfs_ioc_func_t *zvec_func;
252 zfs_secpolicy_func_t *zvec_secpolicy;
253 zfs_ioc_namecheck_t zvec_namecheck;
254 boolean_t zvec_allow_log;
255 zfs_ioc_poolcheck_t zvec_pool_check;
256 boolean_t zvec_smush_outnvlist;
257 const char *zvec_name;
258 const zfs_ioc_key_t *zvec_nvl_keys;
259 size_t zvec_nvl_key_count;
260 } zfs_ioc_vec_t;
261
262 /* This array is indexed by zfs_userquota_prop_t */
263 static const char *userquota_perms[] = {
264 ZFS_DELEG_PERM_USERUSED,
265 ZFS_DELEG_PERM_USERQUOTA,
266 ZFS_DELEG_PERM_GROUPUSED,
267 ZFS_DELEG_PERM_GROUPQUOTA,
268 ZFS_DELEG_PERM_USEROBJUSED,
269 ZFS_DELEG_PERM_USEROBJQUOTA,
270 ZFS_DELEG_PERM_GROUPOBJUSED,
271 ZFS_DELEG_PERM_GROUPOBJQUOTA,
272 ZFS_DELEG_PERM_PROJECTUSED,
273 ZFS_DELEG_PERM_PROJECTQUOTA,
274 ZFS_DELEG_PERM_PROJECTOBJUSED,
275 ZFS_DELEG_PERM_PROJECTOBJQUOTA,
276 };
277
278 static int zfs_ioc_userspace_upgrade(zfs_cmd_t *zc);
279 static int zfs_ioc_id_quota_upgrade(zfs_cmd_t *zc);
280 static int zfs_check_settable(const char *name, nvpair_t *property,
281 cred_t *cr);
282 static int zfs_check_clearable(const char *dataset, nvlist_t *props,
283 nvlist_t **errors);
284 static int zfs_fill_zplprops_root(uint64_t, nvlist_t *, nvlist_t *,
285 boolean_t *);
286 int zfs_set_prop_nvlist(const char *, zprop_source_t, nvlist_t *, nvlist_t *);
287 static int get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp);
288
289 /*
290 * Callback for SPL to look up zoned_uid property.
291 * Walks ancestors to find the delegation root with zoned_uid set.
292 * Returns the zoned_uid value if found, or 0 if not set.
293 */
294 static uid_t
zfs_get_zoned_uid(const char * dataset,char * root_out,size_t root_size)295 zfs_get_zoned_uid(const char *dataset, char *root_out, size_t root_size)
296 {
297 char path[ZFS_MAX_DATASET_NAME_LEN];
298 char setpoint[ZFS_MAX_DATASET_NAME_LEN];
299 char *slash, *at;
300 uint64_t zoned_uid_val = 0;
301 int error;
302
303 (void) strlcpy(path, dataset, sizeof (path));
304
305 /*
306 * Strip snapshot suffix if present — snapshots inherit properties
307 * from their parent filesystem.
308 */
309 at = strchr(path, '@');
310 if (at != NULL)
311 *at = '\0';
312
313 /*
314 * Walk up the hierarchy until we find a dataset with zoned_uid set.
315 * This handles the case where the dataset doesn't exist yet (e.g.,
316 * rename destination) — dsl_prop_get fails on non-existent datasets,
317 * so we walk up to find an existing ancestor.
318 *
319 * When the property is found (possibly via inheritance), setpoint
320 * tells us the actual delegation root where zoned_uid is locally
321 * set, rather than the dataset where we happened to query it.
322 */
323 while (path[0] != '\0') {
324 error = dsl_prop_get(path, "zoned_uid", 8, 1,
325 &zoned_uid_val, setpoint);
326
327 if (error == 0 && zoned_uid_val != 0) {
328 if (root_out != NULL)
329 (void) strlcpy(root_out, setpoint, root_size);
330 return ((uid_t)zoned_uid_val);
331 }
332
333 slash = strrchr(path, '/');
334 if (slash == NULL)
335 break;
336 *slash = '\0';
337 }
338
339 return (0);
340 }
341
342 static void
history_str_free(char * buf)343 history_str_free(char *buf)
344 {
345 kmem_free(buf, HIS_MAX_RECORD_LEN);
346 }
347
348 static char *
history_str_get(zfs_cmd_t * zc)349 history_str_get(zfs_cmd_t *zc)
350 {
351 char *buf;
352
353 if (zc->zc_history == 0)
354 return (NULL);
355
356 buf = kmem_alloc(HIS_MAX_RECORD_LEN, KM_SLEEP);
357 if (copyinstr((void *)(uintptr_t)zc->zc_history,
358 buf, HIS_MAX_RECORD_LEN, NULL) != 0) {
359 history_str_free(buf);
360 return (NULL);
361 }
362
363 buf[HIS_MAX_RECORD_LEN -1] = '\0';
364
365 return (buf);
366 }
367
368 /*
369 * Return non-zero if the spa version is less than requested version.
370 */
371 static int
zfs_earlier_version(const char * name,int version)372 zfs_earlier_version(const char *name, int version)
373 {
374 spa_t *spa;
375
376 if (spa_open(name, &spa, FTAG) == 0) {
377 if (spa_version(spa) < version) {
378 spa_close(spa, FTAG);
379 return (1);
380 }
381 spa_close(spa, FTAG);
382 }
383 return (0);
384 }
385
386 /*
387 * Return TRUE if the ZPL version is less than requested version.
388 */
389 static boolean_t
zpl_earlier_version(const char * name,int version)390 zpl_earlier_version(const char *name, int version)
391 {
392 objset_t *os;
393 boolean_t rc = B_TRUE;
394
395 if (dmu_objset_hold(name, FTAG, &os) == 0) {
396 uint64_t zplversion;
397
398 if (dmu_objset_type(os) != DMU_OST_ZFS) {
399 dmu_objset_rele(os, FTAG);
400 return (B_TRUE);
401 }
402 /* XXX reading from non-owned objset */
403 if (zfs_get_zplprop(os, ZFS_PROP_VERSION, &zplversion) == 0)
404 rc = zplversion < version;
405 dmu_objset_rele(os, FTAG);
406 }
407 return (rc);
408 }
409
410 static void
zfs_log_history(zfs_cmd_t * zc)411 zfs_log_history(zfs_cmd_t *zc)
412 {
413 spa_t *spa;
414 char *buf;
415
416 if ((buf = history_str_get(zc)) == NULL)
417 return;
418
419 if (spa_open(zc->zc_name, &spa, FTAG) == 0) {
420 if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY)
421 (void) spa_history_log(spa, buf);
422 spa_close(spa, FTAG);
423 }
424 history_str_free(buf);
425 }
426
427 /*
428 * Policy for top-level read operations (list pools). Requires no privileges,
429 * and can be used in the local zone, as there is no associated dataset.
430 */
431 static int
zfs_secpolicy_none(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)432 zfs_secpolicy_none(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
433 {
434 (void) zc, (void) innvl, (void) cr;
435 return (0);
436 }
437
438 /*
439 * Policy for dataset read operations (list children, get statistics). Requires
440 * no privileges, but must be visible in the local zone.
441 */
442 static int
zfs_secpolicy_read(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)443 zfs_secpolicy_read(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
444 {
445 (void) innvl, (void) cr;
446 if (INGLOBALZONE(curproc) ||
447 zone_dataset_visible(zc->zc_name, NULL))
448 return (0);
449
450 return (SET_ERROR(ENOENT));
451 }
452
453 static int
zfs_dozonecheck_impl(const char * dataset,uint64_t zoned,cred_t * cr)454 zfs_dozonecheck_impl(const char *dataset, uint64_t zoned, cred_t *cr)
455 {
456 int writable = 1;
457
458 /*
459 * The dataset must be visible by this zone -- check this first
460 * so they don't see EPERM on something they shouldn't know about.
461 */
462 if (!INGLOBALZONE(curproc) &&
463 !zone_dataset_visible(dataset, &writable))
464 return (SET_ERROR(ENOENT));
465
466 if (INGLOBALZONE(curproc)) {
467 /*
468 * If the fs is zoned, only root can access it from the
469 * global zone.
470 */
471 if (secpolicy_zfs(cr) && zoned)
472 return (SET_ERROR(EPERM));
473 } else {
474 /*
475 * If we are in a local zone, the 'zoned' property must be set.
476 */
477 if (!zoned)
478 return (SET_ERROR(EPERM));
479
480 /* must be writable by this zone */
481 if (!writable)
482 return (SET_ERROR(EPERM));
483 }
484 return (0);
485 }
486
487 static int
zfs_dozonecheck(const char * dataset,cred_t * cr)488 zfs_dozonecheck(const char *dataset, cred_t *cr)
489 {
490 uint64_t zoned;
491
492 if (dsl_prop_get_integer(dataset, zfs_prop_to_name(ZFS_PROP_ZONED),
493 &zoned, NULL))
494 return (SET_ERROR(ENOENT));
495
496 return (zfs_dozonecheck_impl(dataset, zoned, cr));
497 }
498
499 static int
zfs_dozonecheck_ds(const char * dataset,dsl_dataset_t * ds,cred_t * cr)500 zfs_dozonecheck_ds(const char *dataset, dsl_dataset_t *ds, cred_t *cr)
501 {
502 uint64_t zoned;
503
504 if (dsl_prop_get_int_ds(ds, zfs_prop_to_name(ZFS_PROP_ZONED), &zoned))
505 return (SET_ERROR(ENOENT));
506
507 return (zfs_dozonecheck_impl(dataset, zoned, cr));
508 }
509
510 static int
zfs_secpolicy_write_perms_ds(const char * name,dsl_dataset_t * ds,const char * perm,cred_t * cr)511 zfs_secpolicy_write_perms_ds(const char *name, dsl_dataset_t *ds,
512 const char *perm, cred_t *cr)
513 {
514 int error;
515
516 error = zfs_dozonecheck_ds(name, ds, cr);
517 if (error == 0) {
518 error = secpolicy_zfs(cr);
519 if (error != 0)
520 error = dsl_deleg_access_impl(ds, perm, cr);
521 }
522 return (error);
523 }
524
525 static int
zfs_secpolicy_write_perms(const char * name,const char * perm,cred_t * cr)526 zfs_secpolicy_write_perms(const char *name, const char *perm, cred_t *cr)
527 {
528 int error;
529 dsl_dataset_t *ds;
530 dsl_pool_t *dp;
531
532 /*
533 * First do a quick check for root in the global zone, which
534 * is allowed to do all write_perms. This ensures that zfs_ioc_*
535 * will get to handle nonexistent datasets.
536 */
537 if (INGLOBALZONE(curproc) && secpolicy_zfs(cr) == 0)
538 return (0);
539
540 error = dsl_pool_hold(name, FTAG, &dp);
541 if (error != 0)
542 return (error);
543
544 error = dsl_dataset_hold(dp, name, FTAG, &ds);
545 if (error != 0) {
546 dsl_pool_rele(dp, FTAG);
547 return (error);
548 }
549
550 error = zfs_secpolicy_write_perms_ds(name, ds, perm, cr);
551
552 dsl_dataset_rele(ds, FTAG);
553 dsl_pool_rele(dp, FTAG);
554 return (error);
555 }
556
557 /*
558 * Check dsl_deleg permission for zoned_uid datasets.
559 *
560 * This bypasses zfs_dozonecheck_ds() (which requires the 'zoned' property)
561 * because zoned_uid datasets use a different authentication model. The zone
562 * check was already performed by zone_dataset_admin_check().
563 *
564 * Returns 0 if permission is granted, error otherwise.
565 * ECANCELED from dsl_deleg_access_impl() means delegation is disabled on the
566 * pool — in that case we deny access (POLP: no delegation = no access).
567 */
568 static int
zfs_secpolicy_zoned_uid_deleg(const char * name,const char * perm,cred_t * cr)569 zfs_secpolicy_zoned_uid_deleg(const char *name, const char *perm, cred_t *cr)
570 {
571 dsl_pool_t *dp;
572 dsl_dataset_t *ds;
573 int error;
574
575 error = dsl_pool_hold(name, FTAG, &dp);
576 if (error != 0)
577 return (error);
578 error = dsl_dataset_hold(dp, name, FTAG, &ds);
579 if (error != 0) {
580 dsl_pool_rele(dp, FTAG);
581 return (error);
582 }
583 error = dsl_deleg_access_impl(ds, perm, cr);
584 dsl_dataset_rele(ds, FTAG);
585 dsl_pool_rele(dp, FTAG);
586
587 /* ECANCELED = delegation disabled on pool; deny access (POLP) */
588 if (error == ECANCELED)
589 return (SET_ERROR(EPERM));
590 return (error);
591 }
592
593 /*
594 * Policy for setting the security label property.
595 *
596 * Returns 0 for success, non-zero for access and other errors.
597 */
598 static int
zfs_set_slabel_policy(const char * name,const char * strval,cred_t * cr)599 zfs_set_slabel_policy(const char *name, const char *strval, cred_t *cr)
600 {
601 #ifdef HAVE_MLSLABEL
602 char ds_hexsl[MAXNAMELEN];
603 bslabel_t ds_sl, new_sl;
604 boolean_t new_default = FALSE;
605 uint64_t zoned;
606 int needed_priv = -1;
607 int error;
608
609 /* First get the existing dataset label. */
610 error = dsl_prop_get(name, zfs_prop_to_name(ZFS_PROP_MLSLABEL),
611 1, sizeof (ds_hexsl), &ds_hexsl, NULL);
612 if (error != 0)
613 return (SET_ERROR(EPERM));
614
615 if (strcasecmp(strval, ZFS_MLSLABEL_DEFAULT) == 0)
616 new_default = TRUE;
617
618 /* The label must be translatable */
619 if (!new_default && (hexstr_to_label(strval, &new_sl) != 0))
620 return (SET_ERROR(EINVAL));
621
622 /*
623 * In a non-global zone, disallow attempts to set a label that
624 * doesn't match that of the zone; otherwise no other checks
625 * are needed.
626 */
627 if (!INGLOBALZONE(curproc)) {
628 if (new_default || !blequal(&new_sl, CR_SL(CRED())))
629 return (SET_ERROR(EPERM));
630 return (0);
631 }
632
633 /*
634 * For global-zone datasets (i.e., those whose zoned property is
635 * "off", verify that the specified new label is valid for the
636 * global zone.
637 */
638 if (dsl_prop_get_integer(name,
639 zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, NULL))
640 return (SET_ERROR(EPERM));
641 if (!zoned) {
642 if (zfs_check_global_label(name, strval) != 0)
643 return (SET_ERROR(EPERM));
644 }
645
646 /*
647 * If the existing dataset label is nondefault, check if the
648 * dataset is mounted (label cannot be changed while mounted).
649 * Get the zfsvfs_t; if there isn't one, then the dataset isn't
650 * mounted (or isn't a dataset, doesn't exist, ...).
651 */
652 if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) != 0) {
653 objset_t *os;
654 static const char *setsl_tag = "setsl_tag";
655
656 /*
657 * Try to own the dataset; abort if there is any error,
658 * (e.g., already mounted, in use, or other error).
659 */
660 error = dmu_objset_own(name, DMU_OST_ZFS, B_TRUE, B_TRUE,
661 setsl_tag, &os);
662 if (error != 0)
663 return (SET_ERROR(EPERM));
664
665 dmu_objset_disown(os, B_TRUE, setsl_tag);
666
667 if (new_default) {
668 needed_priv = PRIV_FILE_DOWNGRADE_SL;
669 goto out_check;
670 }
671
672 if (hexstr_to_label(strval, &new_sl) != 0)
673 return (SET_ERROR(EPERM));
674
675 if (blstrictdom(&ds_sl, &new_sl))
676 needed_priv = PRIV_FILE_DOWNGRADE_SL;
677 else if (blstrictdom(&new_sl, &ds_sl))
678 needed_priv = PRIV_FILE_UPGRADE_SL;
679 } else {
680 /* dataset currently has a default label */
681 if (!new_default)
682 needed_priv = PRIV_FILE_UPGRADE_SL;
683 }
684
685 out_check:
686 if (needed_priv != -1)
687 return (PRIV_POLICY(cr, needed_priv, B_FALSE, EPERM, NULL));
688 return (0);
689 #else
690 return (SET_ERROR(ENOTSUP));
691 #endif /* HAVE_MLSLABEL */
692 }
693
694 static int
zfs_secpolicy_setprop(const char * dsname,zfs_prop_t prop,nvpair_t * propval,cred_t * cr)695 zfs_secpolicy_setprop(const char *dsname, zfs_prop_t prop, nvpair_t *propval,
696 cred_t *cr)
697 {
698 const char *strval;
699 zone_admin_result_t zone_result;
700
701 /*
702 * Check zoned_uid delegation first. However, even delegated
703 * namespace users must not be allowed to modify zoned_uid itself.
704 */
705 zone_result = zone_dataset_admin_check(dsname, ZONE_OP_SETPROP, NULL);
706 if (zone_result == ZONE_ADMIN_ALLOWED) {
707 if (prop == ZFS_PROP_ZONED_UID)
708 return (SET_ERROR(EPERM));
709 if (prop == ZFS_PROP_FILESYSTEM_LIMIT ||
710 prop == ZFS_PROP_SNAPSHOT_LIMIT) {
711 char setpoint[ZFS_MAX_DATASET_NAME_LEN];
712 uint64_t zoned_uid_val = 0;
713 if (dsl_prop_get(dsname, "zoned_uid", 8, 1,
714 &zoned_uid_val, setpoint) == 0 &&
715 zoned_uid_val != 0 &&
716 strcmp(dsname, setpoint) == 0)
717 return (SET_ERROR(EPERM));
718 }
719 return (zfs_secpolicy_zoned_uid_deleg(dsname,
720 zfs_prop_to_name(prop), cr));
721 }
722 if (zone_result == ZONE_ADMIN_DENIED)
723 return (SET_ERROR(EPERM));
724
725 /*
726 * Check permissions for special properties.
727 */
728 switch (prop) {
729 default:
730 break;
731 case ZFS_PROP_ZONED:
732 /*
733 * Disallow setting of 'zoned' from within a local zone.
734 */
735 if (!INGLOBALZONE(curproc))
736 return (SET_ERROR(EPERM));
737 break;
738 case ZFS_PROP_ZONED_UID:
739 /*
740 * Disallow setting of 'zoned_uid' from within a
741 * delegated namespace -- only global zone can manage
742 * delegation assignments.
743 */
744 if (!INGLOBALZONE(curproc))
745 return (SET_ERROR(EPERM));
746 break;
747
748 case ZFS_PROP_QUOTA:
749 case ZFS_PROP_FILESYSTEM_LIMIT:
750 case ZFS_PROP_SNAPSHOT_LIMIT:
751 if (!INGLOBALZONE(curproc)) {
752 uint64_t zoned;
753 char setpoint[ZFS_MAX_DATASET_NAME_LEN];
754 /*
755 * Unprivileged users are allowed to modify the
756 * limit on things *under* (ie. contained by)
757 * the thing they own.
758 */
759 if (dsl_prop_get_integer(dsname,
760 zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, setpoint))
761 return (SET_ERROR(EPERM));
762 if (!zoned || strlen(dsname) <= strlen(setpoint))
763 return (SET_ERROR(EPERM));
764 }
765 break;
766
767 case ZFS_PROP_MLSLABEL:
768 if (!is_system_labeled())
769 return (SET_ERROR(EPERM));
770
771 if (nvpair_value_string(propval, &strval) == 0) {
772 int err;
773
774 err = zfs_set_slabel_policy(dsname, strval, CRED());
775 if (err != 0)
776 return (err);
777 }
778 break;
779 }
780
781 return (zfs_secpolicy_write_perms(dsname, zfs_prop_to_name(prop), cr));
782 }
783
784 static int
zfs_secpolicy_set_fsacl(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)785 zfs_secpolicy_set_fsacl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
786 {
787 /*
788 * permission to set permissions will be evaluated later in
789 * dsl_deleg_can_allow()
790 */
791 (void) innvl;
792 return (zfs_dozonecheck(zc->zc_name, cr));
793 }
794
795 static int
zfs_secpolicy_rollback(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)796 zfs_secpolicy_rollback(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
797 {
798 (void) innvl;
799 return (zfs_secpolicy_write_perms(zc->zc_name,
800 ZFS_DELEG_PERM_ROLLBACK, cr));
801 }
802
803 static int
zfs_secpolicy_send(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)804 zfs_secpolicy_send(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
805 {
806 (void) innvl;
807 dsl_pool_t *dp;
808 dsl_dataset_t *ds;
809 const char *cp;
810 int error;
811 boolean_t rawok = !!(zc->zc_flags & 0x8);
812
813 /*
814 * Generate the current snapshot name from the given objsetid, then
815 * use that name for the secpolicy/zone checks.
816 */
817 cp = strchr(zc->zc_name, '@');
818 if (cp == NULL)
819 return (SET_ERROR(EINVAL));
820 error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
821 if (error != 0)
822 return (error);
823
824 error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds);
825 if (error != 0) {
826 dsl_pool_rele(dp, FTAG);
827 return (error);
828 }
829
830 dsl_dataset_name(ds, zc->zc_name);
831
832 error = zfs_secpolicy_write_perms_ds(zc->zc_name, ds,
833 ZFS_DELEG_PERM_SEND, cr);
834 if (error != 0 && rawok) {
835 error = zfs_secpolicy_write_perms_ds(zc->zc_name, ds,
836 ZFS_DELEG_PERM_SEND_RAW, cr);
837 }
838 dsl_dataset_rele(ds, FTAG);
839 dsl_pool_rele(dp, FTAG);
840
841 return (error);
842 }
843
844 static int
zfs_secpolicy_send_new(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)845 zfs_secpolicy_send_new(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
846 {
847 boolean_t rawok = nvlist_exists(innvl, "rawok");
848 int error;
849
850 (void) innvl;
851 error = zfs_secpolicy_write_perms(zc->zc_name,
852 ZFS_DELEG_PERM_SEND, cr);
853 if (error != 0 && rawok) {
854 error = zfs_secpolicy_write_perms(zc->zc_name,
855 ZFS_DELEG_PERM_SEND_RAW, cr);
856 }
857 return (error);
858 }
859
860 static int
zfs_secpolicy_share(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)861 zfs_secpolicy_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
862 {
863 (void) zc, (void) innvl, (void) cr;
864 return (SET_ERROR(ENOTSUP));
865 }
866
867 static int
zfs_secpolicy_smb_acl(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)868 zfs_secpolicy_smb_acl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
869 {
870 (void) zc, (void) innvl, (void) cr;
871 return (SET_ERROR(ENOTSUP));
872 }
873
874 static int
zfs_get_parent(const char * datasetname,char * parent,int parentsize)875 zfs_get_parent(const char *datasetname, char *parent, int parentsize)
876 {
877 char *cp;
878
879 /*
880 * Remove the @bla or /bla from the end of the name to get the parent.
881 */
882 (void) strlcpy(parent, datasetname, parentsize);
883 cp = strrchr(parent, '@');
884 if (cp != NULL) {
885 cp[0] = '\0';
886 } else {
887 cp = strrchr(parent, '/');
888 if (cp == NULL)
889 return (SET_ERROR(ENOENT));
890 cp[0] = '\0';
891 }
892
893 return (0);
894 }
895
896 int
zfs_secpolicy_destroy_perms(const char * name,cred_t * cr)897 zfs_secpolicy_destroy_perms(const char *name, cred_t *cr)
898 {
899 int error;
900 zone_admin_result_t result;
901
902 /* Check zoned_uid delegation first */
903 result = zone_dataset_admin_check(name, ZONE_OP_DESTROY, NULL);
904 if (result == ZONE_ADMIN_ALLOWED) {
905 if ((error = zfs_secpolicy_zoned_uid_deleg(name,
906 ZFS_DELEG_PERM_DESTROY, cr)) != 0)
907 return (error);
908 return (zfs_secpolicy_zoned_uid_deleg(name,
909 ZFS_DELEG_PERM_MOUNT, cr));
910 }
911 if (result == ZONE_ADMIN_DENIED)
912 return (SET_ERROR(EPERM));
913
914 /* NOT_APPLICABLE: continue with existing checks */
915 if ((error = zfs_secpolicy_write_perms(name,
916 ZFS_DELEG_PERM_MOUNT, cr)) != 0)
917 return (error);
918
919 return (zfs_secpolicy_write_perms(name, ZFS_DELEG_PERM_DESTROY, cr));
920 }
921
922 static int
zfs_secpolicy_destroy(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)923 zfs_secpolicy_destroy(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
924 {
925 (void) innvl;
926 return (zfs_secpolicy_destroy_perms(zc->zc_name, cr));
927 }
928
929 /*
930 * Destroying snapshots with delegated permissions requires
931 * descendant mount and destroy permissions.
932 */
933 static int
zfs_secpolicy_destroy_snaps(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)934 zfs_secpolicy_destroy_snaps(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
935 {
936 (void) zc;
937 nvlist_t *snaps;
938 nvpair_t *pair, *nextpair;
939 int error = 0;
940
941 snaps = fnvlist_lookup_nvlist(innvl, "snaps");
942
943 for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
944 pair = nextpair) {
945 nextpair = nvlist_next_nvpair(snaps, pair);
946 error = zfs_secpolicy_destroy_perms(nvpair_name(pair), cr);
947 if (error == ENOENT) {
948 /*
949 * Ignore any snapshots that don't exist (we consider
950 * them "already destroyed"). Remove the name from the
951 * nvl here in case the snapshot is created between
952 * now and when we try to destroy it (in which case
953 * we don't want to destroy it since we haven't
954 * checked for permission).
955 */
956 fnvlist_remove_nvpair(snaps, pair);
957 error = 0;
958 }
959 if (error != 0)
960 break;
961 }
962
963 return (error);
964 }
965
966 int
zfs_secpolicy_rename_perms(const char * from,const char * to,cred_t * cr)967 zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)
968 {
969 char parentname[ZFS_MAX_DATASET_NAME_LEN];
970 int error;
971 zone_admin_result_t result;
972
973 /* Check zoned_uid delegation first */
974 result = zone_dataset_admin_check(from, ZONE_OP_RENAME, to);
975 if (result == ZONE_ADMIN_ALLOWED) {
976 if ((error = zfs_secpolicy_zoned_uid_deleg(from,
977 ZFS_DELEG_PERM_RENAME, cr)) != 0)
978 return (error);
979 return (zfs_secpolicy_zoned_uid_deleg(from,
980 ZFS_DELEG_PERM_MOUNT, cr));
981 }
982 if (result == ZONE_ADMIN_DENIED)
983 return (SET_ERROR(EPERM));
984
985 /* NOT_APPLICABLE: continue with existing checks */
986 if ((error = zfs_secpolicy_write_perms(from,
987 ZFS_DELEG_PERM_RENAME, cr)) != 0)
988 return (error);
989
990 if ((error = zfs_secpolicy_write_perms(from,
991 ZFS_DELEG_PERM_MOUNT, cr)) != 0)
992 return (error);
993
994 if ((error = zfs_get_parent(to, parentname,
995 sizeof (parentname))) != 0)
996 return (error);
997
998 if ((error = zfs_secpolicy_write_perms(parentname,
999 ZFS_DELEG_PERM_CREATE, cr)) != 0)
1000 return (error);
1001
1002 if ((error = zfs_secpolicy_write_perms(parentname,
1003 ZFS_DELEG_PERM_MOUNT, cr)) != 0)
1004 return (error);
1005
1006 return (error);
1007 }
1008
1009 static int
zfs_secpolicy_rename(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)1010 zfs_secpolicy_rename(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1011 {
1012 (void) innvl;
1013 return (zfs_secpolicy_rename_perms(zc->zc_name, zc->zc_value, cr));
1014 }
1015
1016 static int
zfs_secpolicy_promote(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)1017 zfs_secpolicy_promote(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1018 {
1019 (void) innvl;
1020 dsl_pool_t *dp;
1021 dsl_dataset_t *clone;
1022 int error;
1023
1024 error = zfs_secpolicy_write_perms(zc->zc_name,
1025 ZFS_DELEG_PERM_PROMOTE, cr);
1026 if (error != 0)
1027 return (error);
1028
1029 error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
1030 if (error != 0)
1031 return (error);
1032
1033 error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &clone);
1034
1035 if (error == 0) {
1036 char parentname[ZFS_MAX_DATASET_NAME_LEN];
1037 dsl_dataset_t *origin = NULL;
1038 dsl_dir_t *dd;
1039 dd = clone->ds_dir;
1040
1041 error = dsl_dataset_hold_obj(dd->dd_pool,
1042 dsl_dir_phys(dd)->dd_origin_obj, FTAG, &origin);
1043 if (error != 0) {
1044 dsl_dataset_rele(clone, FTAG);
1045 dsl_pool_rele(dp, FTAG);
1046 return (error);
1047 }
1048
1049 error = zfs_secpolicy_write_perms_ds(zc->zc_name, clone,
1050 ZFS_DELEG_PERM_MOUNT, cr);
1051
1052 dsl_dataset_name(origin, parentname);
1053 if (error == 0) {
1054 error = zfs_secpolicy_write_perms_ds(parentname, origin,
1055 ZFS_DELEG_PERM_PROMOTE, cr);
1056 }
1057 dsl_dataset_rele(clone, FTAG);
1058 dsl_dataset_rele(origin, FTAG);
1059 }
1060 dsl_pool_rele(dp, FTAG);
1061 return (error);
1062 }
1063
1064 static int
zfs_secpolicy_recv(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)1065 zfs_secpolicy_recv(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1066 {
1067 (void) innvl;
1068 int error;
1069
1070 /*
1071 * zfs receive -F requires full receive permission,
1072 * otherwise receive:append permission is enough
1073 */
1074 if ((error = zfs_secpolicy_write_perms(zc->zc_name,
1075 ZFS_DELEG_PERM_RECEIVE, cr)) != 0) {
1076 if (zc->zc_guid || nvlist_exists(innvl, "force"))
1077 return (error);
1078 if ((error = zfs_secpolicy_write_perms(zc->zc_name,
1079 ZFS_DELEG_PERM_RECEIVE_APPEND, cr)) != 0)
1080 return (error);
1081 }
1082
1083 if ((error = zfs_secpolicy_write_perms(zc->zc_name,
1084 ZFS_DELEG_PERM_MOUNT, cr)) != 0)
1085 return (error);
1086
1087 return (zfs_secpolicy_write_perms(zc->zc_name,
1088 ZFS_DELEG_PERM_CREATE, cr));
1089 }
1090
1091 /*
1092 * Policy for dataset set property operations. Individual properties checked by
1093 * zfs_check_settable(), additionally require zfs_secpolicy_recv() when setting
1094 * properties as part of a receive.
1095 */
1096 static int
zfs_secpolicy_setprops(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)1097 zfs_secpolicy_setprops(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1098 {
1099 boolean_t received = zc->zc_cookie;
1100 int error;
1101
1102 if (received && (error = zfs_secpolicy_recv(zc, innvl, cr)))
1103 return (error);
1104
1105 return (zfs_secpolicy_read(zc, innvl, cr));
1106 }
1107
1108 int
zfs_secpolicy_snapshot_perms(const char * name,cred_t * cr)1109 zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
1110 {
1111 zone_admin_result_t result;
1112
1113 /* Check zoned_uid delegation first */
1114 result = zone_dataset_admin_check(name, ZONE_OP_SNAPSHOT, NULL);
1115 if (result == ZONE_ADMIN_ALLOWED)
1116 return (zfs_secpolicy_zoned_uid_deleg(name,
1117 ZFS_DELEG_PERM_SNAPSHOT, cr));
1118 if (result == ZONE_ADMIN_DENIED)
1119 return (SET_ERROR(EPERM));
1120
1121 /* NOT_APPLICABLE: continue with existing checks */
1122 return (zfs_secpolicy_write_perms(name,
1123 ZFS_DELEG_PERM_SNAPSHOT, cr));
1124 }
1125
1126 /*
1127 * Check for permission to create each snapshot in the nvlist.
1128 */
1129 static int
zfs_secpolicy_snapshot(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)1130 zfs_secpolicy_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1131 {
1132 (void) zc;
1133 nvlist_t *snaps;
1134 int error = 0;
1135 nvpair_t *pair;
1136
1137 snaps = fnvlist_lookup_nvlist(innvl, "snaps");
1138
1139 for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
1140 pair = nvlist_next_nvpair(snaps, pair)) {
1141 char *name = (char *)nvpair_name(pair);
1142 char *atp = strchr(name, '@');
1143
1144 if (atp == NULL) {
1145 error = SET_ERROR(EINVAL);
1146 break;
1147 }
1148 *atp = '\0';
1149 error = zfs_secpolicy_snapshot_perms(name, cr);
1150 *atp = '@';
1151 if (error != 0)
1152 break;
1153 }
1154 return (error);
1155 }
1156
1157 /*
1158 * Check for permission to create each bookmark in the nvlist.
1159 */
1160 static int
zfs_secpolicy_bookmark(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)1161 zfs_secpolicy_bookmark(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1162 {
1163 (void) zc;
1164 int error = 0;
1165
1166 for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
1167 pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
1168 char *name = (char *)nvpair_name(pair);
1169 char *hashp = strchr(name, '#');
1170
1171 if (hashp == NULL) {
1172 error = SET_ERROR(EINVAL);
1173 break;
1174 }
1175 *hashp = '\0';
1176 error = zfs_secpolicy_write_perms(name,
1177 ZFS_DELEG_PERM_BOOKMARK, cr);
1178 *hashp = '#';
1179 if (error != 0)
1180 break;
1181 }
1182 return (error);
1183 }
1184
1185 static int
zfs_secpolicy_destroy_bookmarks(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)1186 zfs_secpolicy_destroy_bookmarks(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1187 {
1188 (void) zc;
1189 nvpair_t *pair, *nextpair;
1190 int error = 0;
1191
1192 for (pair = nvlist_next_nvpair(innvl, NULL); pair != NULL;
1193 pair = nextpair) {
1194 char *name = (char *)nvpair_name(pair);
1195 char *hashp = strchr(name, '#');
1196 nextpair = nvlist_next_nvpair(innvl, pair);
1197
1198 if (hashp == NULL) {
1199 error = SET_ERROR(EINVAL);
1200 break;
1201 }
1202
1203 *hashp = '\0';
1204 error = zfs_secpolicy_write_perms(name,
1205 ZFS_DELEG_PERM_DESTROY, cr);
1206 *hashp = '#';
1207 if (error == ENOENT) {
1208 /*
1209 * Ignore any filesystems that don't exist (we consider
1210 * their bookmarks "already destroyed"). Remove
1211 * the name from the nvl here in case the filesystem
1212 * is created between now and when we try to destroy
1213 * the bookmark (in which case we don't want to
1214 * destroy it since we haven't checked for permission).
1215 */
1216 fnvlist_remove_nvpair(innvl, pair);
1217 error = 0;
1218 }
1219 if (error != 0)
1220 break;
1221 }
1222
1223 return (error);
1224 }
1225
1226 static int
zfs_secpolicy_log_history(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)1227 zfs_secpolicy_log_history(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1228 {
1229 (void) zc, (void) innvl, (void) cr;
1230 /*
1231 * Even root must have a proper TSD so that we know what pool
1232 * to log to.
1233 */
1234 if (tsd_get(zfs_allow_log_key) == NULL)
1235 return (SET_ERROR(EPERM));
1236 return (0);
1237 }
1238
1239 static int
zfs_secpolicy_create_clone(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)1240 zfs_secpolicy_create_clone(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1241 {
1242 char parentname[ZFS_MAX_DATASET_NAME_LEN];
1243 int error;
1244 const char *origin = NULL;
1245 zone_admin_result_t result;
1246
1247 if ((error = zfs_get_parent(zc->zc_name, parentname,
1248 sizeof (parentname))) != 0)
1249 return (error);
1250
1251 (void) nvlist_lookup_string(innvl, "origin", &origin);
1252
1253 /* Check zoned_uid delegation first */
1254 result = zone_dataset_admin_check(parentname,
1255 origin != NULL ? ZONE_OP_CLONE : ZONE_OP_CREATE, origin);
1256 if (result == ZONE_ADMIN_ALLOWED) {
1257 if (origin != NULL) {
1258 if ((error = zfs_secpolicy_zoned_uid_deleg(origin,
1259 ZFS_DELEG_PERM_CLONE, cr)) != 0)
1260 return (error);
1261 }
1262 if ((error = zfs_secpolicy_zoned_uid_deleg(parentname,
1263 ZFS_DELEG_PERM_CREATE, cr)) != 0)
1264 return (error);
1265 return (zfs_secpolicy_zoned_uid_deleg(parentname,
1266 ZFS_DELEG_PERM_MOUNT, cr));
1267 }
1268 if (result == ZONE_ADMIN_DENIED)
1269 return (SET_ERROR(EPERM));
1270
1271 /* NOT_APPLICABLE: continue with existing checks */
1272 if (origin != NULL &&
1273 (error = zfs_secpolicy_write_perms(origin,
1274 ZFS_DELEG_PERM_CLONE, cr)) != 0)
1275 return (error);
1276
1277 if ((error = zfs_secpolicy_write_perms(parentname,
1278 ZFS_DELEG_PERM_CREATE, cr)) != 0)
1279 return (error);
1280
1281 return (zfs_secpolicy_write_perms(parentname,
1282 ZFS_DELEG_PERM_MOUNT, cr));
1283 }
1284
1285 /*
1286 * Policy for pool operations - create/destroy pools, add vdevs, etc. Requires
1287 * SYS_CONFIG privilege, which is not available in a local zone.
1288 */
1289 int
zfs_secpolicy_config(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)1290 zfs_secpolicy_config(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1291 {
1292 (void) zc, (void) innvl;
1293
1294 if (secpolicy_sys_config(cr, B_FALSE) != 0)
1295 return (SET_ERROR(EPERM));
1296
1297 return (0);
1298 }
1299
1300 /*
1301 * Policy for object to name lookups.
1302 */
1303 static int
zfs_secpolicy_diff(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)1304 zfs_secpolicy_diff(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1305 {
1306 (void) innvl;
1307 int error;
1308
1309 if (secpolicy_sys_config(cr, B_FALSE) == 0)
1310 return (0);
1311
1312 error = zfs_secpolicy_write_perms(zc->zc_name, ZFS_DELEG_PERM_DIFF, cr);
1313 return (error);
1314 }
1315
1316 /*
1317 * Policy for fault injection. Requires all privileges.
1318 */
1319 static int
zfs_secpolicy_inject(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)1320 zfs_secpolicy_inject(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1321 {
1322 (void) zc, (void) innvl;
1323 return (secpolicy_zinject(cr));
1324 }
1325
1326 static int
zfs_secpolicy_inherit_prop(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)1327 zfs_secpolicy_inherit_prop(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1328 {
1329 (void) innvl;
1330 zfs_prop_t prop = zfs_name_to_prop(zc->zc_value);
1331
1332 if (prop == ZPROP_USERPROP) {
1333 if (!zfs_prop_user(zc->zc_value))
1334 return (SET_ERROR(EINVAL));
1335 zone_admin_result_t zone_result;
1336 zone_result = zone_dataset_admin_check(zc->zc_name,
1337 ZONE_OP_SETPROP, NULL);
1338 if (zone_result == ZONE_ADMIN_ALLOWED)
1339 return (zfs_secpolicy_zoned_uid_deleg(zc->zc_name,
1340 ZFS_DELEG_PERM_USERPROP, cr));
1341 if (zone_result == ZONE_ADMIN_DENIED)
1342 return (SET_ERROR(EPERM));
1343 return (zfs_secpolicy_write_perms(zc->zc_name,
1344 ZFS_DELEG_PERM_USERPROP, cr));
1345 } else {
1346 return (zfs_secpolicy_setprop(zc->zc_name, prop,
1347 NULL, cr));
1348 }
1349 }
1350
1351 static int
zfs_secpolicy_userspace_one(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)1352 zfs_secpolicy_userspace_one(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1353 {
1354 int err = zfs_secpolicy_read(zc, innvl, cr);
1355 if (err)
1356 return (err);
1357
1358 if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
1359 return (SET_ERROR(EINVAL));
1360
1361 if (zc->zc_value[0] == 0) {
1362 /*
1363 * They are asking about a posix uid/gid. If it's
1364 * themself, allow it.
1365 */
1366 if (zc->zc_objset_type == ZFS_PROP_USERUSED ||
1367 zc->zc_objset_type == ZFS_PROP_USERQUOTA ||
1368 zc->zc_objset_type == ZFS_PROP_USEROBJUSED ||
1369 zc->zc_objset_type == ZFS_PROP_USEROBJQUOTA) {
1370 if (zc->zc_guid == crgetuid(cr))
1371 return (0);
1372 } else if (zc->zc_objset_type == ZFS_PROP_GROUPUSED ||
1373 zc->zc_objset_type == ZFS_PROP_GROUPQUOTA ||
1374 zc->zc_objset_type == ZFS_PROP_GROUPOBJUSED ||
1375 zc->zc_objset_type == ZFS_PROP_GROUPOBJQUOTA) {
1376 if (groupmember(zc->zc_guid, cr))
1377 return (0);
1378 }
1379 /* else is for project quota/used */
1380 }
1381
1382 return (zfs_secpolicy_write_perms(zc->zc_name,
1383 userquota_perms[zc->zc_objset_type], cr));
1384 }
1385
1386 static int
zfs_secpolicy_userspace_many(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)1387 zfs_secpolicy_userspace_many(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1388 {
1389 int err = zfs_secpolicy_read(zc, innvl, cr);
1390 if (err)
1391 return (err);
1392
1393 if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
1394 return (SET_ERROR(EINVAL));
1395
1396 return (zfs_secpolicy_write_perms(zc->zc_name,
1397 userquota_perms[zc->zc_objset_type], cr));
1398 }
1399
1400 static int
zfs_secpolicy_userspace_upgrade(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)1401 zfs_secpolicy_userspace_upgrade(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1402 {
1403 (void) innvl;
1404 return (zfs_secpolicy_setprop(zc->zc_name, ZFS_PROP_VERSION,
1405 NULL, cr));
1406 }
1407
1408 static int
zfs_secpolicy_hold(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)1409 zfs_secpolicy_hold(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1410 {
1411 (void) zc;
1412 nvpair_t *pair;
1413 nvlist_t *holds;
1414 int error;
1415
1416 holds = fnvlist_lookup_nvlist(innvl, "holds");
1417
1418 for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
1419 pair = nvlist_next_nvpair(holds, pair)) {
1420 char fsname[ZFS_MAX_DATASET_NAME_LEN];
1421 error = dmu_fsname(nvpair_name(pair), fsname);
1422 if (error != 0)
1423 return (error);
1424 error = zfs_secpolicy_write_perms(fsname,
1425 ZFS_DELEG_PERM_HOLD, cr);
1426 if (error != 0)
1427 return (error);
1428 }
1429 return (0);
1430 }
1431
1432 static int
zfs_secpolicy_release(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)1433 zfs_secpolicy_release(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1434 {
1435 (void) zc;
1436 nvpair_t *pair;
1437 int error;
1438
1439 for (pair = nvlist_next_nvpair(innvl, NULL); pair != NULL;
1440 pair = nvlist_next_nvpair(innvl, pair)) {
1441 char fsname[ZFS_MAX_DATASET_NAME_LEN];
1442 error = dmu_fsname(nvpair_name(pair), fsname);
1443 if (error != 0)
1444 return (error);
1445 error = zfs_secpolicy_write_perms(fsname,
1446 ZFS_DELEG_PERM_RELEASE, cr);
1447 if (error != 0)
1448 return (error);
1449 }
1450 return (0);
1451 }
1452
1453 /*
1454 * Policy for allowing temporary snapshots to be taken or released
1455 */
1456 static int
zfs_secpolicy_tmp_snapshot(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)1457 zfs_secpolicy_tmp_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1458 {
1459 /*
1460 * A temporary snapshot is the same as a snapshot,
1461 * hold, destroy and release all rolled into one.
1462 * Delegated diff alone is sufficient that we allow this.
1463 */
1464 int error;
1465
1466 if (zfs_secpolicy_write_perms(zc->zc_name,
1467 ZFS_DELEG_PERM_DIFF, cr) == 0)
1468 return (0);
1469
1470 error = zfs_secpolicy_snapshot_perms(zc->zc_name, cr);
1471
1472 if (innvl != NULL) {
1473 if (error == 0)
1474 error = zfs_secpolicy_hold(zc, innvl, cr);
1475 if (error == 0)
1476 error = zfs_secpolicy_release(zc, innvl, cr);
1477 if (error == 0)
1478 error = zfs_secpolicy_destroy(zc, innvl, cr);
1479 }
1480 return (error);
1481 }
1482
1483 static int
zfs_secpolicy_load_key(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)1484 zfs_secpolicy_load_key(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1485 {
1486 return (zfs_secpolicy_write_perms(zc->zc_name,
1487 ZFS_DELEG_PERM_LOAD_KEY, cr));
1488 }
1489
1490 static int
zfs_secpolicy_change_key(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)1491 zfs_secpolicy_change_key(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1492 {
1493 return (zfs_secpolicy_write_perms(zc->zc_name,
1494 ZFS_DELEG_PERM_CHANGE_KEY, cr));
1495 }
1496
1497 /*
1498 * Returns the nvlist as specified by the user in the zfs_cmd_t.
1499 */
1500 static int
get_nvlist(uint64_t nvl,uint64_t size,int iflag,nvlist_t ** nvp)1501 get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp)
1502 {
1503 char *packed;
1504 int error;
1505 nvlist_t *list = NULL;
1506
1507 /*
1508 * Read in and unpack the user-supplied nvlist.
1509 */
1510 if (size == 0)
1511 return (SET_ERROR(EINVAL));
1512
1513 packed = vmem_alloc(size, KM_SLEEP);
1514
1515 if (ddi_copyin((void *)(uintptr_t)nvl, packed, size, iflag) != 0) {
1516 vmem_free(packed, size);
1517 return (SET_ERROR(EFAULT));
1518 }
1519
1520 if ((error = nvlist_unpack(packed, size, &list, 0)) != 0) {
1521 vmem_free(packed, size);
1522 return (error);
1523 }
1524
1525 vmem_free(packed, size);
1526
1527 *nvp = list;
1528 return (0);
1529 }
1530
1531 /*
1532 * Reduce the size of this nvlist until it can be serialized in 'max' bytes.
1533 * Entries will be removed from the end of the nvlist, and one int32 entry
1534 * named "N_MORE_ERRORS" will be added indicating how many entries were
1535 * removed.
1536 */
1537 static int
nvlist_smush(nvlist_t * errors,size_t max)1538 nvlist_smush(nvlist_t *errors, size_t max)
1539 {
1540 size_t size;
1541
1542 size = fnvlist_size(errors);
1543
1544 if (size > max) {
1545 nvpair_t *more_errors;
1546 int n = 0;
1547
1548 if (max < 1024)
1549 return (SET_ERROR(ENOMEM));
1550
1551 fnvlist_add_int32(errors, ZPROP_N_MORE_ERRORS, 0);
1552 more_errors = nvlist_prev_nvpair(errors, NULL);
1553
1554 do {
1555 nvpair_t *pair = nvlist_prev_nvpair(errors,
1556 more_errors);
1557 fnvlist_remove_nvpair(errors, pair);
1558 n++;
1559 size = fnvlist_size(errors);
1560 } while (size > max);
1561
1562 fnvlist_remove_nvpair(errors, more_errors);
1563 fnvlist_add_int32(errors, ZPROP_N_MORE_ERRORS, n);
1564 ASSERT3U(fnvlist_size(errors), <=, max);
1565 }
1566
1567 return (0);
1568 }
1569
1570 static int
put_nvlist(zfs_cmd_t * zc,nvlist_t * nvl)1571 put_nvlist(zfs_cmd_t *zc, nvlist_t *nvl)
1572 {
1573 char *packed = NULL;
1574 int error = 0;
1575 size_t size;
1576
1577 size = fnvlist_size(nvl);
1578
1579 if (size > zc->zc_nvlist_dst_size) {
1580 error = SET_ERROR(ENOMEM);
1581 } else {
1582 packed = fnvlist_pack(nvl, &size);
1583 if (ddi_copyout(packed, (void *)(uintptr_t)zc->zc_nvlist_dst,
1584 size, zc->zc_iflags) != 0)
1585 error = SET_ERROR(EFAULT);
1586 fnvlist_pack_free(packed, size);
1587 }
1588
1589 zc->zc_nvlist_dst_size = size;
1590 zc->zc_nvlist_dst_filled = B_TRUE;
1591 return (error);
1592 }
1593
1594 int
getzfsvfs_impl(objset_t * os,zfsvfs_t ** zfvp)1595 getzfsvfs_impl(objset_t *os, zfsvfs_t **zfvp)
1596 {
1597 int error = 0;
1598 if (dmu_objset_type(os) != DMU_OST_ZFS) {
1599 return (SET_ERROR(EINVAL));
1600 }
1601
1602 mutex_enter(&os->os_user_ptr_lock);
1603 *zfvp = dmu_objset_get_user(os);
1604 /* bump s_active only when non-zero to prevent umount race */
1605 error = zfs_vfs_ref(zfvp);
1606 mutex_exit(&os->os_user_ptr_lock);
1607 return (error);
1608 }
1609
1610 int
getzfsvfs(const char * dsname,zfsvfs_t ** zfvp)1611 getzfsvfs(const char *dsname, zfsvfs_t **zfvp)
1612 {
1613 objset_t *os;
1614 int error;
1615
1616 error = dmu_objset_hold(dsname, FTAG, &os);
1617 if (error != 0)
1618 return (error);
1619
1620 error = getzfsvfs_impl(os, zfvp);
1621 dmu_objset_rele(os, FTAG);
1622 return (error);
1623 }
1624
1625 /*
1626 * Find a zfsvfs_t for a mounted filesystem, or create our own, in which
1627 * case its z_sb will be NULL, and it will be opened as the owner.
1628 * If 'writer' is set, the z_teardown_lock will be held for RW_WRITER,
1629 * which prevents all inode ops from running.
1630 */
1631 static int
zfsvfs_hold(const char * name,const void * tag,zfsvfs_t ** zfvp,boolean_t writer)1632 zfsvfs_hold(const char *name, const void *tag, zfsvfs_t **zfvp,
1633 boolean_t writer)
1634 {
1635 int error = 0;
1636
1637 if (getzfsvfs(name, zfvp) != 0)
1638 error = zfsvfs_create(name, B_FALSE, zfvp);
1639 if (error == 0) {
1640 if (writer)
1641 ZFS_TEARDOWN_ENTER_WRITE(*zfvp, tag);
1642 else
1643 ZFS_TEARDOWN_ENTER_READ(*zfvp, tag);
1644 if ((*zfvp)->z_unmounted) {
1645 /*
1646 * XXX we could probably try again, since the unmounting
1647 * thread should be just about to disassociate the
1648 * objset from the zfsvfs.
1649 */
1650 ZFS_TEARDOWN_EXIT(*zfvp, tag);
1651 zfs_vfs_rele(*zfvp);
1652 return (SET_ERROR(EBUSY));
1653 }
1654 }
1655 return (error);
1656 }
1657
1658 static void
zfsvfs_rele(zfsvfs_t * zfsvfs,const void * tag)1659 zfsvfs_rele(zfsvfs_t *zfsvfs, const void *tag)
1660 {
1661 ZFS_TEARDOWN_EXIT(zfsvfs, tag);
1662
1663 if (zfs_vfs_held(zfsvfs)) {
1664 zfs_vfs_rele(zfsvfs);
1665 } else {
1666 dmu_objset_disown(zfsvfs->z_os, B_TRUE, zfsvfs);
1667 zfsvfs_free(zfsvfs);
1668 }
1669 }
1670
1671 static int
zfs_ioc_pool_create(zfs_cmd_t * zc)1672 zfs_ioc_pool_create(zfs_cmd_t *zc)
1673 {
1674 int error;
1675 nvlist_t *config, *props = NULL;
1676 nvlist_t *rootprops = NULL;
1677 nvlist_t *zplprops = NULL;
1678 dsl_crypto_params_t *dcp = NULL;
1679 const char *spa_name = zc->zc_name;
1680 boolean_t unload_wkey = B_TRUE;
1681 nvlist_t *errinfo = NULL;
1682
1683 if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1684 zc->zc_iflags, &config)))
1685 return (error);
1686
1687 if (zc->zc_nvlist_src_size != 0 && (error =
1688 get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1689 zc->zc_iflags, &props))) {
1690 nvlist_free(config);
1691 return (error);
1692 }
1693
1694 if (props) {
1695 nvlist_t *nvl = NULL;
1696 nvlist_t *hidden_args = NULL;
1697 uint64_t version = SPA_VERSION;
1698 const char *tname;
1699
1700 (void) nvlist_lookup_uint64(props,
1701 zpool_prop_to_name(ZPOOL_PROP_VERSION), &version);
1702 if (!SPA_VERSION_IS_SUPPORTED(version)) {
1703 error = SET_ERROR(EINVAL);
1704 goto pool_props_bad;
1705 }
1706 (void) nvlist_lookup_nvlist(props, ZPOOL_ROOTFS_PROPS, &nvl);
1707 if (nvl) {
1708 error = nvlist_dup(nvl, &rootprops, KM_SLEEP);
1709 if (error != 0)
1710 goto pool_props_bad;
1711 (void) nvlist_remove_all(props, ZPOOL_ROOTFS_PROPS);
1712 }
1713
1714 (void) nvlist_lookup_nvlist(props, ZPOOL_HIDDEN_ARGS,
1715 &hidden_args);
1716 error = dsl_crypto_params_create_nvlist(DCP_CMD_NONE,
1717 rootprops, hidden_args, &dcp);
1718 if (error != 0)
1719 goto pool_props_bad;
1720 (void) nvlist_remove_all(props, ZPOOL_HIDDEN_ARGS);
1721
1722 VERIFY0(nvlist_alloc(&zplprops, NV_UNIQUE_NAME, KM_SLEEP));
1723 error = zfs_fill_zplprops_root(version, rootprops,
1724 zplprops, NULL);
1725 if (error != 0)
1726 goto pool_props_bad;
1727
1728 if (nvlist_lookup_string(props,
1729 zpool_prop_to_name(ZPOOL_PROP_TNAME), &tname) == 0)
1730 spa_name = tname;
1731 }
1732
1733 error = spa_create(zc->zc_name, config, props, zplprops, dcp,
1734 &errinfo);
1735 if (errinfo != NULL) {
1736 nvlist_t *outnv = fnvlist_alloc();
1737 fnvlist_add_nvlist(outnv,
1738 ZPOOL_CONFIG_CREATE_INFO, errinfo);
1739 (void) put_nvlist(zc, outnv);
1740 nvlist_free(outnv);
1741 nvlist_free(errinfo);
1742 }
1743
1744 /*
1745 * Set the remaining root properties
1746 */
1747 if (!error && (error = zfs_set_prop_nvlist(spa_name,
1748 ZPROP_SRC_LOCAL, rootprops, NULL)) != 0) {
1749 (void) spa_destroy(spa_name);
1750 unload_wkey = B_FALSE; /* spa_destroy() unloads wrapping keys */
1751 }
1752
1753 pool_props_bad:
1754 nvlist_free(rootprops);
1755 nvlist_free(zplprops);
1756 nvlist_free(config);
1757 nvlist_free(props);
1758 dsl_crypto_params_free(dcp, unload_wkey && !!error);
1759
1760 return (error);
1761 }
1762
1763 static int
zfs_ioc_pool_destroy(zfs_cmd_t * zc)1764 zfs_ioc_pool_destroy(zfs_cmd_t *zc)
1765 {
1766 int error;
1767 zfs_log_history(zc);
1768 error = spa_destroy(zc->zc_name);
1769
1770 return (error);
1771 }
1772
1773 static int
zfs_ioc_pool_import(zfs_cmd_t * zc)1774 zfs_ioc_pool_import(zfs_cmd_t *zc)
1775 {
1776 nvlist_t *config, *props = NULL;
1777 uint64_t guid;
1778 int error;
1779
1780 if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1781 zc->zc_iflags, &config)) != 0)
1782 return (error);
1783
1784 if (zc->zc_nvlist_src_size != 0 && (error =
1785 get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1786 zc->zc_iflags, &props))) {
1787 nvlist_free(config);
1788 return (error);
1789 }
1790
1791 if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &guid) != 0 ||
1792 guid != zc->zc_guid)
1793 error = SET_ERROR(EINVAL);
1794 else
1795 error = spa_import(zc->zc_name, config, props, zc->zc_cookie);
1796
1797 if (zc->zc_nvlist_dst != 0) {
1798 int err;
1799
1800 if ((err = put_nvlist(zc, config)) != 0)
1801 error = err;
1802 }
1803
1804 nvlist_free(config);
1805 nvlist_free(props);
1806
1807 return (error);
1808 }
1809
1810 static int
zfs_ioc_pool_export(zfs_cmd_t * zc)1811 zfs_ioc_pool_export(zfs_cmd_t *zc)
1812 {
1813 int error;
1814 boolean_t force = (boolean_t)zc->zc_cookie;
1815 boolean_t hardforce = (boolean_t)zc->zc_guid;
1816
1817 zfs_log_history(zc);
1818 error = spa_export(zc->zc_name, NULL, force, hardforce);
1819
1820 return (error);
1821 }
1822
1823 static int
zfs_ioc_pool_configs(zfs_cmd_t * zc)1824 zfs_ioc_pool_configs(zfs_cmd_t *zc)
1825 {
1826 nvlist_t *configs;
1827 int error;
1828
1829 error = spa_all_configs(&zc->zc_cookie, &configs);
1830 if (error)
1831 return (error);
1832
1833 error = put_nvlist(zc, configs);
1834
1835 nvlist_free(configs);
1836
1837 return (error);
1838 }
1839
1840 /*
1841 * inputs:
1842 * zc_name name of the pool
1843 *
1844 * outputs:
1845 * zc_cookie real errno
1846 * zc_nvlist_dst config nvlist
1847 * zc_nvlist_dst_size size of config nvlist
1848 */
1849 static int
zfs_ioc_pool_stats(zfs_cmd_t * zc)1850 zfs_ioc_pool_stats(zfs_cmd_t *zc)
1851 {
1852 nvlist_t *config;
1853 int error;
1854 int ret = 0;
1855
1856 error = spa_get_stats(zc->zc_name, &config, zc->zc_value,
1857 sizeof (zc->zc_value));
1858
1859 if (config != NULL) {
1860 ret = put_nvlist(zc, config);
1861 nvlist_free(config);
1862
1863 /*
1864 * The config may be present even if 'error' is non-zero.
1865 * In this case we return success, and preserve the real errno
1866 * in 'zc_cookie'.
1867 */
1868 zc->zc_cookie = error;
1869 } else {
1870 ret = error;
1871 }
1872
1873 return (ret);
1874 }
1875
1876 /*
1877 * Try to import the given pool, returning pool stats as appropriate so that
1878 * user land knows which devices are available and overall pool health.
1879 */
1880 static int
zfs_ioc_pool_tryimport(zfs_cmd_t * zc)1881 zfs_ioc_pool_tryimport(zfs_cmd_t *zc)
1882 {
1883 nvlist_t *tryconfig, *config = NULL;
1884 int error;
1885
1886 if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1887 zc->zc_iflags, &tryconfig)) != 0)
1888 return (error);
1889
1890 config = spa_tryimport(tryconfig);
1891
1892 nvlist_free(tryconfig);
1893
1894 if (config == NULL)
1895 return (SET_ERROR(EINVAL));
1896
1897 error = put_nvlist(zc, config);
1898 nvlist_free(config);
1899
1900 return (error);
1901 }
1902
1903 /*
1904 * inputs:
1905 * zc_name name of the pool
1906 * zc_cookie scan func (pool_scan_func_t)
1907 * zc_flags scrub pause/resume flag (pool_scrub_cmd_t)
1908 */
1909 static int
zfs_ioc_pool_scan(zfs_cmd_t * zc)1910 zfs_ioc_pool_scan(zfs_cmd_t *zc)
1911 {
1912 spa_t *spa;
1913 int error;
1914
1915 if (zc->zc_flags >= POOL_SCRUB_FLAGS_END)
1916 return (SET_ERROR(EINVAL));
1917
1918 if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1919 return (error);
1920
1921 if (zc->zc_flags == POOL_SCRUB_PAUSE)
1922 error = spa_scrub_pause_resume(spa, POOL_SCRUB_PAUSE);
1923 else if (zc->zc_cookie == POOL_SCAN_NONE)
1924 error = spa_scan_stop(spa);
1925 else
1926 error = spa_scan(spa, zc->zc_cookie);
1927
1928 spa_close(spa, FTAG);
1929
1930 return (error);
1931 }
1932
1933 /*
1934 * inputs:
1935 * poolname name of the pool
1936 * scan_type scan func (pool_scan_func_t)
1937 * scan_command scrub pause/resume flag (pool_scrub_cmd_t)
1938 */
1939 static const zfs_ioc_key_t zfs_keys_pool_scrub[] = {
1940 {"scan_type", DATA_TYPE_UINT64, 0},
1941 {"scan_command", DATA_TYPE_UINT64, 0},
1942 {"scan_date_start", DATA_TYPE_UINT64, ZK_OPTIONAL},
1943 {"scan_date_end", DATA_TYPE_UINT64, ZK_OPTIONAL},
1944 };
1945
1946 static int
zfs_ioc_pool_scrub(const char * poolname,nvlist_t * innvl,nvlist_t * outnvl)1947 zfs_ioc_pool_scrub(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
1948 {
1949 spa_t *spa;
1950 int error;
1951 uint64_t scan_type, scan_cmd;
1952 uint64_t date_start, date_end;
1953
1954 if (nvlist_lookup_uint64(innvl, "scan_type", &scan_type) != 0)
1955 return (SET_ERROR(EINVAL));
1956 if (nvlist_lookup_uint64(innvl, "scan_command", &scan_cmd) != 0)
1957 return (SET_ERROR(EINVAL));
1958
1959 if (scan_cmd >= POOL_SCRUB_FLAGS_END)
1960 return (SET_ERROR(EINVAL));
1961
1962 if (nvlist_lookup_uint64(innvl, "scan_date_start", &date_start) != 0)
1963 date_start = 0;
1964 if (nvlist_lookup_uint64(innvl, "scan_date_end", &date_end) != 0)
1965 date_end = 0;
1966
1967 if ((error = spa_open(poolname, &spa, FTAG)) != 0)
1968 return (error);
1969
1970 if (scan_cmd == POOL_SCRUB_PAUSE) {
1971 error = spa_scrub_pause_resume(spa, POOL_SCRUB_PAUSE);
1972 } else if (scan_type == POOL_SCAN_NONE) {
1973 error = spa_scan_stop(spa);
1974 } else if (scan_cmd == POOL_SCRUB_FROM_LAST_TXG) {
1975 error = spa_scan_range(spa, scan_type,
1976 spa_get_last_scrubbed_txg(spa), 0);
1977 } else {
1978 uint64_t txg_start, txg_end;
1979
1980 txg_start = txg_end = 0;
1981 if (date_start != 0 || date_end != 0) {
1982 mutex_enter(&spa->spa_txg_log_time_lock);
1983 if (date_start != 0) {
1984 txg_start = dbrrd_query(&spa->spa_txg_log_time,
1985 date_start, DBRRD_FLOOR);
1986 }
1987
1988 if (date_end != 0) {
1989 txg_end = dbrrd_query(&spa->spa_txg_log_time,
1990 date_end, DBRRD_CEILING);
1991 }
1992 mutex_exit(&spa->spa_txg_log_time_lock);
1993 }
1994
1995 error = spa_scan_range(spa, scan_type, txg_start, txg_end);
1996 }
1997
1998 spa_close(spa, FTAG);
1999 return (error);
2000 }
2001
2002 static int
zfs_ioc_pool_freeze(zfs_cmd_t * zc)2003 zfs_ioc_pool_freeze(zfs_cmd_t *zc)
2004 {
2005 spa_t *spa;
2006 int error;
2007
2008 error = spa_open(zc->zc_name, &spa, FTAG);
2009 if (error == 0) {
2010 spa_freeze(spa);
2011 spa_close(spa, FTAG);
2012 }
2013 return (error);
2014 }
2015
2016 static int
zfs_ioc_pool_upgrade(zfs_cmd_t * zc)2017 zfs_ioc_pool_upgrade(zfs_cmd_t *zc)
2018 {
2019 spa_t *spa;
2020 int error;
2021
2022 if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
2023 return (error);
2024
2025 if (zc->zc_cookie < spa_version(spa) ||
2026 !SPA_VERSION_IS_SUPPORTED(zc->zc_cookie)) {
2027 spa_close(spa, FTAG);
2028 return (SET_ERROR(EINVAL));
2029 }
2030
2031 spa_upgrade(spa, zc->zc_cookie);
2032 spa_close(spa, FTAG);
2033
2034 return (error);
2035 }
2036
2037 static int
zfs_ioc_pool_get_history(zfs_cmd_t * zc)2038 zfs_ioc_pool_get_history(zfs_cmd_t *zc)
2039 {
2040 spa_t *spa;
2041 char *hist_buf;
2042 uint64_t size;
2043 int error;
2044
2045 if ((size = zc->zc_history_len) == 0)
2046 return (SET_ERROR(EINVAL));
2047
2048 if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
2049 return (error);
2050
2051 if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) {
2052 spa_close(spa, FTAG);
2053 return (SET_ERROR(ENOTSUP));
2054 }
2055
2056 hist_buf = vmem_alloc(size, KM_SLEEP);
2057 if ((error = spa_history_get(spa, &zc->zc_history_offset,
2058 &zc->zc_history_len, hist_buf)) == 0) {
2059 error = ddi_copyout(hist_buf,
2060 (void *)(uintptr_t)zc->zc_history,
2061 zc->zc_history_len, zc->zc_iflags);
2062 }
2063
2064 spa_close(spa, FTAG);
2065 vmem_free(hist_buf, size);
2066 return (error);
2067 }
2068
2069 /*
2070 * inputs:
2071 * zc_nvlist_src nvlist optionally containing ZPOOL_REGUID_GUID
2072 * zc_nvlist_src_size size of the nvlist
2073 */
2074 static int
zfs_ioc_pool_reguid(zfs_cmd_t * zc)2075 zfs_ioc_pool_reguid(zfs_cmd_t *zc)
2076 {
2077 uint64_t *guidp = NULL;
2078 nvlist_t *props = NULL;
2079 spa_t *spa;
2080 uint64_t guid;
2081 int error;
2082
2083 if (zc->zc_nvlist_src_size != 0) {
2084 error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2085 zc->zc_iflags, &props);
2086 if (error != 0)
2087 return (error);
2088
2089 error = nvlist_lookup_uint64(props, ZPOOL_REGUID_GUID, &guid);
2090 if (error == 0)
2091 guidp = &guid;
2092 else if (error == ENOENT)
2093 guidp = NULL;
2094 else
2095 goto out;
2096 }
2097
2098 error = spa_open(zc->zc_name, &spa, FTAG);
2099 if (error == 0) {
2100 error = spa_change_guid(spa, guidp);
2101 spa_close(spa, FTAG);
2102 }
2103
2104 out:
2105 if (props != NULL)
2106 nvlist_free(props);
2107
2108 return (error);
2109 }
2110
2111 static int
zfs_ioc_dsobj_to_dsname(zfs_cmd_t * zc)2112 zfs_ioc_dsobj_to_dsname(zfs_cmd_t *zc)
2113 {
2114 return (dsl_dsobj_to_dsname(zc->zc_name, zc->zc_obj, zc->zc_value));
2115 }
2116
2117 /*
2118 * inputs:
2119 * zc_name name of filesystem
2120 * zc_obj object to find
2121 *
2122 * outputs:
2123 * zc_value name of object
2124 */
2125 static int
zfs_ioc_obj_to_path(zfs_cmd_t * zc)2126 zfs_ioc_obj_to_path(zfs_cmd_t *zc)
2127 {
2128 objset_t *os;
2129 int error;
2130
2131 /* XXX reading from objset not owned */
2132 if ((error = dmu_objset_hold_flags(zc->zc_name, B_TRUE,
2133 FTAG, &os)) != 0)
2134 return (error);
2135 if (dmu_objset_type(os) != DMU_OST_ZFS) {
2136 dmu_objset_rele_flags(os, B_TRUE, FTAG);
2137 return (SET_ERROR(EINVAL));
2138 }
2139 error = zfs_obj_to_path(os, zc->zc_obj, zc->zc_value,
2140 sizeof (zc->zc_value));
2141 dmu_objset_rele_flags(os, B_TRUE, FTAG);
2142
2143 return (error);
2144 }
2145
2146 /*
2147 * inputs:
2148 * zc_name name of filesystem
2149 * zc_obj object to find
2150 *
2151 * outputs:
2152 * zc_stat stats on object
2153 * zc_value path to object
2154 */
2155 static int
zfs_ioc_obj_to_stats(zfs_cmd_t * zc)2156 zfs_ioc_obj_to_stats(zfs_cmd_t *zc)
2157 {
2158 objset_t *os;
2159 int error;
2160
2161 /* XXX reading from objset not owned */
2162 if ((error = dmu_objset_hold_flags(zc->zc_name, B_TRUE,
2163 FTAG, &os)) != 0)
2164 return (error);
2165 if (dmu_objset_type(os) != DMU_OST_ZFS) {
2166 dmu_objset_rele_flags(os, B_TRUE, FTAG);
2167 return (SET_ERROR(EINVAL));
2168 }
2169 error = zfs_obj_to_stats(os, zc->zc_obj, &zc->zc_stat, zc->zc_value,
2170 sizeof (zc->zc_value));
2171 dmu_objset_rele_flags(os, B_TRUE, FTAG);
2172
2173 return (error);
2174 }
2175
2176 static int
zfs_ioc_vdev_add(zfs_cmd_t * zc)2177 zfs_ioc_vdev_add(zfs_cmd_t *zc)
2178 {
2179 spa_t *spa;
2180 int error;
2181 nvlist_t *config;
2182
2183 error = spa_open(zc->zc_name, &spa, FTAG);
2184 if (error != 0)
2185 return (error);
2186
2187 error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
2188 zc->zc_iflags, &config);
2189 if (error == 0) {
2190 error = spa_vdev_add(spa, config, zc->zc_flags);
2191 nvlist_free(config);
2192 }
2193 spa_close(spa, FTAG);
2194 return (error);
2195 }
2196
2197 /*
2198 * inputs:
2199 * zc_name name of the pool
2200 * zc_guid guid of vdev to remove
2201 * zc_cookie cancel removal
2202 */
2203 static int
zfs_ioc_vdev_remove(zfs_cmd_t * zc)2204 zfs_ioc_vdev_remove(zfs_cmd_t *zc)
2205 {
2206 spa_t *spa;
2207 int error;
2208
2209 error = spa_open(zc->zc_name, &spa, FTAG);
2210 if (error != 0)
2211 return (error);
2212 if (zc->zc_cookie != 0) {
2213 error = spa_vdev_remove_cancel(spa);
2214 } else {
2215 error = spa_vdev_remove(spa, zc->zc_guid, B_FALSE);
2216 }
2217 spa_close(spa, FTAG);
2218 return (error);
2219 }
2220
2221 static int
zfs_ioc_vdev_set_state(zfs_cmd_t * zc)2222 zfs_ioc_vdev_set_state(zfs_cmd_t *zc)
2223 {
2224 spa_t *spa;
2225 int error;
2226 vdev_state_t newstate = VDEV_STATE_UNKNOWN;
2227
2228 if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
2229 return (error);
2230 switch (zc->zc_cookie) {
2231 case VDEV_STATE_ONLINE:
2232 error = vdev_online(spa, zc->zc_guid, zc->zc_obj, &newstate);
2233 break;
2234
2235 case VDEV_STATE_OFFLINE:
2236 error = vdev_offline(spa, zc->zc_guid, zc->zc_obj);
2237 break;
2238
2239 case VDEV_STATE_FAULTED:
2240 if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
2241 zc->zc_obj != VDEV_AUX_EXTERNAL &&
2242 zc->zc_obj != VDEV_AUX_EXTERNAL_PERSIST)
2243 zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
2244
2245 error = vdev_fault(spa, zc->zc_guid, zc->zc_obj);
2246 break;
2247
2248 case VDEV_STATE_DEGRADED:
2249 if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
2250 zc->zc_obj != VDEV_AUX_EXTERNAL)
2251 zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
2252
2253 error = vdev_degrade(spa, zc->zc_guid, zc->zc_obj);
2254 break;
2255
2256 case VDEV_STATE_REMOVED:
2257 error = vdev_remove_wanted(spa, zc->zc_guid);
2258 break;
2259
2260 default:
2261 error = SET_ERROR(EINVAL);
2262 }
2263 zc->zc_cookie = newstate;
2264 spa_close(spa, FTAG);
2265 return (error);
2266 }
2267
2268 static int
zfs_ioc_vdev_attach(zfs_cmd_t * zc)2269 zfs_ioc_vdev_attach(zfs_cmd_t *zc)
2270 {
2271 spa_t *spa;
2272 nvlist_t *config;
2273 int replacing = zc->zc_cookie;
2274 int rebuild = zc->zc_simple;
2275 int error;
2276
2277 if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
2278 return (error);
2279
2280 if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
2281 zc->zc_iflags, &config)) == 0) {
2282 error = spa_vdev_attach(spa, zc->zc_guid, config, replacing,
2283 rebuild);
2284 nvlist_free(config);
2285 }
2286
2287 spa_close(spa, FTAG);
2288 return (error);
2289 }
2290
2291 static int
zfs_ioc_vdev_detach(zfs_cmd_t * zc)2292 zfs_ioc_vdev_detach(zfs_cmd_t *zc)
2293 {
2294 spa_t *spa;
2295 int error;
2296
2297 if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
2298 return (error);
2299
2300 error = spa_vdev_detach(spa, zc->zc_guid, 0, B_FALSE);
2301
2302 spa_close(spa, FTAG);
2303 return (error);
2304 }
2305
2306 static int
zfs_ioc_vdev_split(zfs_cmd_t * zc)2307 zfs_ioc_vdev_split(zfs_cmd_t *zc)
2308 {
2309 spa_t *spa;
2310 nvlist_t *config, *props = NULL;
2311 int error;
2312 boolean_t exp = !!(zc->zc_cookie & ZPOOL_EXPORT_AFTER_SPLIT);
2313
2314 if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
2315 return (error);
2316
2317 if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
2318 zc->zc_iflags, &config))) {
2319 spa_close(spa, FTAG);
2320 return (error);
2321 }
2322
2323 if (zc->zc_nvlist_src_size != 0 && (error =
2324 get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2325 zc->zc_iflags, &props))) {
2326 spa_close(spa, FTAG);
2327 nvlist_free(config);
2328 return (error);
2329 }
2330
2331 error = spa_vdev_split_mirror(spa, zc->zc_string, config, props, exp);
2332
2333 spa_close(spa, FTAG);
2334
2335 nvlist_free(config);
2336 nvlist_free(props);
2337
2338 return (error);
2339 }
2340
2341 static int
zfs_ioc_vdev_setpath(zfs_cmd_t * zc)2342 zfs_ioc_vdev_setpath(zfs_cmd_t *zc)
2343 {
2344 spa_t *spa;
2345 const char *path = zc->zc_value;
2346 uint64_t guid = zc->zc_guid;
2347 int error;
2348
2349 error = spa_open(zc->zc_name, &spa, FTAG);
2350 if (error != 0)
2351 return (error);
2352
2353 error = spa_vdev_setpath(spa, guid, path);
2354 spa_close(spa, FTAG);
2355 return (error);
2356 }
2357
2358 static int
zfs_ioc_vdev_setfru(zfs_cmd_t * zc)2359 zfs_ioc_vdev_setfru(zfs_cmd_t *zc)
2360 {
2361 spa_t *spa;
2362 const char *fru = zc->zc_value;
2363 uint64_t guid = zc->zc_guid;
2364 int error;
2365
2366 error = spa_open(zc->zc_name, &spa, FTAG);
2367 if (error != 0)
2368 return (error);
2369
2370 error = spa_vdev_setfru(spa, guid, fru);
2371 spa_close(spa, FTAG);
2372 return (error);
2373 }
2374
2375 static int
zfs_ioc_objset_stats_impl(zfs_cmd_t * zc,objset_t * os)2376 zfs_ioc_objset_stats_impl(zfs_cmd_t *zc, objset_t *os)
2377 {
2378 int error = 0;
2379 nvlist_t *nv;
2380
2381 dmu_objset_fast_stat(os, &zc->zc_objset_stats);
2382
2383 if (!zc->zc_simple && zc->zc_nvlist_dst != 0 &&
2384 (error = dsl_prop_get_all(os, &nv)) == 0) {
2385 dmu_objset_stats(os, nv);
2386 /*
2387 * NB: zvol_get_stats() will read the objset contents,
2388 * which we aren't supposed to do with a
2389 * DS_MODE_USER hold, because it could be
2390 * inconsistent. So this is a bit of a workaround...
2391 * XXX reading without owning
2392 */
2393 if (!zc->zc_objset_stats.dds_inconsistent &&
2394 dmu_objset_type(os) == DMU_OST_ZVOL) {
2395 error = zvol_get_stats(os, nv);
2396 if (error == EIO) {
2397 nvlist_free(nv);
2398 return (error);
2399 }
2400 VERIFY0(error);
2401 }
2402 if (error == 0)
2403 error = put_nvlist(zc, nv);
2404 nvlist_free(nv);
2405 }
2406
2407 return (error);
2408 }
2409
2410 /*
2411 * inputs:
2412 * zc_name name of filesystem
2413 * zc_nvlist_dst_size size of buffer for property nvlist
2414 *
2415 * outputs:
2416 * zc_objset_stats stats
2417 * zc_nvlist_dst property nvlist
2418 * zc_nvlist_dst_size size of property nvlist
2419 */
2420 static int
zfs_ioc_objset_stats(zfs_cmd_t * zc)2421 zfs_ioc_objset_stats(zfs_cmd_t *zc)
2422 {
2423 objset_t *os;
2424 int error;
2425
2426 error = dmu_objset_hold(zc->zc_name, FTAG, &os);
2427 if (error == 0) {
2428 error = zfs_ioc_objset_stats_impl(zc, os);
2429 dmu_objset_rele(os, FTAG);
2430 }
2431
2432 return (error);
2433 }
2434
2435 /*
2436 * inputs:
2437 * zc_name name of filesystem
2438 * zc_nvlist_dst_size size of buffer for property nvlist
2439 *
2440 * outputs:
2441 * zc_nvlist_dst received property nvlist
2442 * zc_nvlist_dst_size size of received property nvlist
2443 *
2444 * Gets received properties (distinct from local properties on or after
2445 * SPA_VERSION_RECVD_PROPS) for callers who want to differentiate received from
2446 * local property values.
2447 */
2448 static int
zfs_ioc_objset_recvd_props(zfs_cmd_t * zc)2449 zfs_ioc_objset_recvd_props(zfs_cmd_t *zc)
2450 {
2451 int error = 0;
2452 nvlist_t *nv;
2453
2454 /*
2455 * Without this check, we would return local property values if the
2456 * caller has not already received properties on or after
2457 * SPA_VERSION_RECVD_PROPS.
2458 */
2459 if (!dsl_prop_get_hasrecvd(zc->zc_name))
2460 return (SET_ERROR(ENOTSUP));
2461
2462 if (zc->zc_nvlist_dst != 0 &&
2463 (error = dsl_prop_get_received(zc->zc_name, &nv)) == 0) {
2464 error = put_nvlist(zc, nv);
2465 nvlist_free(nv);
2466 }
2467
2468 return (error);
2469 }
2470
2471 static int
nvl_add_zplprop(objset_t * os,nvlist_t * props,zfs_prop_t prop)2472 nvl_add_zplprop(objset_t *os, nvlist_t *props, zfs_prop_t prop)
2473 {
2474 uint64_t value;
2475 int error;
2476
2477 /*
2478 * zfs_get_zplprop() will either find a value or give us
2479 * the default value (if there is one).
2480 */
2481 if ((error = zfs_get_zplprop(os, prop, &value)) != 0)
2482 return (error);
2483 VERIFY0(nvlist_add_uint64(props, zfs_prop_to_name(prop), value));
2484 return (0);
2485 }
2486
2487 /*
2488 * inputs:
2489 * zc_name name of filesystem
2490 * zc_nvlist_dst_size size of buffer for zpl property nvlist
2491 *
2492 * outputs:
2493 * zc_nvlist_dst zpl property nvlist
2494 * zc_nvlist_dst_size size of zpl property nvlist
2495 */
2496 static int
zfs_ioc_objset_zplprops(zfs_cmd_t * zc)2497 zfs_ioc_objset_zplprops(zfs_cmd_t *zc)
2498 {
2499 objset_t *os;
2500 int err;
2501
2502 /* XXX reading without owning */
2503 if ((err = dmu_objset_hold(zc->zc_name, FTAG, &os)))
2504 return (err);
2505
2506 dmu_objset_fast_stat(os, &zc->zc_objset_stats);
2507
2508 /*
2509 * NB: nvl_add_zplprop() will read the objset contents,
2510 * which we aren't supposed to do with a DS_MODE_USER
2511 * hold, because it could be inconsistent.
2512 */
2513 if (zc->zc_nvlist_dst != 0 &&
2514 !zc->zc_objset_stats.dds_inconsistent &&
2515 dmu_objset_type(os) == DMU_OST_ZFS) {
2516 nvlist_t *nv;
2517
2518 VERIFY0(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP));
2519 if ((err = nvl_add_zplprop(os, nv, ZFS_PROP_VERSION)) == 0 &&
2520 (err = nvl_add_zplprop(os, nv, ZFS_PROP_NORMALIZE)) == 0 &&
2521 (err = nvl_add_zplprop(os, nv, ZFS_PROP_UTF8ONLY)) == 0 &&
2522 (err = nvl_add_zplprop(os, nv, ZFS_PROP_CASE)) == 0 &&
2523 (err = nvl_add_zplprop(os, nv,
2524 ZFS_PROP_DEFAULTUSERQUOTA)) == 0 &&
2525 (err = nvl_add_zplprop(os, nv,
2526 ZFS_PROP_DEFAULTGROUPQUOTA)) == 0 &&
2527 (err = nvl_add_zplprop(os, nv,
2528 ZFS_PROP_DEFAULTPROJECTQUOTA)) == 0 &&
2529 (err = nvl_add_zplprop(os, nv,
2530 ZFS_PROP_DEFAULTUSEROBJQUOTA)) == 0 &&
2531 (err = nvl_add_zplprop(os, nv,
2532 ZFS_PROP_DEFAULTGROUPOBJQUOTA)) == 0 &&
2533 (err = nvl_add_zplprop(os, nv,
2534 ZFS_PROP_DEFAULTPROJECTOBJQUOTA)) == 0)
2535 err = put_nvlist(zc, nv);
2536 nvlist_free(nv);
2537 } else {
2538 err = SET_ERROR(ENOENT);
2539 }
2540 dmu_objset_rele(os, FTAG);
2541 return (err);
2542 }
2543
2544 /*
2545 * inputs:
2546 * zc_name name of filesystem
2547 * zc_cookie zap cursor
2548 * zc_nvlist_dst_size size of buffer for property nvlist
2549 *
2550 * outputs:
2551 * zc_name name of next filesystem
2552 * zc_cookie zap cursor
2553 * zc_objset_stats stats
2554 * zc_nvlist_dst property nvlist
2555 * zc_nvlist_dst_size size of property nvlist
2556 */
2557 static int
zfs_ioc_dataset_list_next(zfs_cmd_t * zc)2558 zfs_ioc_dataset_list_next(zfs_cmd_t *zc)
2559 {
2560 objset_t *os;
2561 int error;
2562 char *p;
2563 size_t orig_len = strlen(zc->zc_name);
2564
2565 top:
2566 if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os))) {
2567 if (error == ENOENT)
2568 error = SET_ERROR(ESRCH);
2569 return (error);
2570 }
2571
2572 p = strrchr(zc->zc_name, '/');
2573 if (p == NULL || p[1] != '\0')
2574 (void) strlcat(zc->zc_name, "/", sizeof (zc->zc_name));
2575 p = zc->zc_name + strlen(zc->zc_name);
2576
2577 do {
2578 error = dmu_dir_list_next(os,
2579 sizeof (zc->zc_name) - (p - zc->zc_name), p,
2580 NULL, &zc->zc_cookie);
2581 if (error == ENOENT)
2582 error = SET_ERROR(ESRCH);
2583 } while (error == 0 && zfs_dataset_name_hidden(zc->zc_name));
2584 dmu_objset_rele(os, FTAG);
2585
2586 /*
2587 * If it's an internal dataset (ie. with a '$' in its name),
2588 * don't try to get stats for it, otherwise we'll return ENOENT.
2589 */
2590 if (error == 0 && strchr(zc->zc_name, '$') == NULL) {
2591 error = zfs_ioc_objset_stats(zc); /* fill in the stats */
2592 if (error == ENOENT) {
2593 /* We lost a race with destroy, get the next one. */
2594 zc->zc_name[orig_len] = '\0';
2595 goto top;
2596 }
2597 }
2598 return (error);
2599 }
2600
2601 /*
2602 * inputs:
2603 * zc_name name of filesystem
2604 * zc_cookie zap cursor
2605 * zc_nvlist_src iteration range nvlist
2606 * zc_nvlist_src_size size of iteration range nvlist
2607 *
2608 * outputs:
2609 * zc_name name of next snapshot
2610 * zc_objset_stats stats
2611 * zc_nvlist_dst property nvlist
2612 * zc_nvlist_dst_size size of property nvlist
2613 */
2614 static int
zfs_ioc_snapshot_list_next(zfs_cmd_t * zc)2615 zfs_ioc_snapshot_list_next(zfs_cmd_t *zc)
2616 {
2617 int error;
2618 objset_t *os, *ossnap;
2619 dsl_dataset_t *ds;
2620 uint64_t min_txg = 0, max_txg = 0;
2621
2622 if (zc->zc_nvlist_src_size != 0) {
2623 nvlist_t *props = NULL;
2624 error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2625 zc->zc_iflags, &props);
2626 if (error != 0)
2627 return (error);
2628 (void) nvlist_lookup_uint64(props, SNAP_ITER_MIN_TXG,
2629 &min_txg);
2630 (void) nvlist_lookup_uint64(props, SNAP_ITER_MAX_TXG,
2631 &max_txg);
2632 nvlist_free(props);
2633 }
2634
2635 error = dmu_objset_hold(zc->zc_name, FTAG, &os);
2636 if (error != 0) {
2637 return (error == ENOENT ? SET_ERROR(ESRCH) : error);
2638 }
2639
2640 /*
2641 * A dataset name of maximum length cannot have any snapshots,
2642 * so exit immediately.
2643 */
2644 if (strlcat(zc->zc_name, "@", sizeof (zc->zc_name)) >=
2645 ZFS_MAX_DATASET_NAME_LEN) {
2646 dmu_objset_rele(os, FTAG);
2647 return (SET_ERROR(ESRCH));
2648 }
2649
2650 while (error == 0) {
2651 if (issig()) {
2652 error = SET_ERROR(EINTR);
2653 break;
2654 }
2655
2656 error = dmu_snapshot_list_next(os,
2657 sizeof (zc->zc_name) - strlen(zc->zc_name),
2658 zc->zc_name + strlen(zc->zc_name), &zc->zc_obj,
2659 &zc->zc_cookie, NULL);
2660 if (error == ENOENT) {
2661 error = SET_ERROR(ESRCH);
2662 break;
2663 } else if (error != 0) {
2664 break;
2665 }
2666
2667 error = dsl_dataset_hold_obj(dmu_objset_pool(os), zc->zc_obj,
2668 FTAG, &ds);
2669 if (error != 0)
2670 break;
2671
2672 if ((min_txg != 0 && dsl_get_creationtxg(ds) < min_txg) ||
2673 (max_txg != 0 && dsl_get_creationtxg(ds) > max_txg)) {
2674 dsl_dataset_rele(ds, FTAG);
2675 /* undo snapshot name append */
2676 *(strchr(zc->zc_name, '@') + 1) = '\0';
2677 /* skip snapshot */
2678 continue;
2679 }
2680
2681 if (zc->zc_simple) {
2682 dsl_dataset_fast_stat(ds, &zc->zc_objset_stats);
2683 dsl_dataset_rele(ds, FTAG);
2684 break;
2685 }
2686
2687 if ((error = dmu_objset_from_ds(ds, &ossnap)) != 0) {
2688 dsl_dataset_rele(ds, FTAG);
2689 break;
2690 }
2691 if ((error = zfs_ioc_objset_stats_impl(zc, ossnap)) != 0) {
2692 dsl_dataset_rele(ds, FTAG);
2693 break;
2694 }
2695 dsl_dataset_rele(ds, FTAG);
2696 break;
2697 }
2698
2699 dmu_objset_rele(os, FTAG);
2700 /* if we failed, undo the @ that we tacked on to zc_name */
2701 if (error != 0)
2702 *strchr(zc->zc_name, '@') = '\0';
2703 return (error);
2704 }
2705
2706 static int
zfs_prop_set_userquota(const char * dsname,nvpair_t * pair)2707 zfs_prop_set_userquota(const char *dsname, nvpair_t *pair)
2708 {
2709 const char *propname = nvpair_name(pair);
2710 uint64_t *valary;
2711 unsigned int vallen;
2712 const char *dash, *domain;
2713 zfs_userquota_prop_t type;
2714 uint64_t rid;
2715 uint64_t quota;
2716 zfsvfs_t *zfsvfs;
2717 int err;
2718
2719 if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2720 nvlist_t *attrs;
2721 VERIFY0(nvpair_value_nvlist(pair, &attrs));
2722 if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2723 &pair) != 0)
2724 return (SET_ERROR(EINVAL));
2725 }
2726
2727 /*
2728 * A correctly constructed propname is encoded as
2729 * userquota@<rid>-<domain>.
2730 */
2731 if ((dash = strchr(propname, '-')) == NULL ||
2732 nvpair_value_uint64_array(pair, &valary, &vallen) != 0 ||
2733 vallen != 3)
2734 return (SET_ERROR(EINVAL));
2735
2736 domain = dash + 1;
2737 type = valary[0];
2738 rid = valary[1];
2739 quota = valary[2];
2740
2741 err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_FALSE);
2742 if (err == 0) {
2743 err = zfs_set_userquota(zfsvfs, type, domain, rid, quota);
2744 zfsvfs_rele(zfsvfs, FTAG);
2745 }
2746
2747 return (err);
2748 }
2749
2750 /*
2751 * If the named property is one that has a special function to set its value,
2752 * return 0 on success and a positive error code on failure; otherwise if it is
2753 * not one of the special properties handled by this function, return -1.
2754 *
2755 * XXX: It would be better for callers of the property interface if we handled
2756 * these special cases in dsl_prop.c (in the dsl layer).
2757 */
2758 static int
zfs_prop_set_special(const char * dsname,zprop_source_t source,nvpair_t * pair)2759 zfs_prop_set_special(const char *dsname, zprop_source_t source,
2760 nvpair_t *pair)
2761 {
2762 const char *propname = nvpair_name(pair);
2763 zfs_prop_t prop = zfs_name_to_prop(propname);
2764 uint64_t intval = 0;
2765 const char *strval = NULL;
2766 int err = -1;
2767
2768 if (prop == ZPROP_USERPROP) {
2769 if (zfs_prop_userquota(propname))
2770 return (zfs_prop_set_userquota(dsname, pair));
2771 return (-1);
2772 }
2773
2774 if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2775 nvlist_t *attrs;
2776 VERIFY0(nvpair_value_nvlist(pair, &attrs));
2777 VERIFY0(nvlist_lookup_nvpair(attrs, ZPROP_VALUE, &pair));
2778 }
2779
2780 /* all special properties are numeric except for keylocation */
2781 if (zfs_prop_get_type(prop) == PROP_TYPE_STRING) {
2782 strval = fnvpair_value_string(pair);
2783 } else {
2784 intval = fnvpair_value_uint64(pair);
2785 }
2786
2787 switch (prop) {
2788 case ZFS_PROP_QUOTA:
2789 err = dsl_dir_set_quota(dsname, source, intval);
2790 break;
2791 case ZFS_PROP_REFQUOTA:
2792 err = dsl_dataset_set_refquota(dsname, source, intval);
2793 break;
2794 case ZFS_PROP_FILESYSTEM_LIMIT:
2795 case ZFS_PROP_SNAPSHOT_LIMIT:
2796 if (intval == UINT64_MAX) {
2797 /* clearing the limit, just do it */
2798 err = 0;
2799 } else {
2800 err = dsl_dir_activate_fs_ss_limit(dsname);
2801 }
2802 /*
2803 * Set err to -1 to force the zfs_set_prop_nvlist code down the
2804 * default path to set the value in the nvlist.
2805 */
2806 if (err == 0)
2807 err = -1;
2808 break;
2809 case ZFS_PROP_KEYLOCATION:
2810 err = dsl_crypto_can_set_keylocation(dsname, strval);
2811
2812 /*
2813 * Set err to -1 to force the zfs_set_prop_nvlist code down the
2814 * default path to set the value in the nvlist.
2815 */
2816 if (err == 0)
2817 err = -1;
2818 break;
2819 case ZFS_PROP_RESERVATION:
2820 err = dsl_dir_set_reservation(dsname, source, intval);
2821 break;
2822 case ZFS_PROP_REFRESERVATION:
2823 err = dsl_dataset_set_refreservation(dsname, source, intval);
2824 break;
2825 case ZFS_PROP_COMPRESSION:
2826 err = dsl_dataset_set_compression(dsname, source, intval);
2827 /*
2828 * Set err to -1 to force the zfs_set_prop_nvlist code down the
2829 * default path to set the value in the nvlist.
2830 */
2831 if (err == 0)
2832 err = -1;
2833 break;
2834 case ZFS_PROP_VOLSIZE:
2835 err = zvol_set_volsize(dsname, intval);
2836 break;
2837 case ZFS_PROP_VOLTHREADING:
2838 err = zvol_set_volthreading(dsname, intval);
2839 /*
2840 * Set err to -1 to force the zfs_set_prop_nvlist code down the
2841 * default path to set the value in the nvlist.
2842 */
2843 if (err == 0)
2844 err = -1;
2845 break;
2846 case ZFS_PROP_SNAPDEV:
2847 case ZFS_PROP_VOLMODE:
2848 err = zvol_set_common(dsname, prop, source, intval);
2849 break;
2850 case ZFS_PROP_READONLY:
2851 err = zvol_set_ro(dsname, intval);
2852 /*
2853 * Set err to -1 to force the zfs_set_prop_nvlist code down the
2854 * default path to set the value in the nvlist.
2855 */
2856 if (err == 0)
2857 err = -1;
2858 break;
2859 case ZFS_PROP_VERSION:
2860 {
2861 zfsvfs_t *zfsvfs;
2862
2863 if ((err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_TRUE)) != 0)
2864 break;
2865
2866 err = zfs_set_version(zfsvfs, intval);
2867 zfsvfs_rele(zfsvfs, FTAG);
2868
2869 if (err == 0 && intval >= ZPL_VERSION_USERSPACE) {
2870 zfs_cmd_t *zc;
2871
2872 zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
2873 (void) strlcpy(zc->zc_name, dsname,
2874 sizeof (zc->zc_name));
2875 (void) zfs_ioc_userspace_upgrade(zc);
2876 (void) zfs_ioc_id_quota_upgrade(zc);
2877 kmem_free(zc, sizeof (zfs_cmd_t));
2878 }
2879 break;
2880 }
2881 case ZFS_PROP_LONGNAME:
2882 {
2883 zfsvfs_t *zfsvfs;
2884
2885 /*
2886 * Ignore the checks if the property is being applied as part of
2887 * 'zfs receive'. Because, we already check if the local pool
2888 * has SPA_FEATURE_LONGNAME enabled in dmu_recv_begin_check().
2889 */
2890 if (source == ZPROP_SRC_RECEIVED) {
2891 cmn_err(CE_NOTE, "Skipping ZFS_PROP_LONGNAME checks "
2892 "for dsname=%s\n", dsname);
2893 err = -1;
2894 break;
2895 }
2896
2897 if ((err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_FALSE)) != 0) {
2898 cmn_err(CE_WARN, "%s:%d Failed to hold for dsname=%s "
2899 "err=%d\n", __FILE__, __LINE__, dsname, err);
2900 break;
2901 }
2902
2903 if (!spa_feature_is_enabled(zfsvfs->z_os->os_spa,
2904 SPA_FEATURE_LONGNAME)) {
2905 err = ENOTSUP;
2906 } else {
2907 /*
2908 * Set err to -1 to force the zfs_set_prop_nvlist code
2909 * down the default path to set the value in the nvlist.
2910 */
2911 err = -1;
2912 }
2913 zfsvfs_rele(zfsvfs, FTAG);
2914 break;
2915 }
2916 case ZFS_PROP_DEFAULTUSERQUOTA:
2917 case ZFS_PROP_DEFAULTGROUPQUOTA:
2918 case ZFS_PROP_DEFAULTPROJECTQUOTA:
2919 case ZFS_PROP_DEFAULTUSEROBJQUOTA:
2920 case ZFS_PROP_DEFAULTGROUPOBJQUOTA:
2921 case ZFS_PROP_DEFAULTPROJECTOBJQUOTA:
2922 {
2923 zfsvfs_t *zfsvfs;
2924 if ((err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_TRUE)) != 0)
2925 break;
2926 err = zfs_set_default_quota(zfsvfs, prop, intval);
2927 zfsvfs_rele(zfsvfs, FTAG);
2928 break;
2929 }
2930 case ZFS_PROP_ZONED_UID:
2931 {
2932 uint64_t old_uid = 0;
2933 (void) dsl_prop_get(dsname, "zoned_uid", 8, 1, &old_uid, NULL);
2934 if (old_uid != 0)
2935 (void) zone_dataset_detach_uid(CRED(), dsname,
2936 (uid_t)old_uid);
2937 if (intval != 0) {
2938 err = zone_dataset_attach_uid(CRED(), dsname,
2939 (uid_t)intval);
2940 if (err == ENXIO)
2941 err = ZFS_ERR_NO_USER_NS_SUPPORT;
2942 if (err != 0)
2943 break;
2944 }
2945 /*
2946 * Set err to -1 to force the zfs_set_prop_nvlist code down the
2947 * default path to set the value in the nvlist.
2948 */
2949 err = -1;
2950 break;
2951 }
2952 default:
2953 err = -1;
2954 }
2955
2956 return (err);
2957 }
2958
2959 static boolean_t
zfs_is_namespace_prop(zfs_prop_t prop)2960 zfs_is_namespace_prop(zfs_prop_t prop)
2961 {
2962 switch (prop) {
2963
2964 case ZFS_PROP_ATIME:
2965 case ZFS_PROP_RELATIME:
2966 case ZFS_PROP_DEVICES:
2967 case ZFS_PROP_EXEC:
2968 case ZFS_PROP_SETUID:
2969 case ZFS_PROP_READONLY:
2970 case ZFS_PROP_XATTR:
2971 case ZFS_PROP_NBMAND:
2972 return (B_TRUE);
2973
2974 default:
2975 return (B_FALSE);
2976 }
2977 }
2978
2979 /*
2980 * This function is best effort. If it fails to set any of the given properties,
2981 * it continues to set as many as it can and returns the last error
2982 * encountered. If the caller provides a non-NULL errlist, it will be filled in
2983 * with the list of names of all the properties that failed along with the
2984 * corresponding error numbers.
2985 *
2986 * If every property is set successfully, zero is returned and errlist is not
2987 * modified.
2988 */
2989 int
zfs_set_prop_nvlist(const char * dsname,zprop_source_t source,nvlist_t * nvl,nvlist_t * errlist)2990 zfs_set_prop_nvlist(const char *dsname, zprop_source_t source, nvlist_t *nvl,
2991 nvlist_t *errlist)
2992 {
2993 nvpair_t *pair;
2994 nvpair_t *propval;
2995 int rv = 0;
2996 int err;
2997 uint64_t intval;
2998 const char *strval;
2999 boolean_t should_update_mount_cache = B_FALSE;
3000
3001 nvlist_t *genericnvl = fnvlist_alloc();
3002 nvlist_t *retrynvl = fnvlist_alloc();
3003 retry:
3004 pair = NULL;
3005 while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
3006 const char *propname = nvpair_name(pair);
3007 zfs_prop_t prop = zfs_name_to_prop(propname);
3008 err = 0;
3009
3010 /* decode the property value */
3011 propval = pair;
3012 if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
3013 nvlist_t *attrs;
3014 attrs = fnvpair_value_nvlist(pair);
3015 if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
3016 &propval) != 0)
3017 err = SET_ERROR(EINVAL);
3018 }
3019
3020 /* Validate value type */
3021 if (err == 0 && source == ZPROP_SRC_INHERITED) {
3022 /* inherited properties are expected to be booleans */
3023 if (nvpair_type(propval) != DATA_TYPE_BOOLEAN)
3024 err = SET_ERROR(EINVAL);
3025 } else if (err == 0 && prop == ZPROP_USERPROP) {
3026 if (zfs_prop_user(propname)) {
3027 if (nvpair_type(propval) != DATA_TYPE_STRING)
3028 err = SET_ERROR(EINVAL);
3029 } else if (zfs_prop_userquota(propname)) {
3030 if (nvpair_type(propval) !=
3031 DATA_TYPE_UINT64_ARRAY)
3032 err = SET_ERROR(EINVAL);
3033 } else {
3034 err = SET_ERROR(EINVAL);
3035 }
3036 } else if (err == 0) {
3037 if (nvpair_type(propval) == DATA_TYPE_STRING) {
3038 if (zfs_prop_get_type(prop) != PROP_TYPE_STRING)
3039 err = SET_ERROR(EINVAL);
3040 } else if (nvpair_type(propval) == DATA_TYPE_UINT64) {
3041 const char *unused;
3042
3043 intval = fnvpair_value_uint64(propval);
3044
3045 switch (zfs_prop_get_type(prop)) {
3046 case PROP_TYPE_NUMBER:
3047 break;
3048 case PROP_TYPE_STRING:
3049 err = SET_ERROR(EINVAL);
3050 break;
3051 case PROP_TYPE_INDEX:
3052 if (zfs_prop_index_to_string(prop,
3053 intval, &unused) != 0)
3054 err =
3055 SET_ERROR(ZFS_ERR_BADPROP);
3056 break;
3057 default:
3058 cmn_err(CE_PANIC,
3059 "unknown property type");
3060 }
3061 } else {
3062 err = SET_ERROR(EINVAL);
3063 }
3064 }
3065
3066 /* Validate permissions */
3067 if (err == 0)
3068 err = zfs_check_settable(dsname, pair, CRED());
3069
3070 if (err == 0) {
3071 if (source == ZPROP_SRC_INHERITED)
3072 err = -1; /* does not need special handling */
3073 else
3074 err = zfs_prop_set_special(dsname, source,
3075 pair);
3076 if (err == -1) {
3077 /*
3078 * For better performance we build up a list of
3079 * properties to set in a single transaction.
3080 */
3081 err = nvlist_add_nvpair(genericnvl, pair);
3082 } else if (err != 0 && nvl != retrynvl) {
3083 /*
3084 * This may be a spurious error caused by
3085 * receiving quota and reservation out of order.
3086 * Try again in a second pass.
3087 */
3088 err = nvlist_add_nvpair(retrynvl, pair);
3089 }
3090 }
3091
3092 if (err != 0) {
3093 if (errlist != NULL)
3094 fnvlist_add_int32(errlist, propname, err);
3095 rv = err;
3096 }
3097
3098 if (zfs_is_namespace_prop(prop))
3099 should_update_mount_cache = B_TRUE;
3100 }
3101
3102 if (nvl != retrynvl && !nvlist_empty(retrynvl)) {
3103 nvl = retrynvl;
3104 goto retry;
3105 }
3106
3107 if (nvlist_empty(genericnvl))
3108 goto out;
3109
3110 /*
3111 * Try to set them all in one batch.
3112 */
3113 err = dsl_props_set(dsname, source, genericnvl);
3114 if (err == 0)
3115 goto out;
3116
3117 /*
3118 * If batching fails, we still want to set as many properties as we
3119 * can, so try setting them individually.
3120 */
3121 pair = NULL;
3122 while ((pair = nvlist_next_nvpair(genericnvl, pair)) != NULL) {
3123 const char *propname = nvpair_name(pair);
3124
3125 propval = pair;
3126 if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
3127 nvlist_t *attrs;
3128 attrs = fnvpair_value_nvlist(pair);
3129 propval = fnvlist_lookup_nvpair(attrs, ZPROP_VALUE);
3130 }
3131
3132 if (nvpair_type(propval) == DATA_TYPE_STRING) {
3133 strval = fnvpair_value_string(propval);
3134 err = dsl_prop_set_string(dsname, propname,
3135 source, strval);
3136 } else if (nvpair_type(propval) == DATA_TYPE_BOOLEAN) {
3137 err = dsl_prop_inherit(dsname, propname, source);
3138 } else {
3139 intval = fnvpair_value_uint64(propval);
3140 err = dsl_prop_set_int(dsname, propname, source,
3141 intval);
3142 }
3143
3144 if (err != 0) {
3145 if (errlist != NULL) {
3146 fnvlist_add_int32(errlist, propname, err);
3147 }
3148 rv = err;
3149 }
3150 }
3151
3152 out:
3153 if (should_update_mount_cache)
3154 zfs_ioctl_update_mount_cache(dsname);
3155
3156 nvlist_free(genericnvl);
3157 nvlist_free(retrynvl);
3158
3159 return (rv);
3160 }
3161
3162 /*
3163 * Check that all the properties are valid user properties.
3164 */
3165 static int
zfs_check_userprops(nvlist_t * nvl)3166 zfs_check_userprops(nvlist_t *nvl)
3167 {
3168 nvpair_t *pair = NULL;
3169
3170 while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
3171 const char *propname = nvpair_name(pair);
3172
3173 if (!zfs_prop_user(propname) ||
3174 nvpair_type(pair) != DATA_TYPE_STRING)
3175 return (SET_ERROR(EINVAL));
3176
3177 if (strlen(propname) >= ZAP_MAXNAMELEN)
3178 return (SET_ERROR(ENAMETOOLONG));
3179
3180 if (strlen(fnvpair_value_string(pair)) >= ZAP_MAXVALUELEN)
3181 return (SET_ERROR(E2BIG));
3182 }
3183 return (0);
3184 }
3185
3186 static void
props_skip(nvlist_t * props,nvlist_t * skipped,nvlist_t ** newprops)3187 props_skip(nvlist_t *props, nvlist_t *skipped, nvlist_t **newprops)
3188 {
3189 nvpair_t *pair;
3190
3191 VERIFY0(nvlist_alloc(newprops, NV_UNIQUE_NAME, KM_SLEEP));
3192
3193 pair = NULL;
3194 while ((pair = nvlist_next_nvpair(props, pair)) != NULL) {
3195 if (nvlist_exists(skipped, nvpair_name(pair)))
3196 continue;
3197
3198 VERIFY0(nvlist_add_nvpair(*newprops, pair));
3199 }
3200 }
3201
3202 static int
clear_received_props(const char * dsname,nvlist_t * props,nvlist_t * skipped)3203 clear_received_props(const char *dsname, nvlist_t *props,
3204 nvlist_t *skipped)
3205 {
3206 int err = 0;
3207 nvlist_t *cleared_props = NULL;
3208 props_skip(props, skipped, &cleared_props);
3209 if (!nvlist_empty(cleared_props)) {
3210 /*
3211 * Acts on local properties until the dataset has received
3212 * properties at least once on or after SPA_VERSION_RECVD_PROPS.
3213 */
3214 zprop_source_t flags = (ZPROP_SRC_NONE |
3215 (dsl_prop_get_hasrecvd(dsname) ? ZPROP_SRC_RECEIVED : 0));
3216 err = zfs_set_prop_nvlist(dsname, flags, cleared_props, NULL);
3217 }
3218 nvlist_free(cleared_props);
3219 return (err);
3220 }
3221
3222 /*
3223 * inputs:
3224 * zc_name name of filesystem
3225 * zc_value name of property to set
3226 * zc_nvlist_src{_size} nvlist of properties to apply
3227 * zc_cookie received properties flag
3228 *
3229 * outputs:
3230 * zc_nvlist_dst{_size} error for each unapplied received property
3231 */
3232 static int
zfs_ioc_set_prop(zfs_cmd_t * zc)3233 zfs_ioc_set_prop(zfs_cmd_t *zc)
3234 {
3235 nvlist_t *nvl;
3236 boolean_t received = zc->zc_cookie;
3237 zprop_source_t source = (received ? ZPROP_SRC_RECEIVED :
3238 ZPROP_SRC_LOCAL);
3239 nvlist_t *errors;
3240 int error;
3241
3242 if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
3243 zc->zc_iflags, &nvl)) != 0)
3244 return (error);
3245
3246 if (received) {
3247 nvlist_t *origprops;
3248
3249 if (dsl_prop_get_received(zc->zc_name, &origprops) == 0) {
3250 (void) clear_received_props(zc->zc_name,
3251 origprops, nvl);
3252 nvlist_free(origprops);
3253 }
3254
3255 error = dsl_prop_set_hasrecvd(zc->zc_name);
3256 }
3257
3258 errors = fnvlist_alloc();
3259 if (error == 0)
3260 error = zfs_set_prop_nvlist(zc->zc_name, source, nvl, errors);
3261
3262 if (zc->zc_nvlist_dst != 0 && errors != NULL) {
3263 (void) put_nvlist(zc, errors);
3264 }
3265
3266 nvlist_free(errors);
3267 nvlist_free(nvl);
3268 return (error);
3269 }
3270
3271 /*
3272 * inputs:
3273 * zc_name name of filesystem
3274 * zc_value name of property to inherit
3275 * zc_cookie revert to received value if TRUE
3276 *
3277 * outputs: none
3278 */
3279 static int
zfs_ioc_inherit_prop(zfs_cmd_t * zc)3280 zfs_ioc_inherit_prop(zfs_cmd_t *zc)
3281 {
3282 const char *propname = zc->zc_value;
3283 zfs_prop_t prop = zfs_name_to_prop(propname);
3284 boolean_t received = zc->zc_cookie;
3285 zprop_source_t source = (received
3286 ? ZPROP_SRC_NONE /* revert to received value, if any */
3287 : ZPROP_SRC_INHERITED); /* explicitly inherit */
3288 nvlist_t *dummy;
3289 nvpair_t *pair;
3290 zprop_type_t type;
3291 int err;
3292
3293 if (!received) {
3294 /*
3295 * Only check this in the non-received case. We want to allow
3296 * 'inherit -S' to revert non-inheritable properties like quota
3297 * and reservation to the received or default values even though
3298 * they are not considered inheritable.
3299 */
3300 if (prop != ZPROP_USERPROP && !zfs_prop_inheritable(prop))
3301 return (SET_ERROR(EINVAL));
3302 }
3303
3304 if (prop == ZPROP_USERPROP) {
3305 if (!zfs_prop_user(propname))
3306 return (SET_ERROR(EINVAL));
3307
3308 type = PROP_TYPE_STRING;
3309 } else if (prop == ZFS_PROP_VOLSIZE || prop == ZFS_PROP_VERSION) {
3310 return (SET_ERROR(EINVAL));
3311 } else {
3312 type = zfs_prop_get_type(prop);
3313 }
3314
3315 /*
3316 * zfs_prop_set_special() expects properties in the form of an
3317 * nvpair with type info.
3318 */
3319 dummy = fnvlist_alloc();
3320
3321 switch (type) {
3322 case PROP_TYPE_STRING:
3323 VERIFY0(nvlist_add_string(dummy, propname, ""));
3324 break;
3325 case PROP_TYPE_NUMBER:
3326 case PROP_TYPE_INDEX:
3327 VERIFY0(nvlist_add_uint64(dummy, propname, 0));
3328 break;
3329 default:
3330 err = SET_ERROR(EINVAL);
3331 goto errout;
3332 }
3333
3334 pair = nvlist_next_nvpair(dummy, NULL);
3335 if (pair == NULL) {
3336 err = SET_ERROR(EINVAL);
3337 } else {
3338 err = zfs_prop_set_special(zc->zc_name, source, pair);
3339 if (err == -1) /* property is not "special", needs handling */
3340 err = dsl_prop_inherit(zc->zc_name, zc->zc_value,
3341 source);
3342 }
3343
3344 errout:
3345 nvlist_free(dummy);
3346 return (err);
3347 }
3348
3349 static int
zfs_ioc_pool_set_props(zfs_cmd_t * zc)3350 zfs_ioc_pool_set_props(zfs_cmd_t *zc)
3351 {
3352 nvlist_t *props;
3353 spa_t *spa;
3354 int error;
3355 nvpair_t *pair;
3356
3357 if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
3358 zc->zc_iflags, &props)))
3359 return (error);
3360
3361 /*
3362 * If the only property is the configfile, then just do a spa_lookup()
3363 * to handle the faulted case.
3364 */
3365 pair = nvlist_next_nvpair(props, NULL);
3366 if (pair != NULL && strcmp(nvpair_name(pair),
3367 zpool_prop_to_name(ZPOOL_PROP_CACHEFILE)) == 0 &&
3368 nvlist_next_nvpair(props, pair) == NULL) {
3369 spa_namespace_enter(FTAG);
3370 if ((spa = spa_lookup(zc->zc_name)) != NULL) {
3371 spa_configfile_set(spa, props, B_FALSE);
3372 spa_write_cachefile(spa, B_FALSE, B_TRUE, B_FALSE);
3373 }
3374 spa_namespace_exit(FTAG);
3375 if (spa != NULL) {
3376 nvlist_free(props);
3377 return (0);
3378 }
3379 }
3380
3381 if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
3382 nvlist_free(props);
3383 return (error);
3384 }
3385
3386 error = spa_prop_set(spa, props);
3387
3388 nvlist_free(props);
3389 spa_close(spa, FTAG);
3390
3391 return (error);
3392 }
3393
3394 /*
3395 * innvl: {
3396 * "get_props_names": [ "prop1", "prop2", ..., "propN" ]
3397 * }
3398 */
3399
3400 static const zfs_ioc_key_t zfs_keys_get_props[] = {
3401 { ZPOOL_GET_PROPS_NAMES, DATA_TYPE_STRING_ARRAY, ZK_OPTIONAL },
3402 };
3403
3404 static int
zfs_ioc_pool_get_props(const char * pool,nvlist_t * innvl,nvlist_t * outnvl)3405 zfs_ioc_pool_get_props(const char *pool, nvlist_t *innvl, nvlist_t *outnvl)
3406 {
3407 spa_t *spa;
3408 char **props = NULL;
3409 unsigned int n_props = 0;
3410 int error;
3411
3412 if (nvlist_lookup_string_array(innvl, ZPOOL_GET_PROPS_NAMES,
3413 &props, &n_props) != 0) {
3414 props = NULL;
3415 }
3416
3417 if ((error = spa_open(pool, &spa, FTAG)) != 0) {
3418 /*
3419 * If the pool is faulted, there may be properties we can still
3420 * get (such as altroot and cachefile), so attempt to get them
3421 * anyway.
3422 */
3423 spa_namespace_enter(FTAG);
3424 if ((spa = spa_lookup(pool)) != NULL) {
3425 error = spa_prop_get(spa, outnvl);
3426 if (error == 0 && props != NULL)
3427 error = spa_prop_get_nvlist(spa, props, n_props,
3428 outnvl);
3429 }
3430 spa_namespace_exit(FTAG);
3431 } else {
3432 error = spa_prop_get(spa, outnvl);
3433 if (error == 0 && props != NULL)
3434 error = spa_prop_get_nvlist(spa, props, n_props,
3435 outnvl);
3436 spa_close(spa, FTAG);
3437 }
3438
3439 return (error);
3440 }
3441
3442 /*
3443 * innvl: {
3444 * "vdevprops_set_vdev" -> guid
3445 * "vdevprops_set_props" -> { prop -> value }
3446 * }
3447 *
3448 * outnvl: propname -> error code (int32)
3449 */
3450 static const zfs_ioc_key_t zfs_keys_vdev_set_props[] = {
3451 {ZPOOL_VDEV_PROPS_SET_VDEV, DATA_TYPE_UINT64, 0},
3452 {ZPOOL_VDEV_PROPS_SET_PROPS, DATA_TYPE_NVLIST, 0}
3453 };
3454
3455 static int
zfs_ioc_vdev_set_props(const char * poolname,nvlist_t * innvl,nvlist_t * outnvl)3456 zfs_ioc_vdev_set_props(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3457 {
3458 spa_t *spa;
3459 int error;
3460 vdev_t *vd;
3461 uint64_t vdev_guid;
3462
3463 /* Early validation */
3464 if (nvlist_lookup_uint64(innvl, ZPOOL_VDEV_PROPS_SET_VDEV,
3465 &vdev_guid) != 0)
3466 return (SET_ERROR(EINVAL));
3467
3468 if (outnvl == NULL)
3469 return (SET_ERROR(EINVAL));
3470
3471 if ((error = spa_open(poolname, &spa, FTAG)) != 0)
3472 return (error);
3473
3474 ASSERT(spa_writeable(spa));
3475
3476 spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
3477 if ((vd = spa_lookup_by_guid(spa, vdev_guid, B_TRUE)) == NULL) {
3478 spa_config_exit(spa, SCL_CONFIG, FTAG);
3479 spa_close(spa, FTAG);
3480 return (SET_ERROR(ENOENT));
3481 }
3482
3483 error = vdev_prop_set(vd, innvl, outnvl);
3484 spa_config_exit(spa, SCL_CONFIG, FTAG);
3485
3486 spa_close(spa, FTAG);
3487
3488 return (error);
3489 }
3490
3491 /*
3492 * innvl: {
3493 * "vdevprops_get_vdev" -> guid
3494 * (optional) "vdevprops_get_props" -> { propname -> propid }
3495 * }
3496 *
3497 * outnvl: propname -> value
3498 */
3499 static const zfs_ioc_key_t zfs_keys_vdev_get_props[] = {
3500 {ZPOOL_VDEV_PROPS_GET_VDEV, DATA_TYPE_UINT64, 0},
3501 {ZPOOL_VDEV_PROPS_GET_PROPS, DATA_TYPE_NVLIST, ZK_OPTIONAL}
3502 };
3503
3504 static int
zfs_ioc_vdev_get_props(const char * poolname,nvlist_t * innvl,nvlist_t * outnvl)3505 zfs_ioc_vdev_get_props(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3506 {
3507 spa_t *spa;
3508 int error;
3509 vdev_t *vd;
3510 uint64_t vdev_guid;
3511
3512 /* Early validation */
3513 if (nvlist_lookup_uint64(innvl, ZPOOL_VDEV_PROPS_GET_VDEV,
3514 &vdev_guid) != 0)
3515 return (SET_ERROR(EINVAL));
3516
3517 if (outnvl == NULL)
3518 return (SET_ERROR(EINVAL));
3519
3520 if ((error = spa_open(poolname, &spa, FTAG)) != 0)
3521 return (error);
3522
3523 spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
3524 if ((vd = spa_lookup_by_guid(spa, vdev_guid, B_TRUE)) == NULL) {
3525 spa_config_exit(spa, SCL_CONFIG, FTAG);
3526 spa_close(spa, FTAG);
3527 return (SET_ERROR(ENOENT));
3528 }
3529
3530 error = vdev_prop_get(vd, innvl, outnvl);
3531 spa_config_exit(spa, SCL_CONFIG, FTAG);
3532
3533 spa_close(spa, FTAG);
3534
3535 return (error);
3536 }
3537
3538 /*
3539 * inputs:
3540 * zc_name name of filesystem
3541 * zc_nvlist_src{_size} nvlist of delegated permissions
3542 * zc_perm_action allow/unallow flag
3543 *
3544 * outputs: none
3545 */
3546 static int
zfs_ioc_set_fsacl(zfs_cmd_t * zc)3547 zfs_ioc_set_fsacl(zfs_cmd_t *zc)
3548 {
3549 int error;
3550 nvlist_t *fsaclnv = NULL;
3551
3552 if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
3553 zc->zc_iflags, &fsaclnv)) != 0)
3554 return (error);
3555
3556 /*
3557 * Verify nvlist is constructed correctly
3558 */
3559 if (zfs_deleg_verify_nvlist(fsaclnv) != 0) {
3560 nvlist_free(fsaclnv);
3561 return (SET_ERROR(EINVAL));
3562 }
3563
3564 /*
3565 * If we don't have PRIV_SYS_MOUNT, then validate
3566 * that user is allowed to hand out each permission in
3567 * the nvlist(s)
3568 */
3569
3570 error = secpolicy_zfs(CRED());
3571 if (error != 0) {
3572 if (zc->zc_perm_action == B_FALSE) {
3573 error = dsl_deleg_can_allow(zc->zc_name,
3574 fsaclnv, CRED());
3575 } else {
3576 error = dsl_deleg_can_unallow(zc->zc_name,
3577 fsaclnv, CRED());
3578 }
3579 }
3580
3581 if (error == 0)
3582 error = dsl_deleg_set(zc->zc_name, fsaclnv, zc->zc_perm_action);
3583
3584 nvlist_free(fsaclnv);
3585 return (error);
3586 }
3587
3588 /*
3589 * inputs:
3590 * zc_name name of filesystem
3591 *
3592 * outputs:
3593 * zc_nvlist_src{_size} nvlist of delegated permissions
3594 */
3595 static int
zfs_ioc_get_fsacl(zfs_cmd_t * zc)3596 zfs_ioc_get_fsacl(zfs_cmd_t *zc)
3597 {
3598 nvlist_t *nvp;
3599 int error;
3600
3601 if ((error = dsl_deleg_get(zc->zc_name, &nvp)) == 0) {
3602 error = put_nvlist(zc, nvp);
3603 nvlist_free(nvp);
3604 }
3605
3606 return (error);
3607 }
3608
3609 static void
zfs_create_cb(objset_t * os,void * arg,cred_t * cr,dmu_tx_t * tx)3610 zfs_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx)
3611 {
3612 zfs_creat_t *zct = arg;
3613
3614 zfs_create_fs(os, cr, zct->zct_zplprops, tx);
3615 }
3616
3617 #define ZFS_PROP_UNDEFINED ((uint64_t)-1)
3618
3619 /*
3620 * inputs:
3621 * os parent objset pointer (NULL if root fs)
3622 * fuids_ok fuids allowed in this version of the spa?
3623 * sa_ok SAs allowed in this version of the spa?
3624 * createprops list of properties requested by creator
3625 *
3626 * outputs:
3627 * zplprops values for the zplprops we attach to the master node object
3628 * is_ci true if requested file system will be purely case-insensitive
3629 *
3630 * Determine the settings for utf8only, normalization and
3631 * casesensitivity. Specific values may have been requested by the
3632 * creator and/or we can inherit values from the parent dataset. If
3633 * the file system is of too early a vintage, a creator can not
3634 * request settings for these properties, even if the requested
3635 * setting is the default value. We don't actually want to create dsl
3636 * properties for these, so remove them from the source nvlist after
3637 * processing.
3638 */
3639 static int
zfs_fill_zplprops_impl(objset_t * os,uint64_t zplver,boolean_t fuids_ok,boolean_t sa_ok,nvlist_t * createprops,nvlist_t * zplprops,boolean_t * is_ci)3640 zfs_fill_zplprops_impl(objset_t *os, uint64_t zplver,
3641 boolean_t fuids_ok, boolean_t sa_ok, nvlist_t *createprops,
3642 nvlist_t *zplprops, boolean_t *is_ci)
3643 {
3644 uint64_t sense = ZFS_PROP_UNDEFINED;
3645 uint64_t norm = ZFS_PROP_UNDEFINED;
3646 uint64_t u8 = ZFS_PROP_UNDEFINED;
3647 uint64_t duq = ZFS_PROP_UNDEFINED, duoq = ZFS_PROP_UNDEFINED;
3648 uint64_t dgq = ZFS_PROP_UNDEFINED, dgoq = ZFS_PROP_UNDEFINED;
3649 uint64_t dpq = ZFS_PROP_UNDEFINED, dpoq = ZFS_PROP_UNDEFINED;
3650 int error;
3651
3652 ASSERT(zplprops != NULL);
3653
3654 /* parent dataset must be a filesystem */
3655 if (os != NULL && os->os_phys->os_type != DMU_OST_ZFS)
3656 return (SET_ERROR(ZFS_ERR_WRONG_PARENT));
3657
3658 /*
3659 * Pull out creator prop choices, if any.
3660 */
3661 if (createprops) {
3662 (void) nvlist_lookup_uint64(createprops,
3663 zfs_prop_to_name(ZFS_PROP_VERSION), &zplver);
3664 (void) nvlist_lookup_uint64(createprops,
3665 zfs_prop_to_name(ZFS_PROP_NORMALIZE), &norm);
3666 (void) nvlist_remove_all(createprops,
3667 zfs_prop_to_name(ZFS_PROP_NORMALIZE));
3668 (void) nvlist_lookup_uint64(createprops,
3669 zfs_prop_to_name(ZFS_PROP_UTF8ONLY), &u8);
3670 (void) nvlist_remove_all(createprops,
3671 zfs_prop_to_name(ZFS_PROP_UTF8ONLY));
3672 (void) nvlist_lookup_uint64(createprops,
3673 zfs_prop_to_name(ZFS_PROP_CASE), &sense);
3674 (void) nvlist_remove_all(createprops,
3675 zfs_prop_to_name(ZFS_PROP_CASE));
3676 (void) nvlist_lookup_uint64(createprops,
3677 zfs_prop_to_name(ZFS_PROP_DEFAULTUSERQUOTA), &duq);
3678 (void) nvlist_remove_all(createprops,
3679 zfs_prop_to_name(ZFS_PROP_DEFAULTUSERQUOTA));
3680 (void) nvlist_lookup_uint64(createprops,
3681 zfs_prop_to_name(ZFS_PROP_DEFAULTGROUPQUOTA), &dgq);
3682 (void) nvlist_remove_all(createprops,
3683 zfs_prop_to_name(ZFS_PROP_DEFAULTGROUPQUOTA));
3684 (void) nvlist_lookup_uint64(createprops,
3685 zfs_prop_to_name(ZFS_PROP_DEFAULTPROJECTQUOTA), &dpq);
3686 (void) nvlist_remove_all(createprops,
3687 zfs_prop_to_name(ZFS_PROP_DEFAULTPROJECTQUOTA));
3688 (void) nvlist_lookup_uint64(createprops,
3689 zfs_prop_to_name(ZFS_PROP_DEFAULTUSEROBJQUOTA), &duoq);
3690 (void) nvlist_remove_all(createprops,
3691 zfs_prop_to_name(ZFS_PROP_DEFAULTUSEROBJQUOTA));
3692 (void) nvlist_lookup_uint64(createprops,
3693 zfs_prop_to_name(ZFS_PROP_DEFAULTGROUPOBJQUOTA), &dgoq);
3694 (void) nvlist_remove_all(createprops,
3695 zfs_prop_to_name(ZFS_PROP_DEFAULTGROUPOBJQUOTA));
3696 (void) nvlist_lookup_uint64(createprops,
3697 zfs_prop_to_name(ZFS_PROP_DEFAULTPROJECTOBJQUOTA), &dpoq);
3698 (void) nvlist_remove_all(createprops,
3699 zfs_prop_to_name(ZFS_PROP_DEFAULTPROJECTOBJQUOTA));
3700 }
3701
3702 /*
3703 * If the zpl version requested is whacky or the file system
3704 * or pool is version is too "young" to support normalization
3705 * and the creator tried to set a value for one of the props,
3706 * error out.
3707 */
3708 if ((zplver < ZPL_VERSION_INITIAL || zplver > ZPL_VERSION) ||
3709 (zplver >= ZPL_VERSION_FUID && !fuids_ok) ||
3710 (zplver >= ZPL_VERSION_SA && !sa_ok) ||
3711 (zplver < ZPL_VERSION_NORMALIZATION &&
3712 (norm != ZFS_PROP_UNDEFINED || u8 != ZFS_PROP_UNDEFINED ||
3713 sense != ZFS_PROP_UNDEFINED)))
3714 return (SET_ERROR(ENOTSUP));
3715
3716 /*
3717 * Put the version in the zplprops
3718 */
3719 VERIFY0(nvlist_add_uint64(zplprops,
3720 zfs_prop_to_name(ZFS_PROP_VERSION), zplver));
3721
3722 if (norm == ZFS_PROP_UNDEFINED &&
3723 (error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &norm)) != 0)
3724 return (error);
3725 VERIFY0(nvlist_add_uint64(zplprops,
3726 zfs_prop_to_name(ZFS_PROP_NORMALIZE), norm));
3727
3728 /*
3729 * If we're normalizing, names must always be valid UTF-8 strings.
3730 */
3731 if (norm)
3732 u8 = 1;
3733 if (u8 == ZFS_PROP_UNDEFINED &&
3734 (error = zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &u8)) != 0)
3735 return (error);
3736 VERIFY0(nvlist_add_uint64(zplprops,
3737 zfs_prop_to_name(ZFS_PROP_UTF8ONLY), u8));
3738
3739 if (sense == ZFS_PROP_UNDEFINED &&
3740 (error = zfs_get_zplprop(os, ZFS_PROP_CASE, &sense)) != 0)
3741 return (error);
3742 VERIFY0(nvlist_add_uint64(zplprops,
3743 zfs_prop_to_name(ZFS_PROP_CASE), sense));
3744
3745 if (duq == ZFS_PROP_UNDEFINED &&
3746 (error = zfs_get_zplprop(os, ZFS_PROP_DEFAULTUSERQUOTA, &duq)) != 0)
3747 return (error);
3748 VERIFY0(nvlist_add_uint64(zplprops,
3749 zfs_prop_to_name(ZFS_PROP_DEFAULTUSERQUOTA), duq));
3750
3751 if (dgq == ZFS_PROP_UNDEFINED &&
3752 (error = zfs_get_zplprop(os, ZFS_PROP_DEFAULTGROUPQUOTA,
3753 &dgq)) != 0)
3754 return (error);
3755 VERIFY0(nvlist_add_uint64(zplprops,
3756 zfs_prop_to_name(ZFS_PROP_DEFAULTGROUPQUOTA), dgq));
3757
3758 if (dpq == ZFS_PROP_UNDEFINED &&
3759 (error = zfs_get_zplprop(os, ZFS_PROP_DEFAULTPROJECTQUOTA,
3760 &dpq)) != 0)
3761 return (error);
3762 VERIFY0(nvlist_add_uint64(zplprops,
3763 zfs_prop_to_name(ZFS_PROP_DEFAULTPROJECTQUOTA), dpq));
3764
3765 if (duoq == ZFS_PROP_UNDEFINED &&
3766 (error = zfs_get_zplprop(os, ZFS_PROP_DEFAULTUSEROBJQUOTA,
3767 &duoq)) != 0)
3768 return (error);
3769 VERIFY0(nvlist_add_uint64(zplprops,
3770 zfs_prop_to_name(ZFS_PROP_DEFAULTUSEROBJQUOTA), duoq));
3771
3772 if (dgoq == ZFS_PROP_UNDEFINED &&
3773 (error = zfs_get_zplprop(os, ZFS_PROP_DEFAULTGROUPOBJQUOTA,
3774 &dgoq)) != 0)
3775 return (error);
3776 VERIFY0(nvlist_add_uint64(zplprops,
3777 zfs_prop_to_name(ZFS_PROP_DEFAULTGROUPOBJQUOTA), dgoq));
3778
3779 if (dpoq == ZFS_PROP_UNDEFINED &&
3780 (error = zfs_get_zplprop(os, ZFS_PROP_DEFAULTPROJECTOBJQUOTA,
3781 &dpoq)) != 0)
3782 return (error);
3783 VERIFY0(nvlist_add_uint64(zplprops,
3784 zfs_prop_to_name(ZFS_PROP_DEFAULTPROJECTOBJQUOTA), dpoq));
3785
3786 if (is_ci)
3787 *is_ci = (sense == ZFS_CASE_INSENSITIVE);
3788
3789 return (0);
3790 }
3791
3792 static int
zfs_fill_zplprops(const char * dataset,nvlist_t * createprops,nvlist_t * zplprops,boolean_t * is_ci)3793 zfs_fill_zplprops(const char *dataset, nvlist_t *createprops,
3794 nvlist_t *zplprops, boolean_t *is_ci)
3795 {
3796 boolean_t fuids_ok, sa_ok;
3797 uint64_t zplver = ZPL_VERSION;
3798 objset_t *os = NULL;
3799 char parentname[ZFS_MAX_DATASET_NAME_LEN];
3800 spa_t *spa;
3801 uint64_t spa_vers;
3802 int error;
3803
3804 zfs_get_parent(dataset, parentname, sizeof (parentname));
3805
3806 if ((error = spa_open(dataset, &spa, FTAG)) != 0)
3807 return (error);
3808
3809 spa_vers = spa_version(spa);
3810 spa_close(spa, FTAG);
3811
3812 zplver = zfs_zpl_version_map(spa_vers);
3813 fuids_ok = (zplver >= ZPL_VERSION_FUID);
3814 sa_ok = (zplver >= ZPL_VERSION_SA);
3815
3816 /*
3817 * Open parent object set so we can inherit zplprop values.
3818 */
3819 if ((error = dmu_objset_hold(parentname, FTAG, &os)) != 0)
3820 return (error);
3821
3822 error = zfs_fill_zplprops_impl(os, zplver, fuids_ok, sa_ok, createprops,
3823 zplprops, is_ci);
3824 dmu_objset_rele(os, FTAG);
3825 return (error);
3826 }
3827
3828 static int
zfs_fill_zplprops_root(uint64_t spa_vers,nvlist_t * createprops,nvlist_t * zplprops,boolean_t * is_ci)3829 zfs_fill_zplprops_root(uint64_t spa_vers, nvlist_t *createprops,
3830 nvlist_t *zplprops, boolean_t *is_ci)
3831 {
3832 boolean_t fuids_ok;
3833 boolean_t sa_ok;
3834 uint64_t zplver = ZPL_VERSION;
3835 int error;
3836
3837 zplver = zfs_zpl_version_map(spa_vers);
3838 fuids_ok = (zplver >= ZPL_VERSION_FUID);
3839 sa_ok = (zplver >= ZPL_VERSION_SA);
3840
3841 error = zfs_fill_zplprops_impl(NULL, zplver, fuids_ok, sa_ok,
3842 createprops, zplprops, is_ci);
3843 return (error);
3844 }
3845
3846 /*
3847 * innvl: {
3848 * "type" -> dmu_objset_type_t (int32)
3849 * (optional) "props" -> { prop -> value }
3850 * (optional) "hidden_args" -> { "wkeydata" -> value }
3851 * raw uint8_t array of encryption wrapping key data (32 bytes)
3852 * }
3853 *
3854 * outnvl: propname -> error code (int32)
3855 */
3856
3857 static const zfs_ioc_key_t zfs_keys_create[] = {
3858 {"type", DATA_TYPE_INT32, 0},
3859 {"props", DATA_TYPE_NVLIST, ZK_OPTIONAL},
3860 {"hidden_args", DATA_TYPE_NVLIST, ZK_OPTIONAL},
3861 };
3862
3863 static int
zfs_ioc_create(const char * fsname,nvlist_t * innvl,nvlist_t * outnvl)3864 zfs_ioc_create(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3865 {
3866 int error = 0;
3867 zfs_creat_t zct = { 0 };
3868 nvlist_t *nvprops = NULL;
3869 nvlist_t *hidden_args = NULL;
3870 void (*cbfunc)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx);
3871 dmu_objset_type_t type;
3872 boolean_t is_insensitive = B_FALSE;
3873 dsl_crypto_params_t *dcp = NULL;
3874
3875 type = (dmu_objset_type_t)fnvlist_lookup_int32(innvl, "type");
3876 (void) nvlist_lookup_nvlist(innvl, "props", &nvprops);
3877 (void) nvlist_lookup_nvlist(innvl, ZPOOL_HIDDEN_ARGS, &hidden_args);
3878
3879 switch (type) {
3880 case DMU_OST_ZFS:
3881 cbfunc = zfs_create_cb;
3882 break;
3883
3884 case DMU_OST_ZVOL:
3885 cbfunc = zvol_create_cb;
3886 break;
3887
3888 default:
3889 cbfunc = NULL;
3890 break;
3891 }
3892 if (strchr(fsname, '@') ||
3893 strchr(fsname, '%'))
3894 return (SET_ERROR(EINVAL));
3895
3896 zct.zct_props = nvprops;
3897
3898 if (cbfunc == NULL)
3899 return (SET_ERROR(EINVAL));
3900
3901 if (type == DMU_OST_ZVOL) {
3902 uint64_t volsize, volblocksize;
3903
3904 if (nvprops == NULL)
3905 return (SET_ERROR(EINVAL));
3906 if (nvlist_lookup_uint64(nvprops,
3907 zfs_prop_to_name(ZFS_PROP_VOLSIZE), &volsize) != 0)
3908 return (SET_ERROR(EINVAL));
3909
3910 if ((error = nvlist_lookup_uint64(nvprops,
3911 zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE),
3912 &volblocksize)) != 0 && error != ENOENT)
3913 return (SET_ERROR(EINVAL));
3914
3915 if (error != 0)
3916 volblocksize = zfs_prop_default_numeric(
3917 ZFS_PROP_VOLBLOCKSIZE);
3918
3919 if ((error = zvol_check_volblocksize(fsname,
3920 volblocksize)) != 0 ||
3921 (error = zvol_check_volsize(volsize,
3922 volblocksize)) != 0)
3923 return (error);
3924 } else if (type == DMU_OST_ZFS) {
3925 int error;
3926
3927 /*
3928 * We have to have normalization and
3929 * case-folding flags correct when we do the
3930 * file system creation, so go figure them out
3931 * now.
3932 */
3933 VERIFY0(nvlist_alloc(&zct.zct_zplprops,
3934 NV_UNIQUE_NAME, KM_SLEEP));
3935 error = zfs_fill_zplprops(fsname, nvprops,
3936 zct.zct_zplprops, &is_insensitive);
3937 if (error != 0) {
3938 nvlist_free(zct.zct_zplprops);
3939 return (error);
3940 }
3941 }
3942
3943 error = dsl_crypto_params_create_nvlist(DCP_CMD_NONE, nvprops,
3944 hidden_args, &dcp);
3945 if (error != 0) {
3946 nvlist_free(zct.zct_zplprops);
3947 return (error);
3948 }
3949
3950 error = dmu_objset_create(fsname, type,
3951 is_insensitive ? DS_FLAG_CI_DATASET : 0, dcp, cbfunc, &zct);
3952
3953 nvlist_free(zct.zct_zplprops);
3954 dsl_crypto_params_free(dcp, !!error);
3955
3956 /*
3957 * It would be nice to do this atomically.
3958 */
3959 if (error == 0) {
3960 error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL,
3961 nvprops, outnvl);
3962 if (error != 0) {
3963 spa_t *spa;
3964 int error2;
3965
3966 /*
3967 * Volumes will return EBUSY and cannot be destroyed
3968 * until all asynchronous minor handling (e.g. from
3969 * setting the volmode property) has completed. Wait for
3970 * the spa_zvol_taskq to drain then retry.
3971 */
3972 error2 = dsl_destroy_head(fsname);
3973 while ((error2 == EBUSY) && (type == DMU_OST_ZVOL)) {
3974 error2 = spa_open(fsname, &spa, FTAG);
3975 if (error2 == 0) {
3976 taskq_wait(spa->spa_zvol_taskq);
3977 spa_close(spa, FTAG);
3978 }
3979 error2 = dsl_destroy_head(fsname);
3980 }
3981 }
3982 }
3983 return (error);
3984 }
3985
3986 /*
3987 * innvl: {
3988 * "origin" -> name of origin snapshot
3989 * (optional) "props" -> { prop -> value }
3990 * (optional) "hidden_args" -> { "wkeydata" -> value }
3991 * raw uint8_t array of encryption wrapping key data (32 bytes)
3992 * }
3993 *
3994 * outputs:
3995 * outnvl: propname -> error code (int32)
3996 */
3997 static const zfs_ioc_key_t zfs_keys_clone[] = {
3998 {"origin", DATA_TYPE_STRING, 0},
3999 {"props", DATA_TYPE_NVLIST, ZK_OPTIONAL},
4000 {"hidden_args", DATA_TYPE_NVLIST, ZK_OPTIONAL},
4001 };
4002
4003 static int
zfs_ioc_clone(const char * fsname,nvlist_t * innvl,nvlist_t * outnvl)4004 zfs_ioc_clone(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
4005 {
4006 int error = 0;
4007 nvlist_t *nvprops = NULL;
4008 const char *origin_name;
4009
4010 origin_name = fnvlist_lookup_string(innvl, "origin");
4011 (void) nvlist_lookup_nvlist(innvl, "props", &nvprops);
4012
4013 if (strchr(fsname, '@') ||
4014 strchr(fsname, '%'))
4015 return (SET_ERROR(EINVAL));
4016
4017 if (dataset_namecheck(origin_name, NULL, NULL) != 0)
4018 return (SET_ERROR(EINVAL));
4019
4020 error = dsl_dataset_clone(fsname, origin_name);
4021
4022 /*
4023 * It would be nice to do this atomically.
4024 */
4025 if (error == 0) {
4026 error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL,
4027 nvprops, outnvl);
4028 if (error != 0)
4029 (void) dsl_destroy_head(fsname);
4030 }
4031 return (error);
4032 }
4033
4034 static const zfs_ioc_key_t zfs_keys_remap[] = {
4035 /* no nvl keys */
4036 };
4037
4038 static int
zfs_ioc_remap(const char * fsname,nvlist_t * innvl,nvlist_t * outnvl)4039 zfs_ioc_remap(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
4040 {
4041 /* This IOCTL is no longer supported. */
4042 (void) fsname, (void) innvl, (void) outnvl;
4043 return (0);
4044 }
4045
4046 /*
4047 * innvl: {
4048 * "snaps" -> { snapshot1, snapshot2 }
4049 * (optional) "props" -> { prop -> value (string) }
4050 * }
4051 *
4052 * outnvl: snapshot -> error code (int32)
4053 */
4054 static const zfs_ioc_key_t zfs_keys_snapshot[] = {
4055 {"snaps", DATA_TYPE_NVLIST, 0},
4056 {"props", DATA_TYPE_NVLIST, ZK_OPTIONAL},
4057 };
4058
4059 static int
zfs_ioc_snapshot(const char * poolname,nvlist_t * innvl,nvlist_t * outnvl)4060 zfs_ioc_snapshot(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
4061 {
4062 nvlist_t *snaps;
4063 nvlist_t *props = NULL;
4064 int error, poollen;
4065 nvpair_t *pair;
4066
4067 (void) nvlist_lookup_nvlist(innvl, "props", &props);
4068 if (!nvlist_empty(props) &&
4069 zfs_earlier_version(poolname, SPA_VERSION_SNAP_PROPS))
4070 return (SET_ERROR(ENOTSUP));
4071 if ((error = zfs_check_userprops(props)) != 0)
4072 return (error);
4073
4074 snaps = fnvlist_lookup_nvlist(innvl, "snaps");
4075 poollen = strlen(poolname);
4076 for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
4077 pair = nvlist_next_nvpair(snaps, pair)) {
4078 const char *name = nvpair_name(pair);
4079 char *cp = strchr(name, '@');
4080
4081 /*
4082 * The snap name must contain an @, and the part after it must
4083 * contain only valid characters.
4084 */
4085 if (cp == NULL ||
4086 zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
4087 return (SET_ERROR(EINVAL));
4088
4089 /*
4090 * The snap must be in the specified pool.
4091 */
4092 if (strncmp(name, poolname, poollen) != 0 ||
4093 (name[poollen] != '/' && name[poollen] != '@'))
4094 return (SET_ERROR(EXDEV));
4095
4096 /*
4097 * Check for permission to set the properties on the fs.
4098 */
4099 if (!nvlist_empty(props)) {
4100 *cp = '\0';
4101 zone_admin_result_t zone_result;
4102 zone_result = zone_dataset_admin_check(name,
4103 ZONE_OP_SETPROP, NULL);
4104 if (zone_result == ZONE_ADMIN_DENIED) {
4105 *cp = '@';
4106 return (SET_ERROR(EPERM));
4107 }
4108 if (zone_result == ZONE_ADMIN_ALLOWED) {
4109 error = zfs_secpolicy_zoned_uid_deleg(name,
4110 ZFS_DELEG_PERM_USERPROP, CRED());
4111 } else {
4112 error = zfs_secpolicy_write_perms(name,
4113 ZFS_DELEG_PERM_USERPROP, CRED());
4114 }
4115 *cp = '@';
4116 if (error != 0)
4117 return (error);
4118 }
4119
4120 /* This must be the only snap of this fs. */
4121 for (nvpair_t *pair2 = nvlist_next_nvpair(snaps, pair);
4122 pair2 != NULL; pair2 = nvlist_next_nvpair(snaps, pair2)) {
4123 if (strncmp(name, nvpair_name(pair2), cp - name + 1)
4124 == 0) {
4125 return (SET_ERROR(EXDEV));
4126 }
4127 }
4128 }
4129
4130 error = dsl_dataset_snapshot(snaps, props, outnvl);
4131
4132 return (error);
4133 }
4134
4135 /*
4136 * innvl: "message" -> string
4137 */
4138 static const zfs_ioc_key_t zfs_keys_log_history[] = {
4139 {"message", DATA_TYPE_STRING, 0},
4140 };
4141
4142 static int
zfs_ioc_log_history(const char * unused,nvlist_t * innvl,nvlist_t * outnvl)4143 zfs_ioc_log_history(const char *unused, nvlist_t *innvl, nvlist_t *outnvl)
4144 {
4145 (void) unused, (void) outnvl;
4146 char *poolname;
4147 spa_t *spa;
4148 int error;
4149
4150 /*
4151 * The poolname in the ioctl is not set, we get it from the TSD,
4152 * which was set at the end of the last successful ioctl that allows
4153 * logging. The secpolicy func already checked that it is set.
4154 * Only one log ioctl is allowed after each successful ioctl, so
4155 * we clear the TSD here.
4156 */
4157 poolname = tsd_get(zfs_allow_log_key);
4158 if (poolname == NULL)
4159 return (SET_ERROR(EINVAL));
4160 (void) tsd_set(zfs_allow_log_key, NULL);
4161 error = spa_open(poolname, &spa, FTAG);
4162 kmem_strfree(poolname);
4163 if (error != 0)
4164 return (error);
4165
4166 const char *message = fnvlist_lookup_string(innvl, "message");
4167
4168 if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) {
4169 spa_close(spa, FTAG);
4170 return (SET_ERROR(ENOTSUP));
4171 }
4172
4173 error = spa_history_log(spa, message);
4174 spa_close(spa, FTAG);
4175 return (error);
4176 }
4177
4178 /*
4179 * This ioctl is used to set the bootenv configuration on the current
4180 * pool. This configuration is stored in the second padding area of the label,
4181 * and it is used by the bootloader(s) to store the bootloader and/or system
4182 * specific data.
4183 * The data is stored as nvlist data stream, and is protected by
4184 * an embedded checksum.
4185 * The version can have two possible values:
4186 * VB_RAW: nvlist should have key GRUB_ENVMAP, value DATA_TYPE_STRING.
4187 * VB_NVLIST: nvlist with arbitrary <key, value> pairs.
4188 */
4189 static const zfs_ioc_key_t zfs_keys_set_bootenv[] = {
4190 {"version", DATA_TYPE_UINT64, 0},
4191 {"<keys>", DATA_TYPE_ANY, ZK_OPTIONAL | ZK_WILDCARDLIST},
4192 };
4193
4194 static int
zfs_ioc_set_bootenv(const char * name,nvlist_t * innvl,nvlist_t * outnvl)4195 zfs_ioc_set_bootenv(const char *name, nvlist_t *innvl, nvlist_t *outnvl)
4196 {
4197 int error;
4198 spa_t *spa;
4199
4200 if ((error = spa_open(name, &spa, FTAG)) != 0)
4201 return (error);
4202 spa_vdev_state_enter(spa, SCL_ALL);
4203 error = vdev_label_write_bootenv(spa->spa_root_vdev, innvl);
4204 (void) spa_vdev_state_exit(spa, NULL, 0);
4205 spa_close(spa, FTAG);
4206 return (error);
4207 }
4208
4209 static const zfs_ioc_key_t zfs_keys_get_bootenv[] = {
4210 /* no nvl keys */
4211 };
4212
4213 static int
zfs_ioc_get_bootenv(const char * name,nvlist_t * innvl,nvlist_t * outnvl)4214 zfs_ioc_get_bootenv(const char *name, nvlist_t *innvl, nvlist_t *outnvl)
4215 {
4216 spa_t *spa;
4217 int error;
4218
4219 if ((error = spa_open(name, &spa, FTAG)) != 0)
4220 return (error);
4221 spa_vdev_state_enter(spa, SCL_ALL);
4222 error = vdev_label_read_bootenv(spa->spa_root_vdev, outnvl);
4223 (void) spa_vdev_state_exit(spa, NULL, 0);
4224 spa_close(spa, FTAG);
4225 return (error);
4226 }
4227
4228 /*
4229 * The dp_config_rwlock must not be held when calling this, because the
4230 * unmount may need to write out data.
4231 *
4232 * This function is best-effort. Callers must deal gracefully if it
4233 * remains mounted (or is remounted after this call).
4234 *
4235 * Returns 0 if the argument is not a snapshot, or it is not currently a
4236 * filesystem, or we were able to unmount it. Returns error code otherwise.
4237 */
4238 void
zfs_unmount_snap(const char * snapname)4239 zfs_unmount_snap(const char *snapname)
4240 {
4241 if (strchr(snapname, '@') == NULL)
4242 return;
4243
4244 (void) zfsctl_snapshot_unmount(snapname, MNT_FORCE);
4245 }
4246
4247 static int
zfs_unmount_snap_cb(const char * snapname,void * arg)4248 zfs_unmount_snap_cb(const char *snapname, void *arg)
4249 {
4250 (void) arg;
4251 zfs_unmount_snap(snapname);
4252 return (0);
4253 }
4254
4255 /*
4256 * When a clone is destroyed, its origin may also need to be destroyed,
4257 * in which case it must be unmounted. This routine will do that unmount
4258 * if necessary.
4259 */
4260 void
zfs_destroy_unmount_origin(const char * fsname)4261 zfs_destroy_unmount_origin(const char *fsname)
4262 {
4263 int error;
4264 objset_t *os;
4265 dsl_dataset_t *ds;
4266
4267 error = dmu_objset_hold(fsname, FTAG, &os);
4268 if (error != 0)
4269 return;
4270 ds = dmu_objset_ds(os);
4271 if (dsl_dir_is_clone(ds->ds_dir) && DS_IS_DEFER_DESTROY(ds->ds_prev)) {
4272 char originname[ZFS_MAX_DATASET_NAME_LEN];
4273 dsl_dataset_name(ds->ds_prev, originname);
4274 dmu_objset_rele(os, FTAG);
4275 zfs_unmount_snap(originname);
4276 } else {
4277 dmu_objset_rele(os, FTAG);
4278 }
4279 }
4280
4281 /*
4282 * innvl: {
4283 * "snaps" -> { snapshot1, snapshot2 }
4284 * (optional boolean) "defer"
4285 * }
4286 *
4287 * outnvl: snapshot -> error code (int32)
4288 */
4289 static const zfs_ioc_key_t zfs_keys_destroy_snaps[] = {
4290 {"snaps", DATA_TYPE_NVLIST, 0},
4291 {"defer", DATA_TYPE_BOOLEAN, ZK_OPTIONAL},
4292 };
4293
4294 static int
zfs_ioc_destroy_snaps(const char * poolname,nvlist_t * innvl,nvlist_t * outnvl)4295 zfs_ioc_destroy_snaps(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
4296 {
4297 int poollen;
4298 nvlist_t *snaps;
4299 nvpair_t *pair;
4300 boolean_t defer;
4301 spa_t *spa;
4302
4303 snaps = fnvlist_lookup_nvlist(innvl, "snaps");
4304 defer = nvlist_exists(innvl, "defer");
4305
4306 poollen = strlen(poolname);
4307 for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
4308 pair = nvlist_next_nvpair(snaps, pair)) {
4309 const char *name = nvpair_name(pair);
4310
4311 /*
4312 * The snap must be in the specified pool to prevent the
4313 * invalid removal of zvol minors below.
4314 */
4315 if (strncmp(name, poolname, poollen) != 0 ||
4316 (name[poollen] != '/' && name[poollen] != '@'))
4317 return (SET_ERROR(EXDEV));
4318
4319 zfs_unmount_snap(nvpair_name(pair));
4320 if (spa_open(name, &spa, FTAG) == 0) {
4321 zvol_remove_minors(spa, name, B_TRUE);
4322 spa_close(spa, FTAG);
4323 }
4324 }
4325
4326 return (dsl_destroy_snapshots_nvl(snaps, defer, outnvl));
4327 }
4328
4329 /*
4330 * Create bookmarks. The bookmark names are of the form <fs>#<bmark>.
4331 * All bookmarks and snapshots must be in the same pool.
4332 * dsl_bookmark_create_nvl_validate describes the nvlist schema in more detail.
4333 *
4334 * innvl: {
4335 * new_bookmark1 -> existing_snapshot,
4336 * new_bookmark2 -> existing_bookmark,
4337 * }
4338 *
4339 * outnvl: bookmark -> error code (int32)
4340 *
4341 */
4342 static const zfs_ioc_key_t zfs_keys_bookmark[] = {
4343 {"<bookmark>...", DATA_TYPE_STRING, ZK_WILDCARDLIST},
4344 };
4345
4346 static int
zfs_ioc_bookmark(const char * poolname,nvlist_t * innvl,nvlist_t * outnvl)4347 zfs_ioc_bookmark(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
4348 {
4349 (void) poolname;
4350 return (dsl_bookmark_create(innvl, outnvl));
4351 }
4352
4353 /*
4354 * innvl: {
4355 * property 1, property 2, ...
4356 * }
4357 *
4358 * outnvl: {
4359 * bookmark name 1 -> { property 1, property 2, ... },
4360 * bookmark name 2 -> { property 1, property 2, ... }
4361 * }
4362 *
4363 */
4364 static const zfs_ioc_key_t zfs_keys_get_bookmarks[] = {
4365 {"<property>...", DATA_TYPE_BOOLEAN, ZK_WILDCARDLIST | ZK_OPTIONAL},
4366 };
4367
4368 static int
zfs_ioc_get_bookmarks(const char * fsname,nvlist_t * innvl,nvlist_t * outnvl)4369 zfs_ioc_get_bookmarks(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
4370 {
4371 return (dsl_get_bookmarks(fsname, innvl, outnvl));
4372 }
4373
4374 /*
4375 * innvl is not used.
4376 *
4377 * outnvl: {
4378 * property 1, property 2, ...
4379 * }
4380 *
4381 */
4382 static const zfs_ioc_key_t zfs_keys_get_bookmark_props[] = {
4383 /* no nvl keys */
4384 };
4385
4386 static int
zfs_ioc_get_bookmark_props(const char * bookmark,nvlist_t * innvl,nvlist_t * outnvl)4387 zfs_ioc_get_bookmark_props(const char *bookmark, nvlist_t *innvl,
4388 nvlist_t *outnvl)
4389 {
4390 (void) innvl;
4391 char fsname[ZFS_MAX_DATASET_NAME_LEN];
4392 char *bmname;
4393
4394 bmname = strchr(bookmark, '#');
4395 if (bmname == NULL)
4396 return (SET_ERROR(EINVAL));
4397 bmname++;
4398
4399 (void) strlcpy(fsname, bookmark, sizeof (fsname));
4400 *(strchr(fsname, '#')) = '\0';
4401
4402 return (dsl_get_bookmark_props(fsname, bmname, outnvl));
4403 }
4404
4405 /*
4406 * innvl: {
4407 * bookmark name 1, bookmark name 2
4408 * }
4409 *
4410 * outnvl: bookmark -> error code (int32)
4411 *
4412 */
4413 static const zfs_ioc_key_t zfs_keys_destroy_bookmarks[] = {
4414 {"<bookmark>...", DATA_TYPE_BOOLEAN, ZK_WILDCARDLIST},
4415 };
4416
4417 static int
zfs_ioc_destroy_bookmarks(const char * poolname,nvlist_t * innvl,nvlist_t * outnvl)4418 zfs_ioc_destroy_bookmarks(const char *poolname, nvlist_t *innvl,
4419 nvlist_t *outnvl)
4420 {
4421 int error, poollen;
4422
4423 poollen = strlen(poolname);
4424 for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
4425 pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
4426 const char *name = nvpair_name(pair);
4427 const char *cp = strchr(name, '#');
4428
4429 /*
4430 * The bookmark name must contain an #, and the part after it
4431 * must contain only valid characters.
4432 */
4433 if (cp == NULL ||
4434 zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
4435 return (SET_ERROR(EINVAL));
4436
4437 /*
4438 * The bookmark must be in the specified pool.
4439 */
4440 if (strncmp(name, poolname, poollen) != 0 ||
4441 (name[poollen] != '/' && name[poollen] != '#'))
4442 return (SET_ERROR(EXDEV));
4443 }
4444
4445 error = dsl_bookmark_destroy(innvl, outnvl);
4446 return (error);
4447 }
4448
4449 static const zfs_ioc_key_t zfs_keys_channel_program[] = {
4450 {"program", DATA_TYPE_STRING, 0},
4451 {"arg", DATA_TYPE_ANY, 0},
4452 {"sync", DATA_TYPE_BOOLEAN_VALUE, ZK_OPTIONAL},
4453 {"instrlimit", DATA_TYPE_UINT64, ZK_OPTIONAL},
4454 {"memlimit", DATA_TYPE_UINT64, ZK_OPTIONAL},
4455 };
4456
4457 static int
zfs_ioc_channel_program(const char * poolname,nvlist_t * innvl,nvlist_t * outnvl)4458 zfs_ioc_channel_program(const char *poolname, nvlist_t *innvl,
4459 nvlist_t *outnvl)
4460 {
4461 const char *program;
4462 uint64_t instrlimit, memlimit;
4463 boolean_t sync_flag;
4464 nvpair_t *nvarg = NULL;
4465
4466 program = fnvlist_lookup_string(innvl, ZCP_ARG_PROGRAM);
4467 if (0 != nvlist_lookup_boolean_value(innvl, ZCP_ARG_SYNC, &sync_flag)) {
4468 sync_flag = B_TRUE;
4469 }
4470 if (0 != nvlist_lookup_uint64(innvl, ZCP_ARG_INSTRLIMIT, &instrlimit)) {
4471 instrlimit = ZCP_DEFAULT_INSTRLIMIT;
4472 }
4473 if (0 != nvlist_lookup_uint64(innvl, ZCP_ARG_MEMLIMIT, &memlimit)) {
4474 memlimit = ZCP_DEFAULT_MEMLIMIT;
4475 }
4476 nvarg = fnvlist_lookup_nvpair(innvl, ZCP_ARG_ARGLIST);
4477
4478 if (instrlimit == 0 || instrlimit > zfs_lua_max_instrlimit)
4479 return (SET_ERROR(EINVAL));
4480 if (memlimit == 0 || memlimit > zfs_lua_max_memlimit)
4481 return (SET_ERROR(EINVAL));
4482
4483 return (zcp_eval(poolname, program, sync_flag, instrlimit, memlimit,
4484 nvarg, outnvl));
4485 }
4486
4487 /*
4488 * innvl: unused
4489 * outnvl: empty
4490 */
4491 static const zfs_ioc_key_t zfs_keys_pool_checkpoint[] = {
4492 /* no nvl keys */
4493 };
4494
4495 static int
zfs_ioc_pool_checkpoint(const char * poolname,nvlist_t * innvl,nvlist_t * outnvl)4496 zfs_ioc_pool_checkpoint(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
4497 {
4498 (void) innvl, (void) outnvl;
4499 return (spa_checkpoint(poolname));
4500 }
4501
4502 /*
4503 * innvl: unused
4504 * outnvl: empty
4505 */
4506 static const zfs_ioc_key_t zfs_keys_pool_discard_checkpoint[] = {
4507 /* no nvl keys */
4508 };
4509
4510 static int
zfs_ioc_pool_discard_checkpoint(const char * poolname,nvlist_t * innvl,nvlist_t * outnvl)4511 zfs_ioc_pool_discard_checkpoint(const char *poolname, nvlist_t *innvl,
4512 nvlist_t *outnvl)
4513 {
4514 (void) innvl, (void) outnvl;
4515 return (spa_checkpoint_discard(poolname));
4516 }
4517
4518 /*
4519 * Loads specific types of data for the given pool
4520 *
4521 * innvl: {
4522 * "prefetch_type" -> int32_t
4523 * }
4524 *
4525 * outnvl: empty
4526 */
4527 static const zfs_ioc_key_t zfs_keys_pool_prefetch[] = {
4528 {ZPOOL_PREFETCH_TYPE, DATA_TYPE_INT32, 0},
4529 };
4530
4531 static int
zfs_ioc_pool_prefetch(const char * poolname,nvlist_t * innvl,nvlist_t * outnvl)4532 zfs_ioc_pool_prefetch(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
4533 {
4534 (void) outnvl;
4535
4536 int error;
4537 spa_t *spa;
4538 int32_t type;
4539
4540 if (nvlist_lookup_int32(innvl, ZPOOL_PREFETCH_TYPE, &type) != 0)
4541 return (EINVAL);
4542
4543 if (type != ZPOOL_PREFETCH_DDT && type != ZPOOL_PREFETCH_BRT)
4544 return (EINVAL);
4545
4546 error = spa_open(poolname, &spa, FTAG);
4547 if (error != 0)
4548 return (error);
4549
4550 hrtime_t start_time = gethrtime();
4551
4552 if (type == ZPOOL_PREFETCH_DDT) {
4553 ddt_prefetch_all(spa);
4554 zfs_dbgmsg("pool '%s': loaded ddt into ARC in %llu ms",
4555 spa->spa_name,
4556 (u_longlong_t)NSEC2MSEC(gethrtime() - start_time));
4557 } else {
4558 brt_prefetch_all(spa);
4559 zfs_dbgmsg("pool '%s': loaded brt into ARC in %llu ms",
4560 spa->spa_name,
4561 (u_longlong_t)NSEC2MSEC(gethrtime() - start_time));
4562 }
4563
4564 spa_close(spa, FTAG);
4565
4566 return (error);
4567 }
4568
4569 /*
4570 * inputs:
4571 * zc_name name of dataset to destroy
4572 * zc_defer_destroy mark for deferred destroy
4573 *
4574 * outputs: none
4575 */
4576 static int
zfs_ioc_destroy(zfs_cmd_t * zc)4577 zfs_ioc_destroy(zfs_cmd_t *zc)
4578 {
4579 objset_t *os;
4580 dmu_objset_type_t ost;
4581 int err;
4582
4583 err = dmu_objset_hold(zc->zc_name, FTAG, &os);
4584 if (err != 0)
4585 return (err);
4586 ost = dmu_objset_type(os);
4587 dmu_objset_rele(os, FTAG);
4588
4589 if (ost == DMU_OST_ZFS)
4590 zfs_unmount_snap(zc->zc_name);
4591
4592 if (strchr(zc->zc_name, '@')) {
4593 err = dsl_destroy_snapshot(zc->zc_name, zc->zc_defer_destroy);
4594 } else {
4595 /*
4596 * Save zoned_uid before destroying so we can clean up
4597 * kernel-side zone tracking after a successful destroy.
4598 */
4599 uint64_t zoned_uid = 0;
4600 (void) dsl_prop_get(zc->zc_name, "zoned_uid",
4601 8, 1, &zoned_uid, NULL);
4602
4603 err = dsl_destroy_head(zc->zc_name);
4604 if (err == EEXIST) {
4605 /*
4606 * It is possible that the given DS may have
4607 * hidden child (%recv) datasets - "leftovers"
4608 * resulting from the previously interrupted
4609 * 'zfs receive'.
4610 *
4611 * 6 extra bytes for /%recv
4612 */
4613 char namebuf[ZFS_MAX_DATASET_NAME_LEN + 6];
4614
4615 if (snprintf(namebuf, sizeof (namebuf), "%s/%s",
4616 zc->zc_name, recv_clone_name) >=
4617 sizeof (namebuf))
4618 return (SET_ERROR(EINVAL));
4619
4620 /*
4621 * Try to remove the hidden child (%recv) and after
4622 * that try to remove the target dataset.
4623 * If the hidden child (%recv) does not exist
4624 * the original error (EEXIST) will be returned
4625 */
4626 err = dsl_destroy_head(namebuf);
4627 if (err == 0)
4628 err = dsl_destroy_head(zc->zc_name);
4629 else if (err == ENOENT)
4630 err = SET_ERROR(EEXIST);
4631 }
4632
4633 if (err == 0 && zoned_uid != 0) {
4634 (void) zone_dataset_detach_uid(kcred,
4635 zc->zc_name, (uid_t)zoned_uid);
4636 }
4637 }
4638
4639 return (err);
4640 }
4641
4642 /*
4643 * innvl: {
4644 * "initialize_command" -> POOL_INITIALIZE_{CANCEL|START|SUSPEND} (uint64)
4645 * "initialize_vdevs": { -> guids to initialize (nvlist)
4646 * "vdev_path_1": vdev_guid_1, (uint64),
4647 * "vdev_path_2": vdev_guid_2, (uint64),
4648 * ...
4649 * },
4650 * }
4651 *
4652 * outnvl: {
4653 * "initialize_vdevs": { -> initialization errors (nvlist)
4654 * "vdev_path_1": errno, see function body for possible errnos (uint64)
4655 * "vdev_path_2": errno, ... (uint64)
4656 * ...
4657 * }
4658 * }
4659 *
4660 * EINVAL is returned for an unknown commands or if any of the provided vdev
4661 * guids have be specified with a type other than uint64.
4662 */
4663 static const zfs_ioc_key_t zfs_keys_pool_initialize[] = {
4664 {ZPOOL_INITIALIZE_COMMAND, DATA_TYPE_UINT64, 0},
4665 {ZPOOL_INITIALIZE_VDEVS, DATA_TYPE_NVLIST, 0}
4666 };
4667
4668 static int
zfs_ioc_pool_initialize(const char * poolname,nvlist_t * innvl,nvlist_t * outnvl)4669 zfs_ioc_pool_initialize(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
4670 {
4671 uint64_t cmd_type;
4672 if (nvlist_lookup_uint64(innvl, ZPOOL_INITIALIZE_COMMAND,
4673 &cmd_type) != 0) {
4674 return (SET_ERROR(EINVAL));
4675 }
4676
4677 if (!(cmd_type == POOL_INITIALIZE_CANCEL ||
4678 cmd_type == POOL_INITIALIZE_START ||
4679 cmd_type == POOL_INITIALIZE_SUSPEND ||
4680 cmd_type == POOL_INITIALIZE_UNINIT)) {
4681 return (SET_ERROR(EINVAL));
4682 }
4683
4684 nvlist_t *vdev_guids;
4685 if (nvlist_lookup_nvlist(innvl, ZPOOL_INITIALIZE_VDEVS,
4686 &vdev_guids) != 0) {
4687 return (SET_ERROR(EINVAL));
4688 }
4689
4690 for (nvpair_t *pair = nvlist_next_nvpair(vdev_guids, NULL);
4691 pair != NULL; pair = nvlist_next_nvpair(vdev_guids, pair)) {
4692 uint64_t vdev_guid;
4693 if (nvpair_value_uint64(pair, &vdev_guid) != 0) {
4694 return (SET_ERROR(EINVAL));
4695 }
4696 }
4697
4698 spa_t *spa;
4699 int error = spa_open(poolname, &spa, FTAG);
4700 if (error != 0)
4701 return (error);
4702
4703 nvlist_t *vdev_errlist = fnvlist_alloc();
4704 int total_errors = spa_vdev_initialize(spa, vdev_guids, cmd_type,
4705 vdev_errlist);
4706
4707 if (fnvlist_size(vdev_errlist) > 0) {
4708 fnvlist_add_nvlist(outnvl, ZPOOL_INITIALIZE_VDEVS,
4709 vdev_errlist);
4710 }
4711 fnvlist_free(vdev_errlist);
4712
4713 spa_close(spa, FTAG);
4714 return (total_errors > 0 ? SET_ERROR(EINVAL) : 0);
4715 }
4716
4717 /*
4718 * innvl: {
4719 * "trim_command" -> POOL_TRIM_{CANCEL|START|SUSPEND} (uint64)
4720 * "trim_vdevs": { -> guids to TRIM (nvlist)
4721 * "vdev_path_1": vdev_guid_1, (uint64),
4722 * "vdev_path_2": vdev_guid_2, (uint64),
4723 * ...
4724 * },
4725 * "trim_rate" -> Target TRIM rate in bytes/sec.
4726 * "trim_secure" -> Set to request a secure TRIM.
4727 * }
4728 *
4729 * outnvl: {
4730 * "trim_vdevs": { -> TRIM errors (nvlist)
4731 * "vdev_path_1": errno, see function body for possible errnos (uint64)
4732 * "vdev_path_2": errno, ... (uint64)
4733 * ...
4734 * }
4735 * }
4736 *
4737 * EINVAL is returned for an unknown commands or if any of the provided vdev
4738 * guids have be specified with a type other than uint64.
4739 */
4740 static const zfs_ioc_key_t zfs_keys_pool_trim[] = {
4741 {ZPOOL_TRIM_COMMAND, DATA_TYPE_UINT64, 0},
4742 {ZPOOL_TRIM_VDEVS, DATA_TYPE_NVLIST, 0},
4743 {ZPOOL_TRIM_RATE, DATA_TYPE_UINT64, ZK_OPTIONAL},
4744 {ZPOOL_TRIM_SECURE, DATA_TYPE_BOOLEAN_VALUE, ZK_OPTIONAL},
4745 };
4746
4747 static int
zfs_ioc_pool_trim(const char * poolname,nvlist_t * innvl,nvlist_t * outnvl)4748 zfs_ioc_pool_trim(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
4749 {
4750 uint64_t cmd_type;
4751 if (nvlist_lookup_uint64(innvl, ZPOOL_TRIM_COMMAND, &cmd_type) != 0)
4752 return (SET_ERROR(EINVAL));
4753
4754 if (!(cmd_type == POOL_TRIM_CANCEL ||
4755 cmd_type == POOL_TRIM_START ||
4756 cmd_type == POOL_TRIM_SUSPEND)) {
4757 return (SET_ERROR(EINVAL));
4758 }
4759
4760 nvlist_t *vdev_guids;
4761 if (nvlist_lookup_nvlist(innvl, ZPOOL_TRIM_VDEVS, &vdev_guids) != 0)
4762 return (SET_ERROR(EINVAL));
4763
4764 for (nvpair_t *pair = nvlist_next_nvpair(vdev_guids, NULL);
4765 pair != NULL; pair = nvlist_next_nvpair(vdev_guids, pair)) {
4766 uint64_t vdev_guid;
4767 if (nvpair_value_uint64(pair, &vdev_guid) != 0) {
4768 return (SET_ERROR(EINVAL));
4769 }
4770 }
4771
4772 /* Optional, defaults to maximum rate when not provided */
4773 uint64_t rate;
4774 if (nvlist_lookup_uint64(innvl, ZPOOL_TRIM_RATE, &rate) != 0)
4775 rate = 0;
4776
4777 /* Optional, defaults to standard TRIM when not provided */
4778 boolean_t secure;
4779 if (nvlist_lookup_boolean_value(innvl, ZPOOL_TRIM_SECURE,
4780 &secure) != 0) {
4781 secure = B_FALSE;
4782 }
4783
4784 spa_t *spa;
4785 int error = spa_open(poolname, &spa, FTAG);
4786 if (error != 0)
4787 return (error);
4788
4789 nvlist_t *vdev_errlist = fnvlist_alloc();
4790 int total_errors = spa_vdev_trim(spa, vdev_guids, cmd_type,
4791 rate, !!zfs_trim_metaslab_skip, secure, vdev_errlist);
4792
4793 if (fnvlist_size(vdev_errlist) > 0)
4794 fnvlist_add_nvlist(outnvl, ZPOOL_TRIM_VDEVS, vdev_errlist);
4795
4796 fnvlist_free(vdev_errlist);
4797
4798 spa_close(spa, FTAG);
4799 return (total_errors > 0 ? SET_ERROR(EINVAL) : 0);
4800 }
4801
4802 #define DDT_PRUNE_UNIT "ddt_prune_unit"
4803 #define DDT_PRUNE_AMOUNT "ddt_prune_amount"
4804
4805 /*
4806 * innvl: {
4807 * "ddt_prune_unit" -> uint32_t
4808 * "ddt_prune_amount" -> uint64_t
4809 * }
4810 *
4811 * outnvl: "waited" -> boolean_t
4812 */
4813 static const zfs_ioc_key_t zfs_keys_ddt_prune[] = {
4814 {DDT_PRUNE_UNIT, DATA_TYPE_INT32, 0},
4815 {DDT_PRUNE_AMOUNT, DATA_TYPE_UINT64, 0},
4816 };
4817
4818 static int
zfs_ioc_ddt_prune(const char * poolname,nvlist_t * innvl,nvlist_t * outnvl)4819 zfs_ioc_ddt_prune(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
4820 {
4821 int32_t unit;
4822 uint64_t amount;
4823
4824 if (nvlist_lookup_int32(innvl, DDT_PRUNE_UNIT, &unit) != 0 ||
4825 nvlist_lookup_uint64(innvl, DDT_PRUNE_AMOUNT, &amount) != 0) {
4826 return (EINVAL);
4827 }
4828
4829 spa_t *spa;
4830 int error = spa_open(poolname, &spa, FTAG);
4831 if (error != 0)
4832 return (error);
4833
4834 if (!spa_feature_is_enabled(spa, SPA_FEATURE_FAST_DEDUP)) {
4835 spa_close(spa, FTAG);
4836 return (SET_ERROR(ENOTSUP));
4837 }
4838
4839 error = ddt_prune_unique_entries(spa, (zpool_ddt_prune_unit_t)unit,
4840 amount);
4841
4842 spa_close(spa, FTAG);
4843
4844 return (error);
4845 }
4846
4847 /*
4848 * This ioctl waits for activity of a particular type to complete. If there is
4849 * no activity of that type in progress, it returns immediately, and the
4850 * returned value "waited" is false. If there is activity in progress, and no
4851 * tag is passed in, the ioctl blocks until all activity of that type is
4852 * complete, and then returns with "waited" set to true.
4853 *
4854 * If a tag is provided, it identifies a particular instance of an activity to
4855 * wait for. Currently, this is only valid for use with 'initialize', because
4856 * that is the only activity for which there can be multiple instances running
4857 * concurrently. In the case of 'initialize', the tag corresponds to the guid of
4858 * the vdev on which to wait.
4859 *
4860 * If a thread waiting in the ioctl receives a signal, the call will return
4861 * immediately, and the return value will be EINTR.
4862 *
4863 * innvl: {
4864 * "wait_activity" -> int32_t
4865 * (optional) "wait_tag" -> uint64_t
4866 * }
4867 *
4868 * outnvl: "waited" -> boolean_t
4869 */
4870 static const zfs_ioc_key_t zfs_keys_pool_wait[] = {
4871 {ZPOOL_WAIT_ACTIVITY, DATA_TYPE_INT32, 0},
4872 {ZPOOL_WAIT_TAG, DATA_TYPE_UINT64, ZK_OPTIONAL},
4873 };
4874
4875 static int
zfs_ioc_wait(const char * name,nvlist_t * innvl,nvlist_t * outnvl)4876 zfs_ioc_wait(const char *name, nvlist_t *innvl, nvlist_t *outnvl)
4877 {
4878 int32_t activity;
4879 uint64_t tag;
4880 boolean_t waited;
4881 int error;
4882
4883 if (nvlist_lookup_int32(innvl, ZPOOL_WAIT_ACTIVITY, &activity) != 0)
4884 return (EINVAL);
4885
4886 if (nvlist_lookup_uint64(innvl, ZPOOL_WAIT_TAG, &tag) == 0)
4887 error = spa_wait_tag(name, activity, tag, &waited);
4888 else
4889 error = spa_wait(name, activity, &waited);
4890
4891 if (error == 0)
4892 fnvlist_add_boolean_value(outnvl, ZPOOL_WAIT_WAITED, waited);
4893
4894 return (error);
4895 }
4896
4897 /*
4898 * This ioctl waits for activity of a particular type to complete. If there is
4899 * no activity of that type in progress, it returns immediately, and the
4900 * returned value "waited" is false. If there is activity in progress, and no
4901 * tag is passed in, the ioctl blocks until all activity of that type is
4902 * complete, and then returns with "waited" set to true.
4903 *
4904 * If a thread waiting in the ioctl receives a signal, the call will return
4905 * immediately, and the return value will be EINTR.
4906 *
4907 * innvl: {
4908 * "wait_activity" -> int32_t
4909 * }
4910 *
4911 * outnvl: "waited" -> boolean_t
4912 */
4913 static const zfs_ioc_key_t zfs_keys_fs_wait[] = {
4914 {ZFS_WAIT_ACTIVITY, DATA_TYPE_INT32, 0},
4915 };
4916
4917 static int
zfs_ioc_wait_fs(const char * name,nvlist_t * innvl,nvlist_t * outnvl)4918 zfs_ioc_wait_fs(const char *name, nvlist_t *innvl, nvlist_t *outnvl)
4919 {
4920 int32_t activity;
4921 boolean_t waited = B_FALSE;
4922 int error;
4923 dsl_pool_t *dp;
4924 dsl_dir_t *dd;
4925 dsl_dataset_t *ds;
4926
4927 if (nvlist_lookup_int32(innvl, ZFS_WAIT_ACTIVITY, &activity) != 0)
4928 return (SET_ERROR(EINVAL));
4929
4930 if (activity >= ZFS_WAIT_NUM_ACTIVITIES || activity < 0)
4931 return (SET_ERROR(EINVAL));
4932
4933 if ((error = dsl_pool_hold(name, FTAG, &dp)) != 0)
4934 return (error);
4935
4936 if ((error = dsl_dataset_hold(dp, name, FTAG, &ds)) != 0) {
4937 dsl_pool_rele(dp, FTAG);
4938 return (error);
4939 }
4940
4941 dd = ds->ds_dir;
4942 mutex_enter(&dd->dd_activity_lock);
4943 dd->dd_activity_waiters++;
4944
4945 /*
4946 * We get a long-hold here so that the dsl_dataset_t and dsl_dir_t
4947 * aren't evicted while we're waiting. Normally this is prevented by
4948 * holding the pool, but we can't do that while we're waiting since
4949 * that would prevent TXGs from syncing out. Some of the functionality
4950 * of long-holds (e.g. preventing deletion) is unnecessary for this
4951 * case, since we would cancel the waiters before proceeding with a
4952 * deletion. An alternative mechanism for keeping the dataset around
4953 * could be developed but this is simpler.
4954 */
4955 dsl_dataset_long_hold(ds, FTAG);
4956 dsl_pool_rele(dp, FTAG);
4957
4958 error = dsl_dir_wait(dd, ds, activity, &waited);
4959
4960 dsl_dataset_long_rele(ds, FTAG);
4961 dd->dd_activity_waiters--;
4962 if (dd->dd_activity_waiters == 0)
4963 cv_signal(&dd->dd_activity_cv);
4964 mutex_exit(&dd->dd_activity_lock);
4965
4966 dsl_dataset_rele(ds, FTAG);
4967
4968 if (error == 0)
4969 fnvlist_add_boolean_value(outnvl, ZFS_WAIT_WAITED, waited);
4970
4971 return (error);
4972 }
4973
4974 /*
4975 * fsname is name of dataset to rollback (to most recent snapshot)
4976 *
4977 * innvl may contain name of expected target snapshot
4978 *
4979 * outnvl: "target" -> name of most recent snapshot
4980 * }
4981 */
4982 static const zfs_ioc_key_t zfs_keys_rollback[] = {
4983 {"target", DATA_TYPE_STRING, ZK_OPTIONAL},
4984 };
4985
4986 static int
zfs_ioc_rollback(const char * fsname,nvlist_t * innvl,nvlist_t * outnvl)4987 zfs_ioc_rollback(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
4988 {
4989 zfsvfs_t *zfsvfs;
4990 zvol_state_handle_t *zv;
4991 const char *target = NULL;
4992 int error;
4993
4994 (void) nvlist_lookup_string(innvl, "target", &target);
4995 if (target != NULL) {
4996 const char *cp = strchr(target, '@');
4997
4998 /*
4999 * The snap name must contain an @, and the part after it must
5000 * contain only valid characters.
5001 */
5002 if (cp == NULL ||
5003 zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
5004 return (SET_ERROR(EINVAL));
5005 }
5006
5007 if (getzfsvfs(fsname, &zfsvfs) == 0) {
5008 dsl_dataset_t *ds;
5009
5010 ds = dmu_objset_ds(zfsvfs->z_os);
5011 error = zfs_suspend_fs(zfsvfs);
5012 if (error == 0) {
5013 int resume_err;
5014
5015 error = dsl_dataset_rollback(fsname, target, zfsvfs,
5016 outnvl);
5017 resume_err = zfs_resume_fs(zfsvfs, ds);
5018 error = error ? error : resume_err;
5019 }
5020 zfs_vfs_rele(zfsvfs);
5021 } else if (zvol_suspend(fsname, &zv) == 0) {
5022 error = dsl_dataset_rollback(fsname, target, zvol_tag(zv),
5023 outnvl);
5024 zvol_resume(zv);
5025 } else {
5026 error = dsl_dataset_rollback(fsname, target, NULL, outnvl);
5027 }
5028 return (error);
5029 }
5030
5031 static int
recursive_unmount(const char * fsname,void * arg)5032 recursive_unmount(const char *fsname, void *arg)
5033 {
5034 const char *snapname = arg;
5035 char *fullname;
5036
5037 fullname = kmem_asprintf("%s@%s", fsname, snapname);
5038 zfs_unmount_snap(fullname);
5039 kmem_strfree(fullname);
5040
5041 return (0);
5042 }
5043
5044 /*
5045 *
5046 * snapname is the snapshot to redact.
5047 * innvl: {
5048 * "bookname" -> (string)
5049 * shortname of the redaction bookmark to generate
5050 * "snapnv" -> (nvlist, values ignored)
5051 * snapshots to redact snapname with respect to
5052 * }
5053 *
5054 * outnvl is unused
5055 */
5056
5057 static const zfs_ioc_key_t zfs_keys_redact[] = {
5058 {"bookname", DATA_TYPE_STRING, 0},
5059 {"snapnv", DATA_TYPE_NVLIST, 0},
5060 };
5061
5062 static int
zfs_ioc_redact(const char * snapname,nvlist_t * innvl,nvlist_t * outnvl)5063 zfs_ioc_redact(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
5064 {
5065 (void) outnvl;
5066 nvlist_t *redactnvl = NULL;
5067 const char *redactbook = NULL;
5068
5069 if (nvlist_lookup_nvlist(innvl, "snapnv", &redactnvl) != 0)
5070 return (SET_ERROR(EINVAL));
5071 if (fnvlist_num_pairs(redactnvl) == 0)
5072 return (SET_ERROR(ENXIO));
5073 if (nvlist_lookup_string(innvl, "bookname", &redactbook) != 0)
5074 return (SET_ERROR(EINVAL));
5075
5076 return (dmu_redact_snap(snapname, redactnvl, redactbook));
5077 }
5078
5079 /*
5080 * inputs:
5081 * zc_name old name of dataset
5082 * zc_value new name of dataset
5083 * zc_cookie recursive flag (only valid for snapshots)
5084 *
5085 * outputs: none
5086 */
5087 static int
zfs_ioc_rename(zfs_cmd_t * zc)5088 zfs_ioc_rename(zfs_cmd_t *zc)
5089 {
5090 objset_t *os;
5091 dmu_objset_type_t ost;
5092 boolean_t recursive = zc->zc_cookie & 1;
5093 boolean_t nounmount = !!(zc->zc_cookie & 2);
5094 char *at;
5095 int err;
5096
5097 /* "zfs rename" from and to ...%recv datasets should both fail */
5098 zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
5099 zc->zc_value[sizeof (zc->zc_value) - 1] = '\0';
5100 if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0 ||
5101 dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
5102 strchr(zc->zc_name, '%') || strchr(zc->zc_value, '%'))
5103 return (SET_ERROR(EINVAL));
5104
5105 err = dmu_objset_hold(zc->zc_name, FTAG, &os);
5106 if (err != 0)
5107 return (err);
5108 ost = dmu_objset_type(os);
5109 dmu_objset_rele(os, FTAG);
5110
5111 at = strchr(zc->zc_name, '@');
5112 if (at != NULL) {
5113 /* snaps must be in same fs */
5114 int error;
5115
5116 if (strncmp(zc->zc_name, zc->zc_value, at - zc->zc_name + 1))
5117 return (SET_ERROR(EXDEV));
5118 *at = '\0';
5119 if (ost == DMU_OST_ZFS && !nounmount) {
5120 error = dmu_objset_find(zc->zc_name,
5121 recursive_unmount, at + 1,
5122 recursive ? DS_FIND_CHILDREN : 0);
5123 if (error != 0) {
5124 *at = '@';
5125 return (error);
5126 }
5127 }
5128 error = dsl_dataset_rename_snapshot(zc->zc_name,
5129 at + 1, strchr(zc->zc_value, '@') + 1, recursive);
5130 *at = '@';
5131
5132 return (error);
5133 } else {
5134 /*
5135 * For dataset renames, update kernel-side zone tracking
5136 * if the dataset has a zoned_uid delegation. Read the
5137 * property before rename, then detach old / attach new.
5138 */
5139 uint64_t zoned_uid = 0;
5140 (void) dsl_prop_get(zc->zc_name, "zoned_uid",
5141 8, 1, &zoned_uid, NULL);
5142
5143 err = dsl_dir_rename(zc->zc_name, zc->zc_value);
5144
5145 if (err == 0 && zoned_uid != 0) {
5146 (void) zone_dataset_detach_uid(kcred,
5147 zc->zc_name, (uid_t)zoned_uid);
5148 (void) zone_dataset_attach_uid(kcred,
5149 zc->zc_value, (uid_t)zoned_uid);
5150 }
5151 return (err);
5152 }
5153 }
5154
5155 static int
zfs_check_settable(const char * dsname,nvpair_t * pair,cred_t * cr)5156 zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
5157 {
5158 const char *propname = nvpair_name(pair);
5159 boolean_t issnap = (strchr(dsname, '@') != NULL);
5160 zfs_prop_t prop = zfs_name_to_prop(propname);
5161 uint64_t intval, compval;
5162 int err;
5163
5164 if (prop == ZPROP_USERPROP) {
5165 if (zfs_prop_user(propname)) {
5166 zone_admin_result_t zone_result;
5167 zone_result = zone_dataset_admin_check(dsname,
5168 ZONE_OP_SETPROP, NULL);
5169 if (zone_result == ZONE_ADMIN_ALLOWED)
5170 return (zfs_secpolicy_zoned_uid_deleg(dsname,
5171 ZFS_DELEG_PERM_USERPROP, cr));
5172 if (zone_result == ZONE_ADMIN_DENIED)
5173 return (SET_ERROR(EPERM));
5174 if ((err = zfs_secpolicy_write_perms(dsname,
5175 ZFS_DELEG_PERM_USERPROP, cr)))
5176 return (err);
5177 return (0);
5178 }
5179
5180 if (!issnap && zfs_prop_userquota(propname)) {
5181 const char *perm = NULL;
5182 const char *uq_prefix =
5183 zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA];
5184 const char *gq_prefix =
5185 zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA];
5186 const char *uiq_prefix =
5187 zfs_userquota_prop_prefixes[ZFS_PROP_USEROBJQUOTA];
5188 const char *giq_prefix =
5189 zfs_userquota_prop_prefixes[ZFS_PROP_GROUPOBJQUOTA];
5190 const char *pq_prefix =
5191 zfs_userquota_prop_prefixes[ZFS_PROP_PROJECTQUOTA];
5192 const char *piq_prefix = zfs_userquota_prop_prefixes[\
5193 ZFS_PROP_PROJECTOBJQUOTA];
5194
5195 if (strncmp(propname, uq_prefix,
5196 strlen(uq_prefix)) == 0) {
5197 perm = ZFS_DELEG_PERM_USERQUOTA;
5198 } else if (strncmp(propname, uiq_prefix,
5199 strlen(uiq_prefix)) == 0) {
5200 perm = ZFS_DELEG_PERM_USEROBJQUOTA;
5201 } else if (strncmp(propname, gq_prefix,
5202 strlen(gq_prefix)) == 0) {
5203 perm = ZFS_DELEG_PERM_GROUPQUOTA;
5204 } else if (strncmp(propname, giq_prefix,
5205 strlen(giq_prefix)) == 0) {
5206 perm = ZFS_DELEG_PERM_GROUPOBJQUOTA;
5207 } else if (strncmp(propname, pq_prefix,
5208 strlen(pq_prefix)) == 0) {
5209 perm = ZFS_DELEG_PERM_PROJECTQUOTA;
5210 } else if (strncmp(propname, piq_prefix,
5211 strlen(piq_prefix)) == 0) {
5212 perm = ZFS_DELEG_PERM_PROJECTOBJQUOTA;
5213 } else {
5214 /* {USER|GROUP|PROJECT}USED are read-only */
5215 return (SET_ERROR(EINVAL));
5216 }
5217
5218 zone_admin_result_t zone_result;
5219 zone_result = zone_dataset_admin_check(dsname,
5220 ZONE_OP_SETPROP, NULL);
5221 if (zone_result == ZONE_ADMIN_ALLOWED)
5222 return (zfs_secpolicy_zoned_uid_deleg(dsname,
5223 perm, cr));
5224 if (zone_result == ZONE_ADMIN_DENIED)
5225 return (SET_ERROR(EPERM));
5226 if ((err = zfs_secpolicy_write_perms(dsname, perm, cr)))
5227 return (err);
5228 return (0);
5229 }
5230
5231 return (SET_ERROR(EINVAL));
5232 }
5233
5234 if (issnap)
5235 return (SET_ERROR(EINVAL));
5236
5237 if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
5238 /*
5239 * dsl_prop_get_all_impl() returns properties in this
5240 * format.
5241 */
5242 nvlist_t *attrs;
5243 VERIFY0(nvpair_value_nvlist(pair, &attrs));
5244 VERIFY0(nvlist_lookup_nvpair(attrs, ZPROP_VALUE, &pair));
5245 }
5246
5247 /*
5248 * Check that this value is valid for this pool version
5249 */
5250 switch (prop) {
5251 case ZFS_PROP_COMPRESSION:
5252 /*
5253 * If the user specified gzip compression, make sure
5254 * the SPA supports it. We ignore any errors here since
5255 * we'll catch them later.
5256 */
5257 if (nvpair_value_uint64(pair, &intval) == 0) {
5258 compval = ZIO_COMPRESS_ALGO(intval);
5259 if (compval >= ZIO_COMPRESS_GZIP_1 &&
5260 compval <= ZIO_COMPRESS_GZIP_9 &&
5261 zfs_earlier_version(dsname,
5262 SPA_VERSION_GZIP_COMPRESSION)) {
5263 return (SET_ERROR(ENOTSUP));
5264 }
5265
5266 if (compval == ZIO_COMPRESS_ZLE &&
5267 zfs_earlier_version(dsname,
5268 SPA_VERSION_ZLE_COMPRESSION))
5269 return (SET_ERROR(ENOTSUP));
5270
5271 if (compval == ZIO_COMPRESS_LZ4) {
5272 spa_t *spa;
5273
5274 if ((err = spa_open(dsname, &spa, FTAG)) != 0)
5275 return (err);
5276
5277 if (!spa_feature_is_enabled(spa,
5278 SPA_FEATURE_LZ4_COMPRESS)) {
5279 spa_close(spa, FTAG);
5280 return (SET_ERROR(ENOTSUP));
5281 }
5282 spa_close(spa, FTAG);
5283 }
5284
5285 if (compval == ZIO_COMPRESS_ZSTD) {
5286 spa_t *spa;
5287
5288 if ((err = spa_open(dsname, &spa, FTAG)) != 0)
5289 return (err);
5290
5291 if (!spa_feature_is_enabled(spa,
5292 SPA_FEATURE_ZSTD_COMPRESS)) {
5293 spa_close(spa, FTAG);
5294 return (SET_ERROR(ENOTSUP));
5295 }
5296 spa_close(spa, FTAG);
5297 }
5298 }
5299 break;
5300
5301 case ZFS_PROP_COPIES:
5302 if (zfs_earlier_version(dsname, SPA_VERSION_DITTO_BLOCKS))
5303 return (SET_ERROR(ENOTSUP));
5304 break;
5305
5306 case ZFS_PROP_VOLBLOCKSIZE:
5307 case ZFS_PROP_RECORDSIZE:
5308 /* Record sizes above 128k need the feature to be enabled */
5309 if (nvpair_value_uint64(pair, &intval) == 0 &&
5310 intval > SPA_OLD_MAXBLOCKSIZE) {
5311 spa_t *spa;
5312
5313 /*
5314 * We don't allow setting the property above 1MB,
5315 * unless the tunable has been changed.
5316 */
5317 if (intval > zfs_max_recordsize ||
5318 intval > SPA_MAXBLOCKSIZE)
5319 return (SET_ERROR(ERANGE));
5320
5321 if ((err = spa_open(dsname, &spa, FTAG)) != 0)
5322 return (err);
5323
5324 if (!spa_feature_is_enabled(spa,
5325 SPA_FEATURE_LARGE_BLOCKS)) {
5326 spa_close(spa, FTAG);
5327 return (SET_ERROR(ENOTSUP));
5328 }
5329 spa_close(spa, FTAG);
5330 }
5331 break;
5332
5333 case ZFS_PROP_DNODESIZE:
5334 /* Dnode sizes above 512 need the feature to be enabled */
5335 if (nvpair_value_uint64(pair, &intval) == 0 &&
5336 intval != ZFS_DNSIZE_LEGACY) {
5337 spa_t *spa;
5338
5339 if ((err = spa_open(dsname, &spa, FTAG)) != 0)
5340 return (err);
5341
5342 if (!spa_feature_is_enabled(spa,
5343 SPA_FEATURE_LARGE_DNODE)) {
5344 spa_close(spa, FTAG);
5345 return (SET_ERROR(ENOTSUP));
5346 }
5347 spa_close(spa, FTAG);
5348 }
5349 break;
5350
5351 case ZFS_PROP_SHARESMB:
5352 if (zpl_earlier_version(dsname, ZPL_VERSION_FUID))
5353 return (SET_ERROR(ENOTSUP));
5354 break;
5355
5356 case ZFS_PROP_ACLINHERIT:
5357 if (nvpair_type(pair) == DATA_TYPE_UINT64 &&
5358 nvpair_value_uint64(pair, &intval) == 0) {
5359 if (intval == ZFS_ACL_PASSTHROUGH_X &&
5360 zfs_earlier_version(dsname,
5361 SPA_VERSION_PASSTHROUGH_X))
5362 return (SET_ERROR(ENOTSUP));
5363 }
5364 break;
5365 case ZFS_PROP_CHECKSUM:
5366 case ZFS_PROP_DEDUP:
5367 {
5368 spa_feature_t feature;
5369 spa_t *spa;
5370 int err;
5371
5372 /* dedup feature version checks */
5373 if (prop == ZFS_PROP_DEDUP &&
5374 zfs_earlier_version(dsname, SPA_VERSION_DEDUP))
5375 return (SET_ERROR(ENOTSUP));
5376
5377 if (nvpair_type(pair) == DATA_TYPE_UINT64 &&
5378 nvpair_value_uint64(pair, &intval) == 0) {
5379 /* check prop value is enabled in features */
5380 feature = zio_checksum_to_feature(
5381 intval & ZIO_CHECKSUM_MASK);
5382 if (feature == SPA_FEATURE_NONE)
5383 break;
5384
5385 if ((err = spa_open(dsname, &spa, FTAG)) != 0)
5386 return (err);
5387
5388 if (!spa_feature_is_enabled(spa, feature)) {
5389 spa_close(spa, FTAG);
5390 return (SET_ERROR(ENOTSUP));
5391 }
5392 spa_close(spa, FTAG);
5393 }
5394 break;
5395 }
5396
5397 default:
5398 break;
5399 }
5400
5401 return (zfs_secpolicy_setprop(dsname, prop, pair, CRED()));
5402 }
5403
5404 /*
5405 * Removes properties from the given props list that fail permission checks
5406 * needed to clear them and to restore them in case of a receive error. For each
5407 * property, make sure we have both set and inherit permissions.
5408 *
5409 * Returns the first error encountered if any permission checks fail. If the
5410 * caller provides a non-NULL errlist, it also gives the complete list of names
5411 * of all the properties that failed a permission check along with the
5412 * corresponding error numbers. The caller is responsible for freeing the
5413 * returned errlist.
5414 *
5415 * If every property checks out successfully, zero is returned and the list
5416 * pointed at by errlist is NULL.
5417 */
5418 static int
zfs_check_clearable(const char * dataset,nvlist_t * props,nvlist_t ** errlist)5419 zfs_check_clearable(const char *dataset, nvlist_t *props, nvlist_t **errlist)
5420 {
5421 zfs_cmd_t *zc;
5422 nvpair_t *pair, *next_pair;
5423 nvlist_t *errors;
5424 int err, rv = 0;
5425
5426 if (props == NULL)
5427 return (0);
5428
5429 VERIFY0(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP));
5430
5431 zc = kmem_alloc(sizeof (zfs_cmd_t), KM_SLEEP);
5432 (void) strlcpy(zc->zc_name, dataset, sizeof (zc->zc_name));
5433 pair = nvlist_next_nvpair(props, NULL);
5434 while (pair != NULL) {
5435 next_pair = nvlist_next_nvpair(props, pair);
5436
5437 (void) strlcpy(zc->zc_value, nvpair_name(pair),
5438 sizeof (zc->zc_value));
5439 if ((err = zfs_check_settable(dataset, pair, CRED())) != 0 ||
5440 (err = zfs_secpolicy_inherit_prop(zc, NULL, CRED())) != 0) {
5441 VERIFY0(nvlist_remove_nvpair(props, pair));
5442 VERIFY0(nvlist_add_int32(errors, zc->zc_value, err));
5443 }
5444 pair = next_pair;
5445 }
5446 kmem_free(zc, sizeof (zfs_cmd_t));
5447
5448 if ((pair = nvlist_next_nvpair(errors, NULL)) == NULL) {
5449 nvlist_free(errors);
5450 errors = NULL;
5451 } else {
5452 VERIFY0(nvpair_value_int32(pair, &rv));
5453 }
5454
5455 if (errlist == NULL)
5456 nvlist_free(errors);
5457 else
5458 *errlist = errors;
5459
5460 return (rv);
5461 }
5462
5463 static boolean_t
propval_equals(nvpair_t * p1,nvpair_t * p2)5464 propval_equals(nvpair_t *p1, nvpair_t *p2)
5465 {
5466 if (nvpair_type(p1) == DATA_TYPE_NVLIST) {
5467 /* dsl_prop_get_all_impl() format */
5468 nvlist_t *attrs;
5469 VERIFY0(nvpair_value_nvlist(p1, &attrs));
5470 VERIFY0(nvlist_lookup_nvpair(attrs, ZPROP_VALUE, &p1));
5471 }
5472
5473 if (nvpair_type(p2) == DATA_TYPE_NVLIST) {
5474 nvlist_t *attrs;
5475 VERIFY0(nvpair_value_nvlist(p2, &attrs));
5476 VERIFY0(nvlist_lookup_nvpair(attrs, ZPROP_VALUE, &p2));
5477 }
5478
5479 if (nvpair_type(p1) != nvpair_type(p2))
5480 return (B_FALSE);
5481
5482 if (nvpair_type(p1) == DATA_TYPE_STRING) {
5483 const char *valstr1, *valstr2;
5484
5485 VERIFY0(nvpair_value_string(p1, &valstr1));
5486 VERIFY0(nvpair_value_string(p2, &valstr2));
5487 return (strcmp(valstr1, valstr2) == 0);
5488 } else {
5489 uint64_t intval1, intval2;
5490
5491 VERIFY0(nvpair_value_uint64(p1, &intval1));
5492 VERIFY0(nvpair_value_uint64(p2, &intval2));
5493 return (intval1 == intval2);
5494 }
5495 }
5496
5497 /*
5498 * Remove properties from props if they are not going to change (as determined
5499 * by comparison with origprops). Remove them from origprops as well, since we
5500 * do not need to clear or restore properties that won't change.
5501 */
5502 static void
props_reduce(nvlist_t * props,nvlist_t * origprops)5503 props_reduce(nvlist_t *props, nvlist_t *origprops)
5504 {
5505 nvpair_t *pair, *next_pair;
5506
5507 if (origprops == NULL)
5508 return; /* all props need to be received */
5509
5510 pair = nvlist_next_nvpair(props, NULL);
5511 while (pair != NULL) {
5512 const char *propname = nvpair_name(pair);
5513 nvpair_t *match;
5514
5515 next_pair = nvlist_next_nvpair(props, pair);
5516
5517 if ((nvlist_lookup_nvpair(origprops, propname,
5518 &match) != 0) || !propval_equals(pair, match))
5519 goto next; /* need to set received value */
5520
5521 /* don't clear the existing received value */
5522 (void) nvlist_remove_nvpair(origprops, match);
5523 /* don't bother receiving the property */
5524 (void) nvlist_remove_nvpair(props, pair);
5525 next:
5526 pair = next_pair;
5527 }
5528 }
5529
5530 /*
5531 * Extract properties that cannot be set PRIOR to the receipt of a dataset.
5532 * For example, refquota cannot be set until after the receipt of a dataset,
5533 * because in replication streams, an older/earlier snapshot may exceed the
5534 * refquota. We want to receive the older/earlier snapshot, but setting
5535 * refquota pre-receipt will set the dsl's ACTUAL quota, which will prevent
5536 * the older/earlier snapshot from being received (with EDQUOT).
5537 *
5538 * The ZFS test "zfs_receive_011_pos" demonstrates such a scenario.
5539 *
5540 * libzfs will need to be judicious handling errors encountered by props
5541 * extracted by this function.
5542 */
5543 static nvlist_t *
extract_delay_props(nvlist_t * props)5544 extract_delay_props(nvlist_t *props)
5545 {
5546 nvlist_t *delayprops;
5547 nvpair_t *nvp, *tmp;
5548 static const zfs_prop_t delayable[] = {
5549 ZFS_PROP_REFQUOTA,
5550 ZFS_PROP_KEYLOCATION,
5551 /*
5552 * Setting ZFS_PROP_SHARESMB requires the objset type to be
5553 * known, which is not possible prior to receipt of raw sends.
5554 */
5555 ZFS_PROP_SHARESMB,
5556 0
5557 };
5558 int i;
5559
5560 VERIFY0(nvlist_alloc(&delayprops, NV_UNIQUE_NAME, KM_SLEEP));
5561
5562 for (nvp = nvlist_next_nvpair(props, NULL); nvp != NULL;
5563 nvp = nvlist_next_nvpair(props, nvp)) {
5564 /*
5565 * strcmp() is safe because zfs_prop_to_name() always returns
5566 * a bounded string.
5567 */
5568 for (i = 0; delayable[i] != 0; i++) {
5569 if (strcmp(zfs_prop_to_name(delayable[i]),
5570 nvpair_name(nvp)) == 0) {
5571 break;
5572 }
5573 }
5574 if (delayable[i] != 0) {
5575 tmp = nvlist_prev_nvpair(props, nvp);
5576 VERIFY0(nvlist_add_nvpair(delayprops, nvp));
5577 VERIFY0(nvlist_remove_nvpair(props, nvp));
5578 nvp = tmp;
5579 }
5580 }
5581
5582 if (nvlist_empty(delayprops)) {
5583 nvlist_free(delayprops);
5584 delayprops = NULL;
5585 }
5586 return (delayprops);
5587 }
5588
5589 static void
zfs_allow_log_destroy(void * arg)5590 zfs_allow_log_destroy(void *arg)
5591 {
5592 char *poolname = arg;
5593
5594 if (poolname != NULL)
5595 kmem_strfree(poolname);
5596 }
5597
5598 #ifdef ZFS_DEBUG
5599 static boolean_t zfs_ioc_recv_inject_err;
5600 #endif
5601
5602 /*
5603 * nvlist 'errors' is always allocated. It will contain descriptions of
5604 * encountered errors, if any. It's the callers responsibility to free.
5605 */
5606 static int
zfs_ioc_recv_impl(char * tofs,char * tosnap,const char * origin,nvlist_t * recvprops,nvlist_t * localprops,nvlist_t * hidden_args,boolean_t force,boolean_t heal,boolean_t resumable,int input_fd,dmu_replay_record_t * begin_record,uint64_t * read_bytes,uint64_t * errflags,nvlist_t ** errors)5607 zfs_ioc_recv_impl(char *tofs, char *tosnap, const char *origin,
5608 nvlist_t *recvprops, nvlist_t *localprops, nvlist_t *hidden_args,
5609 boolean_t force, boolean_t heal, boolean_t resumable, int input_fd,
5610 dmu_replay_record_t *begin_record, uint64_t *read_bytes,
5611 uint64_t *errflags, nvlist_t **errors)
5612 {
5613 dmu_recv_cookie_t drc;
5614 int error = 0;
5615 int props_error = 0;
5616 offset_t off, noff;
5617 nvlist_t *local_delayprops = NULL;
5618 nvlist_t *recv_delayprops = NULL;
5619 nvlist_t *inherited_delayprops = NULL;
5620 nvlist_t *origprops = NULL; /* existing properties */
5621 nvlist_t *origrecvd = NULL; /* existing received properties */
5622 boolean_t first_recvd_props = B_FALSE;
5623 boolean_t tofs_was_redacted;
5624 zfs_file_t *input_fp;
5625
5626 *read_bytes = 0;
5627 *errflags = 0;
5628 *errors = fnvlist_alloc();
5629 off = 0;
5630
5631 if ((input_fp = zfs_file_get(input_fd)) == NULL)
5632 return (SET_ERROR(EBADF));
5633
5634 noff = off = zfs_file_off(input_fp);
5635 error = dmu_recv_begin(tofs, tosnap, begin_record, force, heal,
5636 resumable, localprops, hidden_args, origin, &drc, input_fp,
5637 &off);
5638 if (error != 0)
5639 goto out;
5640 tofs_was_redacted = dsl_get_redacted(drc.drc_ds);
5641
5642 /*
5643 * Set properties before we receive the stream so that they are applied
5644 * to the new data. Note that we must call dmu_recv_stream() if
5645 * dmu_recv_begin() succeeds.
5646 */
5647 if (recvprops != NULL && !drc.drc_newfs) {
5648 if (spa_version(dsl_dataset_get_spa(drc.drc_ds)) >=
5649 SPA_VERSION_RECVD_PROPS &&
5650 !dsl_prop_get_hasrecvd(tofs))
5651 first_recvd_props = B_TRUE;
5652
5653 /*
5654 * If new received properties are supplied, they are to
5655 * completely replace the existing received properties,
5656 * so stash away the existing ones.
5657 */
5658 if (dsl_prop_get_received(tofs, &origrecvd) == 0) {
5659 nvlist_t *errlist = NULL;
5660 /*
5661 * Don't bother writing a property if its value won't
5662 * change (and avoid the unnecessary security checks).
5663 *
5664 * The first receive after SPA_VERSION_RECVD_PROPS is a
5665 * special case where we blow away all local properties
5666 * regardless.
5667 */
5668 if (!first_recvd_props)
5669 props_reduce(recvprops, origrecvd);
5670 if (zfs_check_clearable(tofs, origrecvd, &errlist) != 0)
5671 (void) nvlist_merge(*errors, errlist, 0);
5672 nvlist_free(errlist);
5673
5674 if (clear_received_props(tofs, origrecvd,
5675 first_recvd_props ? NULL : recvprops) != 0)
5676 *errflags |= ZPROP_ERR_NOCLEAR;
5677 } else {
5678 *errflags |= ZPROP_ERR_NOCLEAR;
5679 }
5680 }
5681
5682 /*
5683 * Stash away existing properties so we can restore them on error unless
5684 * we're doing the first receive after SPA_VERSION_RECVD_PROPS, in which
5685 * case "origrecvd" will take care of that.
5686 */
5687 if (localprops != NULL && !drc.drc_newfs && !first_recvd_props) {
5688 objset_t *os;
5689 if (dmu_objset_hold(tofs, FTAG, &os) == 0) {
5690 if (dsl_prop_get_all(os, &origprops) != 0) {
5691 *errflags |= ZPROP_ERR_NOCLEAR;
5692 }
5693 dmu_objset_rele(os, FTAG);
5694 } else {
5695 *errflags |= ZPROP_ERR_NOCLEAR;
5696 }
5697 }
5698
5699 if (recvprops != NULL) {
5700 props_error = dsl_prop_set_hasrecvd(tofs);
5701
5702 if (props_error == 0) {
5703 recv_delayprops = extract_delay_props(recvprops);
5704 (void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED,
5705 recvprops, *errors);
5706 }
5707 }
5708
5709 if (localprops != NULL) {
5710 nvlist_t *oprops = fnvlist_alloc();
5711 nvlist_t *xprops = fnvlist_alloc();
5712 nvpair_t *nvp = NULL;
5713
5714 while ((nvp = nvlist_next_nvpair(localprops, nvp)) != NULL) {
5715 if (nvpair_type(nvp) == DATA_TYPE_BOOLEAN) {
5716 /* -x property */
5717 const char *name = nvpair_name(nvp);
5718 zfs_prop_t prop = zfs_name_to_prop(name);
5719 if (prop != ZPROP_USERPROP) {
5720 if (!zfs_prop_inheritable(prop))
5721 continue;
5722 } else if (!zfs_prop_user(name))
5723 continue;
5724 fnvlist_add_boolean(xprops, name);
5725 } else {
5726 /* -o property=value */
5727 fnvlist_add_nvpair(oprops, nvp);
5728 }
5729 }
5730
5731 local_delayprops = extract_delay_props(oprops);
5732 (void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_LOCAL,
5733 oprops, *errors);
5734 inherited_delayprops = extract_delay_props(xprops);
5735 (void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_INHERITED,
5736 xprops, *errors);
5737
5738 nvlist_free(oprops);
5739 nvlist_free(xprops);
5740 }
5741
5742 error = dmu_recv_stream(&drc, &off);
5743
5744 if (error == 0) {
5745 zfsvfs_t *zfsvfs = NULL;
5746 zvol_state_handle_t *zv = NULL;
5747
5748 if (getzfsvfs(tofs, &zfsvfs) == 0) {
5749 /* online recv */
5750 dsl_dataset_t *ds;
5751 int end_err;
5752 boolean_t stream_is_redacted = DMU_GET_FEATUREFLAGS(
5753 begin_record->drr_u.drr_begin.
5754 drr_versioninfo) & DMU_BACKUP_FEATURE_REDACTED;
5755
5756 ds = dmu_objset_ds(zfsvfs->z_os);
5757 error = zfs_suspend_fs(zfsvfs);
5758 /*
5759 * If the suspend fails, then the recv_end will
5760 * likely also fail, and clean up after itself.
5761 */
5762 end_err = dmu_recv_end(&drc, zfsvfs);
5763 /*
5764 * If the dataset was not redacted, but we received a
5765 * redacted stream onto it, we need to unmount the
5766 * dataset. Otherwise, resume the filesystem.
5767 */
5768 if (error == 0 && !drc.drc_newfs &&
5769 stream_is_redacted && !tofs_was_redacted) {
5770 error = zfs_end_fs(zfsvfs, ds);
5771 } else if (error == 0) {
5772 error = zfs_resume_fs(zfsvfs, ds);
5773 }
5774 error = error ? error : end_err;
5775 zfs_vfs_rele(zfsvfs);
5776 } else if (zvol_suspend(tofs, &zv) == 0) {
5777 error = dmu_recv_end(&drc, zvol_tag(zv));
5778 zvol_resume(zv);
5779 } else {
5780 error = dmu_recv_end(&drc, NULL);
5781 }
5782
5783 /* Set delayed properties now, after we're done receiving. */
5784 if (recv_delayprops != NULL && error == 0) {
5785 (void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED,
5786 recv_delayprops, *errors);
5787 }
5788 if (local_delayprops != NULL && error == 0) {
5789 (void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_LOCAL,
5790 local_delayprops, *errors);
5791 }
5792 if (inherited_delayprops != NULL && error == 0) {
5793 (void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_INHERITED,
5794 inherited_delayprops, *errors);
5795 }
5796 }
5797
5798 /*
5799 * Merge delayed props back in with initial props, in case
5800 * we're DEBUG and zfs_ioc_recv_inject_err is set (which means
5801 * we have to make sure clear_received_props() includes
5802 * the delayed properties).
5803 *
5804 * Since zfs_ioc_recv_inject_err is only in DEBUG kernels,
5805 * using ASSERT() will be just like a VERIFY.
5806 */
5807 if (recv_delayprops != NULL) {
5808 ASSERT0(nvlist_merge(recvprops, recv_delayprops, 0));
5809 nvlist_free(recv_delayprops);
5810 }
5811 if (local_delayprops != NULL) {
5812 ASSERT0(nvlist_merge(localprops, local_delayprops, 0));
5813 nvlist_free(local_delayprops);
5814 }
5815 if (inherited_delayprops != NULL) {
5816 ASSERT0(nvlist_merge(localprops, inherited_delayprops, 0));
5817 nvlist_free(inherited_delayprops);
5818 }
5819 *read_bytes = off - noff;
5820
5821 #ifdef ZFS_DEBUG
5822 if (zfs_ioc_recv_inject_err) {
5823 zfs_ioc_recv_inject_err = B_FALSE;
5824 error = 1;
5825 }
5826 #endif
5827
5828 /*
5829 * On error, restore the original props.
5830 */
5831 if (error != 0 && recvprops != NULL && !drc.drc_newfs) {
5832 if (clear_received_props(tofs, recvprops, NULL) != 0) {
5833 /*
5834 * We failed to clear the received properties.
5835 * Since we may have left a $recvd value on the
5836 * system, we can't clear the $hasrecvd flag.
5837 */
5838 *errflags |= ZPROP_ERR_NORESTORE;
5839 } else if (first_recvd_props) {
5840 dsl_prop_unset_hasrecvd(tofs);
5841 }
5842
5843 if (origrecvd == NULL && !drc.drc_newfs) {
5844 /* We failed to stash the original properties. */
5845 *errflags |= ZPROP_ERR_NORESTORE;
5846 }
5847
5848 /*
5849 * dsl_props_set() will not convert RECEIVED to LOCAL on or
5850 * after SPA_VERSION_RECVD_PROPS, so we need to specify LOCAL
5851 * explicitly if we're restoring local properties cleared in the
5852 * first new-style receive.
5853 */
5854 if (origrecvd != NULL &&
5855 zfs_set_prop_nvlist(tofs, (first_recvd_props ?
5856 ZPROP_SRC_LOCAL : ZPROP_SRC_RECEIVED),
5857 origrecvd, NULL) != 0) {
5858 /*
5859 * We stashed the original properties but failed to
5860 * restore them.
5861 */
5862 *errflags |= ZPROP_ERR_NORESTORE;
5863 }
5864 }
5865 if (error != 0 && localprops != NULL && !drc.drc_newfs &&
5866 !first_recvd_props) {
5867 nvlist_t *setprops;
5868 nvlist_t *inheritprops;
5869 nvpair_t *nvp;
5870
5871 if (origprops == NULL) {
5872 /* We failed to stash the original properties. */
5873 *errflags |= ZPROP_ERR_NORESTORE;
5874 goto out;
5875 }
5876
5877 /* Restore original props */
5878 setprops = fnvlist_alloc();
5879 inheritprops = fnvlist_alloc();
5880 nvp = NULL;
5881 while ((nvp = nvlist_next_nvpair(localprops, nvp)) != NULL) {
5882 const char *name = nvpair_name(nvp);
5883 const char *source;
5884 nvlist_t *attrs;
5885
5886 if (!nvlist_exists(origprops, name)) {
5887 /*
5888 * Property was not present or was explicitly
5889 * inherited before the receive, restore this.
5890 */
5891 fnvlist_add_boolean(inheritprops, name);
5892 continue;
5893 }
5894 attrs = fnvlist_lookup_nvlist(origprops, name);
5895 source = fnvlist_lookup_string(attrs, ZPROP_SOURCE);
5896
5897 /* Skip received properties */
5898 if (strcmp(source, ZPROP_SOURCE_VAL_RECVD) == 0)
5899 continue;
5900
5901 if (strcmp(source, tofs) == 0) {
5902 /* Property was locally set */
5903 fnvlist_add_nvlist(setprops, name, attrs);
5904 } else {
5905 /* Property was implicitly inherited */
5906 fnvlist_add_boolean(inheritprops, name);
5907 }
5908 }
5909
5910 if (zfs_set_prop_nvlist(tofs, ZPROP_SRC_LOCAL, setprops,
5911 NULL) != 0)
5912 *errflags |= ZPROP_ERR_NORESTORE;
5913 if (zfs_set_prop_nvlist(tofs, ZPROP_SRC_INHERITED, inheritprops,
5914 NULL) != 0)
5915 *errflags |= ZPROP_ERR_NORESTORE;
5916
5917 nvlist_free(setprops);
5918 nvlist_free(inheritprops);
5919 }
5920 out:
5921 zfs_file_put(input_fp);
5922 nvlist_free(origrecvd);
5923 nvlist_free(origprops);
5924
5925 if (error == 0)
5926 error = props_error;
5927
5928 return (error);
5929 }
5930
5931 /*
5932 * inputs:
5933 * zc_name name of containing filesystem (unused)
5934 * zc_nvlist_src{_size} nvlist of properties to apply
5935 * zc_nvlist_conf{_size} nvlist of properties to exclude
5936 * (DATA_TYPE_BOOLEAN) and override (everything else)
5937 * zc_value name of snapshot to create
5938 * zc_string name of clone origin (if DRR_FLAG_CLONE)
5939 * zc_cookie file descriptor to recv from
5940 * zc_begin_record the BEGIN record of the stream (not byteswapped)
5941 * zc_guid force flag
5942 *
5943 * outputs:
5944 * zc_cookie number of bytes read
5945 * zc_obj zprop_errflags_t
5946 * zc_nvlist_dst{_size} error for each unapplied received property
5947 */
5948 static int
zfs_ioc_recv(zfs_cmd_t * zc)5949 zfs_ioc_recv(zfs_cmd_t *zc)
5950 {
5951 dmu_replay_record_t begin_record;
5952 nvlist_t *errors = NULL;
5953 nvlist_t *recvdprops = NULL;
5954 nvlist_t *localprops = NULL;
5955 const char *origin = NULL;
5956 char *tosnap;
5957 char tofs[ZFS_MAX_DATASET_NAME_LEN];
5958 int error = 0;
5959
5960 if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
5961 strchr(zc->zc_value, '@') == NULL ||
5962 strchr(zc->zc_value, '%') != NULL) {
5963 return (SET_ERROR(EINVAL));
5964 }
5965
5966 (void) strlcpy(tofs, zc->zc_value, sizeof (tofs));
5967 tosnap = strchr(tofs, '@');
5968 *tosnap++ = '\0';
5969
5970 if (zc->zc_nvlist_src != 0 &&
5971 (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
5972 zc->zc_iflags, &recvdprops)) != 0) {
5973 goto out;
5974 }
5975
5976 if (zc->zc_nvlist_conf != 0 &&
5977 (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
5978 zc->zc_iflags, &localprops)) != 0) {
5979 goto out;
5980 }
5981
5982 if (zc->zc_string[0])
5983 origin = zc->zc_string;
5984
5985 begin_record.drr_type = DRR_BEGIN;
5986 begin_record.drr_payloadlen = 0;
5987 begin_record.drr_u.drr_begin = zc->zc_begin_record;
5988
5989 error = zfs_ioc_recv_impl(tofs, tosnap, origin, recvdprops, localprops,
5990 NULL, zc->zc_guid, B_FALSE, B_FALSE, zc->zc_cookie, &begin_record,
5991 &zc->zc_cookie, &zc->zc_obj, &errors);
5992
5993 /*
5994 * Now that all props, initial and delayed, are set, report the prop
5995 * errors to the caller.
5996 */
5997 if (zc->zc_nvlist_dst_size != 0 && errors != NULL &&
5998 (nvlist_smush(errors, zc->zc_nvlist_dst_size) != 0 ||
5999 put_nvlist(zc, errors) != 0)) {
6000 /*
6001 * Caller made zc->zc_nvlist_dst less than the minimum expected
6002 * size or supplied an invalid address.
6003 */
6004 error = SET_ERROR(EINVAL);
6005 }
6006
6007 out:
6008 nvlist_free(errors);
6009 nvlist_free(recvdprops);
6010 nvlist_free(localprops);
6011
6012 return (error);
6013 }
6014
6015 /*
6016 * innvl: {
6017 * "snapname" -> full name of the snapshot to create
6018 * (optional) "props" -> received properties to set (nvlist)
6019 * (optional) "localprops" -> override and exclude properties (nvlist)
6020 * (optional) "origin" -> name of clone origin (DRR_FLAG_CLONE)
6021 * "begin_record" -> non-byteswapped dmu_replay_record_t
6022 * "input_fd" -> file descriptor to read stream from (int32)
6023 * (optional) "force" -> force flag (value ignored)
6024 * (optional) "heal" -> use send stream to heal data corruption
6025 * (optional) "resumable" -> resumable flag (value ignored)
6026 * (optional) "cleanup_fd" -> unused
6027 * (optional) "action_handle" -> unused
6028 * (optional) "hidden_args" -> { "wkeydata" -> value }
6029 * }
6030 *
6031 * outnvl: {
6032 * "read_bytes" -> number of bytes read
6033 * "error_flags" -> zprop_errflags_t
6034 * "errors" -> error for each unapplied received property (nvlist)
6035 * }
6036 */
6037 static const zfs_ioc_key_t zfs_keys_recv_new[] = {
6038 {"snapname", DATA_TYPE_STRING, 0},
6039 {"props", DATA_TYPE_NVLIST, ZK_OPTIONAL},
6040 {"localprops", DATA_TYPE_NVLIST, ZK_OPTIONAL},
6041 {"origin", DATA_TYPE_STRING, ZK_OPTIONAL},
6042 {"begin_record", DATA_TYPE_BYTE_ARRAY, 0},
6043 {"input_fd", DATA_TYPE_INT32, 0},
6044 {"force", DATA_TYPE_BOOLEAN, ZK_OPTIONAL},
6045 {"heal", DATA_TYPE_BOOLEAN, ZK_OPTIONAL},
6046 {"resumable", DATA_TYPE_BOOLEAN, ZK_OPTIONAL},
6047 {"cleanup_fd", DATA_TYPE_INT32, ZK_OPTIONAL},
6048 {"action_handle", DATA_TYPE_UINT64, ZK_OPTIONAL},
6049 {"hidden_args", DATA_TYPE_NVLIST, ZK_OPTIONAL},
6050 };
6051
6052 static int
zfs_ioc_recv_new(const char * fsname,nvlist_t * innvl,nvlist_t * outnvl)6053 zfs_ioc_recv_new(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
6054 {
6055 dmu_replay_record_t *begin_record;
6056 uint_t begin_record_size;
6057 nvlist_t *errors = NULL;
6058 nvlist_t *recvprops = NULL;
6059 nvlist_t *localprops = NULL;
6060 nvlist_t *hidden_args = NULL;
6061 const char *snapname;
6062 const char *origin = NULL;
6063 char *tosnap;
6064 char tofs[ZFS_MAX_DATASET_NAME_LEN];
6065 boolean_t force;
6066 boolean_t heal;
6067 boolean_t resumable;
6068 uint64_t read_bytes = 0;
6069 uint64_t errflags = 0;
6070 int input_fd = -1;
6071 int error;
6072
6073 snapname = fnvlist_lookup_string(innvl, "snapname");
6074
6075 if (dataset_namecheck(snapname, NULL, NULL) != 0 ||
6076 strchr(snapname, '@') == NULL ||
6077 strchr(snapname, '%') != NULL) {
6078 return (SET_ERROR(EINVAL));
6079 }
6080
6081 (void) strlcpy(tofs, snapname, sizeof (tofs));
6082 tosnap = strchr(tofs, '@');
6083 *tosnap++ = '\0';
6084
6085 error = nvlist_lookup_string(innvl, "origin", &origin);
6086 if (error && error != ENOENT)
6087 return (error);
6088
6089 error = nvlist_lookup_byte_array(innvl, "begin_record",
6090 (uchar_t **)&begin_record, &begin_record_size);
6091 if (error != 0 || begin_record_size != sizeof (*begin_record))
6092 return (SET_ERROR(EINVAL));
6093
6094 input_fd = fnvlist_lookup_int32(innvl, "input_fd");
6095
6096 force = nvlist_exists(innvl, "force");
6097 heal = nvlist_exists(innvl, "heal");
6098 resumable = nvlist_exists(innvl, "resumable");
6099
6100 /* we still use "props" here for backwards compatibility */
6101 error = nvlist_lookup_nvlist(innvl, "props", &recvprops);
6102 if (error && error != ENOENT)
6103 goto out;
6104
6105 error = nvlist_lookup_nvlist(innvl, "localprops", &localprops);
6106 if (error && error != ENOENT)
6107 goto out;
6108
6109 error = nvlist_lookup_nvlist(innvl, ZPOOL_HIDDEN_ARGS, &hidden_args);
6110 if (error && error != ENOENT)
6111 goto out;
6112
6113 error = zfs_ioc_recv_impl(tofs, tosnap, origin, recvprops, localprops,
6114 hidden_args, force, heal, resumable, input_fd, begin_record,
6115 &read_bytes, &errflags, &errors);
6116
6117 fnvlist_add_uint64(outnvl, "read_bytes", read_bytes);
6118 fnvlist_add_uint64(outnvl, "error_flags", errflags);
6119 fnvlist_add_nvlist(outnvl, "errors", errors);
6120
6121 out:
6122 nvlist_free(errors);
6123 nvlist_free(recvprops);
6124 nvlist_free(localprops);
6125 nvlist_free(hidden_args);
6126
6127 return (error);
6128 }
6129
6130 /*
6131 * When stack space is limited, we write replication stream data to the target
6132 * on a separate taskq thread, to make sure there's enough stack space.
6133 */
6134 #ifndef HAVE_LARGE_STACKS
6135 #define USE_SEND_TASKQ 1
6136 #endif
6137
6138 typedef struct dump_bytes_io {
6139 zfs_file_t *dbi_fp;
6140 caddr_t dbi_buf;
6141 int dbi_len;
6142 int dbi_err;
6143 } dump_bytes_io_t;
6144
6145 static void
dump_bytes_cb(void * arg)6146 dump_bytes_cb(void *arg)
6147 {
6148 dump_bytes_io_t *dbi = (dump_bytes_io_t *)arg;
6149 zfs_file_t *fp;
6150 caddr_t buf;
6151
6152 fp = dbi->dbi_fp;
6153 buf = dbi->dbi_buf;
6154
6155 dbi->dbi_err = zfs_file_write(fp, buf, dbi->dbi_len, NULL);
6156 }
6157
6158 typedef struct dump_bytes_arg {
6159 zfs_file_t *dba_fp;
6160 #ifdef USE_SEND_TASKQ
6161 taskq_t *dba_tq;
6162 taskq_ent_t dba_tqent;
6163 #endif
6164 } dump_bytes_arg_t;
6165
6166 static int
dump_bytes(objset_t * os,void * buf,int len,void * arg)6167 dump_bytes(objset_t *os, void *buf, int len, void *arg)
6168 {
6169 dump_bytes_arg_t *dba = (dump_bytes_arg_t *)arg;
6170 dump_bytes_io_t dbi;
6171
6172 dbi.dbi_fp = dba->dba_fp;
6173 dbi.dbi_buf = buf;
6174 dbi.dbi_len = len;
6175
6176 #ifdef USE_SEND_TASKQ
6177 taskq_dispatch_ent(dba->dba_tq, dump_bytes_cb, &dbi, TQ_SLEEP,
6178 &dba->dba_tqent);
6179 taskq_wait(dba->dba_tq);
6180 #else
6181 dump_bytes_cb(&dbi);
6182 #endif
6183
6184 return (dbi.dbi_err);
6185 }
6186
6187 static int
dump_bytes_init(dump_bytes_arg_t * dba,int fd,dmu_send_outparams_t * out)6188 dump_bytes_init(dump_bytes_arg_t *dba, int fd, dmu_send_outparams_t *out)
6189 {
6190 zfs_file_t *fp = zfs_file_get(fd);
6191 if (fp == NULL)
6192 return (SET_ERROR(EBADF));
6193
6194 dba->dba_fp = fp;
6195 #ifdef USE_SEND_TASKQ
6196 dba->dba_tq = taskq_create("z_send", 1, defclsyspri, 0, 0, 0);
6197 taskq_init_ent(&dba->dba_tqent);
6198 #endif
6199
6200 memset(out, 0, sizeof (dmu_send_outparams_t));
6201 out->dso_outfunc = dump_bytes;
6202 out->dso_arg = dba;
6203 out->dso_dryrun = B_FALSE;
6204
6205 return (0);
6206 }
6207
6208 static void
dump_bytes_fini(dump_bytes_arg_t * dba)6209 dump_bytes_fini(dump_bytes_arg_t *dba)
6210 {
6211 zfs_file_put(dba->dba_fp);
6212 #ifdef USE_SEND_TASKQ
6213 taskq_destroy(dba->dba_tq);
6214 #endif
6215 }
6216
6217 /*
6218 * inputs:
6219 * zc_name name of snapshot to send
6220 * zc_cookie file descriptor to send stream to
6221 * zc_obj fromorigin flag (mutually exclusive with zc_fromobj)
6222 * zc_sendobj objsetid of snapshot to send
6223 * zc_fromobj objsetid of incremental fromsnap (may be zero)
6224 * zc_guid if set, estimate size of stream only. zc_cookie is ignored.
6225 * output size in zc_objset_type.
6226 * zc_flags lzc_send_flags
6227 *
6228 * outputs:
6229 * zc_objset_type estimated size, if zc_guid is set
6230 *
6231 * NOTE: This is no longer the preferred interface, any new functionality
6232 * should be added to zfs_ioc_send_new() instead.
6233 */
6234 static int
zfs_ioc_send(zfs_cmd_t * zc)6235 zfs_ioc_send(zfs_cmd_t *zc)
6236 {
6237 int error;
6238 offset_t off;
6239 boolean_t estimate = (zc->zc_guid != 0);
6240 boolean_t embedok = (zc->zc_flags & 0x1);
6241 boolean_t large_block_ok = (zc->zc_flags & 0x2);
6242 boolean_t compressok = (zc->zc_flags & 0x4);
6243 boolean_t rawok = (zc->zc_flags & 0x8);
6244 boolean_t savedok = (zc->zc_flags & 0x10);
6245
6246 if (zc->zc_obj != 0) {
6247 dsl_pool_t *dp;
6248 dsl_dataset_t *tosnap;
6249
6250 error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
6251 if (error != 0)
6252 return (error);
6253
6254 error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &tosnap);
6255 if (error != 0) {
6256 dsl_pool_rele(dp, FTAG);
6257 return (error);
6258 }
6259
6260 if (dsl_dir_is_clone(tosnap->ds_dir))
6261 zc->zc_fromobj =
6262 dsl_dir_phys(tosnap->ds_dir)->dd_origin_obj;
6263 dsl_dataset_rele(tosnap, FTAG);
6264 dsl_pool_rele(dp, FTAG);
6265 }
6266
6267 if (estimate) {
6268 dsl_pool_t *dp;
6269 dsl_dataset_t *tosnap;
6270 dsl_dataset_t *fromsnap = NULL;
6271
6272 error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
6273 if (error != 0)
6274 return (error);
6275
6276 error = dsl_dataset_hold_obj(dp, zc->zc_sendobj,
6277 FTAG, &tosnap);
6278 if (error != 0) {
6279 dsl_pool_rele(dp, FTAG);
6280 return (error);
6281 }
6282
6283 if (zc->zc_fromobj != 0) {
6284 error = dsl_dataset_hold_obj(dp, zc->zc_fromobj,
6285 FTAG, &fromsnap);
6286 if (error != 0) {
6287 dsl_dataset_rele(tosnap, FTAG);
6288 dsl_pool_rele(dp, FTAG);
6289 return (error);
6290 }
6291 }
6292
6293 error = dmu_send_estimate_fast(tosnap, fromsnap, NULL,
6294 compressok || rawok, savedok, &zc->zc_objset_type);
6295
6296 if (fromsnap != NULL)
6297 dsl_dataset_rele(fromsnap, FTAG);
6298 dsl_dataset_rele(tosnap, FTAG);
6299 dsl_pool_rele(dp, FTAG);
6300 } else {
6301 dump_bytes_arg_t dba;
6302 dmu_send_outparams_t out;
6303 error = dump_bytes_init(&dba, zc->zc_cookie, &out);
6304 if (error)
6305 return (error);
6306
6307 off = zfs_file_off(dba.dba_fp);
6308 error = dmu_send_obj(zc->zc_name, zc->zc_sendobj,
6309 zc->zc_fromobj, embedok, large_block_ok, compressok,
6310 rawok, savedok, zc->zc_cookie, &off, &out);
6311
6312 dump_bytes_fini(&dba);
6313 }
6314 return (error);
6315 }
6316
6317 /*
6318 * inputs:
6319 * zc_name name of snapshot on which to report progress
6320 * zc_cookie file descriptor of send stream
6321 *
6322 * outputs:
6323 * zc_cookie number of bytes written in send stream thus far
6324 * zc_objset_type logical size of data traversed by send thus far
6325 */
6326 static int
zfs_ioc_send_progress(zfs_cmd_t * zc)6327 zfs_ioc_send_progress(zfs_cmd_t *zc)
6328 {
6329 dsl_pool_t *dp;
6330 dsl_dataset_t *ds;
6331 dmu_sendstatus_t *dsp = NULL;
6332 int error;
6333
6334 error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
6335 if (error != 0)
6336 return (error);
6337
6338 error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &ds);
6339 if (error != 0) {
6340 dsl_pool_rele(dp, FTAG);
6341 return (error);
6342 }
6343
6344 mutex_enter(&ds->ds_sendstream_lock);
6345
6346 /*
6347 * Iterate over all the send streams currently active on this dataset.
6348 * If there's one which matches the specified file descriptor _and_ the
6349 * stream was started by the current process, return the progress of
6350 * that stream.
6351 */
6352
6353 for (dsp = list_head(&ds->ds_sendstreams); dsp != NULL;
6354 dsp = list_next(&ds->ds_sendstreams, dsp)) {
6355 if (dsp->dss_outfd == zc->zc_cookie &&
6356 zfs_proc_is_caller(dsp->dss_proc))
6357 break;
6358 }
6359
6360 if (dsp != NULL) {
6361 zc->zc_cookie = atomic_cas_64((volatile uint64_t *)dsp->dss_off,
6362 0, 0);
6363 /* This is the closest thing we have to atomic_read_64. */
6364 zc->zc_objset_type = atomic_cas_64(&dsp->dss_blocks, 0, 0);
6365 } else {
6366 error = SET_ERROR(ENOENT);
6367 }
6368
6369 mutex_exit(&ds->ds_sendstream_lock);
6370 dsl_dataset_rele(ds, FTAG);
6371 dsl_pool_rele(dp, FTAG);
6372 return (error);
6373 }
6374
6375 static int
zfs_ioc_inject_fault(zfs_cmd_t * zc)6376 zfs_ioc_inject_fault(zfs_cmd_t *zc)
6377 {
6378 int id, error;
6379
6380 error = zio_inject_fault(zc->zc_name, (int)zc->zc_guid, &id,
6381 &zc->zc_inject_record);
6382
6383 if (error == 0)
6384 zc->zc_guid = (uint64_t)id;
6385
6386 return (error);
6387 }
6388
6389 static int
zfs_ioc_clear_fault(zfs_cmd_t * zc)6390 zfs_ioc_clear_fault(zfs_cmd_t *zc)
6391 {
6392 return (zio_clear_fault((int)zc->zc_guid));
6393 }
6394
6395 static int
zfs_ioc_inject_list_next(zfs_cmd_t * zc)6396 zfs_ioc_inject_list_next(zfs_cmd_t *zc)
6397 {
6398 int id = (int)zc->zc_guid;
6399 int error;
6400
6401 error = zio_inject_list_next(&id, zc->zc_name, sizeof (zc->zc_name),
6402 &zc->zc_inject_record);
6403
6404 zc->zc_guid = id;
6405
6406 return (error);
6407 }
6408
6409 static int
zfs_ioc_error_log(zfs_cmd_t * zc)6410 zfs_ioc_error_log(zfs_cmd_t *zc)
6411 {
6412 spa_t *spa;
6413 int error;
6414
6415 if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
6416 return (error);
6417
6418 error = spa_get_errlog(spa, (void *)(uintptr_t)zc->zc_nvlist_dst,
6419 &zc->zc_nvlist_dst_size);
6420
6421 spa_close(spa, FTAG);
6422
6423 return (error);
6424 }
6425
6426 static int
zfs_ioc_clear(zfs_cmd_t * zc)6427 zfs_ioc_clear(zfs_cmd_t *zc)
6428 {
6429 spa_t *spa;
6430 vdev_t *vd;
6431 int error;
6432
6433 /*
6434 * On zpool clear we also fix up missing slogs
6435 */
6436 spa_namespace_enter(FTAG);
6437 spa = spa_lookup(zc->zc_name);
6438 if (spa == NULL) {
6439 spa_namespace_exit(FTAG);
6440 return (SET_ERROR(EIO));
6441 }
6442 if (spa_get_log_state(spa) == SPA_LOG_MISSING) {
6443 /* we need to let spa_open/spa_load clear the chains */
6444 spa_set_log_state(spa, SPA_LOG_CLEAR);
6445 }
6446 spa->spa_last_open_failed = 0;
6447 spa_namespace_exit(FTAG);
6448
6449 if (zc->zc_cookie & ZPOOL_NO_REWIND) {
6450 error = spa_open(zc->zc_name, &spa, FTAG);
6451 } else {
6452 nvlist_t *policy;
6453 nvlist_t *config = NULL;
6454
6455 if (zc->zc_nvlist_src == 0)
6456 return (SET_ERROR(EINVAL));
6457
6458 if ((error = get_nvlist(zc->zc_nvlist_src,
6459 zc->zc_nvlist_src_size, zc->zc_iflags, &policy)) == 0) {
6460 error = spa_open_rewind(zc->zc_name, &spa, FTAG,
6461 policy, &config);
6462 if (config != NULL) {
6463 int err;
6464
6465 if ((err = put_nvlist(zc, config)) != 0)
6466 error = err;
6467 nvlist_free(config);
6468 }
6469 nvlist_free(policy);
6470 }
6471 }
6472
6473 if (error != 0)
6474 return (error);
6475
6476 /*
6477 * If multihost is enabled, resuming I/O is unsafe as another
6478 * host may have imported the pool. Check for remote activity.
6479 */
6480 if (spa_multihost(spa) && spa_suspended(spa) &&
6481 spa_mmp_remote_host_activity(spa)) {
6482 spa_close(spa, FTAG);
6483 return (SET_ERROR(EREMOTEIO));
6484 }
6485
6486 spa_vdev_state_enter(spa, SCL_NONE);
6487
6488 if (zc->zc_guid == 0) {
6489 vd = NULL;
6490 } else {
6491 vd = spa_lookup_by_guid(spa, zc->zc_guid, B_TRUE);
6492 if (vd == NULL) {
6493 error = SET_ERROR(ENODEV);
6494 (void) spa_vdev_state_exit(spa, NULL, error);
6495 spa_close(spa, FTAG);
6496 return (error);
6497 }
6498 }
6499
6500 vdev_clear(spa, vd);
6501
6502 (void) spa_vdev_state_exit(spa, spa_suspended(spa) ?
6503 NULL : spa->spa_root_vdev, 0);
6504
6505 /*
6506 * Resume any suspended I/Os.
6507 */
6508 if (zio_resume(spa) != 0)
6509 error = SET_ERROR(EIO);
6510
6511 spa_close(spa, FTAG);
6512
6513 return (error);
6514 }
6515
6516 /*
6517 * Reopen all the vdevs associated with the pool.
6518 *
6519 * innvl: {
6520 * "scrub_restart" -> when true and scrub is running, allow to restart
6521 * scrub as the side effect of the reopen (boolean).
6522 * }
6523 *
6524 * outnvl is unused
6525 */
6526 static const zfs_ioc_key_t zfs_keys_pool_reopen[] = {
6527 {"scrub_restart", DATA_TYPE_BOOLEAN_VALUE, ZK_OPTIONAL},
6528 };
6529
6530 static int
zfs_ioc_pool_reopen(const char * pool,nvlist_t * innvl,nvlist_t * outnvl)6531 zfs_ioc_pool_reopen(const char *pool, nvlist_t *innvl, nvlist_t *outnvl)
6532 {
6533 (void) outnvl;
6534 spa_t *spa;
6535 int error;
6536 boolean_t rc, scrub_restart = B_TRUE;
6537
6538 if (innvl) {
6539 error = nvlist_lookup_boolean_value(innvl,
6540 "scrub_restart", &rc);
6541 if (error == 0)
6542 scrub_restart = rc;
6543 }
6544
6545 error = spa_open(pool, &spa, FTAG);
6546 if (error != 0)
6547 return (error);
6548
6549 spa_vdev_state_enter(spa, SCL_NONE);
6550
6551 /*
6552 * If the scrub_restart flag is B_FALSE and a scrub is already
6553 * in progress then set spa_scrub_reopen flag to B_TRUE so that
6554 * we don't restart the scrub as a side effect of the reopen.
6555 * Otherwise, let vdev_open() decided if a resilver is required.
6556 */
6557
6558 spa->spa_scrub_reopen = (!scrub_restart &&
6559 dsl_scan_scrubbing(spa->spa_dsl_pool));
6560 vdev_reopen(spa->spa_root_vdev);
6561 spa->spa_scrub_reopen = B_FALSE;
6562
6563 (void) spa_vdev_state_exit(spa, NULL, 0);
6564 spa_close(spa, FTAG);
6565 return (0);
6566 }
6567
6568 /*
6569 * inputs:
6570 * zc_name name of filesystem
6571 *
6572 * outputs:
6573 * zc_string name of conflicting snapshot, if there is one
6574 */
6575 static int
zfs_ioc_promote(zfs_cmd_t * zc)6576 zfs_ioc_promote(zfs_cmd_t *zc)
6577 {
6578 dsl_pool_t *dp;
6579 dsl_dataset_t *ds, *ods;
6580 char origin[ZFS_MAX_DATASET_NAME_LEN];
6581 char *cp;
6582 int error;
6583
6584 zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
6585 if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0 ||
6586 strchr(zc->zc_name, '%'))
6587 return (SET_ERROR(EINVAL));
6588
6589 error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
6590 if (error != 0)
6591 return (error);
6592
6593 error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &ds);
6594 if (error != 0) {
6595 dsl_pool_rele(dp, FTAG);
6596 return (error);
6597 }
6598
6599 if (!dsl_dir_is_clone(ds->ds_dir)) {
6600 dsl_dataset_rele(ds, FTAG);
6601 dsl_pool_rele(dp, FTAG);
6602 return (SET_ERROR(EINVAL));
6603 }
6604
6605 error = dsl_dataset_hold_obj(dp,
6606 dsl_dir_phys(ds->ds_dir)->dd_origin_obj, FTAG, &ods);
6607 if (error != 0) {
6608 dsl_dataset_rele(ds, FTAG);
6609 dsl_pool_rele(dp, FTAG);
6610 return (error);
6611 }
6612
6613 dsl_dataset_name(ods, origin);
6614 dsl_dataset_rele(ods, FTAG);
6615 dsl_dataset_rele(ds, FTAG);
6616 dsl_pool_rele(dp, FTAG);
6617
6618 /*
6619 * We don't need to unmount *all* the origin fs's snapshots, but
6620 * it's easier.
6621 */
6622 cp = strchr(origin, '@');
6623 if (cp)
6624 *cp = '\0';
6625 (void) dmu_objset_find(origin,
6626 zfs_unmount_snap_cb, NULL, DS_FIND_SNAPSHOTS);
6627 return (dsl_dataset_promote(zc->zc_name, zc->zc_string));
6628 }
6629
6630 /*
6631 * Retrieve a single {user|group|project}{used|quota}@... property.
6632 *
6633 * inputs:
6634 * zc_name name of filesystem
6635 * zc_objset_type zfs_userquota_prop_t
6636 * zc_value domain name (eg. "S-1-234-567-89")
6637 * zc_guid RID/UID/GID
6638 *
6639 * outputs:
6640 * zc_cookie property value
6641 */
6642 static int
zfs_ioc_userspace_one(zfs_cmd_t * zc)6643 zfs_ioc_userspace_one(zfs_cmd_t *zc)
6644 {
6645 zfsvfs_t *zfsvfs;
6646 int error;
6647
6648 if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
6649 return (SET_ERROR(EINVAL));
6650
6651 error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE);
6652 if (error != 0)
6653 return (error);
6654
6655 error = zfs_userspace_one(zfsvfs,
6656 zc->zc_objset_type, zc->zc_value, zc->zc_guid, &zc->zc_cookie);
6657 zfsvfs_rele(zfsvfs, FTAG);
6658
6659 return (error);
6660 }
6661
6662 /*
6663 * inputs:
6664 * zc_name name of filesystem
6665 * zc_cookie zap cursor
6666 * zc_objset_type zfs_userquota_prop_t
6667 * zc_nvlist_dst[_size] buffer to fill (not really an nvlist)
6668 *
6669 * outputs:
6670 * zc_nvlist_dst[_size] data buffer (array of zfs_useracct_t)
6671 * zc_cookie zap cursor
6672 *
6673 * The zc_nvlist_dst output array is limited to 1000 entries.
6674 */
6675 static int
zfs_ioc_userspace_many(zfs_cmd_t * zc)6676 zfs_ioc_userspace_many(zfs_cmd_t *zc)
6677 {
6678 const size_t batch_limit = 1000 * sizeof (zfs_useracct_t);
6679 uint64_t bufsize = MIN(zc->zc_nvlist_dst_size, batch_limit);
6680 zfsvfs_t *zfsvfs;
6681
6682 if (bufsize < sizeof (zfs_useracct_t)) {
6683 zc->zc_nvlist_dst_size = sizeof (zfs_useracct_t);
6684 return (SET_ERROR(ENOMEM));
6685 }
6686
6687 int error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE);
6688 if (error != 0)
6689 return (error);
6690
6691 void *buf = vmem_alloc(bufsize, KM_SLEEP);
6692 zc->zc_nvlist_dst_size = bufsize;
6693
6694 error = zfs_userspace_many(zfsvfs, zc->zc_objset_type, &zc->zc_cookie,
6695 buf, &zc->zc_nvlist_dst_size, &zc->zc_guid);
6696
6697 if (error == 0) {
6698 error = xcopyout(buf,
6699 (void *)(uintptr_t)zc->zc_nvlist_dst,
6700 zc->zc_nvlist_dst_size);
6701 }
6702 vmem_free(buf, bufsize);
6703 zfsvfs_rele(zfsvfs, FTAG);
6704
6705 return (error);
6706 }
6707
6708 /*
6709 * inputs:
6710 * zc_name name of filesystem
6711 *
6712 * outputs:
6713 * none
6714 */
6715 static int
zfs_ioc_userspace_upgrade(zfs_cmd_t * zc)6716 zfs_ioc_userspace_upgrade(zfs_cmd_t *zc)
6717 {
6718 int error = 0;
6719 zfsvfs_t *zfsvfs;
6720
6721 if (getzfsvfs(zc->zc_name, &zfsvfs) == 0) {
6722 if (!dmu_objset_userused_enabled(zfsvfs->z_os)) {
6723 /*
6724 * If userused is not enabled, it may be because the
6725 * objset needs to be closed & reopened (to grow the
6726 * objset_phys_t). Suspend/resume the fs will do that.
6727 */
6728 dsl_dataset_t *ds, *newds;
6729
6730 ds = dmu_objset_ds(zfsvfs->z_os);
6731 error = zfs_suspend_fs(zfsvfs);
6732 if (error == 0) {
6733 dmu_objset_refresh_ownership(ds, &newds,
6734 B_TRUE, zfsvfs);
6735 error = zfs_resume_fs(zfsvfs, newds);
6736 }
6737 }
6738 if (error == 0) {
6739 mutex_enter(&zfsvfs->z_os->os_upgrade_lock);
6740 if (zfsvfs->z_os->os_upgrade_id == 0) {
6741 /* clear potential error code and retry */
6742 zfsvfs->z_os->os_upgrade_status = 0;
6743 mutex_exit(&zfsvfs->z_os->os_upgrade_lock);
6744
6745 dsl_pool_config_enter(
6746 dmu_objset_pool(zfsvfs->z_os), FTAG);
6747 dmu_objset_userspace_upgrade(zfsvfs->z_os);
6748 dsl_pool_config_exit(
6749 dmu_objset_pool(zfsvfs->z_os), FTAG);
6750 } else {
6751 mutex_exit(&zfsvfs->z_os->os_upgrade_lock);
6752 }
6753
6754 taskq_wait_id(zfsvfs->z_os->os_spa->spa_upgrade_taskq,
6755 zfsvfs->z_os->os_upgrade_id);
6756 error = zfsvfs->z_os->os_upgrade_status;
6757 }
6758 zfs_vfs_rele(zfsvfs);
6759 } else {
6760 objset_t *os;
6761
6762 /* XXX kind of reading contents without owning */
6763 error = dmu_objset_hold_flags(zc->zc_name, B_TRUE, FTAG, &os);
6764 if (error != 0)
6765 return (error);
6766
6767 mutex_enter(&os->os_upgrade_lock);
6768 if (os->os_upgrade_id == 0) {
6769 /* clear potential error code and retry */
6770 os->os_upgrade_status = 0;
6771 mutex_exit(&os->os_upgrade_lock);
6772
6773 dmu_objset_userspace_upgrade(os);
6774 } else {
6775 mutex_exit(&os->os_upgrade_lock);
6776 }
6777
6778 dsl_pool_rele(dmu_objset_pool(os), FTAG);
6779
6780 taskq_wait_id(os->os_spa->spa_upgrade_taskq, os->os_upgrade_id);
6781 error = os->os_upgrade_status;
6782
6783 dsl_dataset_rele_flags(dmu_objset_ds(os), DS_HOLD_FLAG_DECRYPT,
6784 FTAG);
6785 }
6786 return (error);
6787 }
6788
6789 /*
6790 * inputs:
6791 * zc_name name of filesystem
6792 *
6793 * outputs:
6794 * none
6795 */
6796 static int
zfs_ioc_id_quota_upgrade(zfs_cmd_t * zc)6797 zfs_ioc_id_quota_upgrade(zfs_cmd_t *zc)
6798 {
6799 objset_t *os;
6800 int error;
6801
6802 error = dmu_objset_hold_flags(zc->zc_name, B_TRUE, FTAG, &os);
6803 if (error != 0)
6804 return (error);
6805
6806 if (dmu_objset_userobjspace_upgradable(os) ||
6807 dmu_objset_projectquota_upgradable(os)) {
6808 mutex_enter(&os->os_upgrade_lock);
6809 if (os->os_upgrade_id == 0) {
6810 /* clear potential error code and retry */
6811 os->os_upgrade_status = 0;
6812 mutex_exit(&os->os_upgrade_lock);
6813
6814 dmu_objset_id_quota_upgrade(os);
6815 } else {
6816 mutex_exit(&os->os_upgrade_lock);
6817 }
6818
6819 dsl_pool_rele(dmu_objset_pool(os), FTAG);
6820
6821 taskq_wait_id(os->os_spa->spa_upgrade_taskq, os->os_upgrade_id);
6822 error = os->os_upgrade_status;
6823 } else {
6824 dsl_pool_rele(dmu_objset_pool(os), FTAG);
6825 }
6826
6827 dsl_dataset_rele_flags(dmu_objset_ds(os), DS_HOLD_FLAG_DECRYPT, FTAG);
6828
6829 return (error);
6830 }
6831
6832 static int
zfs_ioc_share(zfs_cmd_t * zc)6833 zfs_ioc_share(zfs_cmd_t *zc)
6834 {
6835 return (SET_ERROR(ENOSYS));
6836 }
6837
6838 /*
6839 * inputs:
6840 * zc_name name of containing filesystem
6841 * zc_obj object # beyond which we want next in-use object #
6842 *
6843 * outputs:
6844 * zc_obj next in-use object #
6845 */
6846 static int
zfs_ioc_next_obj(zfs_cmd_t * zc)6847 zfs_ioc_next_obj(zfs_cmd_t *zc)
6848 {
6849 objset_t *os = NULL;
6850 int error;
6851
6852 error = dmu_objset_hold(zc->zc_name, FTAG, &os);
6853 if (error != 0)
6854 return (error);
6855
6856 error = dmu_object_next(os, &zc->zc_obj, B_FALSE, 0);
6857
6858 dmu_objset_rele(os, FTAG);
6859 return (error);
6860 }
6861
6862 /*
6863 * inputs:
6864 * zc_name name of filesystem
6865 * zc_value prefix name for snapshot
6866 * zc_cleanup_fd cleanup-on-exit file descriptor for calling process
6867 *
6868 * outputs:
6869 * zc_value short name of new snapshot
6870 */
6871 static int
zfs_ioc_tmp_snapshot(zfs_cmd_t * zc)6872 zfs_ioc_tmp_snapshot(zfs_cmd_t *zc)
6873 {
6874 char *snap_name;
6875 char *hold_name;
6876 minor_t minor;
6877
6878 zfs_file_t *fp = zfs_onexit_fd_hold(zc->zc_cleanup_fd, &minor);
6879 if (fp == NULL)
6880 return (SET_ERROR(EBADF));
6881
6882 snap_name = kmem_asprintf("%s-%016llx", zc->zc_value,
6883 (u_longlong_t)ddi_get_lbolt64());
6884 hold_name = kmem_asprintf("%%%s", zc->zc_value);
6885
6886 int error = dsl_dataset_snapshot_tmp(zc->zc_name, snap_name, minor,
6887 hold_name);
6888 if (error == 0)
6889 (void) strlcpy(zc->zc_value, snap_name,
6890 sizeof (zc->zc_value));
6891 kmem_strfree(snap_name);
6892 kmem_strfree(hold_name);
6893 zfs_onexit_fd_rele(fp);
6894 return (error);
6895 }
6896
6897 /*
6898 * inputs:
6899 * zc_name name of "to" snapshot
6900 * zc_value name of "from" snapshot
6901 * zc_cookie file descriptor to write diff data on
6902 *
6903 * outputs:
6904 * dmu_diff_record_t's to the file descriptor
6905 */
6906 static int
zfs_ioc_diff(zfs_cmd_t * zc)6907 zfs_ioc_diff(zfs_cmd_t *zc)
6908 {
6909 zfs_file_t *fp;
6910 offset_t off;
6911 int error;
6912
6913 if ((fp = zfs_file_get(zc->zc_cookie)) == NULL)
6914 return (SET_ERROR(EBADF));
6915
6916 off = zfs_file_off(fp);
6917 error = dmu_diff(zc->zc_name, zc->zc_value, fp, &off);
6918
6919 zfs_file_put(fp);
6920
6921 return (error);
6922 }
6923
6924 static int
zfs_ioc_smb_acl(zfs_cmd_t * zc)6925 zfs_ioc_smb_acl(zfs_cmd_t *zc)
6926 {
6927 return (SET_ERROR(ENOTSUP));
6928 }
6929
6930 /*
6931 * innvl: {
6932 * "holds" -> { snapname -> holdname (string), ... }
6933 * (optional) "cleanup_fd" -> fd (int32)
6934 * }
6935 *
6936 * outnvl: {
6937 * snapname -> error value (int32)
6938 * ...
6939 * }
6940 */
6941 static const zfs_ioc_key_t zfs_keys_hold[] = {
6942 {"holds", DATA_TYPE_NVLIST, 0},
6943 {"cleanup_fd", DATA_TYPE_INT32, ZK_OPTIONAL},
6944 };
6945
6946 static int
zfs_ioc_hold(const char * pool,nvlist_t * args,nvlist_t * errlist)6947 zfs_ioc_hold(const char *pool, nvlist_t *args, nvlist_t *errlist)
6948 {
6949 (void) pool;
6950 nvpair_t *pair;
6951 nvlist_t *holds;
6952 int cleanup_fd = -1;
6953 int error;
6954 minor_t minor = 0;
6955 zfs_file_t *fp = NULL;
6956
6957 holds = fnvlist_lookup_nvlist(args, "holds");
6958
6959 /* make sure the user didn't pass us any invalid (empty) tags */
6960 for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
6961 pair = nvlist_next_nvpair(holds, pair)) {
6962 const char *htag;
6963
6964 error = nvpair_value_string(pair, &htag);
6965 if (error != 0)
6966 return (SET_ERROR(error));
6967
6968 if (strlen(htag) == 0)
6969 return (SET_ERROR(EINVAL));
6970 }
6971
6972 if (nvlist_lookup_int32(args, "cleanup_fd", &cleanup_fd) == 0) {
6973 fp = zfs_onexit_fd_hold(cleanup_fd, &minor);
6974 if (fp == NULL)
6975 return (SET_ERROR(EBADF));
6976 }
6977
6978 error = dsl_dataset_user_hold(holds, minor, errlist);
6979 if (fp != NULL) {
6980 ASSERT3U(minor, !=, 0);
6981 zfs_onexit_fd_rele(fp);
6982 }
6983 return (SET_ERROR(error));
6984 }
6985
6986 /*
6987 * innvl is not used.
6988 *
6989 * outnvl: {
6990 * holdname -> time added (uint64 seconds since epoch)
6991 * ...
6992 * }
6993 */
6994 static const zfs_ioc_key_t zfs_keys_get_holds[] = {
6995 /* no nvl keys */
6996 };
6997
6998 static int
zfs_ioc_get_holds(const char * snapname,nvlist_t * args,nvlist_t * outnvl)6999 zfs_ioc_get_holds(const char *snapname, nvlist_t *args, nvlist_t *outnvl)
7000 {
7001 (void) args;
7002 return (dsl_dataset_get_holds(snapname, outnvl));
7003 }
7004
7005 /*
7006 * innvl: {
7007 * snapname -> { holdname, ... }
7008 * ...
7009 * }
7010 *
7011 * outnvl: {
7012 * snapname -> error value (int32)
7013 * ...
7014 * }
7015 */
7016 static const zfs_ioc_key_t zfs_keys_release[] = {
7017 {"<snapname>...", DATA_TYPE_NVLIST, ZK_WILDCARDLIST},
7018 };
7019
7020 static int
zfs_ioc_release(const char * pool,nvlist_t * holds,nvlist_t * errlist)7021 zfs_ioc_release(const char *pool, nvlist_t *holds, nvlist_t *errlist)
7022 {
7023 (void) pool;
7024 return (dsl_dataset_user_release(holds, errlist));
7025 }
7026
7027 /*
7028 * inputs:
7029 * zc_guid flags (ZEVENT_NONBLOCK)
7030 * zc_cleanup_fd zevent file descriptor
7031 *
7032 * outputs:
7033 * zc_nvlist_dst next nvlist event
7034 * zc_cookie dropped events since last get
7035 */
7036 static int
zfs_ioc_events_next(zfs_cmd_t * zc)7037 zfs_ioc_events_next(zfs_cmd_t *zc)
7038 {
7039 zfs_zevent_t *ze;
7040 nvlist_t *event = NULL;
7041 minor_t minor;
7042 uint64_t dropped = 0;
7043 int error;
7044
7045 zfs_file_t *fp = zfs_zevent_fd_hold(zc->zc_cleanup_fd, &minor, &ze);
7046 if (fp == NULL)
7047 return (SET_ERROR(EBADF));
7048
7049 do {
7050 error = zfs_zevent_next(ze, &event,
7051 &zc->zc_nvlist_dst_size, &dropped);
7052 if (event != NULL) {
7053 zc->zc_cookie = dropped;
7054 error = put_nvlist(zc, event);
7055 nvlist_free(event);
7056 }
7057
7058 if (zc->zc_guid & ZEVENT_NONBLOCK)
7059 break;
7060
7061 if ((error == 0) || (error != ENOENT))
7062 break;
7063
7064 error = zfs_zevent_wait(ze);
7065 if (error != 0)
7066 break;
7067 } while (1);
7068
7069 zfs_zevent_fd_rele(fp);
7070
7071 return (error);
7072 }
7073
7074 /*
7075 * outputs:
7076 * zc_cookie cleared events count
7077 */
7078 static int
zfs_ioc_events_clear(zfs_cmd_t * zc)7079 zfs_ioc_events_clear(zfs_cmd_t *zc)
7080 {
7081 uint_t count;
7082
7083 zfs_zevent_drain_all(&count);
7084 zc->zc_cookie = count;
7085
7086 return (0);
7087 }
7088
7089 /*
7090 * inputs:
7091 * zc_guid eid | ZEVENT_SEEK_START | ZEVENT_SEEK_END
7092 * zc_cleanup zevent file descriptor
7093 */
7094 static int
zfs_ioc_events_seek(zfs_cmd_t * zc)7095 zfs_ioc_events_seek(zfs_cmd_t *zc)
7096 {
7097 zfs_zevent_t *ze;
7098 minor_t minor;
7099 int error;
7100
7101 zfs_file_t *fp = zfs_zevent_fd_hold(zc->zc_cleanup_fd, &minor, &ze);
7102 if (fp == NULL)
7103 return (SET_ERROR(EBADF));
7104
7105 error = zfs_zevent_seek(ze, zc->zc_guid);
7106 zfs_zevent_fd_rele(fp);
7107
7108 return (error);
7109 }
7110
7111 /*
7112 * inputs:
7113 * zc_name name of later filesystem or snapshot
7114 * zc_value full name of old snapshot or bookmark
7115 *
7116 * outputs:
7117 * zc_cookie space in bytes
7118 * zc_objset_type compressed space in bytes
7119 * zc_perm_action uncompressed space in bytes
7120 */
7121 static int
zfs_ioc_space_written(zfs_cmd_t * zc)7122 zfs_ioc_space_written(zfs_cmd_t *zc)
7123 {
7124 int error;
7125 dsl_pool_t *dp;
7126 dsl_dataset_t *new;
7127
7128 error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
7129 if (error != 0)
7130 return (error);
7131 error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &new);
7132 if (error != 0) {
7133 dsl_pool_rele(dp, FTAG);
7134 return (error);
7135 }
7136 if (strchr(zc->zc_value, '#') != NULL) {
7137 zfs_bookmark_phys_t bmp;
7138 error = dsl_bookmark_lookup(dp, zc->zc_value,
7139 new, &bmp);
7140 if (error == 0) {
7141 error = dsl_dataset_space_written_bookmark(&bmp, new,
7142 &zc->zc_cookie,
7143 &zc->zc_objset_type, &zc->zc_perm_action);
7144 }
7145 } else {
7146 dsl_dataset_t *old;
7147 error = dsl_dataset_hold(dp, zc->zc_value, FTAG, &old);
7148
7149 if (error == 0) {
7150 error = dsl_dataset_space_written(old, new,
7151 &zc->zc_cookie,
7152 &zc->zc_objset_type, &zc->zc_perm_action);
7153 dsl_dataset_rele(old, FTAG);
7154 }
7155 }
7156 dsl_dataset_rele(new, FTAG);
7157 dsl_pool_rele(dp, FTAG);
7158 return (error);
7159 }
7160
7161 /*
7162 * innvl: {
7163 * "firstsnap" -> snapshot name
7164 * }
7165 *
7166 * outnvl: {
7167 * "used" -> space in bytes
7168 * "compressed" -> compressed space in bytes
7169 * "uncompressed" -> uncompressed space in bytes
7170 * }
7171 */
7172 static const zfs_ioc_key_t zfs_keys_space_snaps[] = {
7173 {"firstsnap", DATA_TYPE_STRING, 0},
7174 };
7175
7176 static int
zfs_ioc_space_snaps(const char * lastsnap,nvlist_t * innvl,nvlist_t * outnvl)7177 zfs_ioc_space_snaps(const char *lastsnap, nvlist_t *innvl, nvlist_t *outnvl)
7178 {
7179 int error;
7180 dsl_pool_t *dp;
7181 dsl_dataset_t *new, *old;
7182 const char *firstsnap;
7183 uint64_t used = 0, comp = 0, uncomp = 0;
7184
7185 firstsnap = fnvlist_lookup_string(innvl, "firstsnap");
7186
7187 error = dsl_pool_hold(lastsnap, FTAG, &dp);
7188 if (error != 0)
7189 return (error);
7190
7191 error = dsl_dataset_hold(dp, lastsnap, FTAG, &new);
7192 if (error == 0 && !new->ds_is_snapshot) {
7193 dsl_dataset_rele(new, FTAG);
7194 error = SET_ERROR(EINVAL);
7195 }
7196 if (error != 0) {
7197 dsl_pool_rele(dp, FTAG);
7198 return (error);
7199 }
7200 error = dsl_dataset_hold(dp, firstsnap, FTAG, &old);
7201 if (error == 0 && !old->ds_is_snapshot) {
7202 dsl_dataset_rele(old, FTAG);
7203 error = SET_ERROR(EINVAL);
7204 }
7205 if (error != 0) {
7206 dsl_dataset_rele(new, FTAG);
7207 dsl_pool_rele(dp, FTAG);
7208 return (error);
7209 }
7210
7211 error = dsl_dataset_space_wouldfree(old, new, &used, &comp, &uncomp);
7212 dsl_dataset_rele(old, FTAG);
7213 dsl_dataset_rele(new, FTAG);
7214 dsl_pool_rele(dp, FTAG);
7215 fnvlist_add_uint64(outnvl, "used", used);
7216 fnvlist_add_uint64(outnvl, "compressed", comp);
7217 fnvlist_add_uint64(outnvl, "uncompressed", uncomp);
7218 return (error);
7219 }
7220
7221 /*
7222 * innvl: {
7223 * "fd" -> file descriptor to write stream to (int32)
7224 * (optional) "fromsnap" -> full snap name to send an incremental from
7225 * (optional) "largeblockok" -> (value ignored)
7226 * indicates that blocks > 128KB are permitted
7227 * (optional) "embedok" -> (value ignored)
7228 * presence indicates DRR_WRITE_EMBEDDED records are permitted
7229 * (optional) "compressok" -> (value ignored)
7230 * presence indicates compressed DRR_WRITE records are permitted
7231 * (optional) "rawok" -> (value ignored)
7232 * presence indicates raw encrypted records should be used.
7233 * (optional) "savedok" -> (value ignored)
7234 * presence indicates we should send a partially received snapshot
7235 * (optional) "resume_object" and "resume_offset" -> (uint64)
7236 * if present, resume send stream from specified object and offset.
7237 * (optional) "redactbook" -> (string)
7238 * if present, use this bookmark's redaction list to generate a redacted
7239 * send stream
7240 * }
7241 *
7242 * outnvl is unused
7243 */
7244 static const zfs_ioc_key_t zfs_keys_send_new[] = {
7245 {"fd", DATA_TYPE_INT32, 0},
7246 {"fromsnap", DATA_TYPE_STRING, ZK_OPTIONAL},
7247 {"largeblockok", DATA_TYPE_BOOLEAN, ZK_OPTIONAL},
7248 {"embedok", DATA_TYPE_BOOLEAN, ZK_OPTIONAL},
7249 {"compressok", DATA_TYPE_BOOLEAN, ZK_OPTIONAL},
7250 {"rawok", DATA_TYPE_BOOLEAN, ZK_OPTIONAL},
7251 {"savedok", DATA_TYPE_BOOLEAN, ZK_OPTIONAL},
7252 {"resume_object", DATA_TYPE_UINT64, ZK_OPTIONAL},
7253 {"resume_offset", DATA_TYPE_UINT64, ZK_OPTIONAL},
7254 {"redactbook", DATA_TYPE_STRING, ZK_OPTIONAL},
7255 };
7256
7257 static int
zfs_ioc_send_new(const char * snapname,nvlist_t * innvl,nvlist_t * outnvl)7258 zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
7259 {
7260 (void) outnvl;
7261 int error;
7262 offset_t off;
7263 const char *fromname = NULL;
7264 int fd;
7265 boolean_t largeblockok;
7266 boolean_t embedok;
7267 boolean_t compressok;
7268 boolean_t rawok;
7269 boolean_t savedok;
7270 uint64_t resumeobj = 0;
7271 uint64_t resumeoff = 0;
7272 const char *redactbook = NULL;
7273
7274 fd = fnvlist_lookup_int32(innvl, "fd");
7275
7276 (void) nvlist_lookup_string(innvl, "fromsnap", &fromname);
7277
7278 largeblockok = nvlist_exists(innvl, "largeblockok");
7279 embedok = nvlist_exists(innvl, "embedok");
7280 compressok = nvlist_exists(innvl, "compressok");
7281 rawok = nvlist_exists(innvl, "rawok");
7282 savedok = nvlist_exists(innvl, "savedok");
7283
7284 (void) nvlist_lookup_uint64(innvl, "resume_object", &resumeobj);
7285 (void) nvlist_lookup_uint64(innvl, "resume_offset", &resumeoff);
7286
7287 (void) nvlist_lookup_string(innvl, "redactbook", &redactbook);
7288
7289 dump_bytes_arg_t dba;
7290 dmu_send_outparams_t out;
7291 error = dump_bytes_init(&dba, fd, &out);
7292 if (error)
7293 return (error);
7294
7295 off = zfs_file_off(dba.dba_fp);
7296 error = dmu_send(snapname, fromname, embedok, largeblockok,
7297 compressok, rawok, savedok, resumeobj, resumeoff,
7298 redactbook, fd, &off, &out);
7299
7300 dump_bytes_fini(&dba);
7301
7302 return (error);
7303 }
7304
7305 static int
send_space_sum(objset_t * os,void * buf,int len,void * arg)7306 send_space_sum(objset_t *os, void *buf, int len, void *arg)
7307 {
7308 (void) os, (void) buf;
7309 uint64_t *size = arg;
7310
7311 *size += len;
7312 return (0);
7313 }
7314
7315 /*
7316 * Determine approximately how large a zfs send stream will be -- the number
7317 * of bytes that will be written to the fd supplied to zfs_ioc_send_new().
7318 *
7319 * innvl: {
7320 * (optional) "from" -> full snap or bookmark name to send an incremental
7321 * from
7322 * (optional) "largeblockok" -> (value ignored)
7323 * indicates that blocks > 128KB are permitted
7324 * (optional) "embedok" -> (value ignored)
7325 * presence indicates DRR_WRITE_EMBEDDED records are permitted
7326 * (optional) "compressok" -> (value ignored)
7327 * presence indicates compressed DRR_WRITE records are permitted
7328 * (optional) "rawok" -> (value ignored)
7329 * presence indicates raw encrypted records should be used.
7330 * (optional) "resume_object" and "resume_offset" -> (uint64)
7331 * if present, resume send stream from specified object and offset.
7332 * (optional) "fd" -> file descriptor to use as a cookie for progress
7333 * tracking (int32)
7334 * }
7335 *
7336 * outnvl: {
7337 * "space" -> bytes of space (uint64)
7338 * }
7339 */
7340 static const zfs_ioc_key_t zfs_keys_send_space[] = {
7341 {"from", DATA_TYPE_STRING, ZK_OPTIONAL},
7342 {"fromsnap", DATA_TYPE_STRING, ZK_OPTIONAL},
7343 {"largeblockok", DATA_TYPE_BOOLEAN, ZK_OPTIONAL},
7344 {"embedok", DATA_TYPE_BOOLEAN, ZK_OPTIONAL},
7345 {"compressok", DATA_TYPE_BOOLEAN, ZK_OPTIONAL},
7346 {"rawok", DATA_TYPE_BOOLEAN, ZK_OPTIONAL},
7347 {"fd", DATA_TYPE_INT32, ZK_OPTIONAL},
7348 {"redactbook", DATA_TYPE_STRING, ZK_OPTIONAL},
7349 {"resume_object", DATA_TYPE_UINT64, ZK_OPTIONAL},
7350 {"resume_offset", DATA_TYPE_UINT64, ZK_OPTIONAL},
7351 {"bytes", DATA_TYPE_UINT64, ZK_OPTIONAL},
7352 };
7353
7354 static int
zfs_ioc_send_space(const char * snapname,nvlist_t * innvl,nvlist_t * outnvl)7355 zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
7356 {
7357 dsl_pool_t *dp;
7358 dsl_dataset_t *tosnap;
7359 dsl_dataset_t *fromsnap = NULL;
7360 int error;
7361 const char *fromname = NULL;
7362 const char *redactlist_book = NULL;
7363 boolean_t largeblockok;
7364 boolean_t embedok;
7365 boolean_t compressok;
7366 boolean_t rawok;
7367 boolean_t savedok;
7368 uint64_t space = 0;
7369 boolean_t full_estimate = B_FALSE;
7370 uint64_t resumeobj = 0;
7371 uint64_t resumeoff = 0;
7372 uint64_t resume_bytes = 0;
7373 int32_t fd = -1;
7374 zfs_bookmark_phys_t zbm = {0};
7375
7376 error = dsl_pool_hold(snapname, FTAG, &dp);
7377 if (error != 0)
7378 return (error);
7379
7380 error = dsl_dataset_hold(dp, snapname, FTAG, &tosnap);
7381 if (error != 0) {
7382 dsl_pool_rele(dp, FTAG);
7383 return (error);
7384 }
7385 (void) nvlist_lookup_int32(innvl, "fd", &fd);
7386
7387 largeblockok = nvlist_exists(innvl, "largeblockok");
7388 embedok = nvlist_exists(innvl, "embedok");
7389 compressok = nvlist_exists(innvl, "compressok");
7390 rawok = nvlist_exists(innvl, "rawok");
7391 savedok = nvlist_exists(innvl, "savedok");
7392 boolean_t from = (nvlist_lookup_string(innvl, "from", &fromname) == 0);
7393 boolean_t altbook = (nvlist_lookup_string(innvl, "redactbook",
7394 &redactlist_book) == 0);
7395
7396 (void) nvlist_lookup_uint64(innvl, "resume_object", &resumeobj);
7397 (void) nvlist_lookup_uint64(innvl, "resume_offset", &resumeoff);
7398 (void) nvlist_lookup_uint64(innvl, "bytes", &resume_bytes);
7399
7400 if (altbook) {
7401 full_estimate = B_TRUE;
7402 } else if (from) {
7403 if (strchr(fromname, '#')) {
7404 error = dsl_bookmark_lookup(dp, fromname, tosnap, &zbm);
7405
7406 /*
7407 * dsl_bookmark_lookup() will fail with EXDEV if
7408 * the from-bookmark and tosnap are at the same txg.
7409 * However, it's valid to do a send (and therefore,
7410 * a send estimate) from and to the same time point,
7411 * if the bookmark is redacted (the incremental send
7412 * can change what's redacted on the target). In
7413 * this case, dsl_bookmark_lookup() fills in zbm
7414 * but returns EXDEV. Ignore this error.
7415 */
7416 if (error == EXDEV && zbm.zbm_redaction_obj != 0 &&
7417 zbm.zbm_guid ==
7418 dsl_dataset_phys(tosnap)->ds_guid)
7419 error = 0;
7420
7421 if (error != 0) {
7422 dsl_dataset_rele(tosnap, FTAG);
7423 dsl_pool_rele(dp, FTAG);
7424 return (error);
7425 }
7426 if (zbm.zbm_redaction_obj != 0 || !(zbm.zbm_flags &
7427 ZBM_FLAG_HAS_FBN)) {
7428 full_estimate = B_TRUE;
7429 }
7430 } else if (strchr(fromname, '@')) {
7431 error = dsl_dataset_hold(dp, fromname, FTAG, &fromsnap);
7432 if (error != 0) {
7433 dsl_dataset_rele(tosnap, FTAG);
7434 dsl_pool_rele(dp, FTAG);
7435 return (error);
7436 }
7437
7438 if (!dsl_dataset_is_before(tosnap, fromsnap, 0)) {
7439 full_estimate = B_TRUE;
7440 dsl_dataset_rele(fromsnap, FTAG);
7441 }
7442 } else {
7443 /*
7444 * from is not properly formatted as a snapshot or
7445 * bookmark
7446 */
7447 dsl_dataset_rele(tosnap, FTAG);
7448 dsl_pool_rele(dp, FTAG);
7449 return (SET_ERROR(EINVAL));
7450 }
7451 }
7452
7453 if (full_estimate) {
7454 dmu_send_outparams_t out = {0};
7455 offset_t off = 0;
7456 out.dso_outfunc = send_space_sum;
7457 out.dso_arg = &space;
7458 out.dso_dryrun = B_TRUE;
7459 /*
7460 * We have to release these holds so dmu_send can take them. It
7461 * will do all the error checking we need.
7462 */
7463 dsl_dataset_rele(tosnap, FTAG);
7464 dsl_pool_rele(dp, FTAG);
7465 error = dmu_send(snapname, fromname, embedok, largeblockok,
7466 compressok, rawok, savedok, resumeobj, resumeoff,
7467 redactlist_book, fd, &off, &out);
7468 } else {
7469 error = dmu_send_estimate_fast(tosnap, fromsnap,
7470 (from && strchr(fromname, '#') != NULL ? &zbm : NULL),
7471 compressok || rawok, savedok, &space);
7472 space -= resume_bytes;
7473 if (fromsnap != NULL)
7474 dsl_dataset_rele(fromsnap, FTAG);
7475 dsl_dataset_rele(tosnap, FTAG);
7476 dsl_pool_rele(dp, FTAG);
7477 }
7478
7479 fnvlist_add_uint64(outnvl, "space", space);
7480
7481 return (error);
7482 }
7483
7484 /*
7485 * Sync the currently open TXG to disk for the specified pool.
7486 * This is somewhat similar to 'zfs_sync()'.
7487 * For cases that do not result in error this ioctl will wait for
7488 * the currently open TXG to commit before returning back to the caller.
7489 *
7490 * innvl: {
7491 * "force" -> when true, force uberblock update even if there is no dirty data.
7492 * In addition this will cause the vdev configuration to be written
7493 * out including updating the zpool cache file. (boolean_t)
7494 * }
7495 *
7496 * onvl is unused
7497 */
7498 static const zfs_ioc_key_t zfs_keys_pool_sync[] = {
7499 {"force", DATA_TYPE_BOOLEAN_VALUE, 0},
7500 };
7501
7502 static int
zfs_ioc_pool_sync(const char * pool,nvlist_t * innvl,nvlist_t * onvl)7503 zfs_ioc_pool_sync(const char *pool, nvlist_t *innvl, nvlist_t *onvl)
7504 {
7505 (void) onvl;
7506 int err;
7507 boolean_t rc, force = B_FALSE;
7508 spa_t *spa;
7509
7510 if ((err = spa_open(pool, &spa, FTAG)) != 0)
7511 return (err);
7512
7513 if (innvl) {
7514 err = nvlist_lookup_boolean_value(innvl, "force", &rc);
7515 if (err == 0)
7516 force = rc;
7517 }
7518
7519 if (force) {
7520 spa_config_enter(spa, SCL_CONFIG, FTAG, RW_WRITER);
7521 vdev_config_dirty(spa->spa_root_vdev);
7522 spa_config_exit(spa, SCL_CONFIG, FTAG);
7523 }
7524 txg_wait_synced(spa_get_dsl(spa), 0);
7525
7526 spa_close(spa, FTAG);
7527
7528 return (0);
7529 }
7530
7531 /*
7532 * Load a user's wrapping key into the kernel.
7533 * innvl: {
7534 * "hidden_args" -> { "wkeydata" -> value }
7535 * raw uint8_t array of encryption wrapping key data (32 bytes)
7536 * (optional) "noop" -> (value ignored)
7537 * presence indicated key should only be verified, not loaded
7538 * }
7539 */
7540 static const zfs_ioc_key_t zfs_keys_load_key[] = {
7541 {"hidden_args", DATA_TYPE_NVLIST, 0},
7542 {"noop", DATA_TYPE_BOOLEAN, ZK_OPTIONAL},
7543 };
7544
7545 static int
zfs_ioc_load_key(const char * dsname,nvlist_t * innvl,nvlist_t * outnvl)7546 zfs_ioc_load_key(const char *dsname, nvlist_t *innvl, nvlist_t *outnvl)
7547 {
7548 (void) outnvl;
7549 int ret;
7550 dsl_crypto_params_t *dcp = NULL;
7551 nvlist_t *hidden_args;
7552 boolean_t noop = nvlist_exists(innvl, "noop");
7553
7554 if (strchr(dsname, '@') != NULL || strchr(dsname, '%') != NULL) {
7555 ret = SET_ERROR(EINVAL);
7556 goto error;
7557 }
7558
7559 hidden_args = fnvlist_lookup_nvlist(innvl, ZPOOL_HIDDEN_ARGS);
7560
7561 ret = dsl_crypto_params_create_nvlist(DCP_CMD_NONE, NULL,
7562 hidden_args, &dcp);
7563 if (ret != 0)
7564 goto error;
7565
7566 ret = spa_keystore_load_wkey(dsname, dcp, noop);
7567 if (ret != 0)
7568 goto error;
7569
7570 dsl_crypto_params_free(dcp, noop);
7571
7572 return (0);
7573
7574 error:
7575 dsl_crypto_params_free(dcp, B_TRUE);
7576 return (ret);
7577 }
7578
7579 /*
7580 * Unload a user's wrapping key from the kernel.
7581 * Both innvl and outnvl are unused.
7582 */
7583 static const zfs_ioc_key_t zfs_keys_unload_key[] = {
7584 /* no nvl keys */
7585 };
7586
7587 static int
zfs_ioc_unload_key(const char * dsname,nvlist_t * innvl,nvlist_t * outnvl)7588 zfs_ioc_unload_key(const char *dsname, nvlist_t *innvl, nvlist_t *outnvl)
7589 {
7590 (void) innvl, (void) outnvl;
7591 int ret = 0;
7592
7593 if (strchr(dsname, '@') != NULL || strchr(dsname, '%') != NULL) {
7594 ret = (SET_ERROR(EINVAL));
7595 goto out;
7596 }
7597
7598 ret = spa_keystore_unload_wkey(dsname);
7599 if (ret != 0)
7600 goto out;
7601
7602 out:
7603 return (ret);
7604 }
7605
7606 /*
7607 * Changes a user's wrapping key used to decrypt a dataset. The keyformat,
7608 * keylocation, pbkdf2salt, and pbkdf2iters properties can also be specified
7609 * here to change how the key is derived in userspace.
7610 *
7611 * innvl: {
7612 * "hidden_args" (optional) -> { "wkeydata" -> value }
7613 * raw uint8_t array of new encryption wrapping key data (32 bytes)
7614 * "props" (optional) -> { prop -> value }
7615 * }
7616 *
7617 * outnvl is unused
7618 */
7619 static const zfs_ioc_key_t zfs_keys_change_key[] = {
7620 {"crypt_cmd", DATA_TYPE_UINT64, ZK_OPTIONAL},
7621 {"hidden_args", DATA_TYPE_NVLIST, ZK_OPTIONAL},
7622 {"props", DATA_TYPE_NVLIST, ZK_OPTIONAL},
7623 };
7624
7625 static int
zfs_ioc_change_key(const char * dsname,nvlist_t * innvl,nvlist_t * outnvl)7626 zfs_ioc_change_key(const char *dsname, nvlist_t *innvl, nvlist_t *outnvl)
7627 {
7628 (void) outnvl;
7629 int ret;
7630 uint64_t cmd = DCP_CMD_NONE;
7631 dsl_crypto_params_t *dcp = NULL;
7632 nvlist_t *props = NULL, *hidden_args = NULL;
7633
7634 if (strchr(dsname, '@') != NULL || strchr(dsname, '%') != NULL) {
7635 ret = (SET_ERROR(EINVAL));
7636 goto error;
7637 }
7638
7639 (void) nvlist_lookup_uint64(innvl, "crypt_cmd", &cmd);
7640 (void) nvlist_lookup_nvlist(innvl, "props", &props);
7641 (void) nvlist_lookup_nvlist(innvl, ZPOOL_HIDDEN_ARGS, &hidden_args);
7642
7643 ret = dsl_crypto_params_create_nvlist(cmd, props, hidden_args, &dcp);
7644 if (ret != 0)
7645 goto error;
7646
7647 /* The keylocation property is set from dcp->cp_keylocation. */
7648 (void) nvlist_remove_all(props, zfs_prop_to_name(ZFS_PROP_KEYLOCATION));
7649
7650 if ((ret = zfs_check_userprops(props)) != 0)
7651 goto error;
7652
7653 ret = spa_keystore_change_key(dsname, dcp, props);
7654 if (ret != 0)
7655 goto error;
7656
7657 dsl_crypto_params_free(dcp, B_FALSE);
7658
7659 return (0);
7660
7661 error:
7662 dsl_crypto_params_free(dcp, B_TRUE);
7663 return (ret);
7664 }
7665
7666 static zfs_ioc_vec_t zfs_ioc_vec[ZFS_IOC_LAST - ZFS_IOC_FIRST];
7667
7668 static void
zfs_ioctl_register_legacy(zfs_ioc_t ioc,zfs_ioc_legacy_func_t * func,zfs_secpolicy_func_t * secpolicy,zfs_ioc_namecheck_t namecheck,boolean_t log_history,zfs_ioc_poolcheck_t pool_check)7669 zfs_ioctl_register_legacy(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
7670 zfs_secpolicy_func_t *secpolicy, zfs_ioc_namecheck_t namecheck,
7671 boolean_t log_history, zfs_ioc_poolcheck_t pool_check)
7672 {
7673 zfs_ioc_vec_t *vec = &zfs_ioc_vec[ioc - ZFS_IOC_FIRST];
7674
7675 ASSERT3U(ioc, >=, ZFS_IOC_FIRST);
7676 ASSERT3U(ioc, <, ZFS_IOC_LAST);
7677 ASSERT0P(vec->zvec_legacy_func);
7678 ASSERT0P(vec->zvec_func);
7679
7680 vec->zvec_legacy_func = func;
7681 vec->zvec_secpolicy = secpolicy;
7682 vec->zvec_namecheck = namecheck;
7683 vec->zvec_allow_log = log_history;
7684 vec->zvec_pool_check = pool_check;
7685 }
7686
7687 /*
7688 * See the block comment at the beginning of this file for details on
7689 * each argument to this function.
7690 */
7691 void
zfs_ioctl_register(const char * name,zfs_ioc_t ioc,zfs_ioc_func_t * func,zfs_secpolicy_func_t * secpolicy,zfs_ioc_namecheck_t namecheck,zfs_ioc_poolcheck_t pool_check,boolean_t smush_outnvlist,boolean_t allow_log,const zfs_ioc_key_t * nvl_keys,size_t num_keys)7692 zfs_ioctl_register(const char *name, zfs_ioc_t ioc, zfs_ioc_func_t *func,
7693 zfs_secpolicy_func_t *secpolicy, zfs_ioc_namecheck_t namecheck,
7694 zfs_ioc_poolcheck_t pool_check, boolean_t smush_outnvlist,
7695 boolean_t allow_log, const zfs_ioc_key_t *nvl_keys, size_t num_keys)
7696 {
7697 zfs_ioc_vec_t *vec = &zfs_ioc_vec[ioc - ZFS_IOC_FIRST];
7698
7699 ASSERT3U(ioc, >=, ZFS_IOC_FIRST);
7700 ASSERT3U(ioc, <, ZFS_IOC_LAST);
7701 ASSERT0P(vec->zvec_legacy_func);
7702 ASSERT0P(vec->zvec_func);
7703
7704 /* if we are logging, the name must be valid */
7705 ASSERT(!allow_log || namecheck != NO_NAME);
7706
7707 vec->zvec_name = name;
7708 vec->zvec_func = func;
7709 vec->zvec_secpolicy = secpolicy;
7710 vec->zvec_namecheck = namecheck;
7711 vec->zvec_pool_check = pool_check;
7712 vec->zvec_smush_outnvlist = smush_outnvlist;
7713 vec->zvec_allow_log = allow_log;
7714 vec->zvec_nvl_keys = nvl_keys;
7715 vec->zvec_nvl_key_count = num_keys;
7716 }
7717
7718 static void
zfs_ioctl_register_pool(zfs_ioc_t ioc,zfs_ioc_legacy_func_t * func,zfs_secpolicy_func_t * secpolicy,boolean_t log_history,zfs_ioc_poolcheck_t pool_check)7719 zfs_ioctl_register_pool(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
7720 zfs_secpolicy_func_t *secpolicy, boolean_t log_history,
7721 zfs_ioc_poolcheck_t pool_check)
7722 {
7723 zfs_ioctl_register_legacy(ioc, func, secpolicy,
7724 POOL_NAME, log_history, pool_check);
7725 }
7726
7727 void
zfs_ioctl_register_dataset_nolog(zfs_ioc_t ioc,zfs_ioc_legacy_func_t * func,zfs_secpolicy_func_t * secpolicy,zfs_ioc_poolcheck_t pool_check)7728 zfs_ioctl_register_dataset_nolog(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
7729 zfs_secpolicy_func_t *secpolicy, zfs_ioc_poolcheck_t pool_check)
7730 {
7731 zfs_ioctl_register_legacy(ioc, func, secpolicy,
7732 DATASET_NAME, B_FALSE, pool_check);
7733 }
7734
7735 static void
zfs_ioctl_register_pool_modify(zfs_ioc_t ioc,zfs_ioc_legacy_func_t * func)7736 zfs_ioctl_register_pool_modify(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func)
7737 {
7738 zfs_ioctl_register_legacy(ioc, func, zfs_secpolicy_config,
7739 POOL_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
7740 }
7741
7742 static void
zfs_ioctl_register_pool_meta(zfs_ioc_t ioc,zfs_ioc_legacy_func_t * func,zfs_secpolicy_func_t * secpolicy)7743 zfs_ioctl_register_pool_meta(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
7744 zfs_secpolicy_func_t *secpolicy)
7745 {
7746 zfs_ioctl_register_legacy(ioc, func, secpolicy,
7747 NO_NAME, B_FALSE, POOL_CHECK_NONE);
7748 }
7749
7750 static void
zfs_ioctl_register_dataset_read_secpolicy(zfs_ioc_t ioc,zfs_ioc_legacy_func_t * func,zfs_secpolicy_func_t * secpolicy)7751 zfs_ioctl_register_dataset_read_secpolicy(zfs_ioc_t ioc,
7752 zfs_ioc_legacy_func_t *func, zfs_secpolicy_func_t *secpolicy)
7753 {
7754 zfs_ioctl_register_legacy(ioc, func, secpolicy,
7755 DATASET_NAME, B_FALSE, POOL_CHECK_SUSPENDED);
7756 }
7757
7758 static void
zfs_ioctl_register_dataset_read(zfs_ioc_t ioc,zfs_ioc_legacy_func_t * func)7759 zfs_ioctl_register_dataset_read(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func)
7760 {
7761 zfs_ioctl_register_dataset_read_secpolicy(ioc, func,
7762 zfs_secpolicy_read);
7763 }
7764
7765 static void
zfs_ioctl_register_dataset_modify(zfs_ioc_t ioc,zfs_ioc_legacy_func_t * func,zfs_secpolicy_func_t * secpolicy)7766 zfs_ioctl_register_dataset_modify(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
7767 zfs_secpolicy_func_t *secpolicy)
7768 {
7769 zfs_ioctl_register_legacy(ioc, func, secpolicy,
7770 DATASET_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
7771 }
7772
7773 static void
zfs_ioctl_init(void)7774 zfs_ioctl_init(void)
7775 {
7776 zfs_ioctl_register("snapshot", ZFS_IOC_SNAPSHOT,
7777 zfs_ioc_snapshot, zfs_secpolicy_snapshot, POOL_NAME,
7778 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7779 zfs_keys_snapshot, ARRAY_SIZE(zfs_keys_snapshot));
7780
7781 zfs_ioctl_register("log_history", ZFS_IOC_LOG_HISTORY,
7782 zfs_ioc_log_history, zfs_secpolicy_log_history, NO_NAME,
7783 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE,
7784 zfs_keys_log_history, ARRAY_SIZE(zfs_keys_log_history));
7785
7786 zfs_ioctl_register("space_snaps", ZFS_IOC_SPACE_SNAPS,
7787 zfs_ioc_space_snaps, zfs_secpolicy_read, DATASET_NAME,
7788 POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE,
7789 zfs_keys_space_snaps, ARRAY_SIZE(zfs_keys_space_snaps));
7790
7791 zfs_ioctl_register("send", ZFS_IOC_SEND_NEW,
7792 zfs_ioc_send_new, zfs_secpolicy_send_new, DATASET_NAME,
7793 POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE,
7794 zfs_keys_send_new, ARRAY_SIZE(zfs_keys_send_new));
7795
7796 zfs_ioctl_register("send_space", ZFS_IOC_SEND_SPACE,
7797 zfs_ioc_send_space, zfs_secpolicy_read, DATASET_NAME,
7798 POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE,
7799 zfs_keys_send_space, ARRAY_SIZE(zfs_keys_send_space));
7800
7801 zfs_ioctl_register("create", ZFS_IOC_CREATE,
7802 zfs_ioc_create, zfs_secpolicy_create_clone, DATASET_NAME,
7803 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7804 zfs_keys_create, ARRAY_SIZE(zfs_keys_create));
7805
7806 zfs_ioctl_register("clone", ZFS_IOC_CLONE,
7807 zfs_ioc_clone, zfs_secpolicy_create_clone, DATASET_NAME,
7808 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7809 zfs_keys_clone, ARRAY_SIZE(zfs_keys_clone));
7810
7811 zfs_ioctl_register("remap", ZFS_IOC_REMAP,
7812 zfs_ioc_remap, zfs_secpolicy_none, DATASET_NAME,
7813 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_TRUE,
7814 zfs_keys_remap, ARRAY_SIZE(zfs_keys_remap));
7815
7816 zfs_ioctl_register("destroy_snaps", ZFS_IOC_DESTROY_SNAPS,
7817 zfs_ioc_destroy_snaps, zfs_secpolicy_destroy_snaps, POOL_NAME,
7818 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7819 zfs_keys_destroy_snaps, ARRAY_SIZE(zfs_keys_destroy_snaps));
7820
7821 zfs_ioctl_register("hold", ZFS_IOC_HOLD,
7822 zfs_ioc_hold, zfs_secpolicy_hold, POOL_NAME,
7823 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7824 zfs_keys_hold, ARRAY_SIZE(zfs_keys_hold));
7825 zfs_ioctl_register("release", ZFS_IOC_RELEASE,
7826 zfs_ioc_release, zfs_secpolicy_release, POOL_NAME,
7827 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7828 zfs_keys_release, ARRAY_SIZE(zfs_keys_release));
7829
7830 zfs_ioctl_register("get_holds", ZFS_IOC_GET_HOLDS,
7831 zfs_ioc_get_holds, zfs_secpolicy_read, DATASET_NAME,
7832 POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE,
7833 zfs_keys_get_holds, ARRAY_SIZE(zfs_keys_get_holds));
7834
7835 zfs_ioctl_register("rollback", ZFS_IOC_ROLLBACK,
7836 zfs_ioc_rollback, zfs_secpolicy_rollback, DATASET_NAME,
7837 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_TRUE,
7838 zfs_keys_rollback, ARRAY_SIZE(zfs_keys_rollback));
7839
7840 zfs_ioctl_register("bookmark", ZFS_IOC_BOOKMARK,
7841 zfs_ioc_bookmark, zfs_secpolicy_bookmark, POOL_NAME,
7842 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7843 zfs_keys_bookmark, ARRAY_SIZE(zfs_keys_bookmark));
7844
7845 zfs_ioctl_register("get_bookmarks", ZFS_IOC_GET_BOOKMARKS,
7846 zfs_ioc_get_bookmarks, zfs_secpolicy_read, DATASET_NAME,
7847 POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE,
7848 zfs_keys_get_bookmarks, ARRAY_SIZE(zfs_keys_get_bookmarks));
7849
7850 zfs_ioctl_register("get_bookmark_props", ZFS_IOC_GET_BOOKMARK_PROPS,
7851 zfs_ioc_get_bookmark_props, zfs_secpolicy_read, ENTITY_NAME,
7852 POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE, zfs_keys_get_bookmark_props,
7853 ARRAY_SIZE(zfs_keys_get_bookmark_props));
7854
7855 zfs_ioctl_register("destroy_bookmarks", ZFS_IOC_DESTROY_BOOKMARKS,
7856 zfs_ioc_destroy_bookmarks, zfs_secpolicy_destroy_bookmarks,
7857 POOL_NAME,
7858 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7859 zfs_keys_destroy_bookmarks,
7860 ARRAY_SIZE(zfs_keys_destroy_bookmarks));
7861
7862 zfs_ioctl_register("receive", ZFS_IOC_RECV_NEW,
7863 zfs_ioc_recv_new, zfs_secpolicy_recv, DATASET_NAME,
7864 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7865 zfs_keys_recv_new, ARRAY_SIZE(zfs_keys_recv_new));
7866 zfs_ioctl_register("load-key", ZFS_IOC_LOAD_KEY,
7867 zfs_ioc_load_key, zfs_secpolicy_load_key,
7868 DATASET_NAME, POOL_CHECK_SUSPENDED, B_TRUE, B_TRUE,
7869 zfs_keys_load_key, ARRAY_SIZE(zfs_keys_load_key));
7870 zfs_ioctl_register("unload-key", ZFS_IOC_UNLOAD_KEY,
7871 zfs_ioc_unload_key, zfs_secpolicy_load_key,
7872 DATASET_NAME, POOL_CHECK_SUSPENDED, B_TRUE, B_TRUE,
7873 zfs_keys_unload_key, ARRAY_SIZE(zfs_keys_unload_key));
7874 zfs_ioctl_register("change-key", ZFS_IOC_CHANGE_KEY,
7875 zfs_ioc_change_key, zfs_secpolicy_change_key,
7876 DATASET_NAME, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY,
7877 B_TRUE, B_TRUE, zfs_keys_change_key,
7878 ARRAY_SIZE(zfs_keys_change_key));
7879
7880 zfs_ioctl_register("sync", ZFS_IOC_POOL_SYNC,
7881 zfs_ioc_pool_sync, zfs_secpolicy_none, POOL_NAME,
7882 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE,
7883 zfs_keys_pool_sync, ARRAY_SIZE(zfs_keys_pool_sync));
7884 zfs_ioctl_register("reopen", ZFS_IOC_POOL_REOPEN, zfs_ioc_pool_reopen,
7885 zfs_secpolicy_config, POOL_NAME, POOL_CHECK_SUSPENDED, B_TRUE,
7886 B_TRUE, zfs_keys_pool_reopen, ARRAY_SIZE(zfs_keys_pool_reopen));
7887
7888 zfs_ioctl_register("channel_program", ZFS_IOC_CHANNEL_PROGRAM,
7889 zfs_ioc_channel_program, zfs_secpolicy_config,
7890 POOL_NAME, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE,
7891 B_TRUE, zfs_keys_channel_program,
7892 ARRAY_SIZE(zfs_keys_channel_program));
7893
7894 zfs_ioctl_register("redact", ZFS_IOC_REDACT,
7895 zfs_ioc_redact, zfs_secpolicy_config, DATASET_NAME,
7896 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7897 zfs_keys_redact, ARRAY_SIZE(zfs_keys_redact));
7898
7899 zfs_ioctl_register("zpool_checkpoint", ZFS_IOC_POOL_CHECKPOINT,
7900 zfs_ioc_pool_checkpoint, zfs_secpolicy_config, POOL_NAME,
7901 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7902 zfs_keys_pool_checkpoint, ARRAY_SIZE(zfs_keys_pool_checkpoint));
7903
7904 zfs_ioctl_register("zpool_discard_checkpoint",
7905 ZFS_IOC_POOL_DISCARD_CHECKPOINT, zfs_ioc_pool_discard_checkpoint,
7906 zfs_secpolicy_config, POOL_NAME,
7907 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7908 zfs_keys_pool_discard_checkpoint,
7909 ARRAY_SIZE(zfs_keys_pool_discard_checkpoint));
7910
7911 zfs_ioctl_register("zpool_prefetch",
7912 ZFS_IOC_POOL_PREFETCH, zfs_ioc_pool_prefetch,
7913 zfs_secpolicy_config, POOL_NAME,
7914 POOL_CHECK_SUSPENDED, B_TRUE, B_TRUE,
7915 zfs_keys_pool_prefetch, ARRAY_SIZE(zfs_keys_pool_prefetch));
7916
7917 zfs_ioctl_register("initialize", ZFS_IOC_POOL_INITIALIZE,
7918 zfs_ioc_pool_initialize, zfs_secpolicy_config, POOL_NAME,
7919 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7920 zfs_keys_pool_initialize, ARRAY_SIZE(zfs_keys_pool_initialize));
7921
7922 zfs_ioctl_register("trim", ZFS_IOC_POOL_TRIM,
7923 zfs_ioc_pool_trim, zfs_secpolicy_config, POOL_NAME,
7924 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7925 zfs_keys_pool_trim, ARRAY_SIZE(zfs_keys_pool_trim));
7926
7927 zfs_ioctl_register("wait", ZFS_IOC_WAIT,
7928 zfs_ioc_wait, zfs_secpolicy_none, POOL_NAME,
7929 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE,
7930 zfs_keys_pool_wait, ARRAY_SIZE(zfs_keys_pool_wait));
7931
7932 zfs_ioctl_register("wait_fs", ZFS_IOC_WAIT_FS,
7933 zfs_ioc_wait_fs, zfs_secpolicy_none, DATASET_NAME,
7934 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE,
7935 zfs_keys_fs_wait, ARRAY_SIZE(zfs_keys_fs_wait));
7936
7937 zfs_ioctl_register("set_bootenv", ZFS_IOC_SET_BOOTENV,
7938 zfs_ioc_set_bootenv, zfs_secpolicy_config, POOL_NAME,
7939 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_TRUE,
7940 zfs_keys_set_bootenv, ARRAY_SIZE(zfs_keys_set_bootenv));
7941
7942 zfs_ioctl_register("get_bootenv", ZFS_IOC_GET_BOOTENV,
7943 zfs_ioc_get_bootenv, zfs_secpolicy_none, POOL_NAME,
7944 POOL_CHECK_SUSPENDED, B_FALSE, B_TRUE,
7945 zfs_keys_get_bootenv, ARRAY_SIZE(zfs_keys_get_bootenv));
7946
7947 zfs_ioctl_register("zpool_vdev_get_props", ZFS_IOC_VDEV_GET_PROPS,
7948 zfs_ioc_vdev_get_props, zfs_secpolicy_read, POOL_NAME,
7949 POOL_CHECK_NONE, B_FALSE, B_FALSE, zfs_keys_vdev_get_props,
7950 ARRAY_SIZE(zfs_keys_vdev_get_props));
7951
7952 zfs_ioctl_register("zpool_vdev_set_props", ZFS_IOC_VDEV_SET_PROPS,
7953 zfs_ioc_vdev_set_props, zfs_secpolicy_config, POOL_NAME,
7954 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE,
7955 zfs_keys_vdev_set_props, ARRAY_SIZE(zfs_keys_vdev_set_props));
7956
7957 zfs_ioctl_register("scrub", ZFS_IOC_POOL_SCRUB,
7958 zfs_ioc_pool_scrub, zfs_secpolicy_config, POOL_NAME,
7959 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7960 zfs_keys_pool_scrub, ARRAY_SIZE(zfs_keys_pool_scrub));
7961
7962 zfs_ioctl_register("get_props", ZFS_IOC_POOL_GET_PROPS,
7963 zfs_ioc_pool_get_props, zfs_secpolicy_read, POOL_NAME,
7964 POOL_CHECK_NONE, B_FALSE, B_FALSE,
7965 zfs_keys_get_props, ARRAY_SIZE(zfs_keys_get_props));
7966
7967 zfs_ioctl_register("zpool_ddt_prune", ZFS_IOC_DDT_PRUNE,
7968 zfs_ioc_ddt_prune, zfs_secpolicy_config, POOL_NAME,
7969 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7970 zfs_keys_ddt_prune, ARRAY_SIZE(zfs_keys_ddt_prune));
7971
7972 /* IOCTLS that use the legacy function signature */
7973
7974 zfs_ioctl_register_legacy(ZFS_IOC_POOL_FREEZE, zfs_ioc_pool_freeze,
7975 zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_READONLY);
7976
7977 zfs_ioctl_register_pool(ZFS_IOC_POOL_CREATE, zfs_ioc_pool_create,
7978 zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE);
7979 zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_SCAN,
7980 zfs_ioc_pool_scan);
7981 zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_UPGRADE,
7982 zfs_ioc_pool_upgrade);
7983 zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_ADD,
7984 zfs_ioc_vdev_add);
7985 zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_REMOVE,
7986 zfs_ioc_vdev_remove);
7987 zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SET_STATE,
7988 zfs_ioc_vdev_set_state);
7989 zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_ATTACH,
7990 zfs_ioc_vdev_attach);
7991 zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_DETACH,
7992 zfs_ioc_vdev_detach);
7993 zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SETPATH,
7994 zfs_ioc_vdev_setpath);
7995 zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SETFRU,
7996 zfs_ioc_vdev_setfru);
7997 zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_SET_PROPS,
7998 zfs_ioc_pool_set_props);
7999 zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SPLIT,
8000 zfs_ioc_vdev_split);
8001 zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_REGUID,
8002 zfs_ioc_pool_reguid);
8003
8004 zfs_ioctl_register_pool_meta(ZFS_IOC_POOL_CONFIGS,
8005 zfs_ioc_pool_configs, zfs_secpolicy_none);
8006 zfs_ioctl_register_pool_meta(ZFS_IOC_POOL_TRYIMPORT,
8007 zfs_ioc_pool_tryimport, zfs_secpolicy_config);
8008 zfs_ioctl_register_pool_meta(ZFS_IOC_INJECT_FAULT,
8009 zfs_ioc_inject_fault, zfs_secpolicy_inject);
8010 zfs_ioctl_register_pool_meta(ZFS_IOC_CLEAR_FAULT,
8011 zfs_ioc_clear_fault, zfs_secpolicy_inject);
8012 zfs_ioctl_register_pool_meta(ZFS_IOC_INJECT_LIST_NEXT,
8013 zfs_ioc_inject_list_next, zfs_secpolicy_inject);
8014
8015 /*
8016 * pool destroy, and export don't log the history as part of
8017 * zfsdev_ioctl, but rather zfs_ioc_pool_export
8018 * does the logging of those commands.
8019 */
8020 zfs_ioctl_register_pool(ZFS_IOC_POOL_DESTROY, zfs_ioc_pool_destroy,
8021 zfs_secpolicy_config, B_FALSE, POOL_CHECK_SUSPENDED);
8022 zfs_ioctl_register_pool(ZFS_IOC_POOL_EXPORT, zfs_ioc_pool_export,
8023 zfs_secpolicy_config, B_FALSE, POOL_CHECK_SUSPENDED);
8024
8025 zfs_ioctl_register_pool(ZFS_IOC_POOL_STATS, zfs_ioc_pool_stats,
8026 zfs_secpolicy_read, B_FALSE, POOL_CHECK_NONE);
8027
8028 zfs_ioctl_register_pool(ZFS_IOC_ERROR_LOG, zfs_ioc_error_log,
8029 zfs_secpolicy_inject, B_FALSE, POOL_CHECK_SUSPENDED);
8030 zfs_ioctl_register_pool(ZFS_IOC_DSOBJ_TO_DSNAME,
8031 zfs_ioc_dsobj_to_dsname,
8032 zfs_secpolicy_diff, B_FALSE, POOL_CHECK_SUSPENDED);
8033 zfs_ioctl_register_pool(ZFS_IOC_POOL_GET_HISTORY,
8034 zfs_ioc_pool_get_history,
8035 zfs_secpolicy_config, B_FALSE, POOL_CHECK_SUSPENDED);
8036
8037 zfs_ioctl_register_pool(ZFS_IOC_POOL_IMPORT, zfs_ioc_pool_import,
8038 zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE);
8039
8040 zfs_ioctl_register_pool(ZFS_IOC_CLEAR, zfs_ioc_clear,
8041 zfs_secpolicy_config, B_TRUE, POOL_CHECK_READONLY);
8042
8043 zfs_ioctl_register_dataset_read(ZFS_IOC_SPACE_WRITTEN,
8044 zfs_ioc_space_written);
8045 zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_RECVD_PROPS,
8046 zfs_ioc_objset_recvd_props);
8047 zfs_ioctl_register_dataset_read(ZFS_IOC_NEXT_OBJ,
8048 zfs_ioc_next_obj);
8049 zfs_ioctl_register_dataset_read(ZFS_IOC_GET_FSACL,
8050 zfs_ioc_get_fsacl);
8051 zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_STATS,
8052 zfs_ioc_objset_stats);
8053 zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_ZPLPROPS,
8054 zfs_ioc_objset_zplprops);
8055 zfs_ioctl_register_dataset_read(ZFS_IOC_DATASET_LIST_NEXT,
8056 zfs_ioc_dataset_list_next);
8057 zfs_ioctl_register_dataset_read(ZFS_IOC_SNAPSHOT_LIST_NEXT,
8058 zfs_ioc_snapshot_list_next);
8059 zfs_ioctl_register_dataset_read(ZFS_IOC_SEND_PROGRESS,
8060 zfs_ioc_send_progress);
8061
8062 zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_DIFF,
8063 zfs_ioc_diff, zfs_secpolicy_diff);
8064 zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_OBJ_TO_STATS,
8065 zfs_ioc_obj_to_stats, zfs_secpolicy_diff);
8066 zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_OBJ_TO_PATH,
8067 zfs_ioc_obj_to_path, zfs_secpolicy_diff);
8068 zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_USERSPACE_ONE,
8069 zfs_ioc_userspace_one, zfs_secpolicy_userspace_one);
8070 zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_USERSPACE_MANY,
8071 zfs_ioc_userspace_many, zfs_secpolicy_userspace_many);
8072 zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_SEND,
8073 zfs_ioc_send, zfs_secpolicy_send);
8074
8075 zfs_ioctl_register_dataset_modify(ZFS_IOC_SET_PROP, zfs_ioc_set_prop,
8076 zfs_secpolicy_setprops);
8077 zfs_ioctl_register_dataset_modify(ZFS_IOC_DESTROY, zfs_ioc_destroy,
8078 zfs_secpolicy_destroy);
8079 zfs_ioctl_register_dataset_modify(ZFS_IOC_RENAME, zfs_ioc_rename,
8080 zfs_secpolicy_rename);
8081 zfs_ioctl_register_dataset_modify(ZFS_IOC_RECV, zfs_ioc_recv,
8082 zfs_secpolicy_recv);
8083 zfs_ioctl_register_dataset_modify(ZFS_IOC_PROMOTE, zfs_ioc_promote,
8084 zfs_secpolicy_promote);
8085 zfs_ioctl_register_dataset_modify(ZFS_IOC_INHERIT_PROP,
8086 zfs_ioc_inherit_prop, zfs_secpolicy_inherit_prop);
8087 zfs_ioctl_register_dataset_modify(ZFS_IOC_SET_FSACL, zfs_ioc_set_fsacl,
8088 zfs_secpolicy_set_fsacl);
8089
8090 zfs_ioctl_register_dataset_nolog(ZFS_IOC_SHARE, zfs_ioc_share,
8091 zfs_secpolicy_share, POOL_CHECK_NONE);
8092 zfs_ioctl_register_dataset_nolog(ZFS_IOC_SMB_ACL, zfs_ioc_smb_acl,
8093 zfs_secpolicy_smb_acl, POOL_CHECK_NONE);
8094 zfs_ioctl_register_dataset_nolog(ZFS_IOC_USERSPACE_UPGRADE,
8095 zfs_ioc_userspace_upgrade, zfs_secpolicy_userspace_upgrade,
8096 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
8097 zfs_ioctl_register_dataset_nolog(ZFS_IOC_TMP_SNAPSHOT,
8098 zfs_ioc_tmp_snapshot, zfs_secpolicy_tmp_snapshot,
8099 POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
8100
8101 zfs_ioctl_register_legacy(ZFS_IOC_EVENTS_NEXT, zfs_ioc_events_next,
8102 zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_NONE);
8103 zfs_ioctl_register_legacy(ZFS_IOC_EVENTS_CLEAR, zfs_ioc_events_clear,
8104 zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_NONE);
8105 zfs_ioctl_register_legacy(ZFS_IOC_EVENTS_SEEK, zfs_ioc_events_seek,
8106 zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_NONE);
8107
8108 zfs_ioctl_init_os();
8109 }
8110
8111 /*
8112 * Verify that for non-legacy ioctls the input nvlist
8113 * pairs match against the expected input.
8114 *
8115 * Possible errors are:
8116 * ZFS_ERR_IOC_ARG_UNAVAIL An unrecognized nvpair was encountered
8117 * ZFS_ERR_IOC_ARG_REQUIRED A required nvpair is missing
8118 * ZFS_ERR_IOC_ARG_BADTYPE Invalid type for nvpair
8119 */
8120 static int
zfs_check_input_nvpairs(nvlist_t * innvl,const zfs_ioc_vec_t * vec)8121 zfs_check_input_nvpairs(nvlist_t *innvl, const zfs_ioc_vec_t *vec)
8122 {
8123 const zfs_ioc_key_t *nvl_keys = vec->zvec_nvl_keys;
8124 boolean_t required_keys_found = B_FALSE;
8125
8126 /*
8127 * examine each input pair
8128 */
8129 for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
8130 pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
8131 const char *name = nvpair_name(pair);
8132 data_type_t type = nvpair_type(pair);
8133 boolean_t identified = B_FALSE;
8134
8135 /*
8136 * check pair against the documented names and type
8137 */
8138 for (int k = 0; k < vec->zvec_nvl_key_count; k++) {
8139 /* if not a wild card name, check for an exact match */
8140 if ((nvl_keys[k].zkey_flags & ZK_WILDCARDLIST) == 0 &&
8141 strcmp(nvl_keys[k].zkey_name, name) != 0)
8142 continue;
8143
8144 identified = B_TRUE;
8145
8146 if (nvl_keys[k].zkey_type != DATA_TYPE_ANY &&
8147 nvl_keys[k].zkey_type != type) {
8148 return (SET_ERROR(ZFS_ERR_IOC_ARG_BADTYPE));
8149 }
8150
8151 if (nvl_keys[k].zkey_flags & ZK_OPTIONAL)
8152 continue;
8153
8154 required_keys_found = B_TRUE;
8155 break;
8156 }
8157
8158 /* allow an 'optional' key, everything else is invalid */
8159 if (!identified &&
8160 (strcmp(name, "optional") != 0 ||
8161 type != DATA_TYPE_NVLIST)) {
8162 return (SET_ERROR(ZFS_ERR_IOC_ARG_UNAVAIL));
8163 }
8164 }
8165
8166 /* verify that all required keys were found */
8167 for (int k = 0; k < vec->zvec_nvl_key_count; k++) {
8168 if (nvl_keys[k].zkey_flags & ZK_OPTIONAL)
8169 continue;
8170
8171 if (nvl_keys[k].zkey_flags & ZK_WILDCARDLIST) {
8172 /* at least one non-optional key is expected here */
8173 if (!required_keys_found)
8174 return (SET_ERROR(ZFS_ERR_IOC_ARG_REQUIRED));
8175 continue;
8176 }
8177
8178 if (!nvlist_exists(innvl, nvl_keys[k].zkey_name))
8179 return (SET_ERROR(ZFS_ERR_IOC_ARG_REQUIRED));
8180 }
8181
8182 return (0);
8183 }
8184
8185 static int
pool_status_check(const char * name,zfs_ioc_namecheck_t type,zfs_ioc_poolcheck_t check)8186 pool_status_check(const char *name, zfs_ioc_namecheck_t type,
8187 zfs_ioc_poolcheck_t check)
8188 {
8189 spa_t *spa;
8190 int error;
8191
8192 ASSERT(type == POOL_NAME || type == DATASET_NAME ||
8193 type == ENTITY_NAME);
8194
8195 if (check & POOL_CHECK_NONE)
8196 return (0);
8197
8198 error = spa_open(name, &spa, FTAG);
8199 if (error == 0) {
8200 if ((check & POOL_CHECK_SUSPENDED) && spa_suspended(spa))
8201 error = SET_ERROR(EAGAIN);
8202 else if ((check & POOL_CHECK_READONLY) && !spa_writeable(spa))
8203 error = SET_ERROR(EROFS);
8204 spa_close(spa, FTAG);
8205 }
8206 return (error);
8207 }
8208
8209 int
zfsdev_getminor(zfs_file_t * fp,minor_t * minorp)8210 zfsdev_getminor(zfs_file_t *fp, minor_t *minorp)
8211 {
8212 zfsdev_state_t *zs, *fpd;
8213
8214 ASSERT(!MUTEX_HELD(&zfsdev_state_lock));
8215
8216 fpd = zfs_file_private(fp);
8217 if (fpd == NULL)
8218 return (SET_ERROR(EBADF));
8219
8220 mutex_enter(&zfsdev_state_lock);
8221
8222 for (zs = &zfsdev_state_listhead; zs != NULL; zs = zs->zs_next) {
8223
8224 if (zs->zs_minor == -1)
8225 continue;
8226
8227 if (fpd == zs) {
8228 *minorp = fpd->zs_minor;
8229 mutex_exit(&zfsdev_state_lock);
8230 return (0);
8231 }
8232 }
8233
8234 mutex_exit(&zfsdev_state_lock);
8235
8236 return (SET_ERROR(EBADF));
8237 }
8238
8239 void *
zfsdev_get_state(minor_t minor,enum zfsdev_state_type which)8240 zfsdev_get_state(minor_t minor, enum zfsdev_state_type which)
8241 {
8242 zfsdev_state_t *zs;
8243
8244 for (zs = &zfsdev_state_listhead; zs != NULL; zs = zs->zs_next) {
8245 if (zs->zs_minor == minor) {
8246 membar_consumer();
8247 switch (which) {
8248 case ZST_ONEXIT:
8249 return (zs->zs_onexit);
8250 case ZST_ZEVENT:
8251 return (zs->zs_zevent);
8252 case ZST_ALL:
8253 return (zs);
8254 }
8255 }
8256 }
8257
8258 return (NULL);
8259 }
8260
8261 /*
8262 * Find a free minor number. The zfsdev_state_list is expected to
8263 * be short since it is only a list of currently open file handles.
8264 */
8265 static minor_t
zfsdev_minor_alloc(void)8266 zfsdev_minor_alloc(void)
8267 {
8268 static minor_t last_minor = 0;
8269 minor_t m;
8270
8271 ASSERT(MUTEX_HELD(&zfsdev_state_lock));
8272
8273 for (m = last_minor + 1; m != last_minor; m++) {
8274 if (m > ZFSDEV_MAX_MINOR)
8275 m = 1;
8276 if (zfsdev_get_state(m, ZST_ALL) == NULL) {
8277 last_minor = m;
8278 return (m);
8279 }
8280 }
8281
8282 return (0);
8283 }
8284
8285 int
zfsdev_state_init(void * priv)8286 zfsdev_state_init(void *priv)
8287 {
8288 zfsdev_state_t *zs, *zsprev = NULL;
8289 minor_t minor;
8290 boolean_t newzs = B_FALSE;
8291
8292 ASSERT(MUTEX_HELD(&zfsdev_state_lock));
8293
8294 minor = zfsdev_minor_alloc();
8295 if (minor == 0)
8296 return (SET_ERROR(ENXIO));
8297
8298 for (zs = &zfsdev_state_listhead; zs != NULL; zs = zs->zs_next) {
8299 if (zs->zs_minor == -1)
8300 break;
8301 zsprev = zs;
8302 }
8303
8304 if (!zs) {
8305 zs = kmem_zalloc(sizeof (zfsdev_state_t), KM_SLEEP);
8306 newzs = B_TRUE;
8307 }
8308
8309 zfsdev_private_set_state(priv, zs);
8310
8311 zfs_onexit_init((zfs_onexit_t **)&zs->zs_onexit);
8312 zfs_zevent_init((zfs_zevent_t **)&zs->zs_zevent);
8313
8314 /*
8315 * In order to provide for lock-free concurrent read access
8316 * to the minor list in zfsdev_get_state(), new entries
8317 * must be completely written before linking them into the
8318 * list whereas existing entries are already linked; the last
8319 * operation must be updating zs_minor (from -1 to the new
8320 * value).
8321 */
8322 if (newzs) {
8323 zs->zs_minor = minor;
8324 membar_producer();
8325 zsprev->zs_next = zs;
8326 } else {
8327 membar_producer();
8328 zs->zs_minor = minor;
8329 }
8330
8331 return (0);
8332 }
8333
8334 void
zfsdev_state_destroy(void * priv)8335 zfsdev_state_destroy(void *priv)
8336 {
8337 zfsdev_state_t *zs = zfsdev_private_get_state(priv);
8338
8339 ASSERT(zs != NULL);
8340 ASSERT3S(zs->zs_minor, >, 0);
8341
8342 /*
8343 * The last reference to this zfsdev file descriptor is being dropped.
8344 * We don't have to worry about lookup grabbing this state object, and
8345 * zfsdev_state_init() will not try to reuse this object until it is
8346 * invalidated by setting zs_minor to -1. Invalidation must be done
8347 * last, with a memory barrier to ensure ordering. This lets us avoid
8348 * taking the global zfsdev state lock around destruction.
8349 */
8350 zfs_onexit_destroy(zs->zs_onexit);
8351 zfs_zevent_destroy(zs->zs_zevent);
8352 zs->zs_onexit = NULL;
8353 zs->zs_zevent = NULL;
8354 membar_producer();
8355 zs->zs_minor = -1;
8356 }
8357
8358 long
zfsdev_ioctl_common(uint_t vecnum,zfs_cmd_t * zc,int flag)8359 zfsdev_ioctl_common(uint_t vecnum, zfs_cmd_t *zc, int flag)
8360 {
8361 int error, cmd;
8362 const zfs_ioc_vec_t *vec;
8363 char *saved_poolname = NULL;
8364 uint64_t max_nvlist_src_size;
8365 size_t saved_poolname_len = 0;
8366 nvlist_t *innvl = NULL;
8367 fstrans_cookie_t cookie;
8368 hrtime_t start_time = gethrtime();
8369
8370 cmd = vecnum;
8371 error = 0;
8372 if (vecnum >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0]))
8373 return (SET_ERROR(ZFS_ERR_IOC_CMD_UNAVAIL));
8374
8375 vec = &zfs_ioc_vec[vecnum];
8376
8377 /*
8378 * The registered ioctl list may be sparse, verify that either
8379 * a normal or legacy handler are registered.
8380 */
8381 if (vec->zvec_func == NULL && vec->zvec_legacy_func == NULL)
8382 return (SET_ERROR(ZFS_ERR_IOC_CMD_UNAVAIL));
8383
8384 zc->zc_iflags = flag & FKIOCTL;
8385 max_nvlist_src_size = zfs_max_nvlist_src_size_os();
8386 if (zc->zc_nvlist_src_size > max_nvlist_src_size) {
8387 /*
8388 * Make sure the user doesn't pass in an insane value for
8389 * zc_nvlist_src_size. We have to check, since we will end
8390 * up allocating that much memory inside of get_nvlist(). This
8391 * prevents a nefarious user from allocating tons of kernel
8392 * memory.
8393 *
8394 * Also, we return EINVAL instead of ENOMEM here. The reason
8395 * being that returning ENOMEM from an ioctl() has a special
8396 * connotation; that the user's size value is too small and
8397 * needs to be expanded to hold the nvlist. See
8398 * zcmd_expand_dst_nvlist() for details.
8399 */
8400 error = SET_ERROR(EINVAL); /* User's size too big */
8401
8402 } else if (zc->zc_nvlist_src_size != 0) {
8403 error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
8404 zc->zc_iflags, &innvl);
8405 if (error != 0)
8406 goto out;
8407 }
8408
8409 /*
8410 * Ensure that all pool/dataset names are valid before we pass down to
8411 * the lower layers.
8412 */
8413 zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
8414 switch (vec->zvec_namecheck) {
8415 case POOL_NAME:
8416 if (pool_namecheck(zc->zc_name, NULL, NULL) != 0)
8417 error = SET_ERROR(EINVAL);
8418 else
8419 error = pool_status_check(zc->zc_name,
8420 vec->zvec_namecheck, vec->zvec_pool_check);
8421 break;
8422
8423 case DATASET_NAME:
8424 if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0)
8425 error = SET_ERROR(EINVAL);
8426 else
8427 error = pool_status_check(zc->zc_name,
8428 vec->zvec_namecheck, vec->zvec_pool_check);
8429 break;
8430
8431 case ENTITY_NAME:
8432 if (entity_namecheck(zc->zc_name, NULL, NULL) != 0) {
8433 error = SET_ERROR(EINVAL);
8434 } else {
8435 error = pool_status_check(zc->zc_name,
8436 vec->zvec_namecheck, vec->zvec_pool_check);
8437 }
8438 break;
8439
8440 case NO_NAME:
8441 break;
8442 }
8443 /*
8444 * Ensure that all input pairs are valid before we pass them down
8445 * to the lower layers.
8446 *
8447 * The vectored functions can use fnvlist_lookup_{type} for any
8448 * required pairs since zfs_check_input_nvpairs() confirmed that
8449 * they exist and are of the correct type.
8450 */
8451 if (error == 0 && vec->zvec_func != NULL) {
8452 error = zfs_check_input_nvpairs(innvl, vec);
8453 if (error != 0)
8454 goto out;
8455 }
8456
8457 if (error == 0) {
8458 cookie = spl_fstrans_mark();
8459 error = vec->zvec_secpolicy(zc, innvl, CRED());
8460 spl_fstrans_unmark(cookie);
8461 }
8462
8463 if (error != 0)
8464 goto out;
8465
8466 /* legacy ioctls can modify zc_name */
8467 /*
8468 * Can't use kmem_strdup() as we might truncate the string and
8469 * kmem_strfree() would then free with incorrect size.
8470 */
8471 const char *spa_name = zc->zc_name;
8472 const char *tname;
8473 if (nvlist_lookup_string(innvl,
8474 zpool_prop_to_name(ZPOOL_PROP_TNAME), &tname) == 0) {
8475 spa_name = tname;
8476 }
8477 saved_poolname_len = strlen(spa_name) + 1;
8478 saved_poolname = kmem_alloc(saved_poolname_len, KM_SLEEP);
8479
8480 strlcpy(saved_poolname, spa_name, saved_poolname_len);
8481 saved_poolname[strcspn(saved_poolname, "/@#")] = '\0';
8482
8483 if (vec->zvec_func != NULL) {
8484 nvlist_t *outnvl;
8485 int puterror = 0;
8486 spa_t *spa;
8487 nvlist_t *lognv = NULL;
8488
8489 ASSERT0P(vec->zvec_legacy_func);
8490
8491 /*
8492 * Add the innvl to the lognv before calling the func,
8493 * in case the func changes the innvl.
8494 */
8495 if (vec->zvec_allow_log) {
8496 lognv = fnvlist_alloc();
8497 fnvlist_add_string(lognv, ZPOOL_HIST_IOCTL,
8498 vec->zvec_name);
8499 if (!nvlist_empty(innvl)) {
8500 fnvlist_add_nvlist(lognv, ZPOOL_HIST_INPUT_NVL,
8501 innvl);
8502 }
8503 }
8504
8505 outnvl = fnvlist_alloc();
8506 cookie = spl_fstrans_mark();
8507 error = vec->zvec_func(zc->zc_name, innvl, outnvl);
8508 spl_fstrans_unmark(cookie);
8509
8510 /*
8511 * Some commands can partially execute, modify state, and still
8512 * return an error. In these cases, attempt to record what
8513 * was modified.
8514 */
8515 if ((error == 0 ||
8516 (cmd == ZFS_IOC_CHANNEL_PROGRAM && error != EINVAL)) &&
8517 vec->zvec_allow_log &&
8518 spa_open(zc->zc_name, &spa, FTAG) == 0) {
8519 if (!nvlist_empty(outnvl)) {
8520 size_t out_size = fnvlist_size(outnvl);
8521 if (out_size > zfs_history_output_max) {
8522 fnvlist_add_int64(lognv,
8523 ZPOOL_HIST_OUTPUT_SIZE, out_size);
8524 } else {
8525 fnvlist_add_nvlist(lognv,
8526 ZPOOL_HIST_OUTPUT_NVL, outnvl);
8527 }
8528 }
8529 if (error != 0) {
8530 fnvlist_add_int64(lognv, ZPOOL_HIST_ERRNO,
8531 error);
8532 }
8533 fnvlist_add_int64(lognv, ZPOOL_HIST_ELAPSED_NS,
8534 gethrtime() - start_time);
8535 (void) spa_history_log_nvl(spa, lognv);
8536 spa_close(spa, FTAG);
8537 }
8538 fnvlist_free(lognv);
8539
8540 if (!nvlist_empty(outnvl) || zc->zc_nvlist_dst_size != 0) {
8541 int smusherror = 0;
8542 if (vec->zvec_smush_outnvlist) {
8543 smusherror = nvlist_smush(outnvl,
8544 zc->zc_nvlist_dst_size);
8545 }
8546 if (smusherror == 0)
8547 puterror = put_nvlist(zc, outnvl);
8548 }
8549
8550 if (puterror != 0)
8551 error = puterror;
8552
8553 nvlist_free(outnvl);
8554 } else {
8555 cookie = spl_fstrans_mark();
8556 error = vec->zvec_legacy_func(zc);
8557 spl_fstrans_unmark(cookie);
8558 }
8559
8560 out:
8561 nvlist_free(innvl);
8562 if (error == 0 && vec->zvec_allow_log) {
8563 char *s = tsd_get(zfs_allow_log_key);
8564 if (s != NULL)
8565 kmem_strfree(s);
8566 (void) tsd_set(zfs_allow_log_key, kmem_strdup(saved_poolname));
8567 }
8568 if (saved_poolname != NULL)
8569 kmem_free(saved_poolname, saved_poolname_len);
8570
8571 return (error);
8572 }
8573
8574 int
zfs_kmod_init(void)8575 zfs_kmod_init(void)
8576 {
8577 int error;
8578
8579 if ((error = zvol_init()) != 0)
8580 return (error);
8581
8582 spa_init(SPA_MODE_READ | SPA_MODE_WRITE);
8583 zfs_init();
8584
8585 zfs_ioctl_init();
8586
8587 /* Register zoned_uid property lookup callback with SPL */
8588 zone_register_zoned_uid_callback(zfs_get_zoned_uid);
8589
8590 mutex_init(&zfsdev_state_lock, NULL, MUTEX_DEFAULT, NULL);
8591 zfsdev_state_listhead.zs_minor = -1;
8592
8593 if ((error = zfsdev_attach()) != 0)
8594 goto out;
8595
8596 tsd_create(&rrw_tsd_key, rrw_tsd_destroy);
8597 tsd_create(&zfs_allow_log_key, zfs_allow_log_destroy);
8598
8599 return (0);
8600 out:
8601 zfs_fini();
8602 spa_fini();
8603 zvol_fini();
8604
8605 return (error);
8606 }
8607
8608 void
zfs_kmod_fini(void)8609 zfs_kmod_fini(void)
8610 {
8611 zfsdev_state_t *zs, *zsnext = NULL;
8612
8613 zfsdev_detach();
8614
8615 mutex_destroy(&zfsdev_state_lock);
8616
8617 for (zs = &zfsdev_state_listhead; zs != NULL; zs = zsnext) {
8618 zsnext = zs->zs_next;
8619 if (zs->zs_onexit)
8620 zfs_onexit_destroy(zs->zs_onexit);
8621 if (zs->zs_zevent)
8622 zfs_zevent_destroy(zs->zs_zevent);
8623 if (zs != &zfsdev_state_listhead)
8624 kmem_free(zs, sizeof (zfsdev_state_t));
8625 }
8626
8627 zfs_ereport_taskq_fini(); /* run before zfs_fini() on Linux */
8628
8629 /* Unregister zoned_uid callback before ZFS layer is torn down */
8630 zone_unregister_zoned_uid_callback();
8631
8632 zfs_fini();
8633 spa_fini();
8634 zvol_fini();
8635
8636 tsd_destroy(&rrw_tsd_key);
8637 tsd_destroy(&zfs_allow_log_key);
8638 }
8639
8640 ZFS_MODULE_PARAM(zfs, zfs_, max_nvlist_src_size, U64, ZMOD_RW,
8641 "Maximum size in bytes allowed for src nvlist passed with ZFS ioctls");
8642
8643 ZFS_MODULE_PARAM(zfs, zfs_, history_output_max, U64, ZMOD_RW,
8644 "Maximum size in bytes of ZFS ioctl output that will be logged");
8645