xref: /freebsd/sys/contrib/openzfs/module/zfs/zfs_ioctl.c (revision d9497217456002b0ddad3cd319570d0b098daa29)
1 // SPDX-License-Identifier: CDDL-1.0
2 /*
3  * CDDL HEADER START
4  *
5  * The contents of this file are subject to the terms of the
6  * Common Development and Distribution License (the "License").
7  * You may not use this file except in compliance with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or https://opensource.org/licenses/CDDL-1.0.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 
23 /*
24  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
25  * Portions Copyright 2011 Martin Matuska
26  * Copyright 2015, OmniTI Computer Consulting, Inc. All rights reserved.
27  * Copyright (c) 2012 Pawel Jakub Dawidek
28  * Copyright (c) 2014, 2016 Joyent, Inc. All rights reserved.
29  * Copyright 2016 Nexenta Systems, Inc.  All rights reserved.
30  * Copyright (c) 2014, Joyent, Inc. All rights reserved.
31  * Copyright (c) 2011, 2024 by Delphix. All rights reserved.
32  * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
33  * Copyright (c) 2013 Steven Hartland. All rights reserved.
34  * Copyright (c) 2014 Integros [integros.com]
35  * Copyright 2016 Toomas Soome <tsoome@me.com>
36  * Copyright (c) 2016 Actifio, Inc. All rights reserved.
37  * Copyright (c) 2018, loli10K <ezomori.nozomu@gmail.com>. All rights reserved.
38  * Copyright 2017 RackTop Systems.
39  * Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
40  * Copyright (c) 2019 Datto Inc.
41  * Copyright (c) 2019, 2020 by Christian Schwarz. All rights reserved.
42  * Copyright (c) 2019, 2021, 2023, 2024, Klara Inc.
43  * Copyright (c) 2019, Allan Jude
44  * Copyright 2026 Oxide Computer Company
45  */
46 
47 /*
48  * ZFS ioctls.
49  *
50  * This file handles the ioctls to /dev/zfs, used for configuring ZFS storage
51  * pools and filesystems, e.g. with /sbin/zfs and /sbin/zpool.
52  *
53  * There are two ways that we handle ioctls: the legacy way where almost
54  * all of the logic is in the ioctl callback, and the new way where most
55  * of the marshalling is handled in the common entry point, zfsdev_ioctl().
56  *
57  * Non-legacy ioctls should be registered by calling
58  * zfs_ioctl_register() from zfs_ioctl_init().  The ioctl is invoked
59  * from userland by lzc_ioctl().
60  *
61  * The registration arguments are as follows:
62  *
63  * const char *name
64  *   The name of the ioctl.  This is used for history logging.  If the
65  *   ioctl returns successfully (the callback returns 0), and allow_log
66  *   is true, then a history log entry will be recorded with the input &
67  *   output nvlists.  The log entry can be printed with "zpool history -i".
68  *
69  * zfs_ioc_t ioc
70  *   The ioctl request number, which userland will pass to ioctl(2).
71  *   We want newer versions of libzfs and libzfs_core to run against
72  *   existing zfs kernel modules (i.e. a deferred reboot after an update).
73  *   Therefore the ioctl numbers cannot change from release to release.
74  *
75  * zfs_secpolicy_func_t *secpolicy
76  *   This function will be called before the zfs_ioc_func_t, to
77  *   determine if this operation is permitted.  It should return EPERM
78  *   on failure, and 0 on success.  Checks include determining if the
79  *   dataset is visible in this zone, and if the user has either all
80  *   zfs privileges in the zone (SYS_MOUNT), or has been granted permission
81  *   to do this operation on this dataset with "zfs allow".
82  *
83  * zfs_ioc_namecheck_t namecheck
84  *   This specifies what to expect in the zfs_cmd_t:zc_name -- a pool
85  *   name, a dataset name, or nothing.  If the name is not well-formed,
86  *   the ioctl will fail and the callback will not be called.
87  *   Therefore, the callback can assume that the name is well-formed
88  *   (e.g. is null-terminated, doesn't have more than one '@' character,
89  *   doesn't have invalid characters).
90  *
91  * zfs_ioc_poolcheck_t pool_check
92  *   This specifies requirements on the pool state.  If the pool does
93  *   not meet them (is suspended or is readonly), the ioctl will fail
94  *   and the callback will not be called.  If any checks are specified
95  *   (i.e. it is not POOL_CHECK_NONE), namecheck must not be NO_NAME.
96  *   Multiple checks can be or-ed together (e.g. POOL_CHECK_SUSPENDED |
97  *   POOL_CHECK_READONLY).
98  *
99  * zfs_ioc_key_t *nvl_keys
100  *  The list of expected/allowable innvl input keys. This list is used
101  *  to validate the nvlist input to the ioctl.
102  *
103  * boolean_t smush_outnvlist
104  *   If smush_outnvlist is true, then the output is presumed to be a
105  *   list of errors, and it will be "smushed" down to fit into the
106  *   caller's buffer, by removing some entries and replacing them with a
107  *   single "N_MORE_ERRORS" entry indicating how many were removed.  See
108  *   nvlist_smush() for details.  If smush_outnvlist is false, and the
109  *   outnvlist does not fit into the userland-provided buffer, then the
110  *   ioctl will fail with ENOMEM.
111  *
112  * zfs_ioc_func_t *func
113  *   The callback function that will perform the operation.
114  *
115  *   The callback should return 0 on success, or an error number on
116  *   failure.  If the function fails, the userland ioctl will return -1,
117  *   and errno will be set to the callback's return value.  The callback
118  *   will be called with the following arguments:
119  *
120  *   const char *name
121  *     The name of the pool or dataset to operate on, from
122  *     zfs_cmd_t:zc_name.  The 'namecheck' argument specifies the
123  *     expected type (pool, dataset, or none).
124  *
125  *   nvlist_t *innvl
126  *     The input nvlist, deserialized from zfs_cmd_t:zc_nvlist_src.  Or
127  *     NULL if no input nvlist was provided.  Changes to this nvlist are
128  *     ignored.  If the input nvlist could not be deserialized, the
129  *     ioctl will fail and the callback will not be called.
130  *
131  *   nvlist_t *outnvl
132  *     The output nvlist, initially empty.  The callback can fill it in,
133  *     and it will be returned to userland by serializing it into
134  *     zfs_cmd_t:zc_nvlist_dst.  If it is non-empty, and serialization
135  *     fails (e.g. because the caller didn't supply a large enough
136  *     buffer), then the overall ioctl will fail.  See the
137  *     'smush_nvlist' argument above for additional behaviors.
138  *
139  *     There are two typical uses of the output nvlist:
140  *       - To return state, e.g. property values.  In this case,
141  *         smush_outnvlist should be false.  If the buffer was not large
142  *         enough, the caller will reallocate a larger buffer and try
143  *         the ioctl again.
144  *
145  *       - To return multiple errors from an ioctl which makes on-disk
146  *         changes.  In this case, smush_outnvlist should be true.
147  *         Ioctls which make on-disk modifications should generally not
148  *         use the outnvl if they succeed, because the caller can not
149  *         distinguish between the operation failing, and
150  *         deserialization failing.
151  *
152  * IOCTL Interface Errors
153  *
154  * The following ioctl input errors can be returned:
155  *   ZFS_ERR_IOC_CMD_UNAVAIL	the ioctl number is not supported by kernel
156  *   ZFS_ERR_IOC_ARG_UNAVAIL	an input argument is not supported by kernel
157  *   ZFS_ERR_IOC_ARG_REQUIRED	a required input argument is missing
158  *   ZFS_ERR_IOC_ARG_BADTYPE	an input argument has an invalid type
159  */
160 
161 #include <sys/types.h>
162 #include <sys/param.h>
163 #include <sys/errno.h>
164 #include <sys/file.h>
165 #include <sys/kmem.h>
166 #include <sys/cmn_err.h>
167 #include <sys/stat.h>
168 #include <sys/zfs_ioctl.h>
169 #include <sys/zfs_quota.h>
170 #include <sys/zfs_vfsops.h>
171 #include <sys/zfs_znode.h>
172 #include <sys/zap.h>
173 #include <sys/spa.h>
174 #include <sys/spa_impl.h>
175 #include <sys/vdev.h>
176 #include <sys/vdev_impl.h>
177 #include <sys/dmu.h>
178 #include <sys/dsl_dir.h>
179 #include <sys/dsl_dataset.h>
180 #include <sys/dsl_prop.h>
181 #include <sys/dsl_deleg.h>
182 #include <sys/dmu_objset.h>
183 #include <sys/dmu_impl.h>
184 #include <sys/dmu_redact.h>
185 #include <sys/dmu_tx.h>
186 #include <sys/sunddi.h>
187 #include <sys/policy.h>
188 #include <sys/zone.h>
189 #include <sys/nvpair.h>
190 #include <sys/pathname.h>
191 #include <sys/fs/zfs.h>
192 #include <sys/zfs_ctldir.h>
193 #include <sys/zfs_dir.h>
194 #include <sys/zfs_onexit.h>
195 #include <sys/zvol.h>
196 #include <sys/dsl_scan.h>
197 #include <sys/fm/util.h>
198 #include <sys/dsl_crypt.h>
199 #include <sys/rrwlock.h>
200 #include <sys/zfs_file.h>
201 
202 #include <sys/dmu_recv.h>
203 #include <sys/dmu_send.h>
204 #include <sys/dmu_recv.h>
205 #include <sys/dsl_destroy.h>
206 #include <sys/dsl_bookmark.h>
207 #include <sys/dsl_userhold.h>
208 #include <sys/zfeature.h>
209 #include <sys/zcp.h>
210 #include <sys/zio_checksum.h>
211 #include <sys/vdev_removal.h>
212 #include <sys/vdev_impl.h>
213 #include <sys/vdev_initialize.h>
214 #include <sys/vdev_trim.h>
215 #include <sys/brt.h>
216 #include <sys/ddt.h>
217 
218 #include "zfs_namecheck.h"
219 #include "zfs_prop.h"
220 #include "zfs_deleg.h"
221 #include "zfs_comutil.h"
222 
223 #include <sys/lua/lua.h>
224 #include <sys/lua/lauxlib.h>
225 #include <sys/zfs_ioctl_impl.h>
226 
227 kmutex_t zfsdev_state_lock;
228 static zfsdev_state_t zfsdev_state_listhead;
229 
230 /*
231  * Limit maximum nvlist size.  We don't want users passing in insane values
232  * for zc->zc_nvlist_src_size, since we will need to allocate that much memory.
233  * Defaults to 0=auto which is handled by platform code.
234  */
235 uint64_t zfs_max_nvlist_src_size = 0;
236 
237 /*
238  * When logging the output nvlist of an ioctl in the on-disk history, limit
239  * the logged size to this many bytes.  This must be less than DMU_MAX_ACCESS.
240  * This applies primarily to zfs_ioc_channel_program().
241  */
242 static uint64_t zfs_history_output_max = 1024 * 1024;
243 
244 uint_t zfs_allow_log_key;
245 
246 /* DATA_TYPE_ANY is used when zkey_type can vary. */
247 #define	DATA_TYPE_ANY	DATA_TYPE_UNKNOWN
248 
249 typedef struct zfs_ioc_vec {
250 	zfs_ioc_legacy_func_t	*zvec_legacy_func;
251 	zfs_ioc_func_t		*zvec_func;
252 	zfs_secpolicy_func_t	*zvec_secpolicy;
253 	zfs_ioc_namecheck_t	zvec_namecheck;
254 	boolean_t		zvec_allow_log;
255 	zfs_ioc_poolcheck_t	zvec_pool_check;
256 	boolean_t		zvec_smush_outnvlist;
257 	const char		*zvec_name;
258 	const zfs_ioc_key_t	*zvec_nvl_keys;
259 	size_t			zvec_nvl_key_count;
260 } zfs_ioc_vec_t;
261 
262 /* This array is indexed by zfs_userquota_prop_t */
263 static const char *userquota_perms[] = {
264 	ZFS_DELEG_PERM_USERUSED,
265 	ZFS_DELEG_PERM_USERQUOTA,
266 	ZFS_DELEG_PERM_GROUPUSED,
267 	ZFS_DELEG_PERM_GROUPQUOTA,
268 	ZFS_DELEG_PERM_USEROBJUSED,
269 	ZFS_DELEG_PERM_USEROBJQUOTA,
270 	ZFS_DELEG_PERM_GROUPOBJUSED,
271 	ZFS_DELEG_PERM_GROUPOBJQUOTA,
272 	ZFS_DELEG_PERM_PROJECTUSED,
273 	ZFS_DELEG_PERM_PROJECTQUOTA,
274 	ZFS_DELEG_PERM_PROJECTOBJUSED,
275 	ZFS_DELEG_PERM_PROJECTOBJQUOTA,
276 };
277 
278 static int zfs_ioc_userspace_upgrade(zfs_cmd_t *zc);
279 static int zfs_ioc_id_quota_upgrade(zfs_cmd_t *zc);
280 static int zfs_check_settable(const char *name, nvpair_t *property,
281     cred_t *cr);
282 static int zfs_check_clearable(const char *dataset, nvlist_t *props,
283     nvlist_t **errors);
284 static int zfs_fill_zplprops_root(uint64_t, nvlist_t *, nvlist_t *,
285     boolean_t *);
286 int zfs_set_prop_nvlist(const char *, zprop_source_t, nvlist_t *, nvlist_t *);
287 static int get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp);
288 
289 /*
290  * Callback for SPL to look up zoned_uid property.
291  * Walks ancestors to find the delegation root with zoned_uid set.
292  * Returns the zoned_uid value if found, or 0 if not set.
293  */
294 static uid_t
zfs_get_zoned_uid(const char * dataset,char * root_out,size_t root_size)295 zfs_get_zoned_uid(const char *dataset, char *root_out, size_t root_size)
296 {
297 	char path[ZFS_MAX_DATASET_NAME_LEN];
298 	char setpoint[ZFS_MAX_DATASET_NAME_LEN];
299 	char *slash, *at;
300 	uint64_t zoned_uid_val = 0;
301 	int error;
302 
303 	(void) strlcpy(path, dataset, sizeof (path));
304 
305 	/*
306 	 * Strip snapshot suffix if present — snapshots inherit properties
307 	 * from their parent filesystem.
308 	 */
309 	at = strchr(path, '@');
310 	if (at != NULL)
311 		*at = '\0';
312 
313 	/*
314 	 * Walk up the hierarchy until we find a dataset with zoned_uid set.
315 	 * This handles the case where the dataset doesn't exist yet (e.g.,
316 	 * rename destination) — dsl_prop_get fails on non-existent datasets,
317 	 * so we walk up to find an existing ancestor.
318 	 *
319 	 * When the property is found (possibly via inheritance), setpoint
320 	 * tells us the actual delegation root where zoned_uid is locally
321 	 * set, rather than the dataset where we happened to query it.
322 	 */
323 	while (path[0] != '\0') {
324 		error = dsl_prop_get(path, "zoned_uid", 8, 1,
325 		    &zoned_uid_val, setpoint);
326 
327 		if (error == 0 && zoned_uid_val != 0) {
328 			if (root_out != NULL)
329 				(void) strlcpy(root_out, setpoint, root_size);
330 			return ((uid_t)zoned_uid_val);
331 		}
332 
333 		slash = strrchr(path, '/');
334 		if (slash == NULL)
335 			break;
336 		*slash = '\0';
337 	}
338 
339 	return (0);
340 }
341 
342 static void
history_str_free(char * buf)343 history_str_free(char *buf)
344 {
345 	kmem_free(buf, HIS_MAX_RECORD_LEN);
346 }
347 
348 static char *
history_str_get(zfs_cmd_t * zc)349 history_str_get(zfs_cmd_t *zc)
350 {
351 	char *buf;
352 
353 	if (zc->zc_history == 0)
354 		return (NULL);
355 
356 	buf = kmem_alloc(HIS_MAX_RECORD_LEN, KM_SLEEP);
357 	if (copyinstr((void *)(uintptr_t)zc->zc_history,
358 	    buf, HIS_MAX_RECORD_LEN, NULL) != 0) {
359 		history_str_free(buf);
360 		return (NULL);
361 	}
362 
363 	buf[HIS_MAX_RECORD_LEN -1] = '\0';
364 
365 	return (buf);
366 }
367 
368 /*
369  * Return non-zero if the spa version is less than requested version.
370  */
371 static int
zfs_earlier_version(const char * name,int version)372 zfs_earlier_version(const char *name, int version)
373 {
374 	spa_t *spa;
375 
376 	if (spa_open(name, &spa, FTAG) == 0) {
377 		if (spa_version(spa) < version) {
378 			spa_close(spa, FTAG);
379 			return (1);
380 		}
381 		spa_close(spa, FTAG);
382 	}
383 	return (0);
384 }
385 
386 /*
387  * Return TRUE if the ZPL version is less than requested version.
388  */
389 static boolean_t
zpl_earlier_version(const char * name,int version)390 zpl_earlier_version(const char *name, int version)
391 {
392 	objset_t *os;
393 	boolean_t rc = B_TRUE;
394 
395 	if (dmu_objset_hold(name, FTAG, &os) == 0) {
396 		uint64_t zplversion;
397 
398 		if (dmu_objset_type(os) != DMU_OST_ZFS) {
399 			dmu_objset_rele(os, FTAG);
400 			return (B_TRUE);
401 		}
402 		/* XXX reading from non-owned objset */
403 		if (zfs_get_zplprop(os, ZFS_PROP_VERSION, &zplversion) == 0)
404 			rc = zplversion < version;
405 		dmu_objset_rele(os, FTAG);
406 	}
407 	return (rc);
408 }
409 
410 static void
zfs_log_history(zfs_cmd_t * zc)411 zfs_log_history(zfs_cmd_t *zc)
412 {
413 	spa_t *spa;
414 	char *buf;
415 
416 	if ((buf = history_str_get(zc)) == NULL)
417 		return;
418 
419 	if (spa_open(zc->zc_name, &spa, FTAG) == 0) {
420 		if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY)
421 			(void) spa_history_log(spa, buf);
422 		spa_close(spa, FTAG);
423 	}
424 	history_str_free(buf);
425 }
426 
427 /*
428  * Policy for top-level read operations (list pools).  Requires no privileges,
429  * and can be used in the local zone, as there is no associated dataset.
430  */
431 static int
zfs_secpolicy_none(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)432 zfs_secpolicy_none(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
433 {
434 	(void) zc, (void) innvl, (void) cr;
435 	return (0);
436 }
437 
438 /*
439  * Policy for dataset read operations (list children, get statistics).  Requires
440  * no privileges, but must be visible in the local zone.
441  */
442 static int
zfs_secpolicy_read(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)443 zfs_secpolicy_read(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
444 {
445 	(void) innvl, (void) cr;
446 	if (INGLOBALZONE(curproc) ||
447 	    zone_dataset_visible(zc->zc_name, NULL))
448 		return (0);
449 
450 	return (SET_ERROR(ENOENT));
451 }
452 
453 static int
zfs_dozonecheck_impl(const char * dataset,uint64_t zoned,cred_t * cr)454 zfs_dozonecheck_impl(const char *dataset, uint64_t zoned, cred_t *cr)
455 {
456 	int writable = 1;
457 
458 	/*
459 	 * The dataset must be visible by this zone -- check this first
460 	 * so they don't see EPERM on something they shouldn't know about.
461 	 */
462 	if (!INGLOBALZONE(curproc) &&
463 	    !zone_dataset_visible(dataset, &writable))
464 		return (SET_ERROR(ENOENT));
465 
466 	if (INGLOBALZONE(curproc)) {
467 		/*
468 		 * If the fs is zoned, only root can access it from the
469 		 * global zone.
470 		 */
471 		if (secpolicy_zfs(cr) && zoned)
472 			return (SET_ERROR(EPERM));
473 	} else {
474 		/*
475 		 * If we are in a local zone, the 'zoned' property must be set.
476 		 */
477 		if (!zoned)
478 			return (SET_ERROR(EPERM));
479 
480 		/* must be writable by this zone */
481 		if (!writable)
482 			return (SET_ERROR(EPERM));
483 	}
484 	return (0);
485 }
486 
487 static int
zfs_dozonecheck(const char * dataset,cred_t * cr)488 zfs_dozonecheck(const char *dataset, cred_t *cr)
489 {
490 	uint64_t zoned;
491 
492 	if (dsl_prop_get_integer(dataset, zfs_prop_to_name(ZFS_PROP_ZONED),
493 	    &zoned, NULL))
494 		return (SET_ERROR(ENOENT));
495 
496 	return (zfs_dozonecheck_impl(dataset, zoned, cr));
497 }
498 
499 static int
zfs_dozonecheck_ds(const char * dataset,dsl_dataset_t * ds,cred_t * cr)500 zfs_dozonecheck_ds(const char *dataset, dsl_dataset_t *ds, cred_t *cr)
501 {
502 	uint64_t zoned;
503 
504 	if (dsl_prop_get_int_ds(ds, zfs_prop_to_name(ZFS_PROP_ZONED), &zoned))
505 		return (SET_ERROR(ENOENT));
506 
507 	return (zfs_dozonecheck_impl(dataset, zoned, cr));
508 }
509 
510 static int
zfs_secpolicy_write_perms_ds(const char * name,dsl_dataset_t * ds,const char * perm,cred_t * cr)511 zfs_secpolicy_write_perms_ds(const char *name, dsl_dataset_t *ds,
512     const char *perm, cred_t *cr)
513 {
514 	int error;
515 
516 	error = zfs_dozonecheck_ds(name, ds, cr);
517 	if (error == 0) {
518 		error = secpolicy_zfs(cr);
519 		if (error != 0)
520 			error = dsl_deleg_access_impl(ds, perm, cr);
521 	}
522 	return (error);
523 }
524 
525 static int
zfs_secpolicy_write_perms(const char * name,const char * perm,cred_t * cr)526 zfs_secpolicy_write_perms(const char *name, const char *perm, cred_t *cr)
527 {
528 	int error;
529 	dsl_dataset_t *ds;
530 	dsl_pool_t *dp;
531 
532 	/*
533 	 * First do a quick check for root in the global zone, which
534 	 * is allowed to do all write_perms.  This ensures that zfs_ioc_*
535 	 * will get to handle nonexistent datasets.
536 	 */
537 	if (INGLOBALZONE(curproc) && secpolicy_zfs(cr) == 0)
538 		return (0);
539 
540 	error = dsl_pool_hold(name, FTAG, &dp);
541 	if (error != 0)
542 		return (error);
543 
544 	error = dsl_dataset_hold(dp, name, FTAG, &ds);
545 	if (error != 0) {
546 		dsl_pool_rele(dp, FTAG);
547 		return (error);
548 	}
549 
550 	error = zfs_secpolicy_write_perms_ds(name, ds, perm, cr);
551 
552 	dsl_dataset_rele(ds, FTAG);
553 	dsl_pool_rele(dp, FTAG);
554 	return (error);
555 }
556 
557 /*
558  * Check dsl_deleg permission for zoned_uid datasets.
559  *
560  * This bypasses zfs_dozonecheck_ds() (which requires the 'zoned' property)
561  * because zoned_uid datasets use a different authentication model.  The zone
562  * check was already performed by zone_dataset_admin_check().
563  *
564  * Returns 0 if permission is granted, error otherwise.
565  * ECANCELED from dsl_deleg_access_impl() means delegation is disabled on the
566  * pool — in that case we deny access (POLP: no delegation = no access).
567  */
568 static int
zfs_secpolicy_zoned_uid_deleg(const char * name,const char * perm,cred_t * cr)569 zfs_secpolicy_zoned_uid_deleg(const char *name, const char *perm, cred_t *cr)
570 {
571 	dsl_pool_t *dp;
572 	dsl_dataset_t *ds;
573 	int error;
574 
575 	error = dsl_pool_hold(name, FTAG, &dp);
576 	if (error != 0)
577 		return (error);
578 	error = dsl_dataset_hold(dp, name, FTAG, &ds);
579 	if (error != 0) {
580 		dsl_pool_rele(dp, FTAG);
581 		return (error);
582 	}
583 	error = dsl_deleg_access_impl(ds, perm, cr);
584 	dsl_dataset_rele(ds, FTAG);
585 	dsl_pool_rele(dp, FTAG);
586 
587 	/* ECANCELED = delegation disabled on pool; deny access (POLP) */
588 	if (error == ECANCELED)
589 		return (SET_ERROR(EPERM));
590 	return (error);
591 }
592 
593 /*
594  * Policy for setting the security label property.
595  *
596  * Returns 0 for success, non-zero for access and other errors.
597  */
598 static int
zfs_set_slabel_policy(const char * name,const char * strval,cred_t * cr)599 zfs_set_slabel_policy(const char *name, const char *strval, cred_t *cr)
600 {
601 #ifdef HAVE_MLSLABEL
602 	char		ds_hexsl[MAXNAMELEN];
603 	bslabel_t	ds_sl, new_sl;
604 	boolean_t	new_default = FALSE;
605 	uint64_t	zoned;
606 	int		needed_priv = -1;
607 	int		error;
608 
609 	/* First get the existing dataset label. */
610 	error = dsl_prop_get(name, zfs_prop_to_name(ZFS_PROP_MLSLABEL),
611 	    1, sizeof (ds_hexsl), &ds_hexsl, NULL);
612 	if (error != 0)
613 		return (SET_ERROR(EPERM));
614 
615 	if (strcasecmp(strval, ZFS_MLSLABEL_DEFAULT) == 0)
616 		new_default = TRUE;
617 
618 	/* The label must be translatable */
619 	if (!new_default && (hexstr_to_label(strval, &new_sl) != 0))
620 		return (SET_ERROR(EINVAL));
621 
622 	/*
623 	 * In a non-global zone, disallow attempts to set a label that
624 	 * doesn't match that of the zone; otherwise no other checks
625 	 * are needed.
626 	 */
627 	if (!INGLOBALZONE(curproc)) {
628 		if (new_default || !blequal(&new_sl, CR_SL(CRED())))
629 			return (SET_ERROR(EPERM));
630 		return (0);
631 	}
632 
633 	/*
634 	 * For global-zone datasets (i.e., those whose zoned property is
635 	 * "off", verify that the specified new label is valid for the
636 	 * global zone.
637 	 */
638 	if (dsl_prop_get_integer(name,
639 	    zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, NULL))
640 		return (SET_ERROR(EPERM));
641 	if (!zoned) {
642 		if (zfs_check_global_label(name, strval) != 0)
643 			return (SET_ERROR(EPERM));
644 	}
645 
646 	/*
647 	 * If the existing dataset label is nondefault, check if the
648 	 * dataset is mounted (label cannot be changed while mounted).
649 	 * Get the zfsvfs_t; if there isn't one, then the dataset isn't
650 	 * mounted (or isn't a dataset, doesn't exist, ...).
651 	 */
652 	if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) != 0) {
653 		objset_t *os;
654 		static const char *setsl_tag = "setsl_tag";
655 
656 		/*
657 		 * Try to own the dataset; abort if there is any error,
658 		 * (e.g., already mounted, in use, or other error).
659 		 */
660 		error = dmu_objset_own(name, DMU_OST_ZFS, B_TRUE, B_TRUE,
661 		    setsl_tag, &os);
662 		if (error != 0)
663 			return (SET_ERROR(EPERM));
664 
665 		dmu_objset_disown(os, B_TRUE, setsl_tag);
666 
667 		if (new_default) {
668 			needed_priv = PRIV_FILE_DOWNGRADE_SL;
669 			goto out_check;
670 		}
671 
672 		if (hexstr_to_label(strval, &new_sl) != 0)
673 			return (SET_ERROR(EPERM));
674 
675 		if (blstrictdom(&ds_sl, &new_sl))
676 			needed_priv = PRIV_FILE_DOWNGRADE_SL;
677 		else if (blstrictdom(&new_sl, &ds_sl))
678 			needed_priv = PRIV_FILE_UPGRADE_SL;
679 	} else {
680 		/* dataset currently has a default label */
681 		if (!new_default)
682 			needed_priv = PRIV_FILE_UPGRADE_SL;
683 	}
684 
685 out_check:
686 	if (needed_priv != -1)
687 		return (PRIV_POLICY(cr, needed_priv, B_FALSE, EPERM, NULL));
688 	return (0);
689 #else
690 	return (SET_ERROR(ENOTSUP));
691 #endif /* HAVE_MLSLABEL */
692 }
693 
694 static int
zfs_secpolicy_setprop(const char * dsname,zfs_prop_t prop,nvpair_t * propval,cred_t * cr)695 zfs_secpolicy_setprop(const char *dsname, zfs_prop_t prop, nvpair_t *propval,
696     cred_t *cr)
697 {
698 	const char *strval;
699 	zone_admin_result_t zone_result;
700 
701 	/*
702 	 * Check zoned_uid delegation first.  However, even delegated
703 	 * namespace users must not be allowed to modify zoned_uid itself.
704 	 */
705 	zone_result = zone_dataset_admin_check(dsname, ZONE_OP_SETPROP, NULL);
706 	if (zone_result == ZONE_ADMIN_ALLOWED) {
707 		if (prop == ZFS_PROP_ZONED_UID)
708 			return (SET_ERROR(EPERM));
709 		if (prop == ZFS_PROP_FILESYSTEM_LIMIT ||
710 		    prop == ZFS_PROP_SNAPSHOT_LIMIT) {
711 			char setpoint[ZFS_MAX_DATASET_NAME_LEN];
712 			uint64_t zoned_uid_val = 0;
713 			if (dsl_prop_get(dsname, "zoned_uid", 8, 1,
714 			    &zoned_uid_val, setpoint) == 0 &&
715 			    zoned_uid_val != 0 &&
716 			    strcmp(dsname, setpoint) == 0)
717 				return (SET_ERROR(EPERM));
718 		}
719 		return (zfs_secpolicy_zoned_uid_deleg(dsname,
720 		    zfs_prop_to_name(prop), cr));
721 	}
722 	if (zone_result == ZONE_ADMIN_DENIED)
723 		return (SET_ERROR(EPERM));
724 
725 	/*
726 	 * Check permissions for special properties.
727 	 */
728 	switch (prop) {
729 	default:
730 		break;
731 	case ZFS_PROP_ZONED:
732 		/*
733 		 * Disallow setting of 'zoned' from within a local zone.
734 		 */
735 		if (!INGLOBALZONE(curproc))
736 			return (SET_ERROR(EPERM));
737 		break;
738 	case ZFS_PROP_ZONED_UID:
739 		/*
740 		 * Disallow setting of 'zoned_uid' from within a
741 		 * delegated namespace -- only global zone can manage
742 		 * delegation assignments.
743 		 */
744 		if (!INGLOBALZONE(curproc))
745 			return (SET_ERROR(EPERM));
746 		break;
747 
748 	case ZFS_PROP_QUOTA:
749 	case ZFS_PROP_FILESYSTEM_LIMIT:
750 	case ZFS_PROP_SNAPSHOT_LIMIT:
751 		if (!INGLOBALZONE(curproc)) {
752 			uint64_t zoned;
753 			char setpoint[ZFS_MAX_DATASET_NAME_LEN];
754 			/*
755 			 * Unprivileged users are allowed to modify the
756 			 * limit on things *under* (ie. contained by)
757 			 * the thing they own.
758 			 */
759 			if (dsl_prop_get_integer(dsname,
760 			    zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, setpoint))
761 				return (SET_ERROR(EPERM));
762 			if (!zoned || strlen(dsname) <= strlen(setpoint))
763 				return (SET_ERROR(EPERM));
764 		}
765 		break;
766 
767 	case ZFS_PROP_MLSLABEL:
768 		if (!is_system_labeled())
769 			return (SET_ERROR(EPERM));
770 
771 		if (nvpair_value_string(propval, &strval) == 0) {
772 			int err;
773 
774 			err = zfs_set_slabel_policy(dsname, strval, CRED());
775 			if (err != 0)
776 				return (err);
777 		}
778 		break;
779 	}
780 
781 	return (zfs_secpolicy_write_perms(dsname, zfs_prop_to_name(prop), cr));
782 }
783 
784 static int
zfs_secpolicy_set_fsacl(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)785 zfs_secpolicy_set_fsacl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
786 {
787 	/*
788 	 * permission to set permissions will be evaluated later in
789 	 * dsl_deleg_can_allow()
790 	 */
791 	(void) innvl;
792 	return (zfs_dozonecheck(zc->zc_name, cr));
793 }
794 
795 static int
zfs_secpolicy_rollback(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)796 zfs_secpolicy_rollback(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
797 {
798 	(void) innvl;
799 	return (zfs_secpolicy_write_perms(zc->zc_name,
800 	    ZFS_DELEG_PERM_ROLLBACK, cr));
801 }
802 
803 static int
zfs_secpolicy_send(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)804 zfs_secpolicy_send(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
805 {
806 	(void) innvl;
807 	dsl_pool_t *dp;
808 	dsl_dataset_t *ds;
809 	const char *cp;
810 	int error;
811 	boolean_t rawok = !!(zc->zc_flags & 0x8);
812 
813 	/*
814 	 * Generate the current snapshot name from the given objsetid, then
815 	 * use that name for the secpolicy/zone checks.
816 	 */
817 	cp = strchr(zc->zc_name, '@');
818 	if (cp == NULL)
819 		return (SET_ERROR(EINVAL));
820 	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
821 	if (error != 0)
822 		return (error);
823 
824 	error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds);
825 	if (error != 0) {
826 		dsl_pool_rele(dp, FTAG);
827 		return (error);
828 	}
829 
830 	dsl_dataset_name(ds, zc->zc_name);
831 
832 	error = zfs_secpolicy_write_perms_ds(zc->zc_name, ds,
833 	    ZFS_DELEG_PERM_SEND, cr);
834 	if (error != 0 && rawok) {
835 		error = zfs_secpolicy_write_perms_ds(zc->zc_name, ds,
836 		    ZFS_DELEG_PERM_SEND_RAW, cr);
837 	}
838 	dsl_dataset_rele(ds, FTAG);
839 	dsl_pool_rele(dp, FTAG);
840 
841 	return (error);
842 }
843 
844 static int
zfs_secpolicy_send_new(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)845 zfs_secpolicy_send_new(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
846 {
847 	boolean_t rawok = nvlist_exists(innvl, "rawok");
848 	int error;
849 
850 	(void) innvl;
851 	error = zfs_secpolicy_write_perms(zc->zc_name,
852 	    ZFS_DELEG_PERM_SEND, cr);
853 	if (error != 0 && rawok) {
854 		error = zfs_secpolicy_write_perms(zc->zc_name,
855 		    ZFS_DELEG_PERM_SEND_RAW, cr);
856 	}
857 	return (error);
858 }
859 
860 static int
zfs_secpolicy_share(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)861 zfs_secpolicy_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
862 {
863 	(void) zc, (void) innvl, (void) cr;
864 	return (SET_ERROR(ENOTSUP));
865 }
866 
867 static int
zfs_secpolicy_smb_acl(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)868 zfs_secpolicy_smb_acl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
869 {
870 	(void) zc, (void) innvl, (void) cr;
871 	return (SET_ERROR(ENOTSUP));
872 }
873 
874 static int
zfs_get_parent(const char * datasetname,char * parent,int parentsize)875 zfs_get_parent(const char *datasetname, char *parent, int parentsize)
876 {
877 	char *cp;
878 
879 	/*
880 	 * Remove the @bla or /bla from the end of the name to get the parent.
881 	 */
882 	(void) strlcpy(parent, datasetname, parentsize);
883 	cp = strrchr(parent, '@');
884 	if (cp != NULL) {
885 		cp[0] = '\0';
886 	} else {
887 		cp = strrchr(parent, '/');
888 		if (cp == NULL)
889 			return (SET_ERROR(ENOENT));
890 		cp[0] = '\0';
891 	}
892 
893 	return (0);
894 }
895 
896 int
zfs_secpolicy_destroy_perms(const char * name,cred_t * cr)897 zfs_secpolicy_destroy_perms(const char *name, cred_t *cr)
898 {
899 	int error;
900 	zone_admin_result_t result;
901 
902 	/* Check zoned_uid delegation first */
903 	result = zone_dataset_admin_check(name, ZONE_OP_DESTROY, NULL);
904 	if (result == ZONE_ADMIN_ALLOWED) {
905 		if ((error = zfs_secpolicy_zoned_uid_deleg(name,
906 		    ZFS_DELEG_PERM_DESTROY, cr)) != 0)
907 			return (error);
908 		return (zfs_secpolicy_zoned_uid_deleg(name,
909 		    ZFS_DELEG_PERM_MOUNT, cr));
910 	}
911 	if (result == ZONE_ADMIN_DENIED)
912 		return (SET_ERROR(EPERM));
913 
914 	/* NOT_APPLICABLE: continue with existing checks */
915 	if ((error = zfs_secpolicy_write_perms(name,
916 	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
917 		return (error);
918 
919 	return (zfs_secpolicy_write_perms(name, ZFS_DELEG_PERM_DESTROY, cr));
920 }
921 
922 static int
zfs_secpolicy_destroy(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)923 zfs_secpolicy_destroy(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
924 {
925 	(void) innvl;
926 	return (zfs_secpolicy_destroy_perms(zc->zc_name, cr));
927 }
928 
929 /*
930  * Destroying snapshots with delegated permissions requires
931  * descendant mount and destroy permissions.
932  */
933 static int
zfs_secpolicy_destroy_snaps(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)934 zfs_secpolicy_destroy_snaps(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
935 {
936 	(void) zc;
937 	nvlist_t *snaps;
938 	nvpair_t *pair, *nextpair;
939 	int error = 0;
940 
941 	snaps = fnvlist_lookup_nvlist(innvl, "snaps");
942 
943 	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
944 	    pair = nextpair) {
945 		nextpair = nvlist_next_nvpair(snaps, pair);
946 		error = zfs_secpolicy_destroy_perms(nvpair_name(pair), cr);
947 		if (error == ENOENT) {
948 			/*
949 			 * Ignore any snapshots that don't exist (we consider
950 			 * them "already destroyed").  Remove the name from the
951 			 * nvl here in case the snapshot is created between
952 			 * now and when we try to destroy it (in which case
953 			 * we don't want to destroy it since we haven't
954 			 * checked for permission).
955 			 */
956 			fnvlist_remove_nvpair(snaps, pair);
957 			error = 0;
958 		}
959 		if (error != 0)
960 			break;
961 	}
962 
963 	return (error);
964 }
965 
966 int
zfs_secpolicy_rename_perms(const char * from,const char * to,cred_t * cr)967 zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)
968 {
969 	char	parentname[ZFS_MAX_DATASET_NAME_LEN];
970 	int	error;
971 	zone_admin_result_t result;
972 
973 	/* Check zoned_uid delegation first */
974 	result = zone_dataset_admin_check(from, ZONE_OP_RENAME, to);
975 	if (result == ZONE_ADMIN_ALLOWED) {
976 		if ((error = zfs_secpolicy_zoned_uid_deleg(from,
977 		    ZFS_DELEG_PERM_RENAME, cr)) != 0)
978 			return (error);
979 		return (zfs_secpolicy_zoned_uid_deleg(from,
980 		    ZFS_DELEG_PERM_MOUNT, cr));
981 	}
982 	if (result == ZONE_ADMIN_DENIED)
983 		return (SET_ERROR(EPERM));
984 
985 	/* NOT_APPLICABLE: continue with existing checks */
986 	if ((error = zfs_secpolicy_write_perms(from,
987 	    ZFS_DELEG_PERM_RENAME, cr)) != 0)
988 		return (error);
989 
990 	if ((error = zfs_secpolicy_write_perms(from,
991 	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
992 		return (error);
993 
994 	if ((error = zfs_get_parent(to, parentname,
995 	    sizeof (parentname))) != 0)
996 		return (error);
997 
998 	if ((error = zfs_secpolicy_write_perms(parentname,
999 	    ZFS_DELEG_PERM_CREATE, cr)) != 0)
1000 		return (error);
1001 
1002 	if ((error = zfs_secpolicy_write_perms(parentname,
1003 	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
1004 		return (error);
1005 
1006 	return (error);
1007 }
1008 
1009 static int
zfs_secpolicy_rename(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)1010 zfs_secpolicy_rename(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1011 {
1012 	(void) innvl;
1013 	return (zfs_secpolicy_rename_perms(zc->zc_name, zc->zc_value, cr));
1014 }
1015 
1016 static int
zfs_secpolicy_promote(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)1017 zfs_secpolicy_promote(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1018 {
1019 	(void) innvl;
1020 	dsl_pool_t *dp;
1021 	dsl_dataset_t *clone;
1022 	int error;
1023 
1024 	error = zfs_secpolicy_write_perms(zc->zc_name,
1025 	    ZFS_DELEG_PERM_PROMOTE, cr);
1026 	if (error != 0)
1027 		return (error);
1028 
1029 	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
1030 	if (error != 0)
1031 		return (error);
1032 
1033 	error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &clone);
1034 
1035 	if (error == 0) {
1036 		char parentname[ZFS_MAX_DATASET_NAME_LEN];
1037 		dsl_dataset_t *origin = NULL;
1038 		dsl_dir_t *dd;
1039 		dd = clone->ds_dir;
1040 
1041 		error = dsl_dataset_hold_obj(dd->dd_pool,
1042 		    dsl_dir_phys(dd)->dd_origin_obj, FTAG, &origin);
1043 		if (error != 0) {
1044 			dsl_dataset_rele(clone, FTAG);
1045 			dsl_pool_rele(dp, FTAG);
1046 			return (error);
1047 		}
1048 
1049 		error = zfs_secpolicy_write_perms_ds(zc->zc_name, clone,
1050 		    ZFS_DELEG_PERM_MOUNT, cr);
1051 
1052 		dsl_dataset_name(origin, parentname);
1053 		if (error == 0) {
1054 			error = zfs_secpolicy_write_perms_ds(parentname, origin,
1055 			    ZFS_DELEG_PERM_PROMOTE, cr);
1056 		}
1057 		dsl_dataset_rele(clone, FTAG);
1058 		dsl_dataset_rele(origin, FTAG);
1059 	}
1060 	dsl_pool_rele(dp, FTAG);
1061 	return (error);
1062 }
1063 
1064 static int
zfs_secpolicy_recv(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)1065 zfs_secpolicy_recv(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1066 {
1067 	(void) innvl;
1068 	int error;
1069 
1070 	/*
1071 	 * zfs receive -F requires full receive permission,
1072 	 * otherwise receive:append permission is enough
1073 	 */
1074 	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
1075 	    ZFS_DELEG_PERM_RECEIVE, cr)) != 0) {
1076 		if (zc->zc_guid || nvlist_exists(innvl, "force"))
1077 			return (error);
1078 		if ((error = zfs_secpolicy_write_perms(zc->zc_name,
1079 		    ZFS_DELEG_PERM_RECEIVE_APPEND, cr)) != 0)
1080 			return (error);
1081 	}
1082 
1083 	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
1084 	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
1085 		return (error);
1086 
1087 	return (zfs_secpolicy_write_perms(zc->zc_name,
1088 	    ZFS_DELEG_PERM_CREATE, cr));
1089 }
1090 
1091 /*
1092  * Policy for dataset set property operations.  Individual properties checked by
1093  * zfs_check_settable(), additionally require zfs_secpolicy_recv() when setting
1094  * properties as part of a receive.
1095  */
1096 static int
zfs_secpolicy_setprops(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)1097 zfs_secpolicy_setprops(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1098 {
1099 	boolean_t received = zc->zc_cookie;
1100 	int error;
1101 
1102 	if (received && (error = zfs_secpolicy_recv(zc, innvl, cr)))
1103 		return (error);
1104 
1105 	return (zfs_secpolicy_read(zc, innvl, cr));
1106 }
1107 
1108 int
zfs_secpolicy_snapshot_perms(const char * name,cred_t * cr)1109 zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
1110 {
1111 	zone_admin_result_t result;
1112 
1113 	/* Check zoned_uid delegation first */
1114 	result = zone_dataset_admin_check(name, ZONE_OP_SNAPSHOT, NULL);
1115 	if (result == ZONE_ADMIN_ALLOWED)
1116 		return (zfs_secpolicy_zoned_uid_deleg(name,
1117 		    ZFS_DELEG_PERM_SNAPSHOT, cr));
1118 	if (result == ZONE_ADMIN_DENIED)
1119 		return (SET_ERROR(EPERM));
1120 
1121 	/* NOT_APPLICABLE: continue with existing checks */
1122 	return (zfs_secpolicy_write_perms(name,
1123 	    ZFS_DELEG_PERM_SNAPSHOT, cr));
1124 }
1125 
1126 /*
1127  * Check for permission to create each snapshot in the nvlist.
1128  */
1129 static int
zfs_secpolicy_snapshot(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)1130 zfs_secpolicy_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1131 {
1132 	(void) zc;
1133 	nvlist_t *snaps;
1134 	int error = 0;
1135 	nvpair_t *pair;
1136 
1137 	snaps = fnvlist_lookup_nvlist(innvl, "snaps");
1138 
1139 	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
1140 	    pair = nvlist_next_nvpair(snaps, pair)) {
1141 		char *name = (char *)nvpair_name(pair);
1142 		char *atp = strchr(name, '@');
1143 
1144 		if (atp == NULL) {
1145 			error = SET_ERROR(EINVAL);
1146 			break;
1147 		}
1148 		*atp = '\0';
1149 		error = zfs_secpolicy_snapshot_perms(name, cr);
1150 		*atp = '@';
1151 		if (error != 0)
1152 			break;
1153 	}
1154 	return (error);
1155 }
1156 
1157 /*
1158  * Check for permission to create each bookmark in the nvlist.
1159  */
1160 static int
zfs_secpolicy_bookmark(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)1161 zfs_secpolicy_bookmark(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1162 {
1163 	(void) zc;
1164 	int error = 0;
1165 
1166 	for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
1167 	    pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
1168 		char *name = (char *)nvpair_name(pair);
1169 		char *hashp = strchr(name, '#');
1170 
1171 		if (hashp == NULL) {
1172 			error = SET_ERROR(EINVAL);
1173 			break;
1174 		}
1175 		*hashp = '\0';
1176 		error = zfs_secpolicy_write_perms(name,
1177 		    ZFS_DELEG_PERM_BOOKMARK, cr);
1178 		*hashp = '#';
1179 		if (error != 0)
1180 			break;
1181 	}
1182 	return (error);
1183 }
1184 
1185 static int
zfs_secpolicy_destroy_bookmarks(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)1186 zfs_secpolicy_destroy_bookmarks(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1187 {
1188 	(void) zc;
1189 	nvpair_t *pair, *nextpair;
1190 	int error = 0;
1191 
1192 	for (pair = nvlist_next_nvpair(innvl, NULL); pair != NULL;
1193 	    pair = nextpair) {
1194 		char *name = (char *)nvpair_name(pair);
1195 		char *hashp = strchr(name, '#');
1196 		nextpair = nvlist_next_nvpair(innvl, pair);
1197 
1198 		if (hashp == NULL) {
1199 			error = SET_ERROR(EINVAL);
1200 			break;
1201 		}
1202 
1203 		*hashp = '\0';
1204 		error = zfs_secpolicy_write_perms(name,
1205 		    ZFS_DELEG_PERM_DESTROY, cr);
1206 		*hashp = '#';
1207 		if (error == ENOENT) {
1208 			/*
1209 			 * Ignore any filesystems that don't exist (we consider
1210 			 * their bookmarks "already destroyed").  Remove
1211 			 * the name from the nvl here in case the filesystem
1212 			 * is created between now and when we try to destroy
1213 			 * the bookmark (in which case we don't want to
1214 			 * destroy it since we haven't checked for permission).
1215 			 */
1216 			fnvlist_remove_nvpair(innvl, pair);
1217 			error = 0;
1218 		}
1219 		if (error != 0)
1220 			break;
1221 	}
1222 
1223 	return (error);
1224 }
1225 
1226 static int
zfs_secpolicy_log_history(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)1227 zfs_secpolicy_log_history(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1228 {
1229 	(void) zc, (void) innvl, (void) cr;
1230 	/*
1231 	 * Even root must have a proper TSD so that we know what pool
1232 	 * to log to.
1233 	 */
1234 	if (tsd_get(zfs_allow_log_key) == NULL)
1235 		return (SET_ERROR(EPERM));
1236 	return (0);
1237 }
1238 
1239 static int
zfs_secpolicy_create_clone(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)1240 zfs_secpolicy_create_clone(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1241 {
1242 	char		parentname[ZFS_MAX_DATASET_NAME_LEN];
1243 	int		error;
1244 	const char	*origin = NULL;
1245 	zone_admin_result_t result;
1246 
1247 	if ((error = zfs_get_parent(zc->zc_name, parentname,
1248 	    sizeof (parentname))) != 0)
1249 		return (error);
1250 
1251 	(void) nvlist_lookup_string(innvl, "origin", &origin);
1252 
1253 	/* Check zoned_uid delegation first */
1254 	result = zone_dataset_admin_check(parentname,
1255 	    origin != NULL ? ZONE_OP_CLONE : ZONE_OP_CREATE, origin);
1256 	if (result == ZONE_ADMIN_ALLOWED) {
1257 		if (origin != NULL) {
1258 			if ((error = zfs_secpolicy_zoned_uid_deleg(origin,
1259 			    ZFS_DELEG_PERM_CLONE, cr)) != 0)
1260 				return (error);
1261 		}
1262 		if ((error = zfs_secpolicy_zoned_uid_deleg(parentname,
1263 		    ZFS_DELEG_PERM_CREATE, cr)) != 0)
1264 			return (error);
1265 		return (zfs_secpolicy_zoned_uid_deleg(parentname,
1266 		    ZFS_DELEG_PERM_MOUNT, cr));
1267 	}
1268 	if (result == ZONE_ADMIN_DENIED)
1269 		return (SET_ERROR(EPERM));
1270 
1271 	/* NOT_APPLICABLE: continue with existing checks */
1272 	if (origin != NULL &&
1273 	    (error = zfs_secpolicy_write_perms(origin,
1274 	    ZFS_DELEG_PERM_CLONE, cr)) != 0)
1275 		return (error);
1276 
1277 	if ((error = zfs_secpolicy_write_perms(parentname,
1278 	    ZFS_DELEG_PERM_CREATE, cr)) != 0)
1279 		return (error);
1280 
1281 	return (zfs_secpolicy_write_perms(parentname,
1282 	    ZFS_DELEG_PERM_MOUNT, cr));
1283 }
1284 
1285 /*
1286  * Policy for pool operations - create/destroy pools, add vdevs, etc.  Requires
1287  * SYS_CONFIG privilege, which is not available in a local zone.
1288  */
1289 int
zfs_secpolicy_config(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)1290 zfs_secpolicy_config(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1291 {
1292 	(void) zc, (void) innvl;
1293 
1294 	if (secpolicy_sys_config(cr, B_FALSE) != 0)
1295 		return (SET_ERROR(EPERM));
1296 
1297 	return (0);
1298 }
1299 
1300 /*
1301  * Policy for object to name lookups.
1302  */
1303 static int
zfs_secpolicy_diff(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)1304 zfs_secpolicy_diff(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1305 {
1306 	(void) innvl;
1307 	int error;
1308 
1309 	if (secpolicy_sys_config(cr, B_FALSE) == 0)
1310 		return (0);
1311 
1312 	error = zfs_secpolicy_write_perms(zc->zc_name, ZFS_DELEG_PERM_DIFF, cr);
1313 	return (error);
1314 }
1315 
1316 /*
1317  * Policy for fault injection.  Requires all privileges.
1318  */
1319 static int
zfs_secpolicy_inject(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)1320 zfs_secpolicy_inject(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1321 {
1322 	(void) zc, (void) innvl;
1323 	return (secpolicy_zinject(cr));
1324 }
1325 
1326 static int
zfs_secpolicy_inherit_prop(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)1327 zfs_secpolicy_inherit_prop(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1328 {
1329 	(void) innvl;
1330 	zfs_prop_t prop = zfs_name_to_prop(zc->zc_value);
1331 
1332 	if (prop == ZPROP_USERPROP) {
1333 		if (!zfs_prop_user(zc->zc_value))
1334 			return (SET_ERROR(EINVAL));
1335 		zone_admin_result_t zone_result;
1336 		zone_result = zone_dataset_admin_check(zc->zc_name,
1337 		    ZONE_OP_SETPROP, NULL);
1338 		if (zone_result == ZONE_ADMIN_ALLOWED)
1339 			return (zfs_secpolicy_zoned_uid_deleg(zc->zc_name,
1340 			    ZFS_DELEG_PERM_USERPROP, cr));
1341 		if (zone_result == ZONE_ADMIN_DENIED)
1342 			return (SET_ERROR(EPERM));
1343 		return (zfs_secpolicy_write_perms(zc->zc_name,
1344 		    ZFS_DELEG_PERM_USERPROP, cr));
1345 	} else {
1346 		return (zfs_secpolicy_setprop(zc->zc_name, prop,
1347 		    NULL, cr));
1348 	}
1349 }
1350 
1351 static int
zfs_secpolicy_userspace_one(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)1352 zfs_secpolicy_userspace_one(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1353 {
1354 	int err = zfs_secpolicy_read(zc, innvl, cr);
1355 	if (err)
1356 		return (err);
1357 
1358 	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
1359 		return (SET_ERROR(EINVAL));
1360 
1361 	if (zc->zc_value[0] == 0) {
1362 		/*
1363 		 * They are asking about a posix uid/gid.  If it's
1364 		 * themself, allow it.
1365 		 */
1366 		if (zc->zc_objset_type == ZFS_PROP_USERUSED ||
1367 		    zc->zc_objset_type == ZFS_PROP_USERQUOTA ||
1368 		    zc->zc_objset_type == ZFS_PROP_USEROBJUSED ||
1369 		    zc->zc_objset_type == ZFS_PROP_USEROBJQUOTA) {
1370 			if (zc->zc_guid == crgetuid(cr))
1371 				return (0);
1372 		} else if (zc->zc_objset_type == ZFS_PROP_GROUPUSED ||
1373 		    zc->zc_objset_type == ZFS_PROP_GROUPQUOTA ||
1374 		    zc->zc_objset_type == ZFS_PROP_GROUPOBJUSED ||
1375 		    zc->zc_objset_type == ZFS_PROP_GROUPOBJQUOTA) {
1376 			if (groupmember(zc->zc_guid, cr))
1377 				return (0);
1378 		}
1379 		/* else is for project quota/used */
1380 	}
1381 
1382 	return (zfs_secpolicy_write_perms(zc->zc_name,
1383 	    userquota_perms[zc->zc_objset_type], cr));
1384 }
1385 
1386 static int
zfs_secpolicy_userspace_many(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)1387 zfs_secpolicy_userspace_many(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1388 {
1389 	int err = zfs_secpolicy_read(zc, innvl, cr);
1390 	if (err)
1391 		return (err);
1392 
1393 	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
1394 		return (SET_ERROR(EINVAL));
1395 
1396 	return (zfs_secpolicy_write_perms(zc->zc_name,
1397 	    userquota_perms[zc->zc_objset_type], cr));
1398 }
1399 
1400 static int
zfs_secpolicy_userspace_upgrade(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)1401 zfs_secpolicy_userspace_upgrade(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1402 {
1403 	(void) innvl;
1404 	return (zfs_secpolicy_setprop(zc->zc_name, ZFS_PROP_VERSION,
1405 	    NULL, cr));
1406 }
1407 
1408 static int
zfs_secpolicy_hold(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)1409 zfs_secpolicy_hold(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1410 {
1411 	(void) zc;
1412 	nvpair_t *pair;
1413 	nvlist_t *holds;
1414 	int error;
1415 
1416 	holds = fnvlist_lookup_nvlist(innvl, "holds");
1417 
1418 	for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
1419 	    pair = nvlist_next_nvpair(holds, pair)) {
1420 		char fsname[ZFS_MAX_DATASET_NAME_LEN];
1421 		error = dmu_fsname(nvpair_name(pair), fsname);
1422 		if (error != 0)
1423 			return (error);
1424 		error = zfs_secpolicy_write_perms(fsname,
1425 		    ZFS_DELEG_PERM_HOLD, cr);
1426 		if (error != 0)
1427 			return (error);
1428 	}
1429 	return (0);
1430 }
1431 
1432 static int
zfs_secpolicy_release(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)1433 zfs_secpolicy_release(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1434 {
1435 	(void) zc;
1436 	nvpair_t *pair;
1437 	int error;
1438 
1439 	for (pair = nvlist_next_nvpair(innvl, NULL); pair != NULL;
1440 	    pair = nvlist_next_nvpair(innvl, pair)) {
1441 		char fsname[ZFS_MAX_DATASET_NAME_LEN];
1442 		error = dmu_fsname(nvpair_name(pair), fsname);
1443 		if (error != 0)
1444 			return (error);
1445 		error = zfs_secpolicy_write_perms(fsname,
1446 		    ZFS_DELEG_PERM_RELEASE, cr);
1447 		if (error != 0)
1448 			return (error);
1449 	}
1450 	return (0);
1451 }
1452 
1453 /*
1454  * Policy for allowing temporary snapshots to be taken or released
1455  */
1456 static int
zfs_secpolicy_tmp_snapshot(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)1457 zfs_secpolicy_tmp_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1458 {
1459 	/*
1460 	 * A temporary snapshot is the same as a snapshot,
1461 	 * hold, destroy and release all rolled into one.
1462 	 * Delegated diff alone is sufficient that we allow this.
1463 	 */
1464 	int error;
1465 
1466 	if (zfs_secpolicy_write_perms(zc->zc_name,
1467 	    ZFS_DELEG_PERM_DIFF, cr) == 0)
1468 		return (0);
1469 
1470 	error = zfs_secpolicy_snapshot_perms(zc->zc_name, cr);
1471 
1472 	if (innvl != NULL) {
1473 		if (error == 0)
1474 			error = zfs_secpolicy_hold(zc, innvl, cr);
1475 		if (error == 0)
1476 			error = zfs_secpolicy_release(zc, innvl, cr);
1477 		if (error == 0)
1478 			error = zfs_secpolicy_destroy(zc, innvl, cr);
1479 	}
1480 	return (error);
1481 }
1482 
1483 static int
zfs_secpolicy_load_key(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)1484 zfs_secpolicy_load_key(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1485 {
1486 	return (zfs_secpolicy_write_perms(zc->zc_name,
1487 	    ZFS_DELEG_PERM_LOAD_KEY, cr));
1488 }
1489 
1490 static int
zfs_secpolicy_change_key(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)1491 zfs_secpolicy_change_key(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1492 {
1493 	return (zfs_secpolicy_write_perms(zc->zc_name,
1494 	    ZFS_DELEG_PERM_CHANGE_KEY, cr));
1495 }
1496 
1497 /*
1498  * Returns the nvlist as specified by the user in the zfs_cmd_t.
1499  */
1500 static int
get_nvlist(uint64_t nvl,uint64_t size,int iflag,nvlist_t ** nvp)1501 get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp)
1502 {
1503 	char *packed;
1504 	int error;
1505 	nvlist_t *list = NULL;
1506 
1507 	/*
1508 	 * Read in and unpack the user-supplied nvlist.
1509 	 */
1510 	if (size == 0)
1511 		return (SET_ERROR(EINVAL));
1512 
1513 	packed = vmem_alloc(size, KM_SLEEP);
1514 
1515 	if (ddi_copyin((void *)(uintptr_t)nvl, packed, size, iflag) != 0) {
1516 		vmem_free(packed, size);
1517 		return (SET_ERROR(EFAULT));
1518 	}
1519 
1520 	if ((error = nvlist_unpack(packed, size, &list, 0)) != 0) {
1521 		vmem_free(packed, size);
1522 		return (error);
1523 	}
1524 
1525 	vmem_free(packed, size);
1526 
1527 	*nvp = list;
1528 	return (0);
1529 }
1530 
1531 /*
1532  * Reduce the size of this nvlist until it can be serialized in 'max' bytes.
1533  * Entries will be removed from the end of the nvlist, and one int32 entry
1534  * named "N_MORE_ERRORS" will be added indicating how many entries were
1535  * removed.
1536  */
1537 static int
nvlist_smush(nvlist_t * errors,size_t max)1538 nvlist_smush(nvlist_t *errors, size_t max)
1539 {
1540 	size_t size;
1541 
1542 	size = fnvlist_size(errors);
1543 
1544 	if (size > max) {
1545 		nvpair_t *more_errors;
1546 		int n = 0;
1547 
1548 		if (max < 1024)
1549 			return (SET_ERROR(ENOMEM));
1550 
1551 		fnvlist_add_int32(errors, ZPROP_N_MORE_ERRORS, 0);
1552 		more_errors = nvlist_prev_nvpair(errors, NULL);
1553 
1554 		do {
1555 			nvpair_t *pair = nvlist_prev_nvpair(errors,
1556 			    more_errors);
1557 			fnvlist_remove_nvpair(errors, pair);
1558 			n++;
1559 			size = fnvlist_size(errors);
1560 		} while (size > max);
1561 
1562 		fnvlist_remove_nvpair(errors, more_errors);
1563 		fnvlist_add_int32(errors, ZPROP_N_MORE_ERRORS, n);
1564 		ASSERT3U(fnvlist_size(errors), <=, max);
1565 	}
1566 
1567 	return (0);
1568 }
1569 
1570 static int
put_nvlist(zfs_cmd_t * zc,nvlist_t * nvl)1571 put_nvlist(zfs_cmd_t *zc, nvlist_t *nvl)
1572 {
1573 	char *packed = NULL;
1574 	int error = 0;
1575 	size_t size;
1576 
1577 	size = fnvlist_size(nvl);
1578 
1579 	if (size > zc->zc_nvlist_dst_size) {
1580 		error = SET_ERROR(ENOMEM);
1581 	} else {
1582 		packed = fnvlist_pack(nvl, &size);
1583 		if (ddi_copyout(packed, (void *)(uintptr_t)zc->zc_nvlist_dst,
1584 		    size, zc->zc_iflags) != 0)
1585 			error = SET_ERROR(EFAULT);
1586 		fnvlist_pack_free(packed, size);
1587 	}
1588 
1589 	zc->zc_nvlist_dst_size = size;
1590 	zc->zc_nvlist_dst_filled = B_TRUE;
1591 	return (error);
1592 }
1593 
1594 int
getzfsvfs_impl(objset_t * os,zfsvfs_t ** zfvp)1595 getzfsvfs_impl(objset_t *os, zfsvfs_t **zfvp)
1596 {
1597 	int error = 0;
1598 	if (dmu_objset_type(os) != DMU_OST_ZFS) {
1599 		return (SET_ERROR(EINVAL));
1600 	}
1601 
1602 	mutex_enter(&os->os_user_ptr_lock);
1603 	*zfvp = dmu_objset_get_user(os);
1604 	/* bump s_active only when non-zero to prevent umount race */
1605 	error = zfs_vfs_ref(zfvp);
1606 	mutex_exit(&os->os_user_ptr_lock);
1607 	return (error);
1608 }
1609 
1610 int
getzfsvfs(const char * dsname,zfsvfs_t ** zfvp)1611 getzfsvfs(const char *dsname, zfsvfs_t **zfvp)
1612 {
1613 	objset_t *os;
1614 	int error;
1615 
1616 	error = dmu_objset_hold(dsname, FTAG, &os);
1617 	if (error != 0)
1618 		return (error);
1619 
1620 	error = getzfsvfs_impl(os, zfvp);
1621 	dmu_objset_rele(os, FTAG);
1622 	return (error);
1623 }
1624 
1625 /*
1626  * Find a zfsvfs_t for a mounted filesystem, or create our own, in which
1627  * case its z_sb will be NULL, and it will be opened as the owner.
1628  * If 'writer' is set, the z_teardown_lock will be held for RW_WRITER,
1629  * which prevents all inode ops from running.
1630  */
1631 static int
zfsvfs_hold(const char * name,const void * tag,zfsvfs_t ** zfvp,boolean_t writer)1632 zfsvfs_hold(const char *name, const void *tag, zfsvfs_t **zfvp,
1633     boolean_t writer)
1634 {
1635 	int error = 0;
1636 
1637 	if (getzfsvfs(name, zfvp) != 0)
1638 		error = zfsvfs_create(name, B_FALSE, zfvp);
1639 	if (error == 0) {
1640 		if (writer)
1641 			ZFS_TEARDOWN_ENTER_WRITE(*zfvp, tag);
1642 		else
1643 			ZFS_TEARDOWN_ENTER_READ(*zfvp, tag);
1644 		if ((*zfvp)->z_unmounted) {
1645 			/*
1646 			 * XXX we could probably try again, since the unmounting
1647 			 * thread should be just about to disassociate the
1648 			 * objset from the zfsvfs.
1649 			 */
1650 			ZFS_TEARDOWN_EXIT(*zfvp, tag);
1651 			zfs_vfs_rele(*zfvp);
1652 			return (SET_ERROR(EBUSY));
1653 		}
1654 	}
1655 	return (error);
1656 }
1657 
1658 static void
zfsvfs_rele(zfsvfs_t * zfsvfs,const void * tag)1659 zfsvfs_rele(zfsvfs_t *zfsvfs, const void *tag)
1660 {
1661 	ZFS_TEARDOWN_EXIT(zfsvfs, tag);
1662 
1663 	if (zfs_vfs_held(zfsvfs)) {
1664 		zfs_vfs_rele(zfsvfs);
1665 	} else {
1666 		dmu_objset_disown(zfsvfs->z_os, B_TRUE, zfsvfs);
1667 		zfsvfs_free(zfsvfs);
1668 	}
1669 }
1670 
1671 static int
zfs_ioc_pool_create(zfs_cmd_t * zc)1672 zfs_ioc_pool_create(zfs_cmd_t *zc)
1673 {
1674 	int error;
1675 	nvlist_t *config, *props = NULL;
1676 	nvlist_t *rootprops = NULL;
1677 	nvlist_t *zplprops = NULL;
1678 	dsl_crypto_params_t *dcp = NULL;
1679 	const char *spa_name = zc->zc_name;
1680 	boolean_t unload_wkey = B_TRUE;
1681 	nvlist_t *errinfo = NULL;
1682 
1683 	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1684 	    zc->zc_iflags, &config)))
1685 		return (error);
1686 
1687 	if (zc->zc_nvlist_src_size != 0 && (error =
1688 	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1689 	    zc->zc_iflags, &props))) {
1690 		nvlist_free(config);
1691 		return (error);
1692 	}
1693 
1694 	if (props) {
1695 		nvlist_t *nvl = NULL;
1696 		nvlist_t *hidden_args = NULL;
1697 		uint64_t version = SPA_VERSION;
1698 		const char *tname;
1699 
1700 		(void) nvlist_lookup_uint64(props,
1701 		    zpool_prop_to_name(ZPOOL_PROP_VERSION), &version);
1702 		if (!SPA_VERSION_IS_SUPPORTED(version)) {
1703 			error = SET_ERROR(EINVAL);
1704 			goto pool_props_bad;
1705 		}
1706 		(void) nvlist_lookup_nvlist(props, ZPOOL_ROOTFS_PROPS, &nvl);
1707 		if (nvl) {
1708 			error = nvlist_dup(nvl, &rootprops, KM_SLEEP);
1709 			if (error != 0)
1710 				goto pool_props_bad;
1711 			(void) nvlist_remove_all(props, ZPOOL_ROOTFS_PROPS);
1712 		}
1713 
1714 		(void) nvlist_lookup_nvlist(props, ZPOOL_HIDDEN_ARGS,
1715 		    &hidden_args);
1716 		error = dsl_crypto_params_create_nvlist(DCP_CMD_NONE,
1717 		    rootprops, hidden_args, &dcp);
1718 		if (error != 0)
1719 			goto pool_props_bad;
1720 		(void) nvlist_remove_all(props, ZPOOL_HIDDEN_ARGS);
1721 
1722 		VERIFY0(nvlist_alloc(&zplprops, NV_UNIQUE_NAME, KM_SLEEP));
1723 		error = zfs_fill_zplprops_root(version, rootprops,
1724 		    zplprops, NULL);
1725 		if (error != 0)
1726 			goto pool_props_bad;
1727 
1728 		if (nvlist_lookup_string(props,
1729 		    zpool_prop_to_name(ZPOOL_PROP_TNAME), &tname) == 0)
1730 			spa_name = tname;
1731 	}
1732 
1733 	error = spa_create(zc->zc_name, config, props, zplprops, dcp,
1734 	    &errinfo);
1735 	if (errinfo != NULL) {
1736 		nvlist_t *outnv = fnvlist_alloc();
1737 		fnvlist_add_nvlist(outnv,
1738 		    ZPOOL_CONFIG_CREATE_INFO, errinfo);
1739 		(void) put_nvlist(zc, outnv);
1740 		nvlist_free(outnv);
1741 		nvlist_free(errinfo);
1742 	}
1743 
1744 	/*
1745 	 * Set the remaining root properties
1746 	 */
1747 	if (!error && (error = zfs_set_prop_nvlist(spa_name,
1748 	    ZPROP_SRC_LOCAL, rootprops, NULL)) != 0) {
1749 		(void) spa_destroy(spa_name);
1750 		unload_wkey = B_FALSE; /* spa_destroy() unloads wrapping keys */
1751 	}
1752 
1753 pool_props_bad:
1754 	nvlist_free(rootprops);
1755 	nvlist_free(zplprops);
1756 	nvlist_free(config);
1757 	nvlist_free(props);
1758 	dsl_crypto_params_free(dcp, unload_wkey && !!error);
1759 
1760 	return (error);
1761 }
1762 
1763 static int
zfs_ioc_pool_destroy(zfs_cmd_t * zc)1764 zfs_ioc_pool_destroy(zfs_cmd_t *zc)
1765 {
1766 	int error;
1767 	zfs_log_history(zc);
1768 	error = spa_destroy(zc->zc_name);
1769 
1770 	return (error);
1771 }
1772 
1773 static int
zfs_ioc_pool_import(zfs_cmd_t * zc)1774 zfs_ioc_pool_import(zfs_cmd_t *zc)
1775 {
1776 	nvlist_t *config, *props = NULL;
1777 	uint64_t guid;
1778 	int error;
1779 
1780 	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1781 	    zc->zc_iflags, &config)) != 0)
1782 		return (error);
1783 
1784 	if (zc->zc_nvlist_src_size != 0 && (error =
1785 	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1786 	    zc->zc_iflags, &props))) {
1787 		nvlist_free(config);
1788 		return (error);
1789 	}
1790 
1791 	if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &guid) != 0 ||
1792 	    guid != zc->zc_guid)
1793 		error = SET_ERROR(EINVAL);
1794 	else
1795 		error = spa_import(zc->zc_name, config, props, zc->zc_cookie);
1796 
1797 	if (zc->zc_nvlist_dst != 0) {
1798 		int err;
1799 
1800 		if ((err = put_nvlist(zc, config)) != 0)
1801 			error = err;
1802 	}
1803 
1804 	nvlist_free(config);
1805 	nvlist_free(props);
1806 
1807 	return (error);
1808 }
1809 
1810 static int
zfs_ioc_pool_export(zfs_cmd_t * zc)1811 zfs_ioc_pool_export(zfs_cmd_t *zc)
1812 {
1813 	int error;
1814 	boolean_t force = (boolean_t)zc->zc_cookie;
1815 	boolean_t hardforce = (boolean_t)zc->zc_guid;
1816 
1817 	zfs_log_history(zc);
1818 	error = spa_export(zc->zc_name, NULL, force, hardforce);
1819 
1820 	return (error);
1821 }
1822 
1823 static int
zfs_ioc_pool_configs(zfs_cmd_t * zc)1824 zfs_ioc_pool_configs(zfs_cmd_t *zc)
1825 {
1826 	nvlist_t *configs;
1827 	int error;
1828 
1829 	error = spa_all_configs(&zc->zc_cookie, &configs);
1830 	if (error)
1831 		return (error);
1832 
1833 	error = put_nvlist(zc, configs);
1834 
1835 	nvlist_free(configs);
1836 
1837 	return (error);
1838 }
1839 
1840 /*
1841  * inputs:
1842  * zc_name		name of the pool
1843  *
1844  * outputs:
1845  * zc_cookie		real errno
1846  * zc_nvlist_dst	config nvlist
1847  * zc_nvlist_dst_size	size of config nvlist
1848  */
1849 static int
zfs_ioc_pool_stats(zfs_cmd_t * zc)1850 zfs_ioc_pool_stats(zfs_cmd_t *zc)
1851 {
1852 	nvlist_t *config;
1853 	int error;
1854 	int ret = 0;
1855 
1856 	error = spa_get_stats(zc->zc_name, &config, zc->zc_value,
1857 	    sizeof (zc->zc_value));
1858 
1859 	if (config != NULL) {
1860 		ret = put_nvlist(zc, config);
1861 		nvlist_free(config);
1862 
1863 		/*
1864 		 * The config may be present even if 'error' is non-zero.
1865 		 * In this case we return success, and preserve the real errno
1866 		 * in 'zc_cookie'.
1867 		 */
1868 		zc->zc_cookie = error;
1869 	} else {
1870 		ret = error;
1871 	}
1872 
1873 	return (ret);
1874 }
1875 
1876 /*
1877  * Try to import the given pool, returning pool stats as appropriate so that
1878  * user land knows which devices are available and overall pool health.
1879  */
1880 static int
zfs_ioc_pool_tryimport(zfs_cmd_t * zc)1881 zfs_ioc_pool_tryimport(zfs_cmd_t *zc)
1882 {
1883 	nvlist_t *tryconfig, *config = NULL;
1884 	int error;
1885 
1886 	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1887 	    zc->zc_iflags, &tryconfig)) != 0)
1888 		return (error);
1889 
1890 	config = spa_tryimport(tryconfig);
1891 
1892 	nvlist_free(tryconfig);
1893 
1894 	if (config == NULL)
1895 		return (SET_ERROR(EINVAL));
1896 
1897 	error = put_nvlist(zc, config);
1898 	nvlist_free(config);
1899 
1900 	return (error);
1901 }
1902 
1903 /*
1904  * inputs:
1905  * zc_name              name of the pool
1906  * zc_cookie            scan func (pool_scan_func_t)
1907  * zc_flags             scrub pause/resume flag (pool_scrub_cmd_t)
1908  */
1909 static int
zfs_ioc_pool_scan(zfs_cmd_t * zc)1910 zfs_ioc_pool_scan(zfs_cmd_t *zc)
1911 {
1912 	spa_t *spa;
1913 	int error;
1914 
1915 	if (zc->zc_flags >= POOL_SCRUB_FLAGS_END)
1916 		return (SET_ERROR(EINVAL));
1917 
1918 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1919 		return (error);
1920 
1921 	if (zc->zc_flags == POOL_SCRUB_PAUSE)
1922 		error = spa_scrub_pause_resume(spa, POOL_SCRUB_PAUSE);
1923 	else if (zc->zc_cookie == POOL_SCAN_NONE)
1924 		error = spa_scan_stop(spa);
1925 	else
1926 		error = spa_scan(spa, zc->zc_cookie);
1927 
1928 	spa_close(spa, FTAG);
1929 
1930 	return (error);
1931 }
1932 
1933 /*
1934  * inputs:
1935  * poolname             name of the pool
1936  * scan_type            scan func (pool_scan_func_t)
1937  * scan_command         scrub pause/resume flag (pool_scrub_cmd_t)
1938  */
1939 static const zfs_ioc_key_t zfs_keys_pool_scrub[] = {
1940 	{"scan_type",		DATA_TYPE_UINT64,	0},
1941 	{"scan_command",	DATA_TYPE_UINT64,	0},
1942 	{"scan_date_start",	DATA_TYPE_UINT64,	ZK_OPTIONAL},
1943 	{"scan_date_end",	DATA_TYPE_UINT64,	ZK_OPTIONAL},
1944 };
1945 
1946 static int
zfs_ioc_pool_scrub(const char * poolname,nvlist_t * innvl,nvlist_t * outnvl)1947 zfs_ioc_pool_scrub(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
1948 {
1949 	spa_t *spa;
1950 	int error;
1951 	uint64_t scan_type, scan_cmd;
1952 	uint64_t date_start, date_end;
1953 
1954 	if (nvlist_lookup_uint64(innvl, "scan_type", &scan_type) != 0)
1955 		return (SET_ERROR(EINVAL));
1956 	if (nvlist_lookup_uint64(innvl, "scan_command", &scan_cmd) != 0)
1957 		return (SET_ERROR(EINVAL));
1958 
1959 	if (scan_cmd >= POOL_SCRUB_FLAGS_END)
1960 		return (SET_ERROR(EINVAL));
1961 
1962 	if (nvlist_lookup_uint64(innvl, "scan_date_start", &date_start) != 0)
1963 		date_start = 0;
1964 	if (nvlist_lookup_uint64(innvl, "scan_date_end", &date_end) != 0)
1965 		date_end = 0;
1966 
1967 	if ((error = spa_open(poolname, &spa, FTAG)) != 0)
1968 		return (error);
1969 
1970 	if (scan_cmd == POOL_SCRUB_PAUSE) {
1971 		error = spa_scrub_pause_resume(spa, POOL_SCRUB_PAUSE);
1972 	} else if (scan_type == POOL_SCAN_NONE) {
1973 		error = spa_scan_stop(spa);
1974 	} else if (scan_cmd == POOL_SCRUB_FROM_LAST_TXG) {
1975 		error = spa_scan_range(spa, scan_type,
1976 		    spa_get_last_scrubbed_txg(spa), 0);
1977 	} else {
1978 		uint64_t txg_start, txg_end;
1979 
1980 		txg_start = txg_end = 0;
1981 		if (date_start != 0 || date_end != 0) {
1982 			mutex_enter(&spa->spa_txg_log_time_lock);
1983 			if (date_start != 0) {
1984 				txg_start = dbrrd_query(&spa->spa_txg_log_time,
1985 				    date_start, DBRRD_FLOOR);
1986 			}
1987 
1988 			if (date_end != 0) {
1989 				txg_end = dbrrd_query(&spa->spa_txg_log_time,
1990 				    date_end, DBRRD_CEILING);
1991 			}
1992 			mutex_exit(&spa->spa_txg_log_time_lock);
1993 		}
1994 
1995 		error = spa_scan_range(spa, scan_type, txg_start, txg_end);
1996 	}
1997 
1998 	spa_close(spa, FTAG);
1999 	return (error);
2000 }
2001 
2002 static int
zfs_ioc_pool_freeze(zfs_cmd_t * zc)2003 zfs_ioc_pool_freeze(zfs_cmd_t *zc)
2004 {
2005 	spa_t *spa;
2006 	int error;
2007 
2008 	error = spa_open(zc->zc_name, &spa, FTAG);
2009 	if (error == 0) {
2010 		spa_freeze(spa);
2011 		spa_close(spa, FTAG);
2012 	}
2013 	return (error);
2014 }
2015 
2016 static int
zfs_ioc_pool_upgrade(zfs_cmd_t * zc)2017 zfs_ioc_pool_upgrade(zfs_cmd_t *zc)
2018 {
2019 	spa_t *spa;
2020 	int error;
2021 
2022 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
2023 		return (error);
2024 
2025 	if (zc->zc_cookie < spa_version(spa) ||
2026 	    !SPA_VERSION_IS_SUPPORTED(zc->zc_cookie)) {
2027 		spa_close(spa, FTAG);
2028 		return (SET_ERROR(EINVAL));
2029 	}
2030 
2031 	spa_upgrade(spa, zc->zc_cookie);
2032 	spa_close(spa, FTAG);
2033 
2034 	return (error);
2035 }
2036 
2037 static int
zfs_ioc_pool_get_history(zfs_cmd_t * zc)2038 zfs_ioc_pool_get_history(zfs_cmd_t *zc)
2039 {
2040 	spa_t *spa;
2041 	char *hist_buf;
2042 	uint64_t size;
2043 	int error;
2044 
2045 	if ((size = zc->zc_history_len) == 0)
2046 		return (SET_ERROR(EINVAL));
2047 
2048 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
2049 		return (error);
2050 
2051 	if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) {
2052 		spa_close(spa, FTAG);
2053 		return (SET_ERROR(ENOTSUP));
2054 	}
2055 
2056 	hist_buf = vmem_alloc(size, KM_SLEEP);
2057 	if ((error = spa_history_get(spa, &zc->zc_history_offset,
2058 	    &zc->zc_history_len, hist_buf)) == 0) {
2059 		error = ddi_copyout(hist_buf,
2060 		    (void *)(uintptr_t)zc->zc_history,
2061 		    zc->zc_history_len, zc->zc_iflags);
2062 	}
2063 
2064 	spa_close(spa, FTAG);
2065 	vmem_free(hist_buf, size);
2066 	return (error);
2067 }
2068 
2069 /*
2070  * inputs:
2071  * zc_nvlist_src	nvlist optionally containing ZPOOL_REGUID_GUID
2072  * zc_nvlist_src_size	size of the nvlist
2073  */
2074 static int
zfs_ioc_pool_reguid(zfs_cmd_t * zc)2075 zfs_ioc_pool_reguid(zfs_cmd_t *zc)
2076 {
2077 	uint64_t *guidp = NULL;
2078 	nvlist_t *props = NULL;
2079 	spa_t *spa;
2080 	uint64_t guid;
2081 	int error;
2082 
2083 	if (zc->zc_nvlist_src_size != 0) {
2084 		error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2085 		    zc->zc_iflags, &props);
2086 		if (error != 0)
2087 			return (error);
2088 
2089 		error = nvlist_lookup_uint64(props, ZPOOL_REGUID_GUID, &guid);
2090 		if (error == 0)
2091 			guidp = &guid;
2092 		else if (error == ENOENT)
2093 			guidp = NULL;
2094 		else
2095 			goto out;
2096 	}
2097 
2098 	error = spa_open(zc->zc_name, &spa, FTAG);
2099 	if (error == 0) {
2100 		error = spa_change_guid(spa, guidp);
2101 		spa_close(spa, FTAG);
2102 	}
2103 
2104 out:
2105 	if (props != NULL)
2106 		nvlist_free(props);
2107 
2108 	return (error);
2109 }
2110 
2111 static int
zfs_ioc_dsobj_to_dsname(zfs_cmd_t * zc)2112 zfs_ioc_dsobj_to_dsname(zfs_cmd_t *zc)
2113 {
2114 	return (dsl_dsobj_to_dsname(zc->zc_name, zc->zc_obj, zc->zc_value));
2115 }
2116 
2117 /*
2118  * inputs:
2119  * zc_name		name of filesystem
2120  * zc_obj		object to find
2121  *
2122  * outputs:
2123  * zc_value		name of object
2124  */
2125 static int
zfs_ioc_obj_to_path(zfs_cmd_t * zc)2126 zfs_ioc_obj_to_path(zfs_cmd_t *zc)
2127 {
2128 	objset_t *os;
2129 	int error;
2130 
2131 	/* XXX reading from objset not owned */
2132 	if ((error = dmu_objset_hold_flags(zc->zc_name, B_TRUE,
2133 	    FTAG, &os)) != 0)
2134 		return (error);
2135 	if (dmu_objset_type(os) != DMU_OST_ZFS) {
2136 		dmu_objset_rele_flags(os, B_TRUE, FTAG);
2137 		return (SET_ERROR(EINVAL));
2138 	}
2139 	error = zfs_obj_to_path(os, zc->zc_obj, zc->zc_value,
2140 	    sizeof (zc->zc_value));
2141 	dmu_objset_rele_flags(os, B_TRUE, FTAG);
2142 
2143 	return (error);
2144 }
2145 
2146 /*
2147  * inputs:
2148  * zc_name		name of filesystem
2149  * zc_obj		object to find
2150  *
2151  * outputs:
2152  * zc_stat		stats on object
2153  * zc_value		path to object
2154  */
2155 static int
zfs_ioc_obj_to_stats(zfs_cmd_t * zc)2156 zfs_ioc_obj_to_stats(zfs_cmd_t *zc)
2157 {
2158 	objset_t *os;
2159 	int error;
2160 
2161 	/* XXX reading from objset not owned */
2162 	if ((error = dmu_objset_hold_flags(zc->zc_name, B_TRUE,
2163 	    FTAG, &os)) != 0)
2164 		return (error);
2165 	if (dmu_objset_type(os) != DMU_OST_ZFS) {
2166 		dmu_objset_rele_flags(os, B_TRUE, FTAG);
2167 		return (SET_ERROR(EINVAL));
2168 	}
2169 	error = zfs_obj_to_stats(os, zc->zc_obj, &zc->zc_stat, zc->zc_value,
2170 	    sizeof (zc->zc_value));
2171 	dmu_objset_rele_flags(os, B_TRUE, FTAG);
2172 
2173 	return (error);
2174 }
2175 
2176 static int
zfs_ioc_vdev_add(zfs_cmd_t * zc)2177 zfs_ioc_vdev_add(zfs_cmd_t *zc)
2178 {
2179 	spa_t *spa;
2180 	int error;
2181 	nvlist_t *config;
2182 
2183 	error = spa_open(zc->zc_name, &spa, FTAG);
2184 	if (error != 0)
2185 		return (error);
2186 
2187 	error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
2188 	    zc->zc_iflags, &config);
2189 	if (error == 0) {
2190 		error = spa_vdev_add(spa, config, zc->zc_flags);
2191 		nvlist_free(config);
2192 	}
2193 	spa_close(spa, FTAG);
2194 	return (error);
2195 }
2196 
2197 /*
2198  * inputs:
2199  * zc_name		name of the pool
2200  * zc_guid		guid of vdev to remove
2201  * zc_cookie		cancel removal
2202  */
2203 static int
zfs_ioc_vdev_remove(zfs_cmd_t * zc)2204 zfs_ioc_vdev_remove(zfs_cmd_t *zc)
2205 {
2206 	spa_t *spa;
2207 	int error;
2208 
2209 	error = spa_open(zc->zc_name, &spa, FTAG);
2210 	if (error != 0)
2211 		return (error);
2212 	if (zc->zc_cookie != 0) {
2213 		error = spa_vdev_remove_cancel(spa);
2214 	} else {
2215 		error = spa_vdev_remove(spa, zc->zc_guid, B_FALSE);
2216 	}
2217 	spa_close(spa, FTAG);
2218 	return (error);
2219 }
2220 
2221 static int
zfs_ioc_vdev_set_state(zfs_cmd_t * zc)2222 zfs_ioc_vdev_set_state(zfs_cmd_t *zc)
2223 {
2224 	spa_t *spa;
2225 	int error;
2226 	vdev_state_t newstate = VDEV_STATE_UNKNOWN;
2227 
2228 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
2229 		return (error);
2230 	switch (zc->zc_cookie) {
2231 	case VDEV_STATE_ONLINE:
2232 		error = vdev_online(spa, zc->zc_guid, zc->zc_obj, &newstate);
2233 		break;
2234 
2235 	case VDEV_STATE_OFFLINE:
2236 		error = vdev_offline(spa, zc->zc_guid, zc->zc_obj);
2237 		break;
2238 
2239 	case VDEV_STATE_FAULTED:
2240 		if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
2241 		    zc->zc_obj != VDEV_AUX_EXTERNAL &&
2242 		    zc->zc_obj != VDEV_AUX_EXTERNAL_PERSIST)
2243 			zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
2244 
2245 		error = vdev_fault(spa, zc->zc_guid, zc->zc_obj);
2246 		break;
2247 
2248 	case VDEV_STATE_DEGRADED:
2249 		if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
2250 		    zc->zc_obj != VDEV_AUX_EXTERNAL)
2251 			zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
2252 
2253 		error = vdev_degrade(spa, zc->zc_guid, zc->zc_obj);
2254 		break;
2255 
2256 	case VDEV_STATE_REMOVED:
2257 		error = vdev_remove_wanted(spa, zc->zc_guid);
2258 		break;
2259 
2260 	default:
2261 		error = SET_ERROR(EINVAL);
2262 	}
2263 	zc->zc_cookie = newstate;
2264 	spa_close(spa, FTAG);
2265 	return (error);
2266 }
2267 
2268 static int
zfs_ioc_vdev_attach(zfs_cmd_t * zc)2269 zfs_ioc_vdev_attach(zfs_cmd_t *zc)
2270 {
2271 	spa_t *spa;
2272 	nvlist_t *config;
2273 	int replacing = zc->zc_cookie;
2274 	int rebuild = zc->zc_simple;
2275 	int error;
2276 
2277 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
2278 		return (error);
2279 
2280 	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
2281 	    zc->zc_iflags, &config)) == 0) {
2282 		error = spa_vdev_attach(spa, zc->zc_guid, config, replacing,
2283 		    rebuild);
2284 		nvlist_free(config);
2285 	}
2286 
2287 	spa_close(spa, FTAG);
2288 	return (error);
2289 }
2290 
2291 static int
zfs_ioc_vdev_detach(zfs_cmd_t * zc)2292 zfs_ioc_vdev_detach(zfs_cmd_t *zc)
2293 {
2294 	spa_t *spa;
2295 	int error;
2296 
2297 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
2298 		return (error);
2299 
2300 	error = spa_vdev_detach(spa, zc->zc_guid, 0, B_FALSE);
2301 
2302 	spa_close(spa, FTAG);
2303 	return (error);
2304 }
2305 
2306 static int
zfs_ioc_vdev_split(zfs_cmd_t * zc)2307 zfs_ioc_vdev_split(zfs_cmd_t *zc)
2308 {
2309 	spa_t *spa;
2310 	nvlist_t *config, *props = NULL;
2311 	int error;
2312 	boolean_t exp = !!(zc->zc_cookie & ZPOOL_EXPORT_AFTER_SPLIT);
2313 
2314 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
2315 		return (error);
2316 
2317 	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
2318 	    zc->zc_iflags, &config))) {
2319 		spa_close(spa, FTAG);
2320 		return (error);
2321 	}
2322 
2323 	if (zc->zc_nvlist_src_size != 0 && (error =
2324 	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2325 	    zc->zc_iflags, &props))) {
2326 		spa_close(spa, FTAG);
2327 		nvlist_free(config);
2328 		return (error);
2329 	}
2330 
2331 	error = spa_vdev_split_mirror(spa, zc->zc_string, config, props, exp);
2332 
2333 	spa_close(spa, FTAG);
2334 
2335 	nvlist_free(config);
2336 	nvlist_free(props);
2337 
2338 	return (error);
2339 }
2340 
2341 static int
zfs_ioc_vdev_setpath(zfs_cmd_t * zc)2342 zfs_ioc_vdev_setpath(zfs_cmd_t *zc)
2343 {
2344 	spa_t *spa;
2345 	const char *path = zc->zc_value;
2346 	uint64_t guid = zc->zc_guid;
2347 	int error;
2348 
2349 	error = spa_open(zc->zc_name, &spa, FTAG);
2350 	if (error != 0)
2351 		return (error);
2352 
2353 	error = spa_vdev_setpath(spa, guid, path);
2354 	spa_close(spa, FTAG);
2355 	return (error);
2356 }
2357 
2358 static int
zfs_ioc_vdev_setfru(zfs_cmd_t * zc)2359 zfs_ioc_vdev_setfru(zfs_cmd_t *zc)
2360 {
2361 	spa_t *spa;
2362 	const char *fru = zc->zc_value;
2363 	uint64_t guid = zc->zc_guid;
2364 	int error;
2365 
2366 	error = spa_open(zc->zc_name, &spa, FTAG);
2367 	if (error != 0)
2368 		return (error);
2369 
2370 	error = spa_vdev_setfru(spa, guid, fru);
2371 	spa_close(spa, FTAG);
2372 	return (error);
2373 }
2374 
2375 static int
zfs_ioc_objset_stats_impl(zfs_cmd_t * zc,objset_t * os)2376 zfs_ioc_objset_stats_impl(zfs_cmd_t *zc, objset_t *os)
2377 {
2378 	int error = 0;
2379 	nvlist_t *nv;
2380 
2381 	dmu_objset_fast_stat(os, &zc->zc_objset_stats);
2382 
2383 	if (!zc->zc_simple && zc->zc_nvlist_dst != 0 &&
2384 	    (error = dsl_prop_get_all(os, &nv)) == 0) {
2385 		dmu_objset_stats(os, nv);
2386 		/*
2387 		 * NB: zvol_get_stats() will read the objset contents,
2388 		 * which we aren't supposed to do with a
2389 		 * DS_MODE_USER hold, because it could be
2390 		 * inconsistent.  So this is a bit of a workaround...
2391 		 * XXX reading without owning
2392 		 */
2393 		if (!zc->zc_objset_stats.dds_inconsistent &&
2394 		    dmu_objset_type(os) == DMU_OST_ZVOL) {
2395 			error = zvol_get_stats(os, nv);
2396 			if (error == EIO) {
2397 				nvlist_free(nv);
2398 				return (error);
2399 			}
2400 			VERIFY0(error);
2401 		}
2402 		if (error == 0)
2403 			error = put_nvlist(zc, nv);
2404 		nvlist_free(nv);
2405 	}
2406 
2407 	return (error);
2408 }
2409 
2410 /*
2411  * inputs:
2412  * zc_name		name of filesystem
2413  * zc_nvlist_dst_size	size of buffer for property nvlist
2414  *
2415  * outputs:
2416  * zc_objset_stats	stats
2417  * zc_nvlist_dst	property nvlist
2418  * zc_nvlist_dst_size	size of property nvlist
2419  */
2420 static int
zfs_ioc_objset_stats(zfs_cmd_t * zc)2421 zfs_ioc_objset_stats(zfs_cmd_t *zc)
2422 {
2423 	objset_t *os;
2424 	int error;
2425 
2426 	error = dmu_objset_hold(zc->zc_name, FTAG, &os);
2427 	if (error == 0) {
2428 		error = zfs_ioc_objset_stats_impl(zc, os);
2429 		dmu_objset_rele(os, FTAG);
2430 	}
2431 
2432 	return (error);
2433 }
2434 
2435 /*
2436  * inputs:
2437  * zc_name		name of filesystem
2438  * zc_nvlist_dst_size	size of buffer for property nvlist
2439  *
2440  * outputs:
2441  * zc_nvlist_dst	received property nvlist
2442  * zc_nvlist_dst_size	size of received property nvlist
2443  *
2444  * Gets received properties (distinct from local properties on or after
2445  * SPA_VERSION_RECVD_PROPS) for callers who want to differentiate received from
2446  * local property values.
2447  */
2448 static int
zfs_ioc_objset_recvd_props(zfs_cmd_t * zc)2449 zfs_ioc_objset_recvd_props(zfs_cmd_t *zc)
2450 {
2451 	int error = 0;
2452 	nvlist_t *nv;
2453 
2454 	/*
2455 	 * Without this check, we would return local property values if the
2456 	 * caller has not already received properties on or after
2457 	 * SPA_VERSION_RECVD_PROPS.
2458 	 */
2459 	if (!dsl_prop_get_hasrecvd(zc->zc_name))
2460 		return (SET_ERROR(ENOTSUP));
2461 
2462 	if (zc->zc_nvlist_dst != 0 &&
2463 	    (error = dsl_prop_get_received(zc->zc_name, &nv)) == 0) {
2464 		error = put_nvlist(zc, nv);
2465 		nvlist_free(nv);
2466 	}
2467 
2468 	return (error);
2469 }
2470 
2471 static int
nvl_add_zplprop(objset_t * os,nvlist_t * props,zfs_prop_t prop)2472 nvl_add_zplprop(objset_t *os, nvlist_t *props, zfs_prop_t prop)
2473 {
2474 	uint64_t value;
2475 	int error;
2476 
2477 	/*
2478 	 * zfs_get_zplprop() will either find a value or give us
2479 	 * the default value (if there is one).
2480 	 */
2481 	if ((error = zfs_get_zplprop(os, prop, &value)) != 0)
2482 		return (error);
2483 	VERIFY0(nvlist_add_uint64(props, zfs_prop_to_name(prop), value));
2484 	return (0);
2485 }
2486 
2487 /*
2488  * inputs:
2489  * zc_name		name of filesystem
2490  * zc_nvlist_dst_size	size of buffer for zpl property nvlist
2491  *
2492  * outputs:
2493  * zc_nvlist_dst	zpl property nvlist
2494  * zc_nvlist_dst_size	size of zpl property nvlist
2495  */
2496 static int
zfs_ioc_objset_zplprops(zfs_cmd_t * zc)2497 zfs_ioc_objset_zplprops(zfs_cmd_t *zc)
2498 {
2499 	objset_t *os;
2500 	int err;
2501 
2502 	/* XXX reading without owning */
2503 	if ((err = dmu_objset_hold(zc->zc_name, FTAG, &os)))
2504 		return (err);
2505 
2506 	dmu_objset_fast_stat(os, &zc->zc_objset_stats);
2507 
2508 	/*
2509 	 * NB: nvl_add_zplprop() will read the objset contents,
2510 	 * which we aren't supposed to do with a DS_MODE_USER
2511 	 * hold, because it could be inconsistent.
2512 	 */
2513 	if (zc->zc_nvlist_dst != 0 &&
2514 	    !zc->zc_objset_stats.dds_inconsistent &&
2515 	    dmu_objset_type(os) == DMU_OST_ZFS) {
2516 		nvlist_t *nv;
2517 
2518 		VERIFY0(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP));
2519 		if ((err = nvl_add_zplprop(os, nv, ZFS_PROP_VERSION)) == 0 &&
2520 		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_NORMALIZE)) == 0 &&
2521 		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_UTF8ONLY)) == 0 &&
2522 		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_CASE)) == 0 &&
2523 		    (err = nvl_add_zplprop(os, nv,
2524 		    ZFS_PROP_DEFAULTUSERQUOTA)) == 0 &&
2525 		    (err = nvl_add_zplprop(os, nv,
2526 		    ZFS_PROP_DEFAULTGROUPQUOTA)) == 0 &&
2527 		    (err = nvl_add_zplprop(os, nv,
2528 		    ZFS_PROP_DEFAULTPROJECTQUOTA)) == 0 &&
2529 		    (err = nvl_add_zplprop(os, nv,
2530 		    ZFS_PROP_DEFAULTUSEROBJQUOTA)) == 0 &&
2531 		    (err = nvl_add_zplprop(os, nv,
2532 		    ZFS_PROP_DEFAULTGROUPOBJQUOTA)) == 0 &&
2533 		    (err = nvl_add_zplprop(os, nv,
2534 		    ZFS_PROP_DEFAULTPROJECTOBJQUOTA)) == 0)
2535 			err = put_nvlist(zc, nv);
2536 		nvlist_free(nv);
2537 	} else {
2538 		err = SET_ERROR(ENOENT);
2539 	}
2540 	dmu_objset_rele(os, FTAG);
2541 	return (err);
2542 }
2543 
2544 /*
2545  * inputs:
2546  * zc_name		name of filesystem
2547  * zc_cookie		zap cursor
2548  * zc_nvlist_dst_size	size of buffer for property nvlist
2549  *
2550  * outputs:
2551  * zc_name		name of next filesystem
2552  * zc_cookie		zap cursor
2553  * zc_objset_stats	stats
2554  * zc_nvlist_dst	property nvlist
2555  * zc_nvlist_dst_size	size of property nvlist
2556  */
2557 static int
zfs_ioc_dataset_list_next(zfs_cmd_t * zc)2558 zfs_ioc_dataset_list_next(zfs_cmd_t *zc)
2559 {
2560 	objset_t *os;
2561 	int error;
2562 	char *p;
2563 	size_t orig_len = strlen(zc->zc_name);
2564 
2565 top:
2566 	if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os))) {
2567 		if (error == ENOENT)
2568 			error = SET_ERROR(ESRCH);
2569 		return (error);
2570 	}
2571 
2572 	p = strrchr(zc->zc_name, '/');
2573 	if (p == NULL || p[1] != '\0')
2574 		(void) strlcat(zc->zc_name, "/", sizeof (zc->zc_name));
2575 	p = zc->zc_name + strlen(zc->zc_name);
2576 
2577 	do {
2578 		error = dmu_dir_list_next(os,
2579 		    sizeof (zc->zc_name) - (p - zc->zc_name), p,
2580 		    NULL, &zc->zc_cookie);
2581 		if (error == ENOENT)
2582 			error = SET_ERROR(ESRCH);
2583 	} while (error == 0 && zfs_dataset_name_hidden(zc->zc_name));
2584 	dmu_objset_rele(os, FTAG);
2585 
2586 	/*
2587 	 * If it's an internal dataset (ie. with a '$' in its name),
2588 	 * don't try to get stats for it, otherwise we'll return ENOENT.
2589 	 */
2590 	if (error == 0 && strchr(zc->zc_name, '$') == NULL) {
2591 		error = zfs_ioc_objset_stats(zc); /* fill in the stats */
2592 		if (error == ENOENT) {
2593 			/* We lost a race with destroy, get the next one. */
2594 			zc->zc_name[orig_len] = '\0';
2595 			goto top;
2596 		}
2597 	}
2598 	return (error);
2599 }
2600 
2601 /*
2602  * inputs:
2603  * zc_name		name of filesystem
2604  * zc_cookie		zap cursor
2605  * zc_nvlist_src	iteration range nvlist
2606  * zc_nvlist_src_size	size of iteration range nvlist
2607  *
2608  * outputs:
2609  * zc_name		name of next snapshot
2610  * zc_objset_stats	stats
2611  * zc_nvlist_dst	property nvlist
2612  * zc_nvlist_dst_size	size of property nvlist
2613  */
2614 static int
zfs_ioc_snapshot_list_next(zfs_cmd_t * zc)2615 zfs_ioc_snapshot_list_next(zfs_cmd_t *zc)
2616 {
2617 	int error;
2618 	objset_t *os, *ossnap;
2619 	dsl_dataset_t *ds;
2620 	uint64_t min_txg = 0, max_txg = 0;
2621 
2622 	if (zc->zc_nvlist_src_size != 0) {
2623 		nvlist_t *props = NULL;
2624 		error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2625 		    zc->zc_iflags, &props);
2626 		if (error != 0)
2627 			return (error);
2628 		(void) nvlist_lookup_uint64(props, SNAP_ITER_MIN_TXG,
2629 		    &min_txg);
2630 		(void) nvlist_lookup_uint64(props, SNAP_ITER_MAX_TXG,
2631 		    &max_txg);
2632 		nvlist_free(props);
2633 	}
2634 
2635 	error = dmu_objset_hold(zc->zc_name, FTAG, &os);
2636 	if (error != 0) {
2637 		return (error == ENOENT ? SET_ERROR(ESRCH) : error);
2638 	}
2639 
2640 	/*
2641 	 * A dataset name of maximum length cannot have any snapshots,
2642 	 * so exit immediately.
2643 	 */
2644 	if (strlcat(zc->zc_name, "@", sizeof (zc->zc_name)) >=
2645 	    ZFS_MAX_DATASET_NAME_LEN) {
2646 		dmu_objset_rele(os, FTAG);
2647 		return (SET_ERROR(ESRCH));
2648 	}
2649 
2650 	while (error == 0) {
2651 		if (issig()) {
2652 			error = SET_ERROR(EINTR);
2653 			break;
2654 		}
2655 
2656 		error = dmu_snapshot_list_next(os,
2657 		    sizeof (zc->zc_name) - strlen(zc->zc_name),
2658 		    zc->zc_name + strlen(zc->zc_name), &zc->zc_obj,
2659 		    &zc->zc_cookie, NULL);
2660 		if (error == ENOENT) {
2661 			error = SET_ERROR(ESRCH);
2662 			break;
2663 		} else if (error != 0) {
2664 			break;
2665 		}
2666 
2667 		error = dsl_dataset_hold_obj(dmu_objset_pool(os), zc->zc_obj,
2668 		    FTAG, &ds);
2669 		if (error != 0)
2670 			break;
2671 
2672 		if ((min_txg != 0 && dsl_get_creationtxg(ds) < min_txg) ||
2673 		    (max_txg != 0 && dsl_get_creationtxg(ds) > max_txg)) {
2674 			dsl_dataset_rele(ds, FTAG);
2675 			/* undo snapshot name append */
2676 			*(strchr(zc->zc_name, '@') + 1) = '\0';
2677 			/* skip snapshot */
2678 			continue;
2679 		}
2680 
2681 		if (zc->zc_simple) {
2682 			dsl_dataset_fast_stat(ds, &zc->zc_objset_stats);
2683 			dsl_dataset_rele(ds, FTAG);
2684 			break;
2685 		}
2686 
2687 		if ((error = dmu_objset_from_ds(ds, &ossnap)) != 0) {
2688 			dsl_dataset_rele(ds, FTAG);
2689 			break;
2690 		}
2691 		if ((error = zfs_ioc_objset_stats_impl(zc, ossnap)) != 0) {
2692 			dsl_dataset_rele(ds, FTAG);
2693 			break;
2694 		}
2695 		dsl_dataset_rele(ds, FTAG);
2696 		break;
2697 	}
2698 
2699 	dmu_objset_rele(os, FTAG);
2700 	/* if we failed, undo the @ that we tacked on to zc_name */
2701 	if (error != 0)
2702 		*strchr(zc->zc_name, '@') = '\0';
2703 	return (error);
2704 }
2705 
2706 static int
zfs_prop_set_userquota(const char * dsname,nvpair_t * pair)2707 zfs_prop_set_userquota(const char *dsname, nvpair_t *pair)
2708 {
2709 	const char *propname = nvpair_name(pair);
2710 	uint64_t *valary;
2711 	unsigned int vallen;
2712 	const char *dash, *domain;
2713 	zfs_userquota_prop_t type;
2714 	uint64_t rid;
2715 	uint64_t quota;
2716 	zfsvfs_t *zfsvfs;
2717 	int err;
2718 
2719 	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2720 		nvlist_t *attrs;
2721 		VERIFY0(nvpair_value_nvlist(pair, &attrs));
2722 		if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2723 		    &pair) != 0)
2724 			return (SET_ERROR(EINVAL));
2725 	}
2726 
2727 	/*
2728 	 * A correctly constructed propname is encoded as
2729 	 * userquota@<rid>-<domain>.
2730 	 */
2731 	if ((dash = strchr(propname, '-')) == NULL ||
2732 	    nvpair_value_uint64_array(pair, &valary, &vallen) != 0 ||
2733 	    vallen != 3)
2734 		return (SET_ERROR(EINVAL));
2735 
2736 	domain = dash + 1;
2737 	type = valary[0];
2738 	rid = valary[1];
2739 	quota = valary[2];
2740 
2741 	err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_FALSE);
2742 	if (err == 0) {
2743 		err = zfs_set_userquota(zfsvfs, type, domain, rid, quota);
2744 		zfsvfs_rele(zfsvfs, FTAG);
2745 	}
2746 
2747 	return (err);
2748 }
2749 
2750 /*
2751  * If the named property is one that has a special function to set its value,
2752  * return 0 on success and a positive error code on failure; otherwise if it is
2753  * not one of the special properties handled by this function, return -1.
2754  *
2755  * XXX: It would be better for callers of the property interface if we handled
2756  * these special cases in dsl_prop.c (in the dsl layer).
2757  */
2758 static int
zfs_prop_set_special(const char * dsname,zprop_source_t source,nvpair_t * pair)2759 zfs_prop_set_special(const char *dsname, zprop_source_t source,
2760     nvpair_t *pair)
2761 {
2762 	const char *propname = nvpair_name(pair);
2763 	zfs_prop_t prop = zfs_name_to_prop(propname);
2764 	uint64_t intval = 0;
2765 	const char *strval = NULL;
2766 	int err = -1;
2767 
2768 	if (prop == ZPROP_USERPROP) {
2769 		if (zfs_prop_userquota(propname))
2770 			return (zfs_prop_set_userquota(dsname, pair));
2771 		return (-1);
2772 	}
2773 
2774 	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2775 		nvlist_t *attrs;
2776 		VERIFY0(nvpair_value_nvlist(pair, &attrs));
2777 		VERIFY0(nvlist_lookup_nvpair(attrs, ZPROP_VALUE, &pair));
2778 	}
2779 
2780 	/* all special properties are numeric except for keylocation */
2781 	if (zfs_prop_get_type(prop) == PROP_TYPE_STRING) {
2782 		strval = fnvpair_value_string(pair);
2783 	} else {
2784 		intval = fnvpair_value_uint64(pair);
2785 	}
2786 
2787 	switch (prop) {
2788 	case ZFS_PROP_QUOTA:
2789 		err = dsl_dir_set_quota(dsname, source, intval);
2790 		break;
2791 	case ZFS_PROP_REFQUOTA:
2792 		err = dsl_dataset_set_refquota(dsname, source, intval);
2793 		break;
2794 	case ZFS_PROP_FILESYSTEM_LIMIT:
2795 	case ZFS_PROP_SNAPSHOT_LIMIT:
2796 		if (intval == UINT64_MAX) {
2797 			/* clearing the limit, just do it */
2798 			err = 0;
2799 		} else {
2800 			err = dsl_dir_activate_fs_ss_limit(dsname);
2801 		}
2802 		/*
2803 		 * Set err to -1 to force the zfs_set_prop_nvlist code down the
2804 		 * default path to set the value in the nvlist.
2805 		 */
2806 		if (err == 0)
2807 			err = -1;
2808 		break;
2809 	case ZFS_PROP_KEYLOCATION:
2810 		err = dsl_crypto_can_set_keylocation(dsname, strval);
2811 
2812 		/*
2813 		 * Set err to -1 to force the zfs_set_prop_nvlist code down the
2814 		 * default path to set the value in the nvlist.
2815 		 */
2816 		if (err == 0)
2817 			err = -1;
2818 		break;
2819 	case ZFS_PROP_RESERVATION:
2820 		err = dsl_dir_set_reservation(dsname, source, intval);
2821 		break;
2822 	case ZFS_PROP_REFRESERVATION:
2823 		err = dsl_dataset_set_refreservation(dsname, source, intval);
2824 		break;
2825 	case ZFS_PROP_COMPRESSION:
2826 		err = dsl_dataset_set_compression(dsname, source, intval);
2827 		/*
2828 		 * Set err to -1 to force the zfs_set_prop_nvlist code down the
2829 		 * default path to set the value in the nvlist.
2830 		 */
2831 		if (err == 0)
2832 			err = -1;
2833 		break;
2834 	case ZFS_PROP_VOLSIZE:
2835 		err = zvol_set_volsize(dsname, intval);
2836 		break;
2837 	case ZFS_PROP_VOLTHREADING:
2838 		err = zvol_set_volthreading(dsname, intval);
2839 		/*
2840 		 * Set err to -1 to force the zfs_set_prop_nvlist code down the
2841 		 * default path to set the value in the nvlist.
2842 		 */
2843 		if (err == 0)
2844 			err = -1;
2845 		break;
2846 	case ZFS_PROP_SNAPDEV:
2847 	case ZFS_PROP_VOLMODE:
2848 		err = zvol_set_common(dsname, prop, source, intval);
2849 		break;
2850 	case ZFS_PROP_READONLY:
2851 		err = zvol_set_ro(dsname, intval);
2852 		/*
2853 		 * Set err to -1 to force the zfs_set_prop_nvlist code down the
2854 		 * default path to set the value in the nvlist.
2855 		 */
2856 		if (err == 0)
2857 			err = -1;
2858 		break;
2859 	case ZFS_PROP_VERSION:
2860 	{
2861 		zfsvfs_t *zfsvfs;
2862 
2863 		if ((err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_TRUE)) != 0)
2864 			break;
2865 
2866 		err = zfs_set_version(zfsvfs, intval);
2867 		zfsvfs_rele(zfsvfs, FTAG);
2868 
2869 		if (err == 0 && intval >= ZPL_VERSION_USERSPACE) {
2870 			zfs_cmd_t *zc;
2871 
2872 			zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
2873 			(void) strlcpy(zc->zc_name, dsname,
2874 			    sizeof (zc->zc_name));
2875 			(void) zfs_ioc_userspace_upgrade(zc);
2876 			(void) zfs_ioc_id_quota_upgrade(zc);
2877 			kmem_free(zc, sizeof (zfs_cmd_t));
2878 		}
2879 		break;
2880 	}
2881 	case ZFS_PROP_LONGNAME:
2882 	{
2883 		zfsvfs_t *zfsvfs;
2884 
2885 		/*
2886 		 * Ignore the checks if the property is being applied as part of
2887 		 * 'zfs receive'. Because, we already check if the local pool
2888 		 * has SPA_FEATURE_LONGNAME enabled in dmu_recv_begin_check().
2889 		 */
2890 		if (source == ZPROP_SRC_RECEIVED) {
2891 			cmn_err(CE_NOTE, "Skipping ZFS_PROP_LONGNAME checks "
2892 			    "for dsname=%s\n", dsname);
2893 			err = -1;
2894 			break;
2895 		}
2896 
2897 		if ((err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_FALSE)) != 0) {
2898 			cmn_err(CE_WARN, "%s:%d Failed to hold for dsname=%s "
2899 			    "err=%d\n", __FILE__, __LINE__, dsname, err);
2900 			break;
2901 		}
2902 
2903 		if (!spa_feature_is_enabled(zfsvfs->z_os->os_spa,
2904 		    SPA_FEATURE_LONGNAME)) {
2905 			err = ENOTSUP;
2906 		} else {
2907 			/*
2908 			 * Set err to -1 to force the zfs_set_prop_nvlist code
2909 			 * down the default path to set the value in the nvlist.
2910 			 */
2911 			err = -1;
2912 		}
2913 		zfsvfs_rele(zfsvfs, FTAG);
2914 		break;
2915 	}
2916 	case ZFS_PROP_DEFAULTUSERQUOTA:
2917 	case ZFS_PROP_DEFAULTGROUPQUOTA:
2918 	case ZFS_PROP_DEFAULTPROJECTQUOTA:
2919 	case ZFS_PROP_DEFAULTUSEROBJQUOTA:
2920 	case ZFS_PROP_DEFAULTGROUPOBJQUOTA:
2921 	case ZFS_PROP_DEFAULTPROJECTOBJQUOTA:
2922 	{
2923 		zfsvfs_t *zfsvfs;
2924 		if ((err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_TRUE)) != 0)
2925 			break;
2926 		err = zfs_set_default_quota(zfsvfs, prop, intval);
2927 		zfsvfs_rele(zfsvfs, FTAG);
2928 		break;
2929 	}
2930 	case ZFS_PROP_ZONED_UID:
2931 	{
2932 		uint64_t old_uid = 0;
2933 		(void) dsl_prop_get(dsname, "zoned_uid", 8, 1, &old_uid, NULL);
2934 		if (old_uid != 0)
2935 			(void) zone_dataset_detach_uid(CRED(), dsname,
2936 			    (uid_t)old_uid);
2937 		if (intval != 0) {
2938 			err = zone_dataset_attach_uid(CRED(), dsname,
2939 			    (uid_t)intval);
2940 			if (err == ENXIO)
2941 				err = ZFS_ERR_NO_USER_NS_SUPPORT;
2942 			if (err != 0)
2943 				break;
2944 		}
2945 		/*
2946 		 * Set err to -1 to force the zfs_set_prop_nvlist code down the
2947 		 * default path to set the value in the nvlist.
2948 		 */
2949 		err = -1;
2950 		break;
2951 	}
2952 	default:
2953 		err = -1;
2954 	}
2955 
2956 	return (err);
2957 }
2958 
2959 static boolean_t
zfs_is_namespace_prop(zfs_prop_t prop)2960 zfs_is_namespace_prop(zfs_prop_t prop)
2961 {
2962 	switch (prop) {
2963 
2964 	case ZFS_PROP_ATIME:
2965 	case ZFS_PROP_RELATIME:
2966 	case ZFS_PROP_DEVICES:
2967 	case ZFS_PROP_EXEC:
2968 	case ZFS_PROP_SETUID:
2969 	case ZFS_PROP_READONLY:
2970 	case ZFS_PROP_XATTR:
2971 	case ZFS_PROP_NBMAND:
2972 		return (B_TRUE);
2973 
2974 	default:
2975 		return (B_FALSE);
2976 	}
2977 }
2978 
2979 /*
2980  * This function is best effort. If it fails to set any of the given properties,
2981  * it continues to set as many as it can and returns the last error
2982  * encountered. If the caller provides a non-NULL errlist, it will be filled in
2983  * with the list of names of all the properties that failed along with the
2984  * corresponding error numbers.
2985  *
2986  * If every property is set successfully, zero is returned and errlist is not
2987  * modified.
2988  */
2989 int
zfs_set_prop_nvlist(const char * dsname,zprop_source_t source,nvlist_t * nvl,nvlist_t * errlist)2990 zfs_set_prop_nvlist(const char *dsname, zprop_source_t source, nvlist_t *nvl,
2991     nvlist_t *errlist)
2992 {
2993 	nvpair_t *pair;
2994 	nvpair_t *propval;
2995 	int rv = 0;
2996 	int err;
2997 	uint64_t intval;
2998 	const char *strval;
2999 	boolean_t should_update_mount_cache = B_FALSE;
3000 
3001 	nvlist_t *genericnvl = fnvlist_alloc();
3002 	nvlist_t *retrynvl = fnvlist_alloc();
3003 retry:
3004 	pair = NULL;
3005 	while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
3006 		const char *propname = nvpair_name(pair);
3007 		zfs_prop_t prop = zfs_name_to_prop(propname);
3008 		err = 0;
3009 
3010 		/* decode the property value */
3011 		propval = pair;
3012 		if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
3013 			nvlist_t *attrs;
3014 			attrs = fnvpair_value_nvlist(pair);
3015 			if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
3016 			    &propval) != 0)
3017 				err = SET_ERROR(EINVAL);
3018 		}
3019 
3020 		/* Validate value type */
3021 		if (err == 0 && source == ZPROP_SRC_INHERITED) {
3022 			/* inherited properties are expected to be booleans */
3023 			if (nvpair_type(propval) != DATA_TYPE_BOOLEAN)
3024 				err = SET_ERROR(EINVAL);
3025 		} else if (err == 0 && prop == ZPROP_USERPROP) {
3026 			if (zfs_prop_user(propname)) {
3027 				if (nvpair_type(propval) != DATA_TYPE_STRING)
3028 					err = SET_ERROR(EINVAL);
3029 			} else if (zfs_prop_userquota(propname)) {
3030 				if (nvpair_type(propval) !=
3031 				    DATA_TYPE_UINT64_ARRAY)
3032 					err = SET_ERROR(EINVAL);
3033 			} else {
3034 				err = SET_ERROR(EINVAL);
3035 			}
3036 		} else if (err == 0) {
3037 			if (nvpair_type(propval) == DATA_TYPE_STRING) {
3038 				if (zfs_prop_get_type(prop) != PROP_TYPE_STRING)
3039 					err = SET_ERROR(EINVAL);
3040 			} else if (nvpair_type(propval) == DATA_TYPE_UINT64) {
3041 				const char *unused;
3042 
3043 				intval = fnvpair_value_uint64(propval);
3044 
3045 				switch (zfs_prop_get_type(prop)) {
3046 				case PROP_TYPE_NUMBER:
3047 					break;
3048 				case PROP_TYPE_STRING:
3049 					err = SET_ERROR(EINVAL);
3050 					break;
3051 				case PROP_TYPE_INDEX:
3052 					if (zfs_prop_index_to_string(prop,
3053 					    intval, &unused) != 0)
3054 						err =
3055 						    SET_ERROR(ZFS_ERR_BADPROP);
3056 					break;
3057 				default:
3058 					cmn_err(CE_PANIC,
3059 					    "unknown property type");
3060 				}
3061 			} else {
3062 				err = SET_ERROR(EINVAL);
3063 			}
3064 		}
3065 
3066 		/* Validate permissions */
3067 		if (err == 0)
3068 			err = zfs_check_settable(dsname, pair, CRED());
3069 
3070 		if (err == 0) {
3071 			if (source == ZPROP_SRC_INHERITED)
3072 				err = -1; /* does not need special handling */
3073 			else
3074 				err = zfs_prop_set_special(dsname, source,
3075 				    pair);
3076 			if (err == -1) {
3077 				/*
3078 				 * For better performance we build up a list of
3079 				 * properties to set in a single transaction.
3080 				 */
3081 				err = nvlist_add_nvpair(genericnvl, pair);
3082 			} else if (err != 0 && nvl != retrynvl) {
3083 				/*
3084 				 * This may be a spurious error caused by
3085 				 * receiving quota and reservation out of order.
3086 				 * Try again in a second pass.
3087 				 */
3088 				err = nvlist_add_nvpair(retrynvl, pair);
3089 			}
3090 		}
3091 
3092 		if (err != 0) {
3093 			if (errlist != NULL)
3094 				fnvlist_add_int32(errlist, propname, err);
3095 			rv = err;
3096 		}
3097 
3098 		if (zfs_is_namespace_prop(prop))
3099 			should_update_mount_cache = B_TRUE;
3100 	}
3101 
3102 	if (nvl != retrynvl && !nvlist_empty(retrynvl)) {
3103 		nvl = retrynvl;
3104 		goto retry;
3105 	}
3106 
3107 	if (nvlist_empty(genericnvl))
3108 		goto out;
3109 
3110 	/*
3111 	 * Try to set them all in one batch.
3112 	 */
3113 	err = dsl_props_set(dsname, source, genericnvl);
3114 	if (err == 0)
3115 		goto out;
3116 
3117 	/*
3118 	 * If batching fails, we still want to set as many properties as we
3119 	 * can, so try setting them individually.
3120 	 */
3121 	pair = NULL;
3122 	while ((pair = nvlist_next_nvpair(genericnvl, pair)) != NULL) {
3123 		const char *propname = nvpair_name(pair);
3124 
3125 		propval = pair;
3126 		if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
3127 			nvlist_t *attrs;
3128 			attrs = fnvpair_value_nvlist(pair);
3129 			propval = fnvlist_lookup_nvpair(attrs, ZPROP_VALUE);
3130 		}
3131 
3132 		if (nvpair_type(propval) == DATA_TYPE_STRING) {
3133 			strval = fnvpair_value_string(propval);
3134 			err = dsl_prop_set_string(dsname, propname,
3135 			    source, strval);
3136 		} else if (nvpair_type(propval) == DATA_TYPE_BOOLEAN) {
3137 			err = dsl_prop_inherit(dsname, propname, source);
3138 		} else {
3139 			intval = fnvpair_value_uint64(propval);
3140 			err = dsl_prop_set_int(dsname, propname, source,
3141 			    intval);
3142 		}
3143 
3144 		if (err != 0) {
3145 			if (errlist != NULL) {
3146 				fnvlist_add_int32(errlist, propname, err);
3147 			}
3148 			rv = err;
3149 		}
3150 	}
3151 
3152 out:
3153 	if (should_update_mount_cache)
3154 		zfs_ioctl_update_mount_cache(dsname);
3155 
3156 	nvlist_free(genericnvl);
3157 	nvlist_free(retrynvl);
3158 
3159 	return (rv);
3160 }
3161 
3162 /*
3163  * Check that all the properties are valid user properties.
3164  */
3165 static int
zfs_check_userprops(nvlist_t * nvl)3166 zfs_check_userprops(nvlist_t *nvl)
3167 {
3168 	nvpair_t *pair = NULL;
3169 
3170 	while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
3171 		const char *propname = nvpair_name(pair);
3172 
3173 		if (!zfs_prop_user(propname) ||
3174 		    nvpair_type(pair) != DATA_TYPE_STRING)
3175 			return (SET_ERROR(EINVAL));
3176 
3177 		if (strlen(propname) >= ZAP_MAXNAMELEN)
3178 			return (SET_ERROR(ENAMETOOLONG));
3179 
3180 		if (strlen(fnvpair_value_string(pair)) >= ZAP_MAXVALUELEN)
3181 			return (SET_ERROR(E2BIG));
3182 	}
3183 	return (0);
3184 }
3185 
3186 static void
props_skip(nvlist_t * props,nvlist_t * skipped,nvlist_t ** newprops)3187 props_skip(nvlist_t *props, nvlist_t *skipped, nvlist_t **newprops)
3188 {
3189 	nvpair_t *pair;
3190 
3191 	VERIFY0(nvlist_alloc(newprops, NV_UNIQUE_NAME, KM_SLEEP));
3192 
3193 	pair = NULL;
3194 	while ((pair = nvlist_next_nvpair(props, pair)) != NULL) {
3195 		if (nvlist_exists(skipped, nvpair_name(pair)))
3196 			continue;
3197 
3198 		VERIFY0(nvlist_add_nvpair(*newprops, pair));
3199 	}
3200 }
3201 
3202 static int
clear_received_props(const char * dsname,nvlist_t * props,nvlist_t * skipped)3203 clear_received_props(const char *dsname, nvlist_t *props,
3204     nvlist_t *skipped)
3205 {
3206 	int err = 0;
3207 	nvlist_t *cleared_props = NULL;
3208 	props_skip(props, skipped, &cleared_props);
3209 	if (!nvlist_empty(cleared_props)) {
3210 		/*
3211 		 * Acts on local properties until the dataset has received
3212 		 * properties at least once on or after SPA_VERSION_RECVD_PROPS.
3213 		 */
3214 		zprop_source_t flags = (ZPROP_SRC_NONE |
3215 		    (dsl_prop_get_hasrecvd(dsname) ? ZPROP_SRC_RECEIVED : 0));
3216 		err = zfs_set_prop_nvlist(dsname, flags, cleared_props, NULL);
3217 	}
3218 	nvlist_free(cleared_props);
3219 	return (err);
3220 }
3221 
3222 /*
3223  * inputs:
3224  * zc_name		name of filesystem
3225  * zc_value		name of property to set
3226  * zc_nvlist_src{_size}	nvlist of properties to apply
3227  * zc_cookie		received properties flag
3228  *
3229  * outputs:
3230  * zc_nvlist_dst{_size} error for each unapplied received property
3231  */
3232 static int
zfs_ioc_set_prop(zfs_cmd_t * zc)3233 zfs_ioc_set_prop(zfs_cmd_t *zc)
3234 {
3235 	nvlist_t *nvl;
3236 	boolean_t received = zc->zc_cookie;
3237 	zprop_source_t source = (received ? ZPROP_SRC_RECEIVED :
3238 	    ZPROP_SRC_LOCAL);
3239 	nvlist_t *errors;
3240 	int error;
3241 
3242 	if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
3243 	    zc->zc_iflags, &nvl)) != 0)
3244 		return (error);
3245 
3246 	if (received) {
3247 		nvlist_t *origprops;
3248 
3249 		if (dsl_prop_get_received(zc->zc_name, &origprops) == 0) {
3250 			(void) clear_received_props(zc->zc_name,
3251 			    origprops, nvl);
3252 			nvlist_free(origprops);
3253 		}
3254 
3255 		error = dsl_prop_set_hasrecvd(zc->zc_name);
3256 	}
3257 
3258 	errors = fnvlist_alloc();
3259 	if (error == 0)
3260 		error = zfs_set_prop_nvlist(zc->zc_name, source, nvl, errors);
3261 
3262 	if (zc->zc_nvlist_dst != 0 && errors != NULL) {
3263 		(void) put_nvlist(zc, errors);
3264 	}
3265 
3266 	nvlist_free(errors);
3267 	nvlist_free(nvl);
3268 	return (error);
3269 }
3270 
3271 /*
3272  * inputs:
3273  * zc_name		name of filesystem
3274  * zc_value		name of property to inherit
3275  * zc_cookie		revert to received value if TRUE
3276  *
3277  * outputs:		none
3278  */
3279 static int
zfs_ioc_inherit_prop(zfs_cmd_t * zc)3280 zfs_ioc_inherit_prop(zfs_cmd_t *zc)
3281 {
3282 	const char *propname = zc->zc_value;
3283 	zfs_prop_t prop = zfs_name_to_prop(propname);
3284 	boolean_t received = zc->zc_cookie;
3285 	zprop_source_t source = (received
3286 	    ? ZPROP_SRC_NONE		/* revert to received value, if any */
3287 	    : ZPROP_SRC_INHERITED);	/* explicitly inherit */
3288 	nvlist_t *dummy;
3289 	nvpair_t *pair;
3290 	zprop_type_t type;
3291 	int err;
3292 
3293 	if (!received) {
3294 		/*
3295 		 * Only check this in the non-received case. We want to allow
3296 		 * 'inherit -S' to revert non-inheritable properties like quota
3297 		 * and reservation to the received or default values even though
3298 		 * they are not considered inheritable.
3299 		 */
3300 		if (prop != ZPROP_USERPROP && !zfs_prop_inheritable(prop))
3301 			return (SET_ERROR(EINVAL));
3302 	}
3303 
3304 	if (prop == ZPROP_USERPROP) {
3305 		if (!zfs_prop_user(propname))
3306 			return (SET_ERROR(EINVAL));
3307 
3308 		type = PROP_TYPE_STRING;
3309 	} else if (prop == ZFS_PROP_VOLSIZE || prop == ZFS_PROP_VERSION) {
3310 		return (SET_ERROR(EINVAL));
3311 	} else {
3312 		type = zfs_prop_get_type(prop);
3313 	}
3314 
3315 	/*
3316 	 * zfs_prop_set_special() expects properties in the form of an
3317 	 * nvpair with type info.
3318 	 */
3319 	dummy = fnvlist_alloc();
3320 
3321 	switch (type) {
3322 	case PROP_TYPE_STRING:
3323 		VERIFY0(nvlist_add_string(dummy, propname, ""));
3324 		break;
3325 	case PROP_TYPE_NUMBER:
3326 	case PROP_TYPE_INDEX:
3327 		VERIFY0(nvlist_add_uint64(dummy, propname, 0));
3328 		break;
3329 	default:
3330 		err = SET_ERROR(EINVAL);
3331 		goto errout;
3332 	}
3333 
3334 	pair = nvlist_next_nvpair(dummy, NULL);
3335 	if (pair == NULL) {
3336 		err = SET_ERROR(EINVAL);
3337 	} else {
3338 		err = zfs_prop_set_special(zc->zc_name, source, pair);
3339 		if (err == -1) /* property is not "special", needs handling */
3340 			err = dsl_prop_inherit(zc->zc_name, zc->zc_value,
3341 			    source);
3342 	}
3343 
3344 errout:
3345 	nvlist_free(dummy);
3346 	return (err);
3347 }
3348 
3349 static int
zfs_ioc_pool_set_props(zfs_cmd_t * zc)3350 zfs_ioc_pool_set_props(zfs_cmd_t *zc)
3351 {
3352 	nvlist_t *props;
3353 	spa_t *spa;
3354 	int error;
3355 	nvpair_t *pair;
3356 
3357 	if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
3358 	    zc->zc_iflags, &props)))
3359 		return (error);
3360 
3361 	/*
3362 	 * If the only property is the configfile, then just do a spa_lookup()
3363 	 * to handle the faulted case.
3364 	 */
3365 	pair = nvlist_next_nvpair(props, NULL);
3366 	if (pair != NULL && strcmp(nvpair_name(pair),
3367 	    zpool_prop_to_name(ZPOOL_PROP_CACHEFILE)) == 0 &&
3368 	    nvlist_next_nvpair(props, pair) == NULL) {
3369 		spa_namespace_enter(FTAG);
3370 		if ((spa = spa_lookup(zc->zc_name)) != NULL) {
3371 			spa_configfile_set(spa, props, B_FALSE);
3372 			spa_write_cachefile(spa, B_FALSE, B_TRUE, B_FALSE);
3373 		}
3374 		spa_namespace_exit(FTAG);
3375 		if (spa != NULL) {
3376 			nvlist_free(props);
3377 			return (0);
3378 		}
3379 	}
3380 
3381 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
3382 		nvlist_free(props);
3383 		return (error);
3384 	}
3385 
3386 	error = spa_prop_set(spa, props);
3387 
3388 	nvlist_free(props);
3389 	spa_close(spa, FTAG);
3390 
3391 	return (error);
3392 }
3393 
3394 /*
3395  * innvl: {
3396  *	"get_props_names": [ "prop1", "prop2", ..., "propN" ]
3397  * }
3398  */
3399 
3400 static const zfs_ioc_key_t zfs_keys_get_props[] = {
3401 	{ ZPOOL_GET_PROPS_NAMES,	DATA_TYPE_STRING_ARRAY,	ZK_OPTIONAL },
3402 };
3403 
3404 static int
zfs_ioc_pool_get_props(const char * pool,nvlist_t * innvl,nvlist_t * outnvl)3405 zfs_ioc_pool_get_props(const char *pool, nvlist_t *innvl, nvlist_t *outnvl)
3406 {
3407 	spa_t *spa;
3408 	char **props = NULL;
3409 	unsigned int n_props = 0;
3410 	int error;
3411 
3412 	if (nvlist_lookup_string_array(innvl, ZPOOL_GET_PROPS_NAMES,
3413 	    &props, &n_props) != 0) {
3414 		props = NULL;
3415 	}
3416 
3417 	if ((error = spa_open(pool, &spa, FTAG)) != 0) {
3418 		/*
3419 		 * If the pool is faulted, there may be properties we can still
3420 		 * get (such as altroot and cachefile), so attempt to get them
3421 		 * anyway.
3422 		 */
3423 		spa_namespace_enter(FTAG);
3424 		if ((spa = spa_lookup(pool)) != NULL) {
3425 			error = spa_prop_get(spa, outnvl);
3426 			if (error == 0 && props != NULL)
3427 				error = spa_prop_get_nvlist(spa, props, n_props,
3428 				    outnvl);
3429 		}
3430 		spa_namespace_exit(FTAG);
3431 	} else {
3432 		error = spa_prop_get(spa, outnvl);
3433 		if (error == 0 && props != NULL)
3434 			error = spa_prop_get_nvlist(spa, props, n_props,
3435 			    outnvl);
3436 		spa_close(spa, FTAG);
3437 	}
3438 
3439 	return (error);
3440 }
3441 
3442 /*
3443  * innvl: {
3444  *     "vdevprops_set_vdev" -> guid
3445  *     "vdevprops_set_props" -> { prop -> value }
3446  * }
3447  *
3448  * outnvl: propname -> error code (int32)
3449  */
3450 static const zfs_ioc_key_t zfs_keys_vdev_set_props[] = {
3451 	{ZPOOL_VDEV_PROPS_SET_VDEV,	DATA_TYPE_UINT64,	0},
3452 	{ZPOOL_VDEV_PROPS_SET_PROPS,	DATA_TYPE_NVLIST,	0}
3453 };
3454 
3455 static int
zfs_ioc_vdev_set_props(const char * poolname,nvlist_t * innvl,nvlist_t * outnvl)3456 zfs_ioc_vdev_set_props(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3457 {
3458 	spa_t *spa;
3459 	int error;
3460 	vdev_t *vd;
3461 	uint64_t vdev_guid;
3462 
3463 	/* Early validation */
3464 	if (nvlist_lookup_uint64(innvl, ZPOOL_VDEV_PROPS_SET_VDEV,
3465 	    &vdev_guid) != 0)
3466 		return (SET_ERROR(EINVAL));
3467 
3468 	if (outnvl == NULL)
3469 		return (SET_ERROR(EINVAL));
3470 
3471 	if ((error = spa_open(poolname, &spa, FTAG)) != 0)
3472 		return (error);
3473 
3474 	ASSERT(spa_writeable(spa));
3475 
3476 	spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
3477 	if ((vd = spa_lookup_by_guid(spa, vdev_guid, B_TRUE)) == NULL) {
3478 		spa_config_exit(spa, SCL_CONFIG, FTAG);
3479 		spa_close(spa, FTAG);
3480 		return (SET_ERROR(ENOENT));
3481 	}
3482 
3483 	error = vdev_prop_set(vd, innvl, outnvl);
3484 	spa_config_exit(spa, SCL_CONFIG, FTAG);
3485 
3486 	spa_close(spa, FTAG);
3487 
3488 	return (error);
3489 }
3490 
3491 /*
3492  * innvl: {
3493  *     "vdevprops_get_vdev" -> guid
3494  *     (optional) "vdevprops_get_props" -> { propname -> propid }
3495  * }
3496  *
3497  * outnvl: propname -> value
3498  */
3499 static const zfs_ioc_key_t zfs_keys_vdev_get_props[] = {
3500 	{ZPOOL_VDEV_PROPS_GET_VDEV,	DATA_TYPE_UINT64,	0},
3501 	{ZPOOL_VDEV_PROPS_GET_PROPS,	DATA_TYPE_NVLIST,	ZK_OPTIONAL}
3502 };
3503 
3504 static int
zfs_ioc_vdev_get_props(const char * poolname,nvlist_t * innvl,nvlist_t * outnvl)3505 zfs_ioc_vdev_get_props(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3506 {
3507 	spa_t *spa;
3508 	int error;
3509 	vdev_t *vd;
3510 	uint64_t vdev_guid;
3511 
3512 	/* Early validation */
3513 	if (nvlist_lookup_uint64(innvl, ZPOOL_VDEV_PROPS_GET_VDEV,
3514 	    &vdev_guid) != 0)
3515 		return (SET_ERROR(EINVAL));
3516 
3517 	if (outnvl == NULL)
3518 		return (SET_ERROR(EINVAL));
3519 
3520 	if ((error = spa_open(poolname, &spa, FTAG)) != 0)
3521 		return (error);
3522 
3523 	spa_config_enter(spa, SCL_CONFIG, FTAG, RW_READER);
3524 	if ((vd = spa_lookup_by_guid(spa, vdev_guid, B_TRUE)) == NULL) {
3525 		spa_config_exit(spa, SCL_CONFIG, FTAG);
3526 		spa_close(spa, FTAG);
3527 		return (SET_ERROR(ENOENT));
3528 	}
3529 
3530 	error = vdev_prop_get(vd, innvl, outnvl);
3531 	spa_config_exit(spa, SCL_CONFIG, FTAG);
3532 
3533 	spa_close(spa, FTAG);
3534 
3535 	return (error);
3536 }
3537 
3538 /*
3539  * inputs:
3540  * zc_name		name of filesystem
3541  * zc_nvlist_src{_size}	nvlist of delegated permissions
3542  * zc_perm_action	allow/unallow flag
3543  *
3544  * outputs:		none
3545  */
3546 static int
zfs_ioc_set_fsacl(zfs_cmd_t * zc)3547 zfs_ioc_set_fsacl(zfs_cmd_t *zc)
3548 {
3549 	int error;
3550 	nvlist_t *fsaclnv = NULL;
3551 
3552 	if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
3553 	    zc->zc_iflags, &fsaclnv)) != 0)
3554 		return (error);
3555 
3556 	/*
3557 	 * Verify nvlist is constructed correctly
3558 	 */
3559 	if (zfs_deleg_verify_nvlist(fsaclnv) != 0) {
3560 		nvlist_free(fsaclnv);
3561 		return (SET_ERROR(EINVAL));
3562 	}
3563 
3564 	/*
3565 	 * If we don't have PRIV_SYS_MOUNT, then validate
3566 	 * that user is allowed to hand out each permission in
3567 	 * the nvlist(s)
3568 	 */
3569 
3570 	error = secpolicy_zfs(CRED());
3571 	if (error != 0) {
3572 		if (zc->zc_perm_action == B_FALSE) {
3573 			error = dsl_deleg_can_allow(zc->zc_name,
3574 			    fsaclnv, CRED());
3575 		} else {
3576 			error = dsl_deleg_can_unallow(zc->zc_name,
3577 			    fsaclnv, CRED());
3578 		}
3579 	}
3580 
3581 	if (error == 0)
3582 		error = dsl_deleg_set(zc->zc_name, fsaclnv, zc->zc_perm_action);
3583 
3584 	nvlist_free(fsaclnv);
3585 	return (error);
3586 }
3587 
3588 /*
3589  * inputs:
3590  * zc_name		name of filesystem
3591  *
3592  * outputs:
3593  * zc_nvlist_src{_size}	nvlist of delegated permissions
3594  */
3595 static int
zfs_ioc_get_fsacl(zfs_cmd_t * zc)3596 zfs_ioc_get_fsacl(zfs_cmd_t *zc)
3597 {
3598 	nvlist_t *nvp;
3599 	int error;
3600 
3601 	if ((error = dsl_deleg_get(zc->zc_name, &nvp)) == 0) {
3602 		error = put_nvlist(zc, nvp);
3603 		nvlist_free(nvp);
3604 	}
3605 
3606 	return (error);
3607 }
3608 
3609 static void
zfs_create_cb(objset_t * os,void * arg,cred_t * cr,dmu_tx_t * tx)3610 zfs_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx)
3611 {
3612 	zfs_creat_t *zct = arg;
3613 
3614 	zfs_create_fs(os, cr, zct->zct_zplprops, tx);
3615 }
3616 
3617 #define	ZFS_PROP_UNDEFINED	((uint64_t)-1)
3618 
3619 /*
3620  * inputs:
3621  * os			parent objset pointer (NULL if root fs)
3622  * fuids_ok		fuids allowed in this version of the spa?
3623  * sa_ok		SAs allowed in this version of the spa?
3624  * createprops		list of properties requested by creator
3625  *
3626  * outputs:
3627  * zplprops	values for the zplprops we attach to the master node object
3628  * is_ci	true if requested file system will be purely case-insensitive
3629  *
3630  * Determine the settings for utf8only, normalization and
3631  * casesensitivity.  Specific values may have been requested by the
3632  * creator and/or we can inherit values from the parent dataset.  If
3633  * the file system is of too early a vintage, a creator can not
3634  * request settings for these properties, even if the requested
3635  * setting is the default value.  We don't actually want to create dsl
3636  * properties for these, so remove them from the source nvlist after
3637  * processing.
3638  */
3639 static int
zfs_fill_zplprops_impl(objset_t * os,uint64_t zplver,boolean_t fuids_ok,boolean_t sa_ok,nvlist_t * createprops,nvlist_t * zplprops,boolean_t * is_ci)3640 zfs_fill_zplprops_impl(objset_t *os, uint64_t zplver,
3641     boolean_t fuids_ok, boolean_t sa_ok, nvlist_t *createprops,
3642     nvlist_t *zplprops, boolean_t *is_ci)
3643 {
3644 	uint64_t sense = ZFS_PROP_UNDEFINED;
3645 	uint64_t norm = ZFS_PROP_UNDEFINED;
3646 	uint64_t u8 = ZFS_PROP_UNDEFINED;
3647 	uint64_t duq = ZFS_PROP_UNDEFINED, duoq = ZFS_PROP_UNDEFINED;
3648 	uint64_t dgq = ZFS_PROP_UNDEFINED, dgoq = ZFS_PROP_UNDEFINED;
3649 	uint64_t dpq = ZFS_PROP_UNDEFINED, dpoq = ZFS_PROP_UNDEFINED;
3650 	int error;
3651 
3652 	ASSERT(zplprops != NULL);
3653 
3654 	/* parent dataset must be a filesystem */
3655 	if (os != NULL && os->os_phys->os_type != DMU_OST_ZFS)
3656 		return (SET_ERROR(ZFS_ERR_WRONG_PARENT));
3657 
3658 	/*
3659 	 * Pull out creator prop choices, if any.
3660 	 */
3661 	if (createprops) {
3662 		(void) nvlist_lookup_uint64(createprops,
3663 		    zfs_prop_to_name(ZFS_PROP_VERSION), &zplver);
3664 		(void) nvlist_lookup_uint64(createprops,
3665 		    zfs_prop_to_name(ZFS_PROP_NORMALIZE), &norm);
3666 		(void) nvlist_remove_all(createprops,
3667 		    zfs_prop_to_name(ZFS_PROP_NORMALIZE));
3668 		(void) nvlist_lookup_uint64(createprops,
3669 		    zfs_prop_to_name(ZFS_PROP_UTF8ONLY), &u8);
3670 		(void) nvlist_remove_all(createprops,
3671 		    zfs_prop_to_name(ZFS_PROP_UTF8ONLY));
3672 		(void) nvlist_lookup_uint64(createprops,
3673 		    zfs_prop_to_name(ZFS_PROP_CASE), &sense);
3674 		(void) nvlist_remove_all(createprops,
3675 		    zfs_prop_to_name(ZFS_PROP_CASE));
3676 		(void) nvlist_lookup_uint64(createprops,
3677 		    zfs_prop_to_name(ZFS_PROP_DEFAULTUSERQUOTA), &duq);
3678 		(void) nvlist_remove_all(createprops,
3679 		    zfs_prop_to_name(ZFS_PROP_DEFAULTUSERQUOTA));
3680 		(void) nvlist_lookup_uint64(createprops,
3681 		    zfs_prop_to_name(ZFS_PROP_DEFAULTGROUPQUOTA), &dgq);
3682 		(void) nvlist_remove_all(createprops,
3683 		    zfs_prop_to_name(ZFS_PROP_DEFAULTGROUPQUOTA));
3684 		(void) nvlist_lookup_uint64(createprops,
3685 		    zfs_prop_to_name(ZFS_PROP_DEFAULTPROJECTQUOTA), &dpq);
3686 		(void) nvlist_remove_all(createprops,
3687 		    zfs_prop_to_name(ZFS_PROP_DEFAULTPROJECTQUOTA));
3688 		(void) nvlist_lookup_uint64(createprops,
3689 		    zfs_prop_to_name(ZFS_PROP_DEFAULTUSEROBJQUOTA), &duoq);
3690 		(void) nvlist_remove_all(createprops,
3691 		    zfs_prop_to_name(ZFS_PROP_DEFAULTUSEROBJQUOTA));
3692 		(void) nvlist_lookup_uint64(createprops,
3693 		    zfs_prop_to_name(ZFS_PROP_DEFAULTGROUPOBJQUOTA), &dgoq);
3694 		(void) nvlist_remove_all(createprops,
3695 		    zfs_prop_to_name(ZFS_PROP_DEFAULTGROUPOBJQUOTA));
3696 		(void) nvlist_lookup_uint64(createprops,
3697 		    zfs_prop_to_name(ZFS_PROP_DEFAULTPROJECTOBJQUOTA), &dpoq);
3698 		(void) nvlist_remove_all(createprops,
3699 		    zfs_prop_to_name(ZFS_PROP_DEFAULTPROJECTOBJQUOTA));
3700 	}
3701 
3702 	/*
3703 	 * If the zpl version requested is whacky or the file system
3704 	 * or pool is version is too "young" to support normalization
3705 	 * and the creator tried to set a value for one of the props,
3706 	 * error out.
3707 	 */
3708 	if ((zplver < ZPL_VERSION_INITIAL || zplver > ZPL_VERSION) ||
3709 	    (zplver >= ZPL_VERSION_FUID && !fuids_ok) ||
3710 	    (zplver >= ZPL_VERSION_SA && !sa_ok) ||
3711 	    (zplver < ZPL_VERSION_NORMALIZATION &&
3712 	    (norm != ZFS_PROP_UNDEFINED || u8 != ZFS_PROP_UNDEFINED ||
3713 	    sense != ZFS_PROP_UNDEFINED)))
3714 		return (SET_ERROR(ENOTSUP));
3715 
3716 	/*
3717 	 * Put the version in the zplprops
3718 	 */
3719 	VERIFY0(nvlist_add_uint64(zplprops,
3720 	    zfs_prop_to_name(ZFS_PROP_VERSION), zplver));
3721 
3722 	if (norm == ZFS_PROP_UNDEFINED &&
3723 	    (error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &norm)) != 0)
3724 		return (error);
3725 	VERIFY0(nvlist_add_uint64(zplprops,
3726 	    zfs_prop_to_name(ZFS_PROP_NORMALIZE), norm));
3727 
3728 	/*
3729 	 * If we're normalizing, names must always be valid UTF-8 strings.
3730 	 */
3731 	if (norm)
3732 		u8 = 1;
3733 	if (u8 == ZFS_PROP_UNDEFINED &&
3734 	    (error = zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &u8)) != 0)
3735 		return (error);
3736 	VERIFY0(nvlist_add_uint64(zplprops,
3737 	    zfs_prop_to_name(ZFS_PROP_UTF8ONLY), u8));
3738 
3739 	if (sense == ZFS_PROP_UNDEFINED &&
3740 	    (error = zfs_get_zplprop(os, ZFS_PROP_CASE, &sense)) != 0)
3741 		return (error);
3742 	VERIFY0(nvlist_add_uint64(zplprops,
3743 	    zfs_prop_to_name(ZFS_PROP_CASE), sense));
3744 
3745 	if (duq == ZFS_PROP_UNDEFINED &&
3746 	    (error = zfs_get_zplprop(os, ZFS_PROP_DEFAULTUSERQUOTA, &duq)) != 0)
3747 		return (error);
3748 	VERIFY0(nvlist_add_uint64(zplprops,
3749 	    zfs_prop_to_name(ZFS_PROP_DEFAULTUSERQUOTA), duq));
3750 
3751 	if (dgq == ZFS_PROP_UNDEFINED &&
3752 	    (error = zfs_get_zplprop(os, ZFS_PROP_DEFAULTGROUPQUOTA,
3753 	    &dgq)) != 0)
3754 		return (error);
3755 	VERIFY0(nvlist_add_uint64(zplprops,
3756 	    zfs_prop_to_name(ZFS_PROP_DEFAULTGROUPQUOTA), dgq));
3757 
3758 	if (dpq == ZFS_PROP_UNDEFINED &&
3759 	    (error = zfs_get_zplprop(os, ZFS_PROP_DEFAULTPROJECTQUOTA,
3760 	    &dpq)) != 0)
3761 		return (error);
3762 	VERIFY0(nvlist_add_uint64(zplprops,
3763 	    zfs_prop_to_name(ZFS_PROP_DEFAULTPROJECTQUOTA), dpq));
3764 
3765 	if (duoq == ZFS_PROP_UNDEFINED &&
3766 	    (error = zfs_get_zplprop(os, ZFS_PROP_DEFAULTUSEROBJQUOTA,
3767 	    &duoq)) != 0)
3768 		return (error);
3769 	VERIFY0(nvlist_add_uint64(zplprops,
3770 	    zfs_prop_to_name(ZFS_PROP_DEFAULTUSEROBJQUOTA), duoq));
3771 
3772 	if (dgoq == ZFS_PROP_UNDEFINED &&
3773 	    (error = zfs_get_zplprop(os, ZFS_PROP_DEFAULTGROUPOBJQUOTA,
3774 	    &dgoq)) != 0)
3775 		return (error);
3776 	VERIFY0(nvlist_add_uint64(zplprops,
3777 	    zfs_prop_to_name(ZFS_PROP_DEFAULTGROUPOBJQUOTA), dgoq));
3778 
3779 	if (dpoq == ZFS_PROP_UNDEFINED &&
3780 	    (error = zfs_get_zplprop(os, ZFS_PROP_DEFAULTPROJECTOBJQUOTA,
3781 	    &dpoq)) != 0)
3782 		return (error);
3783 	VERIFY0(nvlist_add_uint64(zplprops,
3784 	    zfs_prop_to_name(ZFS_PROP_DEFAULTPROJECTOBJQUOTA), dpoq));
3785 
3786 	if (is_ci)
3787 		*is_ci = (sense == ZFS_CASE_INSENSITIVE);
3788 
3789 	return (0);
3790 }
3791 
3792 static int
zfs_fill_zplprops(const char * dataset,nvlist_t * createprops,nvlist_t * zplprops,boolean_t * is_ci)3793 zfs_fill_zplprops(const char *dataset, nvlist_t *createprops,
3794     nvlist_t *zplprops, boolean_t *is_ci)
3795 {
3796 	boolean_t fuids_ok, sa_ok;
3797 	uint64_t zplver = ZPL_VERSION;
3798 	objset_t *os = NULL;
3799 	char parentname[ZFS_MAX_DATASET_NAME_LEN];
3800 	spa_t *spa;
3801 	uint64_t spa_vers;
3802 	int error;
3803 
3804 	zfs_get_parent(dataset, parentname, sizeof (parentname));
3805 
3806 	if ((error = spa_open(dataset, &spa, FTAG)) != 0)
3807 		return (error);
3808 
3809 	spa_vers = spa_version(spa);
3810 	spa_close(spa, FTAG);
3811 
3812 	zplver = zfs_zpl_version_map(spa_vers);
3813 	fuids_ok = (zplver >= ZPL_VERSION_FUID);
3814 	sa_ok = (zplver >= ZPL_VERSION_SA);
3815 
3816 	/*
3817 	 * Open parent object set so we can inherit zplprop values.
3818 	 */
3819 	if ((error = dmu_objset_hold(parentname, FTAG, &os)) != 0)
3820 		return (error);
3821 
3822 	error = zfs_fill_zplprops_impl(os, zplver, fuids_ok, sa_ok, createprops,
3823 	    zplprops, is_ci);
3824 	dmu_objset_rele(os, FTAG);
3825 	return (error);
3826 }
3827 
3828 static int
zfs_fill_zplprops_root(uint64_t spa_vers,nvlist_t * createprops,nvlist_t * zplprops,boolean_t * is_ci)3829 zfs_fill_zplprops_root(uint64_t spa_vers, nvlist_t *createprops,
3830     nvlist_t *zplprops, boolean_t *is_ci)
3831 {
3832 	boolean_t fuids_ok;
3833 	boolean_t sa_ok;
3834 	uint64_t zplver = ZPL_VERSION;
3835 	int error;
3836 
3837 	zplver = zfs_zpl_version_map(spa_vers);
3838 	fuids_ok = (zplver >= ZPL_VERSION_FUID);
3839 	sa_ok = (zplver >= ZPL_VERSION_SA);
3840 
3841 	error = zfs_fill_zplprops_impl(NULL, zplver, fuids_ok, sa_ok,
3842 	    createprops, zplprops, is_ci);
3843 	return (error);
3844 }
3845 
3846 /*
3847  * innvl: {
3848  *     "type" -> dmu_objset_type_t (int32)
3849  *     (optional) "props" -> { prop -> value }
3850  *     (optional) "hidden_args" -> { "wkeydata" -> value }
3851  *         raw uint8_t array of encryption wrapping key data (32 bytes)
3852  * }
3853  *
3854  * outnvl: propname -> error code (int32)
3855  */
3856 
3857 static const zfs_ioc_key_t zfs_keys_create[] = {
3858 	{"type",	DATA_TYPE_INT32,	0},
3859 	{"props",	DATA_TYPE_NVLIST,	ZK_OPTIONAL},
3860 	{"hidden_args",	DATA_TYPE_NVLIST,	ZK_OPTIONAL},
3861 };
3862 
3863 static int
zfs_ioc_create(const char * fsname,nvlist_t * innvl,nvlist_t * outnvl)3864 zfs_ioc_create(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3865 {
3866 	int error = 0;
3867 	zfs_creat_t zct = { 0 };
3868 	nvlist_t *nvprops = NULL;
3869 	nvlist_t *hidden_args = NULL;
3870 	void (*cbfunc)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx);
3871 	dmu_objset_type_t type;
3872 	boolean_t is_insensitive = B_FALSE;
3873 	dsl_crypto_params_t *dcp = NULL;
3874 
3875 	type = (dmu_objset_type_t)fnvlist_lookup_int32(innvl, "type");
3876 	(void) nvlist_lookup_nvlist(innvl, "props", &nvprops);
3877 	(void) nvlist_lookup_nvlist(innvl, ZPOOL_HIDDEN_ARGS, &hidden_args);
3878 
3879 	switch (type) {
3880 	case DMU_OST_ZFS:
3881 		cbfunc = zfs_create_cb;
3882 		break;
3883 
3884 	case DMU_OST_ZVOL:
3885 		cbfunc = zvol_create_cb;
3886 		break;
3887 
3888 	default:
3889 		cbfunc = NULL;
3890 		break;
3891 	}
3892 	if (strchr(fsname, '@') ||
3893 	    strchr(fsname, '%'))
3894 		return (SET_ERROR(EINVAL));
3895 
3896 	zct.zct_props = nvprops;
3897 
3898 	if (cbfunc == NULL)
3899 		return (SET_ERROR(EINVAL));
3900 
3901 	if (type == DMU_OST_ZVOL) {
3902 		uint64_t volsize, volblocksize;
3903 
3904 		if (nvprops == NULL)
3905 			return (SET_ERROR(EINVAL));
3906 		if (nvlist_lookup_uint64(nvprops,
3907 		    zfs_prop_to_name(ZFS_PROP_VOLSIZE), &volsize) != 0)
3908 			return (SET_ERROR(EINVAL));
3909 
3910 		if ((error = nvlist_lookup_uint64(nvprops,
3911 		    zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE),
3912 		    &volblocksize)) != 0 && error != ENOENT)
3913 			return (SET_ERROR(EINVAL));
3914 
3915 		if (error != 0)
3916 			volblocksize = zfs_prop_default_numeric(
3917 			    ZFS_PROP_VOLBLOCKSIZE);
3918 
3919 		if ((error = zvol_check_volblocksize(fsname,
3920 		    volblocksize)) != 0 ||
3921 		    (error = zvol_check_volsize(volsize,
3922 		    volblocksize)) != 0)
3923 			return (error);
3924 	} else if (type == DMU_OST_ZFS) {
3925 		int error;
3926 
3927 		/*
3928 		 * We have to have normalization and
3929 		 * case-folding flags correct when we do the
3930 		 * file system creation, so go figure them out
3931 		 * now.
3932 		 */
3933 		VERIFY0(nvlist_alloc(&zct.zct_zplprops,
3934 		    NV_UNIQUE_NAME, KM_SLEEP));
3935 		error = zfs_fill_zplprops(fsname, nvprops,
3936 		    zct.zct_zplprops, &is_insensitive);
3937 		if (error != 0) {
3938 			nvlist_free(zct.zct_zplprops);
3939 			return (error);
3940 		}
3941 	}
3942 
3943 	error = dsl_crypto_params_create_nvlist(DCP_CMD_NONE, nvprops,
3944 	    hidden_args, &dcp);
3945 	if (error != 0) {
3946 		nvlist_free(zct.zct_zplprops);
3947 		return (error);
3948 	}
3949 
3950 	error = dmu_objset_create(fsname, type,
3951 	    is_insensitive ? DS_FLAG_CI_DATASET : 0, dcp, cbfunc, &zct);
3952 
3953 	nvlist_free(zct.zct_zplprops);
3954 	dsl_crypto_params_free(dcp, !!error);
3955 
3956 	/*
3957 	 * It would be nice to do this atomically.
3958 	 */
3959 	if (error == 0) {
3960 		error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL,
3961 		    nvprops, outnvl);
3962 		if (error != 0) {
3963 			spa_t *spa;
3964 			int error2;
3965 
3966 			/*
3967 			 * Volumes will return EBUSY and cannot be destroyed
3968 			 * until all asynchronous minor handling (e.g. from
3969 			 * setting the volmode property) has completed. Wait for
3970 			 * the spa_zvol_taskq to drain then retry.
3971 			 */
3972 			error2 = dsl_destroy_head(fsname);
3973 			while ((error2 == EBUSY) && (type == DMU_OST_ZVOL)) {
3974 				error2 = spa_open(fsname, &spa, FTAG);
3975 				if (error2 == 0) {
3976 					taskq_wait(spa->spa_zvol_taskq);
3977 					spa_close(spa, FTAG);
3978 				}
3979 				error2 = dsl_destroy_head(fsname);
3980 			}
3981 		}
3982 	}
3983 	return (error);
3984 }
3985 
3986 /*
3987  * innvl: {
3988  *     "origin" -> name of origin snapshot
3989  *     (optional) "props" -> { prop -> value }
3990  *     (optional) "hidden_args" -> { "wkeydata" -> value }
3991  *         raw uint8_t array of encryption wrapping key data (32 bytes)
3992  * }
3993  *
3994  * outputs:
3995  * outnvl: propname -> error code (int32)
3996  */
3997 static const zfs_ioc_key_t zfs_keys_clone[] = {
3998 	{"origin",	DATA_TYPE_STRING,	0},
3999 	{"props",	DATA_TYPE_NVLIST,	ZK_OPTIONAL},
4000 	{"hidden_args",	DATA_TYPE_NVLIST,	ZK_OPTIONAL},
4001 };
4002 
4003 static int
zfs_ioc_clone(const char * fsname,nvlist_t * innvl,nvlist_t * outnvl)4004 zfs_ioc_clone(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
4005 {
4006 	int error = 0;
4007 	nvlist_t *nvprops = NULL;
4008 	const char *origin_name;
4009 
4010 	origin_name = fnvlist_lookup_string(innvl, "origin");
4011 	(void) nvlist_lookup_nvlist(innvl, "props", &nvprops);
4012 
4013 	if (strchr(fsname, '@') ||
4014 	    strchr(fsname, '%'))
4015 		return (SET_ERROR(EINVAL));
4016 
4017 	if (dataset_namecheck(origin_name, NULL, NULL) != 0)
4018 		return (SET_ERROR(EINVAL));
4019 
4020 	error = dsl_dataset_clone(fsname, origin_name);
4021 
4022 	/*
4023 	 * It would be nice to do this atomically.
4024 	 */
4025 	if (error == 0) {
4026 		error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL,
4027 		    nvprops, outnvl);
4028 		if (error != 0)
4029 			(void) dsl_destroy_head(fsname);
4030 	}
4031 	return (error);
4032 }
4033 
4034 static const zfs_ioc_key_t zfs_keys_remap[] = {
4035 	/* no nvl keys */
4036 };
4037 
4038 static int
zfs_ioc_remap(const char * fsname,nvlist_t * innvl,nvlist_t * outnvl)4039 zfs_ioc_remap(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
4040 {
4041 	/* This IOCTL is no longer supported. */
4042 	(void) fsname, (void) innvl, (void) outnvl;
4043 	return (0);
4044 }
4045 
4046 /*
4047  * innvl: {
4048  *     "snaps" -> { snapshot1, snapshot2 }
4049  *     (optional) "props" -> { prop -> value (string) }
4050  * }
4051  *
4052  * outnvl: snapshot -> error code (int32)
4053  */
4054 static const zfs_ioc_key_t zfs_keys_snapshot[] = {
4055 	{"snaps",	DATA_TYPE_NVLIST,	0},
4056 	{"props",	DATA_TYPE_NVLIST,	ZK_OPTIONAL},
4057 };
4058 
4059 static int
zfs_ioc_snapshot(const char * poolname,nvlist_t * innvl,nvlist_t * outnvl)4060 zfs_ioc_snapshot(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
4061 {
4062 	nvlist_t *snaps;
4063 	nvlist_t *props = NULL;
4064 	int error, poollen;
4065 	nvpair_t *pair;
4066 
4067 	(void) nvlist_lookup_nvlist(innvl, "props", &props);
4068 	if (!nvlist_empty(props) &&
4069 	    zfs_earlier_version(poolname, SPA_VERSION_SNAP_PROPS))
4070 		return (SET_ERROR(ENOTSUP));
4071 	if ((error = zfs_check_userprops(props)) != 0)
4072 		return (error);
4073 
4074 	snaps = fnvlist_lookup_nvlist(innvl, "snaps");
4075 	poollen = strlen(poolname);
4076 	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
4077 	    pair = nvlist_next_nvpair(snaps, pair)) {
4078 		const char *name = nvpair_name(pair);
4079 		char *cp = strchr(name, '@');
4080 
4081 		/*
4082 		 * The snap name must contain an @, and the part after it must
4083 		 * contain only valid characters.
4084 		 */
4085 		if (cp == NULL ||
4086 		    zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
4087 			return (SET_ERROR(EINVAL));
4088 
4089 		/*
4090 		 * The snap must be in the specified pool.
4091 		 */
4092 		if (strncmp(name, poolname, poollen) != 0 ||
4093 		    (name[poollen] != '/' && name[poollen] != '@'))
4094 			return (SET_ERROR(EXDEV));
4095 
4096 		/*
4097 		 * Check for permission to set the properties on the fs.
4098 		 */
4099 		if (!nvlist_empty(props)) {
4100 			*cp = '\0';
4101 			zone_admin_result_t zone_result;
4102 			zone_result = zone_dataset_admin_check(name,
4103 			    ZONE_OP_SETPROP, NULL);
4104 			if (zone_result == ZONE_ADMIN_DENIED) {
4105 				*cp = '@';
4106 				return (SET_ERROR(EPERM));
4107 			}
4108 			if (zone_result == ZONE_ADMIN_ALLOWED) {
4109 				error = zfs_secpolicy_zoned_uid_deleg(name,
4110 				    ZFS_DELEG_PERM_USERPROP, CRED());
4111 			} else {
4112 				error = zfs_secpolicy_write_perms(name,
4113 				    ZFS_DELEG_PERM_USERPROP, CRED());
4114 			}
4115 			*cp = '@';
4116 			if (error != 0)
4117 				return (error);
4118 		}
4119 
4120 		/* This must be the only snap of this fs. */
4121 		for (nvpair_t *pair2 = nvlist_next_nvpair(snaps, pair);
4122 		    pair2 != NULL; pair2 = nvlist_next_nvpair(snaps, pair2)) {
4123 			if (strncmp(name, nvpair_name(pair2), cp - name + 1)
4124 			    == 0) {
4125 				return (SET_ERROR(EXDEV));
4126 			}
4127 		}
4128 	}
4129 
4130 	error = dsl_dataset_snapshot(snaps, props, outnvl);
4131 
4132 	return (error);
4133 }
4134 
4135 /*
4136  * innvl: "message" -> string
4137  */
4138 static const zfs_ioc_key_t zfs_keys_log_history[] = {
4139 	{"message",	DATA_TYPE_STRING,	0},
4140 };
4141 
4142 static int
zfs_ioc_log_history(const char * unused,nvlist_t * innvl,nvlist_t * outnvl)4143 zfs_ioc_log_history(const char *unused, nvlist_t *innvl, nvlist_t *outnvl)
4144 {
4145 	(void) unused, (void) outnvl;
4146 	char *poolname;
4147 	spa_t *spa;
4148 	int error;
4149 
4150 	/*
4151 	 * The poolname in the ioctl is not set, we get it from the TSD,
4152 	 * which was set at the end of the last successful ioctl that allows
4153 	 * logging.  The secpolicy func already checked that it is set.
4154 	 * Only one log ioctl is allowed after each successful ioctl, so
4155 	 * we clear the TSD here.
4156 	 */
4157 	poolname = tsd_get(zfs_allow_log_key);
4158 	if (poolname == NULL)
4159 		return (SET_ERROR(EINVAL));
4160 	(void) tsd_set(zfs_allow_log_key, NULL);
4161 	error = spa_open(poolname, &spa, FTAG);
4162 	kmem_strfree(poolname);
4163 	if (error != 0)
4164 		return (error);
4165 
4166 	const char *message = fnvlist_lookup_string(innvl, "message");
4167 
4168 	if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) {
4169 		spa_close(spa, FTAG);
4170 		return (SET_ERROR(ENOTSUP));
4171 	}
4172 
4173 	error = spa_history_log(spa, message);
4174 	spa_close(spa, FTAG);
4175 	return (error);
4176 }
4177 
4178 /*
4179  * This ioctl is used to set the bootenv configuration on the current
4180  * pool. This configuration is stored in the second padding area of the label,
4181  * and it is used by the bootloader(s) to store the bootloader and/or system
4182  * specific data.
4183  * The data is stored as nvlist data stream, and is protected by
4184  * an embedded checksum.
4185  * The version can have two possible values:
4186  * VB_RAW: nvlist should have key GRUB_ENVMAP, value DATA_TYPE_STRING.
4187  * VB_NVLIST: nvlist with arbitrary <key, value> pairs.
4188  */
4189 static const zfs_ioc_key_t zfs_keys_set_bootenv[] = {
4190 	{"version",	DATA_TYPE_UINT64,	0},
4191 	{"<keys>",	DATA_TYPE_ANY, ZK_OPTIONAL | ZK_WILDCARDLIST},
4192 };
4193 
4194 static int
zfs_ioc_set_bootenv(const char * name,nvlist_t * innvl,nvlist_t * outnvl)4195 zfs_ioc_set_bootenv(const char *name, nvlist_t *innvl, nvlist_t *outnvl)
4196 {
4197 	int error;
4198 	spa_t *spa;
4199 
4200 	if ((error = spa_open(name, &spa, FTAG)) != 0)
4201 		return (error);
4202 	spa_vdev_state_enter(spa, SCL_ALL);
4203 	error = vdev_label_write_bootenv(spa->spa_root_vdev, innvl);
4204 	(void) spa_vdev_state_exit(spa, NULL, 0);
4205 	spa_close(spa, FTAG);
4206 	return (error);
4207 }
4208 
4209 static const zfs_ioc_key_t zfs_keys_get_bootenv[] = {
4210 	/* no nvl keys */
4211 };
4212 
4213 static int
zfs_ioc_get_bootenv(const char * name,nvlist_t * innvl,nvlist_t * outnvl)4214 zfs_ioc_get_bootenv(const char *name, nvlist_t *innvl, nvlist_t *outnvl)
4215 {
4216 	spa_t *spa;
4217 	int error;
4218 
4219 	if ((error = spa_open(name, &spa, FTAG)) != 0)
4220 		return (error);
4221 	spa_vdev_state_enter(spa, SCL_ALL);
4222 	error = vdev_label_read_bootenv(spa->spa_root_vdev, outnvl);
4223 	(void) spa_vdev_state_exit(spa, NULL, 0);
4224 	spa_close(spa, FTAG);
4225 	return (error);
4226 }
4227 
4228 /*
4229  * The dp_config_rwlock must not be held when calling this, because the
4230  * unmount may need to write out data.
4231  *
4232  * This function is best-effort.  Callers must deal gracefully if it
4233  * remains mounted (or is remounted after this call).
4234  *
4235  * Returns 0 if the argument is not a snapshot, or it is not currently a
4236  * filesystem, or we were able to unmount it.  Returns error code otherwise.
4237  */
4238 void
zfs_unmount_snap(const char * snapname)4239 zfs_unmount_snap(const char *snapname)
4240 {
4241 	if (strchr(snapname, '@') == NULL)
4242 		return;
4243 
4244 	(void) zfsctl_snapshot_unmount(snapname, MNT_FORCE);
4245 }
4246 
4247 static int
zfs_unmount_snap_cb(const char * snapname,void * arg)4248 zfs_unmount_snap_cb(const char *snapname, void *arg)
4249 {
4250 	(void) arg;
4251 	zfs_unmount_snap(snapname);
4252 	return (0);
4253 }
4254 
4255 /*
4256  * When a clone is destroyed, its origin may also need to be destroyed,
4257  * in which case it must be unmounted.  This routine will do that unmount
4258  * if necessary.
4259  */
4260 void
zfs_destroy_unmount_origin(const char * fsname)4261 zfs_destroy_unmount_origin(const char *fsname)
4262 {
4263 	int error;
4264 	objset_t *os;
4265 	dsl_dataset_t *ds;
4266 
4267 	error = dmu_objset_hold(fsname, FTAG, &os);
4268 	if (error != 0)
4269 		return;
4270 	ds = dmu_objset_ds(os);
4271 	if (dsl_dir_is_clone(ds->ds_dir) && DS_IS_DEFER_DESTROY(ds->ds_prev)) {
4272 		char originname[ZFS_MAX_DATASET_NAME_LEN];
4273 		dsl_dataset_name(ds->ds_prev, originname);
4274 		dmu_objset_rele(os, FTAG);
4275 		zfs_unmount_snap(originname);
4276 	} else {
4277 		dmu_objset_rele(os, FTAG);
4278 	}
4279 }
4280 
4281 /*
4282  * innvl: {
4283  *     "snaps" -> { snapshot1, snapshot2 }
4284  *     (optional boolean) "defer"
4285  * }
4286  *
4287  * outnvl: snapshot -> error code (int32)
4288  */
4289 static const zfs_ioc_key_t zfs_keys_destroy_snaps[] = {
4290 	{"snaps",	DATA_TYPE_NVLIST,	0},
4291 	{"defer",	DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
4292 };
4293 
4294 static int
zfs_ioc_destroy_snaps(const char * poolname,nvlist_t * innvl,nvlist_t * outnvl)4295 zfs_ioc_destroy_snaps(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
4296 {
4297 	int poollen;
4298 	nvlist_t *snaps;
4299 	nvpair_t *pair;
4300 	boolean_t defer;
4301 	spa_t *spa;
4302 
4303 	snaps = fnvlist_lookup_nvlist(innvl, "snaps");
4304 	defer = nvlist_exists(innvl, "defer");
4305 
4306 	poollen = strlen(poolname);
4307 	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
4308 	    pair = nvlist_next_nvpair(snaps, pair)) {
4309 		const char *name = nvpair_name(pair);
4310 
4311 		/*
4312 		 * The snap must be in the specified pool to prevent the
4313 		 * invalid removal of zvol minors below.
4314 		 */
4315 		if (strncmp(name, poolname, poollen) != 0 ||
4316 		    (name[poollen] != '/' && name[poollen] != '@'))
4317 			return (SET_ERROR(EXDEV));
4318 
4319 		zfs_unmount_snap(nvpair_name(pair));
4320 		if (spa_open(name, &spa, FTAG) == 0) {
4321 			zvol_remove_minors(spa, name, B_TRUE);
4322 			spa_close(spa, FTAG);
4323 		}
4324 	}
4325 
4326 	return (dsl_destroy_snapshots_nvl(snaps, defer, outnvl));
4327 }
4328 
4329 /*
4330  * Create bookmarks. The bookmark names are of the form <fs>#<bmark>.
4331  * All bookmarks and snapshots must be in the same pool.
4332  * dsl_bookmark_create_nvl_validate describes the nvlist schema in more detail.
4333  *
4334  * innvl: {
4335  *     new_bookmark1 -> existing_snapshot,
4336  *     new_bookmark2 -> existing_bookmark,
4337  * }
4338  *
4339  * outnvl: bookmark -> error code (int32)
4340  *
4341  */
4342 static const zfs_ioc_key_t zfs_keys_bookmark[] = {
4343 	{"<bookmark>...",	DATA_TYPE_STRING,	ZK_WILDCARDLIST},
4344 };
4345 
4346 static int
zfs_ioc_bookmark(const char * poolname,nvlist_t * innvl,nvlist_t * outnvl)4347 zfs_ioc_bookmark(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
4348 {
4349 	(void) poolname;
4350 	return (dsl_bookmark_create(innvl, outnvl));
4351 }
4352 
4353 /*
4354  * innvl: {
4355  *     property 1, property 2, ...
4356  * }
4357  *
4358  * outnvl: {
4359  *     bookmark name 1 -> { property 1, property 2, ... },
4360  *     bookmark name 2 -> { property 1, property 2, ... }
4361  * }
4362  *
4363  */
4364 static const zfs_ioc_key_t zfs_keys_get_bookmarks[] = {
4365 	{"<property>...", DATA_TYPE_BOOLEAN, ZK_WILDCARDLIST | ZK_OPTIONAL},
4366 };
4367 
4368 static int
zfs_ioc_get_bookmarks(const char * fsname,nvlist_t * innvl,nvlist_t * outnvl)4369 zfs_ioc_get_bookmarks(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
4370 {
4371 	return (dsl_get_bookmarks(fsname, innvl, outnvl));
4372 }
4373 
4374 /*
4375  * innvl is not used.
4376  *
4377  * outnvl: {
4378  *     property 1, property 2, ...
4379  * }
4380  *
4381  */
4382 static const zfs_ioc_key_t zfs_keys_get_bookmark_props[] = {
4383 	/* no nvl keys */
4384 };
4385 
4386 static int
zfs_ioc_get_bookmark_props(const char * bookmark,nvlist_t * innvl,nvlist_t * outnvl)4387 zfs_ioc_get_bookmark_props(const char *bookmark, nvlist_t *innvl,
4388     nvlist_t *outnvl)
4389 {
4390 	(void) innvl;
4391 	char fsname[ZFS_MAX_DATASET_NAME_LEN];
4392 	char *bmname;
4393 
4394 	bmname = strchr(bookmark, '#');
4395 	if (bmname == NULL)
4396 		return (SET_ERROR(EINVAL));
4397 	bmname++;
4398 
4399 	(void) strlcpy(fsname, bookmark, sizeof (fsname));
4400 	*(strchr(fsname, '#')) = '\0';
4401 
4402 	return (dsl_get_bookmark_props(fsname, bmname, outnvl));
4403 }
4404 
4405 /*
4406  * innvl: {
4407  *     bookmark name 1, bookmark name 2
4408  * }
4409  *
4410  * outnvl: bookmark -> error code (int32)
4411  *
4412  */
4413 static const zfs_ioc_key_t zfs_keys_destroy_bookmarks[] = {
4414 	{"<bookmark>...",	DATA_TYPE_BOOLEAN,	ZK_WILDCARDLIST},
4415 };
4416 
4417 static int
zfs_ioc_destroy_bookmarks(const char * poolname,nvlist_t * innvl,nvlist_t * outnvl)4418 zfs_ioc_destroy_bookmarks(const char *poolname, nvlist_t *innvl,
4419     nvlist_t *outnvl)
4420 {
4421 	int error, poollen;
4422 
4423 	poollen = strlen(poolname);
4424 	for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
4425 	    pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
4426 		const char *name = nvpair_name(pair);
4427 		const char *cp = strchr(name, '#');
4428 
4429 		/*
4430 		 * The bookmark name must contain an #, and the part after it
4431 		 * must contain only valid characters.
4432 		 */
4433 		if (cp == NULL ||
4434 		    zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
4435 			return (SET_ERROR(EINVAL));
4436 
4437 		/*
4438 		 * The bookmark must be in the specified pool.
4439 		 */
4440 		if (strncmp(name, poolname, poollen) != 0 ||
4441 		    (name[poollen] != '/' && name[poollen] != '#'))
4442 			return (SET_ERROR(EXDEV));
4443 	}
4444 
4445 	error = dsl_bookmark_destroy(innvl, outnvl);
4446 	return (error);
4447 }
4448 
4449 static const zfs_ioc_key_t zfs_keys_channel_program[] = {
4450 	{"program",	DATA_TYPE_STRING,		0},
4451 	{"arg",		DATA_TYPE_ANY,			0},
4452 	{"sync",	DATA_TYPE_BOOLEAN_VALUE,	ZK_OPTIONAL},
4453 	{"instrlimit",	DATA_TYPE_UINT64,		ZK_OPTIONAL},
4454 	{"memlimit",	DATA_TYPE_UINT64,		ZK_OPTIONAL},
4455 };
4456 
4457 static int
zfs_ioc_channel_program(const char * poolname,nvlist_t * innvl,nvlist_t * outnvl)4458 zfs_ioc_channel_program(const char *poolname, nvlist_t *innvl,
4459     nvlist_t *outnvl)
4460 {
4461 	const char *program;
4462 	uint64_t instrlimit, memlimit;
4463 	boolean_t sync_flag;
4464 	nvpair_t *nvarg = NULL;
4465 
4466 	program = fnvlist_lookup_string(innvl, ZCP_ARG_PROGRAM);
4467 	if (0 != nvlist_lookup_boolean_value(innvl, ZCP_ARG_SYNC, &sync_flag)) {
4468 		sync_flag = B_TRUE;
4469 	}
4470 	if (0 != nvlist_lookup_uint64(innvl, ZCP_ARG_INSTRLIMIT, &instrlimit)) {
4471 		instrlimit = ZCP_DEFAULT_INSTRLIMIT;
4472 	}
4473 	if (0 != nvlist_lookup_uint64(innvl, ZCP_ARG_MEMLIMIT, &memlimit)) {
4474 		memlimit = ZCP_DEFAULT_MEMLIMIT;
4475 	}
4476 	nvarg = fnvlist_lookup_nvpair(innvl, ZCP_ARG_ARGLIST);
4477 
4478 	if (instrlimit == 0 || instrlimit > zfs_lua_max_instrlimit)
4479 		return (SET_ERROR(EINVAL));
4480 	if (memlimit == 0 || memlimit > zfs_lua_max_memlimit)
4481 		return (SET_ERROR(EINVAL));
4482 
4483 	return (zcp_eval(poolname, program, sync_flag, instrlimit, memlimit,
4484 	    nvarg, outnvl));
4485 }
4486 
4487 /*
4488  * innvl: unused
4489  * outnvl: empty
4490  */
4491 static const zfs_ioc_key_t zfs_keys_pool_checkpoint[] = {
4492 	/* no nvl keys */
4493 };
4494 
4495 static int
zfs_ioc_pool_checkpoint(const char * poolname,nvlist_t * innvl,nvlist_t * outnvl)4496 zfs_ioc_pool_checkpoint(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
4497 {
4498 	(void) innvl, (void) outnvl;
4499 	return (spa_checkpoint(poolname));
4500 }
4501 
4502 /*
4503  * innvl: unused
4504  * outnvl: empty
4505  */
4506 static const zfs_ioc_key_t zfs_keys_pool_discard_checkpoint[] = {
4507 	/* no nvl keys */
4508 };
4509 
4510 static int
zfs_ioc_pool_discard_checkpoint(const char * poolname,nvlist_t * innvl,nvlist_t * outnvl)4511 zfs_ioc_pool_discard_checkpoint(const char *poolname, nvlist_t *innvl,
4512     nvlist_t *outnvl)
4513 {
4514 	(void) innvl, (void) outnvl;
4515 	return (spa_checkpoint_discard(poolname));
4516 }
4517 
4518 /*
4519  * Loads specific types of data for the given pool
4520  *
4521  * innvl: {
4522  *     "prefetch_type" -> int32_t
4523  * }
4524  *
4525  * outnvl: empty
4526  */
4527 static const zfs_ioc_key_t zfs_keys_pool_prefetch[] = {
4528 	{ZPOOL_PREFETCH_TYPE,	DATA_TYPE_INT32,	0},
4529 };
4530 
4531 static int
zfs_ioc_pool_prefetch(const char * poolname,nvlist_t * innvl,nvlist_t * outnvl)4532 zfs_ioc_pool_prefetch(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
4533 {
4534 	(void) outnvl;
4535 
4536 	int error;
4537 	spa_t *spa;
4538 	int32_t type;
4539 
4540 	if (nvlist_lookup_int32(innvl, ZPOOL_PREFETCH_TYPE, &type) != 0)
4541 		return (EINVAL);
4542 
4543 	if (type != ZPOOL_PREFETCH_DDT && type != ZPOOL_PREFETCH_BRT)
4544 		return (EINVAL);
4545 
4546 	error = spa_open(poolname, &spa, FTAG);
4547 	if (error != 0)
4548 		return (error);
4549 
4550 	hrtime_t start_time = gethrtime();
4551 
4552 	if (type == ZPOOL_PREFETCH_DDT) {
4553 		ddt_prefetch_all(spa);
4554 		zfs_dbgmsg("pool '%s': loaded ddt into ARC in %llu ms",
4555 		    spa->spa_name,
4556 		    (u_longlong_t)NSEC2MSEC(gethrtime() - start_time));
4557 	} else {
4558 		brt_prefetch_all(spa);
4559 		zfs_dbgmsg("pool '%s': loaded brt into ARC in %llu ms",
4560 		    spa->spa_name,
4561 		    (u_longlong_t)NSEC2MSEC(gethrtime() - start_time));
4562 	}
4563 
4564 	spa_close(spa, FTAG);
4565 
4566 	return (error);
4567 }
4568 
4569 /*
4570  * inputs:
4571  * zc_name		name of dataset to destroy
4572  * zc_defer_destroy	mark for deferred destroy
4573  *
4574  * outputs:		none
4575  */
4576 static int
zfs_ioc_destroy(zfs_cmd_t * zc)4577 zfs_ioc_destroy(zfs_cmd_t *zc)
4578 {
4579 	objset_t *os;
4580 	dmu_objset_type_t ost;
4581 	int err;
4582 
4583 	err = dmu_objset_hold(zc->zc_name, FTAG, &os);
4584 	if (err != 0)
4585 		return (err);
4586 	ost = dmu_objset_type(os);
4587 	dmu_objset_rele(os, FTAG);
4588 
4589 	if (ost == DMU_OST_ZFS)
4590 		zfs_unmount_snap(zc->zc_name);
4591 
4592 	if (strchr(zc->zc_name, '@')) {
4593 		err = dsl_destroy_snapshot(zc->zc_name, zc->zc_defer_destroy);
4594 	} else {
4595 		/*
4596 		 * Save zoned_uid before destroying so we can clean up
4597 		 * kernel-side zone tracking after a successful destroy.
4598 		 */
4599 		uint64_t zoned_uid = 0;
4600 		(void) dsl_prop_get(zc->zc_name, "zoned_uid",
4601 		    8, 1, &zoned_uid, NULL);
4602 
4603 		err = dsl_destroy_head(zc->zc_name);
4604 		if (err == EEXIST) {
4605 			/*
4606 			 * It is possible that the given DS may have
4607 			 * hidden child (%recv) datasets - "leftovers"
4608 			 * resulting from the previously interrupted
4609 			 * 'zfs receive'.
4610 			 *
4611 			 * 6 extra bytes for /%recv
4612 			 */
4613 			char namebuf[ZFS_MAX_DATASET_NAME_LEN + 6];
4614 
4615 			if (snprintf(namebuf, sizeof (namebuf), "%s/%s",
4616 			    zc->zc_name, recv_clone_name) >=
4617 			    sizeof (namebuf))
4618 				return (SET_ERROR(EINVAL));
4619 
4620 			/*
4621 			 * Try to remove the hidden child (%recv) and after
4622 			 * that try to remove the target dataset.
4623 			 * If the hidden child (%recv) does not exist
4624 			 * the original error (EEXIST) will be returned
4625 			 */
4626 			err = dsl_destroy_head(namebuf);
4627 			if (err == 0)
4628 				err = dsl_destroy_head(zc->zc_name);
4629 			else if (err == ENOENT)
4630 				err = SET_ERROR(EEXIST);
4631 		}
4632 
4633 		if (err == 0 && zoned_uid != 0) {
4634 			(void) zone_dataset_detach_uid(kcred,
4635 			    zc->zc_name, (uid_t)zoned_uid);
4636 		}
4637 	}
4638 
4639 	return (err);
4640 }
4641 
4642 /*
4643  * innvl: {
4644  *     "initialize_command" -> POOL_INITIALIZE_{CANCEL|START|SUSPEND} (uint64)
4645  *     "initialize_vdevs": { -> guids to initialize (nvlist)
4646  *         "vdev_path_1": vdev_guid_1, (uint64),
4647  *         "vdev_path_2": vdev_guid_2, (uint64),
4648  *         ...
4649  *     },
4650  * }
4651  *
4652  * outnvl: {
4653  *     "initialize_vdevs": { -> initialization errors (nvlist)
4654  *         "vdev_path_1": errno, see function body for possible errnos (uint64)
4655  *         "vdev_path_2": errno, ... (uint64)
4656  *         ...
4657  *     }
4658  * }
4659  *
4660  * EINVAL is returned for an unknown commands or if any of the provided vdev
4661  * guids have be specified with a type other than uint64.
4662  */
4663 static const zfs_ioc_key_t zfs_keys_pool_initialize[] = {
4664 	{ZPOOL_INITIALIZE_COMMAND,	DATA_TYPE_UINT64,	0},
4665 	{ZPOOL_INITIALIZE_VDEVS,	DATA_TYPE_NVLIST,	0}
4666 };
4667 
4668 static int
zfs_ioc_pool_initialize(const char * poolname,nvlist_t * innvl,nvlist_t * outnvl)4669 zfs_ioc_pool_initialize(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
4670 {
4671 	uint64_t cmd_type;
4672 	if (nvlist_lookup_uint64(innvl, ZPOOL_INITIALIZE_COMMAND,
4673 	    &cmd_type) != 0) {
4674 		return (SET_ERROR(EINVAL));
4675 	}
4676 
4677 	if (!(cmd_type == POOL_INITIALIZE_CANCEL ||
4678 	    cmd_type == POOL_INITIALIZE_START ||
4679 	    cmd_type == POOL_INITIALIZE_SUSPEND ||
4680 	    cmd_type == POOL_INITIALIZE_UNINIT)) {
4681 		return (SET_ERROR(EINVAL));
4682 	}
4683 
4684 	nvlist_t *vdev_guids;
4685 	if (nvlist_lookup_nvlist(innvl, ZPOOL_INITIALIZE_VDEVS,
4686 	    &vdev_guids) != 0) {
4687 		return (SET_ERROR(EINVAL));
4688 	}
4689 
4690 	for (nvpair_t *pair = nvlist_next_nvpair(vdev_guids, NULL);
4691 	    pair != NULL; pair = nvlist_next_nvpair(vdev_guids, pair)) {
4692 		uint64_t vdev_guid;
4693 		if (nvpair_value_uint64(pair, &vdev_guid) != 0) {
4694 			return (SET_ERROR(EINVAL));
4695 		}
4696 	}
4697 
4698 	spa_t *spa;
4699 	int error = spa_open(poolname, &spa, FTAG);
4700 	if (error != 0)
4701 		return (error);
4702 
4703 	nvlist_t *vdev_errlist = fnvlist_alloc();
4704 	int total_errors = spa_vdev_initialize(spa, vdev_guids, cmd_type,
4705 	    vdev_errlist);
4706 
4707 	if (fnvlist_size(vdev_errlist) > 0) {
4708 		fnvlist_add_nvlist(outnvl, ZPOOL_INITIALIZE_VDEVS,
4709 		    vdev_errlist);
4710 	}
4711 	fnvlist_free(vdev_errlist);
4712 
4713 	spa_close(spa, FTAG);
4714 	return (total_errors > 0 ? SET_ERROR(EINVAL) : 0);
4715 }
4716 
4717 /*
4718  * innvl: {
4719  *     "trim_command" -> POOL_TRIM_{CANCEL|START|SUSPEND} (uint64)
4720  *     "trim_vdevs": { -> guids to TRIM (nvlist)
4721  *         "vdev_path_1": vdev_guid_1, (uint64),
4722  *         "vdev_path_2": vdev_guid_2, (uint64),
4723  *         ...
4724  *     },
4725  *     "trim_rate" -> Target TRIM rate in bytes/sec.
4726  *     "trim_secure" -> Set to request a secure TRIM.
4727  * }
4728  *
4729  * outnvl: {
4730  *     "trim_vdevs": { -> TRIM errors (nvlist)
4731  *         "vdev_path_1": errno, see function body for possible errnos (uint64)
4732  *         "vdev_path_2": errno, ... (uint64)
4733  *         ...
4734  *     }
4735  * }
4736  *
4737  * EINVAL is returned for an unknown commands or if any of the provided vdev
4738  * guids have be specified with a type other than uint64.
4739  */
4740 static const zfs_ioc_key_t zfs_keys_pool_trim[] = {
4741 	{ZPOOL_TRIM_COMMAND,	DATA_TYPE_UINT64,		0},
4742 	{ZPOOL_TRIM_VDEVS,	DATA_TYPE_NVLIST,		0},
4743 	{ZPOOL_TRIM_RATE,	DATA_TYPE_UINT64,		ZK_OPTIONAL},
4744 	{ZPOOL_TRIM_SECURE,	DATA_TYPE_BOOLEAN_VALUE,	ZK_OPTIONAL},
4745 };
4746 
4747 static int
zfs_ioc_pool_trim(const char * poolname,nvlist_t * innvl,nvlist_t * outnvl)4748 zfs_ioc_pool_trim(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
4749 {
4750 	uint64_t cmd_type;
4751 	if (nvlist_lookup_uint64(innvl, ZPOOL_TRIM_COMMAND, &cmd_type) != 0)
4752 		return (SET_ERROR(EINVAL));
4753 
4754 	if (!(cmd_type == POOL_TRIM_CANCEL ||
4755 	    cmd_type == POOL_TRIM_START ||
4756 	    cmd_type == POOL_TRIM_SUSPEND)) {
4757 		return (SET_ERROR(EINVAL));
4758 	}
4759 
4760 	nvlist_t *vdev_guids;
4761 	if (nvlist_lookup_nvlist(innvl, ZPOOL_TRIM_VDEVS, &vdev_guids) != 0)
4762 		return (SET_ERROR(EINVAL));
4763 
4764 	for (nvpair_t *pair = nvlist_next_nvpair(vdev_guids, NULL);
4765 	    pair != NULL; pair = nvlist_next_nvpair(vdev_guids, pair)) {
4766 		uint64_t vdev_guid;
4767 		if (nvpair_value_uint64(pair, &vdev_guid) != 0) {
4768 			return (SET_ERROR(EINVAL));
4769 		}
4770 	}
4771 
4772 	/* Optional, defaults to maximum rate when not provided */
4773 	uint64_t rate;
4774 	if (nvlist_lookup_uint64(innvl, ZPOOL_TRIM_RATE, &rate) != 0)
4775 		rate = 0;
4776 
4777 	/* Optional, defaults to standard TRIM when not provided */
4778 	boolean_t secure;
4779 	if (nvlist_lookup_boolean_value(innvl, ZPOOL_TRIM_SECURE,
4780 	    &secure) != 0) {
4781 		secure = B_FALSE;
4782 	}
4783 
4784 	spa_t *spa;
4785 	int error = spa_open(poolname, &spa, FTAG);
4786 	if (error != 0)
4787 		return (error);
4788 
4789 	nvlist_t *vdev_errlist = fnvlist_alloc();
4790 	int total_errors = spa_vdev_trim(spa, vdev_guids, cmd_type,
4791 	    rate, !!zfs_trim_metaslab_skip, secure, vdev_errlist);
4792 
4793 	if (fnvlist_size(vdev_errlist) > 0)
4794 		fnvlist_add_nvlist(outnvl, ZPOOL_TRIM_VDEVS, vdev_errlist);
4795 
4796 	fnvlist_free(vdev_errlist);
4797 
4798 	spa_close(spa, FTAG);
4799 	return (total_errors > 0 ? SET_ERROR(EINVAL) : 0);
4800 }
4801 
4802 #define	DDT_PRUNE_UNIT		"ddt_prune_unit"
4803 #define	DDT_PRUNE_AMOUNT	"ddt_prune_amount"
4804 
4805 /*
4806  * innvl: {
4807  *     "ddt_prune_unit" -> uint32_t
4808  *     "ddt_prune_amount" -> uint64_t
4809  * }
4810  *
4811  * outnvl: "waited" -> boolean_t
4812  */
4813 static const zfs_ioc_key_t zfs_keys_ddt_prune[] = {
4814 	{DDT_PRUNE_UNIT,	DATA_TYPE_INT32,	0},
4815 	{DDT_PRUNE_AMOUNT,	DATA_TYPE_UINT64,	0},
4816 };
4817 
4818 static int
zfs_ioc_ddt_prune(const char * poolname,nvlist_t * innvl,nvlist_t * outnvl)4819 zfs_ioc_ddt_prune(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
4820 {
4821 	int32_t unit;
4822 	uint64_t amount;
4823 
4824 	if (nvlist_lookup_int32(innvl, DDT_PRUNE_UNIT, &unit) != 0 ||
4825 	    nvlist_lookup_uint64(innvl, DDT_PRUNE_AMOUNT, &amount) != 0) {
4826 		return (EINVAL);
4827 	}
4828 
4829 	spa_t *spa;
4830 	int error = spa_open(poolname, &spa, FTAG);
4831 	if (error != 0)
4832 		return (error);
4833 
4834 	if (!spa_feature_is_enabled(spa, SPA_FEATURE_FAST_DEDUP)) {
4835 		spa_close(spa, FTAG);
4836 		return (SET_ERROR(ENOTSUP));
4837 	}
4838 
4839 	error = ddt_prune_unique_entries(spa, (zpool_ddt_prune_unit_t)unit,
4840 	    amount);
4841 
4842 	spa_close(spa, FTAG);
4843 
4844 	return (error);
4845 }
4846 
4847 /*
4848  * This ioctl waits for activity of a particular type to complete. If there is
4849  * no activity of that type in progress, it returns immediately, and the
4850  * returned value "waited" is false. If there is activity in progress, and no
4851  * tag is passed in, the ioctl blocks until all activity of that type is
4852  * complete, and then returns with "waited" set to true.
4853  *
4854  * If a tag is provided, it identifies a particular instance of an activity to
4855  * wait for. Currently, this is only valid for use with 'initialize', because
4856  * that is the only activity for which there can be multiple instances running
4857  * concurrently. In the case of 'initialize', the tag corresponds to the guid of
4858  * the vdev on which to wait.
4859  *
4860  * If a thread waiting in the ioctl receives a signal, the call will return
4861  * immediately, and the return value will be EINTR.
4862  *
4863  * innvl: {
4864  *     "wait_activity" -> int32_t
4865  *     (optional) "wait_tag" -> uint64_t
4866  * }
4867  *
4868  * outnvl: "waited" -> boolean_t
4869  */
4870 static const zfs_ioc_key_t zfs_keys_pool_wait[] = {
4871 	{ZPOOL_WAIT_ACTIVITY,	DATA_TYPE_INT32,		0},
4872 	{ZPOOL_WAIT_TAG,	DATA_TYPE_UINT64,		ZK_OPTIONAL},
4873 };
4874 
4875 static int
zfs_ioc_wait(const char * name,nvlist_t * innvl,nvlist_t * outnvl)4876 zfs_ioc_wait(const char *name, nvlist_t *innvl, nvlist_t *outnvl)
4877 {
4878 	int32_t activity;
4879 	uint64_t tag;
4880 	boolean_t waited;
4881 	int error;
4882 
4883 	if (nvlist_lookup_int32(innvl, ZPOOL_WAIT_ACTIVITY, &activity) != 0)
4884 		return (EINVAL);
4885 
4886 	if (nvlist_lookup_uint64(innvl, ZPOOL_WAIT_TAG, &tag) == 0)
4887 		error = spa_wait_tag(name, activity, tag, &waited);
4888 	else
4889 		error = spa_wait(name, activity, &waited);
4890 
4891 	if (error == 0)
4892 		fnvlist_add_boolean_value(outnvl, ZPOOL_WAIT_WAITED, waited);
4893 
4894 	return (error);
4895 }
4896 
4897 /*
4898  * This ioctl waits for activity of a particular type to complete. If there is
4899  * no activity of that type in progress, it returns immediately, and the
4900  * returned value "waited" is false. If there is activity in progress, and no
4901  * tag is passed in, the ioctl blocks until all activity of that type is
4902  * complete, and then returns with "waited" set to true.
4903  *
4904  * If a thread waiting in the ioctl receives a signal, the call will return
4905  * immediately, and the return value will be EINTR.
4906  *
4907  * innvl: {
4908  *     "wait_activity" -> int32_t
4909  * }
4910  *
4911  * outnvl: "waited" -> boolean_t
4912  */
4913 static const zfs_ioc_key_t zfs_keys_fs_wait[] = {
4914 	{ZFS_WAIT_ACTIVITY,	DATA_TYPE_INT32,		0},
4915 };
4916 
4917 static int
zfs_ioc_wait_fs(const char * name,nvlist_t * innvl,nvlist_t * outnvl)4918 zfs_ioc_wait_fs(const char *name, nvlist_t *innvl, nvlist_t *outnvl)
4919 {
4920 	int32_t activity;
4921 	boolean_t waited = B_FALSE;
4922 	int error;
4923 	dsl_pool_t *dp;
4924 	dsl_dir_t *dd;
4925 	dsl_dataset_t *ds;
4926 
4927 	if (nvlist_lookup_int32(innvl, ZFS_WAIT_ACTIVITY, &activity) != 0)
4928 		return (SET_ERROR(EINVAL));
4929 
4930 	if (activity >= ZFS_WAIT_NUM_ACTIVITIES || activity < 0)
4931 		return (SET_ERROR(EINVAL));
4932 
4933 	if ((error = dsl_pool_hold(name, FTAG, &dp)) != 0)
4934 		return (error);
4935 
4936 	if ((error = dsl_dataset_hold(dp, name, FTAG, &ds)) != 0) {
4937 		dsl_pool_rele(dp, FTAG);
4938 		return (error);
4939 	}
4940 
4941 	dd = ds->ds_dir;
4942 	mutex_enter(&dd->dd_activity_lock);
4943 	dd->dd_activity_waiters++;
4944 
4945 	/*
4946 	 * We get a long-hold here so that the dsl_dataset_t and dsl_dir_t
4947 	 * aren't evicted while we're waiting. Normally this is prevented by
4948 	 * holding the pool, but we can't do that while we're waiting since
4949 	 * that would prevent TXGs from syncing out. Some of the functionality
4950 	 * of long-holds (e.g. preventing deletion) is unnecessary for this
4951 	 * case, since we would cancel the waiters before proceeding with a
4952 	 * deletion. An alternative mechanism for keeping the dataset around
4953 	 * could be developed but this is simpler.
4954 	 */
4955 	dsl_dataset_long_hold(ds, FTAG);
4956 	dsl_pool_rele(dp, FTAG);
4957 
4958 	error = dsl_dir_wait(dd, ds, activity, &waited);
4959 
4960 	dsl_dataset_long_rele(ds, FTAG);
4961 	dd->dd_activity_waiters--;
4962 	if (dd->dd_activity_waiters == 0)
4963 		cv_signal(&dd->dd_activity_cv);
4964 	mutex_exit(&dd->dd_activity_lock);
4965 
4966 	dsl_dataset_rele(ds, FTAG);
4967 
4968 	if (error == 0)
4969 		fnvlist_add_boolean_value(outnvl, ZFS_WAIT_WAITED, waited);
4970 
4971 	return (error);
4972 }
4973 
4974 /*
4975  * fsname is name of dataset to rollback (to most recent snapshot)
4976  *
4977  * innvl may contain name of expected target snapshot
4978  *
4979  * outnvl: "target" -> name of most recent snapshot
4980  * }
4981  */
4982 static const zfs_ioc_key_t zfs_keys_rollback[] = {
4983 	{"target",	DATA_TYPE_STRING,	ZK_OPTIONAL},
4984 };
4985 
4986 static int
zfs_ioc_rollback(const char * fsname,nvlist_t * innvl,nvlist_t * outnvl)4987 zfs_ioc_rollback(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
4988 {
4989 	zfsvfs_t *zfsvfs;
4990 	zvol_state_handle_t *zv;
4991 	const char *target = NULL;
4992 	int error;
4993 
4994 	(void) nvlist_lookup_string(innvl, "target", &target);
4995 	if (target != NULL) {
4996 		const char *cp = strchr(target, '@');
4997 
4998 		/*
4999 		 * The snap name must contain an @, and the part after it must
5000 		 * contain only valid characters.
5001 		 */
5002 		if (cp == NULL ||
5003 		    zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
5004 			return (SET_ERROR(EINVAL));
5005 	}
5006 
5007 	if (getzfsvfs(fsname, &zfsvfs) == 0) {
5008 		dsl_dataset_t *ds;
5009 
5010 		ds = dmu_objset_ds(zfsvfs->z_os);
5011 		error = zfs_suspend_fs(zfsvfs);
5012 		if (error == 0) {
5013 			int resume_err;
5014 
5015 			error = dsl_dataset_rollback(fsname, target, zfsvfs,
5016 			    outnvl);
5017 			resume_err = zfs_resume_fs(zfsvfs, ds);
5018 			error = error ? error : resume_err;
5019 		}
5020 		zfs_vfs_rele(zfsvfs);
5021 	} else if (zvol_suspend(fsname, &zv) == 0) {
5022 		error = dsl_dataset_rollback(fsname, target, zvol_tag(zv),
5023 		    outnvl);
5024 		zvol_resume(zv);
5025 	} else {
5026 		error = dsl_dataset_rollback(fsname, target, NULL, outnvl);
5027 	}
5028 	return (error);
5029 }
5030 
5031 static int
recursive_unmount(const char * fsname,void * arg)5032 recursive_unmount(const char *fsname, void *arg)
5033 {
5034 	const char *snapname = arg;
5035 	char *fullname;
5036 
5037 	fullname = kmem_asprintf("%s@%s", fsname, snapname);
5038 	zfs_unmount_snap(fullname);
5039 	kmem_strfree(fullname);
5040 
5041 	return (0);
5042 }
5043 
5044 /*
5045  *
5046  * snapname is the snapshot to redact.
5047  * innvl: {
5048  *     "bookname" -> (string)
5049  *         shortname of the redaction bookmark to generate
5050  *     "snapnv" -> (nvlist, values ignored)
5051  *         snapshots to redact snapname with respect to
5052  * }
5053  *
5054  * outnvl is unused
5055  */
5056 
5057 static const zfs_ioc_key_t zfs_keys_redact[] = {
5058 	{"bookname",		DATA_TYPE_STRING,	0},
5059 	{"snapnv",		DATA_TYPE_NVLIST,	0},
5060 };
5061 
5062 static int
zfs_ioc_redact(const char * snapname,nvlist_t * innvl,nvlist_t * outnvl)5063 zfs_ioc_redact(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
5064 {
5065 	(void) outnvl;
5066 	nvlist_t *redactnvl = NULL;
5067 	const char *redactbook = NULL;
5068 
5069 	if (nvlist_lookup_nvlist(innvl, "snapnv", &redactnvl) != 0)
5070 		return (SET_ERROR(EINVAL));
5071 	if (fnvlist_num_pairs(redactnvl) == 0)
5072 		return (SET_ERROR(ENXIO));
5073 	if (nvlist_lookup_string(innvl, "bookname", &redactbook) != 0)
5074 		return (SET_ERROR(EINVAL));
5075 
5076 	return (dmu_redact_snap(snapname, redactnvl, redactbook));
5077 }
5078 
5079 /*
5080  * inputs:
5081  * zc_name	old name of dataset
5082  * zc_value	new name of dataset
5083  * zc_cookie	recursive flag (only valid for snapshots)
5084  *
5085  * outputs:	none
5086  */
5087 static int
zfs_ioc_rename(zfs_cmd_t * zc)5088 zfs_ioc_rename(zfs_cmd_t *zc)
5089 {
5090 	objset_t *os;
5091 	dmu_objset_type_t ost;
5092 	boolean_t recursive = zc->zc_cookie & 1;
5093 	boolean_t nounmount = !!(zc->zc_cookie & 2);
5094 	char *at;
5095 	int err;
5096 
5097 	/* "zfs rename" from and to ...%recv datasets should both fail */
5098 	zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
5099 	zc->zc_value[sizeof (zc->zc_value) - 1] = '\0';
5100 	if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0 ||
5101 	    dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
5102 	    strchr(zc->zc_name, '%') || strchr(zc->zc_value, '%'))
5103 		return (SET_ERROR(EINVAL));
5104 
5105 	err = dmu_objset_hold(zc->zc_name, FTAG, &os);
5106 	if (err != 0)
5107 		return (err);
5108 	ost = dmu_objset_type(os);
5109 	dmu_objset_rele(os, FTAG);
5110 
5111 	at = strchr(zc->zc_name, '@');
5112 	if (at != NULL) {
5113 		/* snaps must be in same fs */
5114 		int error;
5115 
5116 		if (strncmp(zc->zc_name, zc->zc_value, at - zc->zc_name + 1))
5117 			return (SET_ERROR(EXDEV));
5118 		*at = '\0';
5119 		if (ost == DMU_OST_ZFS && !nounmount) {
5120 			error = dmu_objset_find(zc->zc_name,
5121 			    recursive_unmount, at + 1,
5122 			    recursive ? DS_FIND_CHILDREN : 0);
5123 			if (error != 0) {
5124 				*at = '@';
5125 				return (error);
5126 			}
5127 		}
5128 		error = dsl_dataset_rename_snapshot(zc->zc_name,
5129 		    at + 1, strchr(zc->zc_value, '@') + 1, recursive);
5130 		*at = '@';
5131 
5132 		return (error);
5133 	} else {
5134 		/*
5135 		 * For dataset renames, update kernel-side zone tracking
5136 		 * if the dataset has a zoned_uid delegation.  Read the
5137 		 * property before rename, then detach old / attach new.
5138 		 */
5139 		uint64_t zoned_uid = 0;
5140 		(void) dsl_prop_get(zc->zc_name, "zoned_uid",
5141 		    8, 1, &zoned_uid, NULL);
5142 
5143 		err = dsl_dir_rename(zc->zc_name, zc->zc_value);
5144 
5145 		if (err == 0 && zoned_uid != 0) {
5146 			(void) zone_dataset_detach_uid(kcred,
5147 			    zc->zc_name, (uid_t)zoned_uid);
5148 			(void) zone_dataset_attach_uid(kcred,
5149 			    zc->zc_value, (uid_t)zoned_uid);
5150 		}
5151 		return (err);
5152 	}
5153 }
5154 
5155 static int
zfs_check_settable(const char * dsname,nvpair_t * pair,cred_t * cr)5156 zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
5157 {
5158 	const char *propname = nvpair_name(pair);
5159 	boolean_t issnap = (strchr(dsname, '@') != NULL);
5160 	zfs_prop_t prop = zfs_name_to_prop(propname);
5161 	uint64_t intval, compval;
5162 	int err;
5163 
5164 	if (prop == ZPROP_USERPROP) {
5165 		if (zfs_prop_user(propname)) {
5166 			zone_admin_result_t zone_result;
5167 			zone_result = zone_dataset_admin_check(dsname,
5168 			    ZONE_OP_SETPROP, NULL);
5169 			if (zone_result == ZONE_ADMIN_ALLOWED)
5170 				return (zfs_secpolicy_zoned_uid_deleg(dsname,
5171 				    ZFS_DELEG_PERM_USERPROP, cr));
5172 			if (zone_result == ZONE_ADMIN_DENIED)
5173 				return (SET_ERROR(EPERM));
5174 			if ((err = zfs_secpolicy_write_perms(dsname,
5175 			    ZFS_DELEG_PERM_USERPROP, cr)))
5176 				return (err);
5177 			return (0);
5178 		}
5179 
5180 		if (!issnap && zfs_prop_userquota(propname)) {
5181 			const char *perm = NULL;
5182 			const char *uq_prefix =
5183 			    zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA];
5184 			const char *gq_prefix =
5185 			    zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA];
5186 			const char *uiq_prefix =
5187 			    zfs_userquota_prop_prefixes[ZFS_PROP_USEROBJQUOTA];
5188 			const char *giq_prefix =
5189 			    zfs_userquota_prop_prefixes[ZFS_PROP_GROUPOBJQUOTA];
5190 			const char *pq_prefix =
5191 			    zfs_userquota_prop_prefixes[ZFS_PROP_PROJECTQUOTA];
5192 			const char *piq_prefix = zfs_userquota_prop_prefixes[\
5193 			    ZFS_PROP_PROJECTOBJQUOTA];
5194 
5195 			if (strncmp(propname, uq_prefix,
5196 			    strlen(uq_prefix)) == 0) {
5197 				perm = ZFS_DELEG_PERM_USERQUOTA;
5198 			} else if (strncmp(propname, uiq_prefix,
5199 			    strlen(uiq_prefix)) == 0) {
5200 				perm = ZFS_DELEG_PERM_USEROBJQUOTA;
5201 			} else if (strncmp(propname, gq_prefix,
5202 			    strlen(gq_prefix)) == 0) {
5203 				perm = ZFS_DELEG_PERM_GROUPQUOTA;
5204 			} else if (strncmp(propname, giq_prefix,
5205 			    strlen(giq_prefix)) == 0) {
5206 				perm = ZFS_DELEG_PERM_GROUPOBJQUOTA;
5207 			} else if (strncmp(propname, pq_prefix,
5208 			    strlen(pq_prefix)) == 0) {
5209 				perm = ZFS_DELEG_PERM_PROJECTQUOTA;
5210 			} else if (strncmp(propname, piq_prefix,
5211 			    strlen(piq_prefix)) == 0) {
5212 				perm = ZFS_DELEG_PERM_PROJECTOBJQUOTA;
5213 			} else {
5214 				/* {USER|GROUP|PROJECT}USED are read-only */
5215 				return (SET_ERROR(EINVAL));
5216 			}
5217 
5218 			zone_admin_result_t zone_result;
5219 			zone_result = zone_dataset_admin_check(dsname,
5220 			    ZONE_OP_SETPROP, NULL);
5221 			if (zone_result == ZONE_ADMIN_ALLOWED)
5222 				return (zfs_secpolicy_zoned_uid_deleg(dsname,
5223 				    perm, cr));
5224 			if (zone_result == ZONE_ADMIN_DENIED)
5225 				return (SET_ERROR(EPERM));
5226 			if ((err = zfs_secpolicy_write_perms(dsname, perm, cr)))
5227 				return (err);
5228 			return (0);
5229 		}
5230 
5231 		return (SET_ERROR(EINVAL));
5232 	}
5233 
5234 	if (issnap)
5235 		return (SET_ERROR(EINVAL));
5236 
5237 	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
5238 		/*
5239 		 * dsl_prop_get_all_impl() returns properties in this
5240 		 * format.
5241 		 */
5242 		nvlist_t *attrs;
5243 		VERIFY0(nvpair_value_nvlist(pair, &attrs));
5244 		VERIFY0(nvlist_lookup_nvpair(attrs, ZPROP_VALUE, &pair));
5245 	}
5246 
5247 	/*
5248 	 * Check that this value is valid for this pool version
5249 	 */
5250 	switch (prop) {
5251 	case ZFS_PROP_COMPRESSION:
5252 		/*
5253 		 * If the user specified gzip compression, make sure
5254 		 * the SPA supports it. We ignore any errors here since
5255 		 * we'll catch them later.
5256 		 */
5257 		if (nvpair_value_uint64(pair, &intval) == 0) {
5258 			compval = ZIO_COMPRESS_ALGO(intval);
5259 			if (compval >= ZIO_COMPRESS_GZIP_1 &&
5260 			    compval <= ZIO_COMPRESS_GZIP_9 &&
5261 			    zfs_earlier_version(dsname,
5262 			    SPA_VERSION_GZIP_COMPRESSION)) {
5263 				return (SET_ERROR(ENOTSUP));
5264 			}
5265 
5266 			if (compval == ZIO_COMPRESS_ZLE &&
5267 			    zfs_earlier_version(dsname,
5268 			    SPA_VERSION_ZLE_COMPRESSION))
5269 				return (SET_ERROR(ENOTSUP));
5270 
5271 			if (compval == ZIO_COMPRESS_LZ4) {
5272 				spa_t *spa;
5273 
5274 				if ((err = spa_open(dsname, &spa, FTAG)) != 0)
5275 					return (err);
5276 
5277 				if (!spa_feature_is_enabled(spa,
5278 				    SPA_FEATURE_LZ4_COMPRESS)) {
5279 					spa_close(spa, FTAG);
5280 					return (SET_ERROR(ENOTSUP));
5281 				}
5282 				spa_close(spa, FTAG);
5283 			}
5284 
5285 			if (compval == ZIO_COMPRESS_ZSTD) {
5286 				spa_t *spa;
5287 
5288 				if ((err = spa_open(dsname, &spa, FTAG)) != 0)
5289 					return (err);
5290 
5291 				if (!spa_feature_is_enabled(spa,
5292 				    SPA_FEATURE_ZSTD_COMPRESS)) {
5293 					spa_close(spa, FTAG);
5294 					return (SET_ERROR(ENOTSUP));
5295 				}
5296 				spa_close(spa, FTAG);
5297 			}
5298 		}
5299 		break;
5300 
5301 	case ZFS_PROP_COPIES:
5302 		if (zfs_earlier_version(dsname, SPA_VERSION_DITTO_BLOCKS))
5303 			return (SET_ERROR(ENOTSUP));
5304 		break;
5305 
5306 	case ZFS_PROP_VOLBLOCKSIZE:
5307 	case ZFS_PROP_RECORDSIZE:
5308 		/* Record sizes above 128k need the feature to be enabled */
5309 		if (nvpair_value_uint64(pair, &intval) == 0 &&
5310 		    intval > SPA_OLD_MAXBLOCKSIZE) {
5311 			spa_t *spa;
5312 
5313 			/*
5314 			 * We don't allow setting the property above 1MB,
5315 			 * unless the tunable has been changed.
5316 			 */
5317 			if (intval > zfs_max_recordsize ||
5318 			    intval > SPA_MAXBLOCKSIZE)
5319 				return (SET_ERROR(ERANGE));
5320 
5321 			if ((err = spa_open(dsname, &spa, FTAG)) != 0)
5322 				return (err);
5323 
5324 			if (!spa_feature_is_enabled(spa,
5325 			    SPA_FEATURE_LARGE_BLOCKS)) {
5326 				spa_close(spa, FTAG);
5327 				return (SET_ERROR(ENOTSUP));
5328 			}
5329 			spa_close(spa, FTAG);
5330 		}
5331 		break;
5332 
5333 	case ZFS_PROP_DNODESIZE:
5334 		/* Dnode sizes above 512 need the feature to be enabled */
5335 		if (nvpair_value_uint64(pair, &intval) == 0 &&
5336 		    intval != ZFS_DNSIZE_LEGACY) {
5337 			spa_t *spa;
5338 
5339 			if ((err = spa_open(dsname, &spa, FTAG)) != 0)
5340 				return (err);
5341 
5342 			if (!spa_feature_is_enabled(spa,
5343 			    SPA_FEATURE_LARGE_DNODE)) {
5344 				spa_close(spa, FTAG);
5345 				return (SET_ERROR(ENOTSUP));
5346 			}
5347 			spa_close(spa, FTAG);
5348 		}
5349 		break;
5350 
5351 	case ZFS_PROP_SHARESMB:
5352 		if (zpl_earlier_version(dsname, ZPL_VERSION_FUID))
5353 			return (SET_ERROR(ENOTSUP));
5354 		break;
5355 
5356 	case ZFS_PROP_ACLINHERIT:
5357 		if (nvpair_type(pair) == DATA_TYPE_UINT64 &&
5358 		    nvpair_value_uint64(pair, &intval) == 0) {
5359 			if (intval == ZFS_ACL_PASSTHROUGH_X &&
5360 			    zfs_earlier_version(dsname,
5361 			    SPA_VERSION_PASSTHROUGH_X))
5362 				return (SET_ERROR(ENOTSUP));
5363 		}
5364 		break;
5365 	case ZFS_PROP_CHECKSUM:
5366 	case ZFS_PROP_DEDUP:
5367 	{
5368 		spa_feature_t feature;
5369 		spa_t *spa;
5370 		int err;
5371 
5372 		/* dedup feature version checks */
5373 		if (prop == ZFS_PROP_DEDUP &&
5374 		    zfs_earlier_version(dsname, SPA_VERSION_DEDUP))
5375 			return (SET_ERROR(ENOTSUP));
5376 
5377 		if (nvpair_type(pair) == DATA_TYPE_UINT64 &&
5378 		    nvpair_value_uint64(pair, &intval) == 0) {
5379 			/* check prop value is enabled in features */
5380 			feature = zio_checksum_to_feature(
5381 			    intval & ZIO_CHECKSUM_MASK);
5382 			if (feature == SPA_FEATURE_NONE)
5383 				break;
5384 
5385 			if ((err = spa_open(dsname, &spa, FTAG)) != 0)
5386 				return (err);
5387 
5388 			if (!spa_feature_is_enabled(spa, feature)) {
5389 				spa_close(spa, FTAG);
5390 				return (SET_ERROR(ENOTSUP));
5391 			}
5392 			spa_close(spa, FTAG);
5393 		}
5394 		break;
5395 	}
5396 
5397 	default:
5398 		break;
5399 	}
5400 
5401 	return (zfs_secpolicy_setprop(dsname, prop, pair, CRED()));
5402 }
5403 
5404 /*
5405  * Removes properties from the given props list that fail permission checks
5406  * needed to clear them and to restore them in case of a receive error. For each
5407  * property, make sure we have both set and inherit permissions.
5408  *
5409  * Returns the first error encountered if any permission checks fail. If the
5410  * caller provides a non-NULL errlist, it also gives the complete list of names
5411  * of all the properties that failed a permission check along with the
5412  * corresponding error numbers. The caller is responsible for freeing the
5413  * returned errlist.
5414  *
5415  * If every property checks out successfully, zero is returned and the list
5416  * pointed at by errlist is NULL.
5417  */
5418 static int
zfs_check_clearable(const char * dataset,nvlist_t * props,nvlist_t ** errlist)5419 zfs_check_clearable(const char *dataset, nvlist_t *props, nvlist_t **errlist)
5420 {
5421 	zfs_cmd_t *zc;
5422 	nvpair_t *pair, *next_pair;
5423 	nvlist_t *errors;
5424 	int err, rv = 0;
5425 
5426 	if (props == NULL)
5427 		return (0);
5428 
5429 	VERIFY0(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP));
5430 
5431 	zc = kmem_alloc(sizeof (zfs_cmd_t), KM_SLEEP);
5432 	(void) strlcpy(zc->zc_name, dataset, sizeof (zc->zc_name));
5433 	pair = nvlist_next_nvpair(props, NULL);
5434 	while (pair != NULL) {
5435 		next_pair = nvlist_next_nvpair(props, pair);
5436 
5437 		(void) strlcpy(zc->zc_value, nvpair_name(pair),
5438 		    sizeof (zc->zc_value));
5439 		if ((err = zfs_check_settable(dataset, pair, CRED())) != 0 ||
5440 		    (err = zfs_secpolicy_inherit_prop(zc, NULL, CRED())) != 0) {
5441 			VERIFY0(nvlist_remove_nvpair(props, pair));
5442 			VERIFY0(nvlist_add_int32(errors, zc->zc_value, err));
5443 		}
5444 		pair = next_pair;
5445 	}
5446 	kmem_free(zc, sizeof (zfs_cmd_t));
5447 
5448 	if ((pair = nvlist_next_nvpair(errors, NULL)) == NULL) {
5449 		nvlist_free(errors);
5450 		errors = NULL;
5451 	} else {
5452 		VERIFY0(nvpair_value_int32(pair, &rv));
5453 	}
5454 
5455 	if (errlist == NULL)
5456 		nvlist_free(errors);
5457 	else
5458 		*errlist = errors;
5459 
5460 	return (rv);
5461 }
5462 
5463 static boolean_t
propval_equals(nvpair_t * p1,nvpair_t * p2)5464 propval_equals(nvpair_t *p1, nvpair_t *p2)
5465 {
5466 	if (nvpair_type(p1) == DATA_TYPE_NVLIST) {
5467 		/* dsl_prop_get_all_impl() format */
5468 		nvlist_t *attrs;
5469 		VERIFY0(nvpair_value_nvlist(p1, &attrs));
5470 		VERIFY0(nvlist_lookup_nvpair(attrs, ZPROP_VALUE, &p1));
5471 	}
5472 
5473 	if (nvpair_type(p2) == DATA_TYPE_NVLIST) {
5474 		nvlist_t *attrs;
5475 		VERIFY0(nvpair_value_nvlist(p2, &attrs));
5476 		VERIFY0(nvlist_lookup_nvpair(attrs, ZPROP_VALUE, &p2));
5477 	}
5478 
5479 	if (nvpair_type(p1) != nvpair_type(p2))
5480 		return (B_FALSE);
5481 
5482 	if (nvpair_type(p1) == DATA_TYPE_STRING) {
5483 		const char *valstr1, *valstr2;
5484 
5485 		VERIFY0(nvpair_value_string(p1, &valstr1));
5486 		VERIFY0(nvpair_value_string(p2, &valstr2));
5487 		return (strcmp(valstr1, valstr2) == 0);
5488 	} else {
5489 		uint64_t intval1, intval2;
5490 
5491 		VERIFY0(nvpair_value_uint64(p1, &intval1));
5492 		VERIFY0(nvpair_value_uint64(p2, &intval2));
5493 		return (intval1 == intval2);
5494 	}
5495 }
5496 
5497 /*
5498  * Remove properties from props if they are not going to change (as determined
5499  * by comparison with origprops). Remove them from origprops as well, since we
5500  * do not need to clear or restore properties that won't change.
5501  */
5502 static void
props_reduce(nvlist_t * props,nvlist_t * origprops)5503 props_reduce(nvlist_t *props, nvlist_t *origprops)
5504 {
5505 	nvpair_t *pair, *next_pair;
5506 
5507 	if (origprops == NULL)
5508 		return; /* all props need to be received */
5509 
5510 	pair = nvlist_next_nvpair(props, NULL);
5511 	while (pair != NULL) {
5512 		const char *propname = nvpair_name(pair);
5513 		nvpair_t *match;
5514 
5515 		next_pair = nvlist_next_nvpair(props, pair);
5516 
5517 		if ((nvlist_lookup_nvpair(origprops, propname,
5518 		    &match) != 0) || !propval_equals(pair, match))
5519 			goto next; /* need to set received value */
5520 
5521 		/* don't clear the existing received value */
5522 		(void) nvlist_remove_nvpair(origprops, match);
5523 		/* don't bother receiving the property */
5524 		(void) nvlist_remove_nvpair(props, pair);
5525 next:
5526 		pair = next_pair;
5527 	}
5528 }
5529 
5530 /*
5531  * Extract properties that cannot be set PRIOR to the receipt of a dataset.
5532  * For example, refquota cannot be set until after the receipt of a dataset,
5533  * because in replication streams, an older/earlier snapshot may exceed the
5534  * refquota.  We want to receive the older/earlier snapshot, but setting
5535  * refquota pre-receipt will set the dsl's ACTUAL quota, which will prevent
5536  * the older/earlier snapshot from being received (with EDQUOT).
5537  *
5538  * The ZFS test "zfs_receive_011_pos" demonstrates such a scenario.
5539  *
5540  * libzfs will need to be judicious handling errors encountered by props
5541  * extracted by this function.
5542  */
5543 static nvlist_t *
extract_delay_props(nvlist_t * props)5544 extract_delay_props(nvlist_t *props)
5545 {
5546 	nvlist_t *delayprops;
5547 	nvpair_t *nvp, *tmp;
5548 	static const zfs_prop_t delayable[] = {
5549 		ZFS_PROP_REFQUOTA,
5550 		ZFS_PROP_KEYLOCATION,
5551 		/*
5552 		 * Setting ZFS_PROP_SHARESMB requires the objset type to be
5553 		 * known, which is not possible prior to receipt of raw sends.
5554 		 */
5555 		ZFS_PROP_SHARESMB,
5556 		0
5557 	};
5558 	int i;
5559 
5560 	VERIFY0(nvlist_alloc(&delayprops, NV_UNIQUE_NAME, KM_SLEEP));
5561 
5562 	for (nvp = nvlist_next_nvpair(props, NULL); nvp != NULL;
5563 	    nvp = nvlist_next_nvpair(props, nvp)) {
5564 		/*
5565 		 * strcmp() is safe because zfs_prop_to_name() always returns
5566 		 * a bounded string.
5567 		 */
5568 		for (i = 0; delayable[i] != 0; i++) {
5569 			if (strcmp(zfs_prop_to_name(delayable[i]),
5570 			    nvpair_name(nvp)) == 0) {
5571 				break;
5572 			}
5573 		}
5574 		if (delayable[i] != 0) {
5575 			tmp = nvlist_prev_nvpair(props, nvp);
5576 			VERIFY0(nvlist_add_nvpair(delayprops, nvp));
5577 			VERIFY0(nvlist_remove_nvpair(props, nvp));
5578 			nvp = tmp;
5579 		}
5580 	}
5581 
5582 	if (nvlist_empty(delayprops)) {
5583 		nvlist_free(delayprops);
5584 		delayprops = NULL;
5585 	}
5586 	return (delayprops);
5587 }
5588 
5589 static void
zfs_allow_log_destroy(void * arg)5590 zfs_allow_log_destroy(void *arg)
5591 {
5592 	char *poolname = arg;
5593 
5594 	if (poolname != NULL)
5595 		kmem_strfree(poolname);
5596 }
5597 
5598 #ifdef	ZFS_DEBUG
5599 static boolean_t zfs_ioc_recv_inject_err;
5600 #endif
5601 
5602 /*
5603  * nvlist 'errors' is always allocated. It will contain descriptions of
5604  * encountered errors, if any. It's the callers responsibility to free.
5605  */
5606 static int
zfs_ioc_recv_impl(char * tofs,char * tosnap,const char * origin,nvlist_t * recvprops,nvlist_t * localprops,nvlist_t * hidden_args,boolean_t force,boolean_t heal,boolean_t resumable,int input_fd,dmu_replay_record_t * begin_record,uint64_t * read_bytes,uint64_t * errflags,nvlist_t ** errors)5607 zfs_ioc_recv_impl(char *tofs, char *tosnap, const char *origin,
5608     nvlist_t *recvprops, nvlist_t *localprops, nvlist_t *hidden_args,
5609     boolean_t force, boolean_t heal, boolean_t resumable, int input_fd,
5610     dmu_replay_record_t *begin_record, uint64_t *read_bytes,
5611     uint64_t *errflags, nvlist_t **errors)
5612 {
5613 	dmu_recv_cookie_t drc;
5614 	int error = 0;
5615 	int props_error = 0;
5616 	offset_t off, noff;
5617 	nvlist_t *local_delayprops = NULL;
5618 	nvlist_t *recv_delayprops = NULL;
5619 	nvlist_t *inherited_delayprops = NULL;
5620 	nvlist_t *origprops = NULL; /* existing properties */
5621 	nvlist_t *origrecvd = NULL; /* existing received properties */
5622 	boolean_t first_recvd_props = B_FALSE;
5623 	boolean_t tofs_was_redacted;
5624 	zfs_file_t *input_fp;
5625 
5626 	*read_bytes = 0;
5627 	*errflags = 0;
5628 	*errors = fnvlist_alloc();
5629 	off = 0;
5630 
5631 	if ((input_fp = zfs_file_get(input_fd)) == NULL)
5632 		return (SET_ERROR(EBADF));
5633 
5634 	noff = off = zfs_file_off(input_fp);
5635 	error = dmu_recv_begin(tofs, tosnap, begin_record, force, heal,
5636 	    resumable, localprops, hidden_args, origin, &drc, input_fp,
5637 	    &off);
5638 	if (error != 0)
5639 		goto out;
5640 	tofs_was_redacted = dsl_get_redacted(drc.drc_ds);
5641 
5642 	/*
5643 	 * Set properties before we receive the stream so that they are applied
5644 	 * to the new data. Note that we must call dmu_recv_stream() if
5645 	 * dmu_recv_begin() succeeds.
5646 	 */
5647 	if (recvprops != NULL && !drc.drc_newfs) {
5648 		if (spa_version(dsl_dataset_get_spa(drc.drc_ds)) >=
5649 		    SPA_VERSION_RECVD_PROPS &&
5650 		    !dsl_prop_get_hasrecvd(tofs))
5651 			first_recvd_props = B_TRUE;
5652 
5653 		/*
5654 		 * If new received properties are supplied, they are to
5655 		 * completely replace the existing received properties,
5656 		 * so stash away the existing ones.
5657 		 */
5658 		if (dsl_prop_get_received(tofs, &origrecvd) == 0) {
5659 			nvlist_t *errlist = NULL;
5660 			/*
5661 			 * Don't bother writing a property if its value won't
5662 			 * change (and avoid the unnecessary security checks).
5663 			 *
5664 			 * The first receive after SPA_VERSION_RECVD_PROPS is a
5665 			 * special case where we blow away all local properties
5666 			 * regardless.
5667 			 */
5668 			if (!first_recvd_props)
5669 				props_reduce(recvprops, origrecvd);
5670 			if (zfs_check_clearable(tofs, origrecvd, &errlist) != 0)
5671 				(void) nvlist_merge(*errors, errlist, 0);
5672 			nvlist_free(errlist);
5673 
5674 			if (clear_received_props(tofs, origrecvd,
5675 			    first_recvd_props ? NULL : recvprops) != 0)
5676 				*errflags |= ZPROP_ERR_NOCLEAR;
5677 		} else {
5678 			*errflags |= ZPROP_ERR_NOCLEAR;
5679 		}
5680 	}
5681 
5682 	/*
5683 	 * Stash away existing properties so we can restore them on error unless
5684 	 * we're doing the first receive after SPA_VERSION_RECVD_PROPS, in which
5685 	 * case "origrecvd" will take care of that.
5686 	 */
5687 	if (localprops != NULL && !drc.drc_newfs && !first_recvd_props) {
5688 		objset_t *os;
5689 		if (dmu_objset_hold(tofs, FTAG, &os) == 0) {
5690 			if (dsl_prop_get_all(os, &origprops) != 0) {
5691 				*errflags |= ZPROP_ERR_NOCLEAR;
5692 			}
5693 			dmu_objset_rele(os, FTAG);
5694 		} else {
5695 			*errflags |= ZPROP_ERR_NOCLEAR;
5696 		}
5697 	}
5698 
5699 	if (recvprops != NULL) {
5700 		props_error = dsl_prop_set_hasrecvd(tofs);
5701 
5702 		if (props_error == 0) {
5703 			recv_delayprops = extract_delay_props(recvprops);
5704 			(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED,
5705 			    recvprops, *errors);
5706 		}
5707 	}
5708 
5709 	if (localprops != NULL) {
5710 		nvlist_t *oprops = fnvlist_alloc();
5711 		nvlist_t *xprops = fnvlist_alloc();
5712 		nvpair_t *nvp = NULL;
5713 
5714 		while ((nvp = nvlist_next_nvpair(localprops, nvp)) != NULL) {
5715 			if (nvpair_type(nvp) == DATA_TYPE_BOOLEAN) {
5716 				/* -x property */
5717 				const char *name = nvpair_name(nvp);
5718 				zfs_prop_t prop = zfs_name_to_prop(name);
5719 				if (prop != ZPROP_USERPROP) {
5720 					if (!zfs_prop_inheritable(prop))
5721 						continue;
5722 				} else if (!zfs_prop_user(name))
5723 					continue;
5724 				fnvlist_add_boolean(xprops, name);
5725 			} else {
5726 				/* -o property=value */
5727 				fnvlist_add_nvpair(oprops, nvp);
5728 			}
5729 		}
5730 
5731 		local_delayprops = extract_delay_props(oprops);
5732 		(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_LOCAL,
5733 		    oprops, *errors);
5734 		inherited_delayprops = extract_delay_props(xprops);
5735 		(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_INHERITED,
5736 		    xprops, *errors);
5737 
5738 		nvlist_free(oprops);
5739 		nvlist_free(xprops);
5740 	}
5741 
5742 	error = dmu_recv_stream(&drc, &off);
5743 
5744 	if (error == 0) {
5745 		zfsvfs_t *zfsvfs = NULL;
5746 		zvol_state_handle_t *zv = NULL;
5747 
5748 		if (getzfsvfs(tofs, &zfsvfs) == 0) {
5749 			/* online recv */
5750 			dsl_dataset_t *ds;
5751 			int end_err;
5752 			boolean_t stream_is_redacted = DMU_GET_FEATUREFLAGS(
5753 			    begin_record->drr_u.drr_begin.
5754 			    drr_versioninfo) & DMU_BACKUP_FEATURE_REDACTED;
5755 
5756 			ds = dmu_objset_ds(zfsvfs->z_os);
5757 			error = zfs_suspend_fs(zfsvfs);
5758 			/*
5759 			 * If the suspend fails, then the recv_end will
5760 			 * likely also fail, and clean up after itself.
5761 			 */
5762 			end_err = dmu_recv_end(&drc, zfsvfs);
5763 			/*
5764 			 * If the dataset was not redacted, but we received a
5765 			 * redacted stream onto it, we need to unmount the
5766 			 * dataset.  Otherwise, resume the filesystem.
5767 			 */
5768 			if (error == 0 && !drc.drc_newfs &&
5769 			    stream_is_redacted && !tofs_was_redacted) {
5770 				error = zfs_end_fs(zfsvfs, ds);
5771 			} else if (error == 0) {
5772 				error = zfs_resume_fs(zfsvfs, ds);
5773 			}
5774 			error = error ? error : end_err;
5775 			zfs_vfs_rele(zfsvfs);
5776 		} else if (zvol_suspend(tofs, &zv) == 0) {
5777 			error = dmu_recv_end(&drc, zvol_tag(zv));
5778 			zvol_resume(zv);
5779 		} else {
5780 			error = dmu_recv_end(&drc, NULL);
5781 		}
5782 
5783 		/* Set delayed properties now, after we're done receiving. */
5784 		if (recv_delayprops != NULL && error == 0) {
5785 			(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED,
5786 			    recv_delayprops, *errors);
5787 		}
5788 		if (local_delayprops != NULL && error == 0) {
5789 			(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_LOCAL,
5790 			    local_delayprops, *errors);
5791 		}
5792 		if (inherited_delayprops != NULL && error == 0) {
5793 			(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_INHERITED,
5794 			    inherited_delayprops, *errors);
5795 		}
5796 	}
5797 
5798 	/*
5799 	 * Merge delayed props back in with initial props, in case
5800 	 * we're DEBUG and zfs_ioc_recv_inject_err is set (which means
5801 	 * we have to make sure clear_received_props() includes
5802 	 * the delayed properties).
5803 	 *
5804 	 * Since zfs_ioc_recv_inject_err is only in DEBUG kernels,
5805 	 * using ASSERT() will be just like a VERIFY.
5806 	 */
5807 	if (recv_delayprops != NULL) {
5808 		ASSERT0(nvlist_merge(recvprops, recv_delayprops, 0));
5809 		nvlist_free(recv_delayprops);
5810 	}
5811 	if (local_delayprops != NULL) {
5812 		ASSERT0(nvlist_merge(localprops, local_delayprops, 0));
5813 		nvlist_free(local_delayprops);
5814 	}
5815 	if (inherited_delayprops != NULL) {
5816 		ASSERT0(nvlist_merge(localprops, inherited_delayprops, 0));
5817 		nvlist_free(inherited_delayprops);
5818 	}
5819 	*read_bytes = off - noff;
5820 
5821 #ifdef	ZFS_DEBUG
5822 	if (zfs_ioc_recv_inject_err) {
5823 		zfs_ioc_recv_inject_err = B_FALSE;
5824 		error = 1;
5825 	}
5826 #endif
5827 
5828 	/*
5829 	 * On error, restore the original props.
5830 	 */
5831 	if (error != 0 && recvprops != NULL && !drc.drc_newfs) {
5832 		if (clear_received_props(tofs, recvprops, NULL) != 0) {
5833 			/*
5834 			 * We failed to clear the received properties.
5835 			 * Since we may have left a $recvd value on the
5836 			 * system, we can't clear the $hasrecvd flag.
5837 			 */
5838 			*errflags |= ZPROP_ERR_NORESTORE;
5839 		} else if (first_recvd_props) {
5840 			dsl_prop_unset_hasrecvd(tofs);
5841 		}
5842 
5843 		if (origrecvd == NULL && !drc.drc_newfs) {
5844 			/* We failed to stash the original properties. */
5845 			*errflags |= ZPROP_ERR_NORESTORE;
5846 		}
5847 
5848 		/*
5849 		 * dsl_props_set() will not convert RECEIVED to LOCAL on or
5850 		 * after SPA_VERSION_RECVD_PROPS, so we need to specify LOCAL
5851 		 * explicitly if we're restoring local properties cleared in the
5852 		 * first new-style receive.
5853 		 */
5854 		if (origrecvd != NULL &&
5855 		    zfs_set_prop_nvlist(tofs, (first_recvd_props ?
5856 		    ZPROP_SRC_LOCAL : ZPROP_SRC_RECEIVED),
5857 		    origrecvd, NULL) != 0) {
5858 			/*
5859 			 * We stashed the original properties but failed to
5860 			 * restore them.
5861 			 */
5862 			*errflags |= ZPROP_ERR_NORESTORE;
5863 		}
5864 	}
5865 	if (error != 0 && localprops != NULL && !drc.drc_newfs &&
5866 	    !first_recvd_props) {
5867 		nvlist_t *setprops;
5868 		nvlist_t *inheritprops;
5869 		nvpair_t *nvp;
5870 
5871 		if (origprops == NULL) {
5872 			/* We failed to stash the original properties. */
5873 			*errflags |= ZPROP_ERR_NORESTORE;
5874 			goto out;
5875 		}
5876 
5877 		/* Restore original props */
5878 		setprops = fnvlist_alloc();
5879 		inheritprops = fnvlist_alloc();
5880 		nvp = NULL;
5881 		while ((nvp = nvlist_next_nvpair(localprops, nvp)) != NULL) {
5882 			const char *name = nvpair_name(nvp);
5883 			const char *source;
5884 			nvlist_t *attrs;
5885 
5886 			if (!nvlist_exists(origprops, name)) {
5887 				/*
5888 				 * Property was not present or was explicitly
5889 				 * inherited before the receive, restore this.
5890 				 */
5891 				fnvlist_add_boolean(inheritprops, name);
5892 				continue;
5893 			}
5894 			attrs = fnvlist_lookup_nvlist(origprops, name);
5895 			source = fnvlist_lookup_string(attrs, ZPROP_SOURCE);
5896 
5897 			/* Skip received properties */
5898 			if (strcmp(source, ZPROP_SOURCE_VAL_RECVD) == 0)
5899 				continue;
5900 
5901 			if (strcmp(source, tofs) == 0) {
5902 				/* Property was locally set */
5903 				fnvlist_add_nvlist(setprops, name, attrs);
5904 			} else {
5905 				/* Property was implicitly inherited */
5906 				fnvlist_add_boolean(inheritprops, name);
5907 			}
5908 		}
5909 
5910 		if (zfs_set_prop_nvlist(tofs, ZPROP_SRC_LOCAL, setprops,
5911 		    NULL) != 0)
5912 			*errflags |= ZPROP_ERR_NORESTORE;
5913 		if (zfs_set_prop_nvlist(tofs, ZPROP_SRC_INHERITED, inheritprops,
5914 		    NULL) != 0)
5915 			*errflags |= ZPROP_ERR_NORESTORE;
5916 
5917 		nvlist_free(setprops);
5918 		nvlist_free(inheritprops);
5919 	}
5920 out:
5921 	zfs_file_put(input_fp);
5922 	nvlist_free(origrecvd);
5923 	nvlist_free(origprops);
5924 
5925 	if (error == 0)
5926 		error = props_error;
5927 
5928 	return (error);
5929 }
5930 
5931 /*
5932  * inputs:
5933  * zc_name		name of containing filesystem (unused)
5934  * zc_nvlist_src{_size}	nvlist of properties to apply
5935  * zc_nvlist_conf{_size}	nvlist of properties to exclude
5936  *			(DATA_TYPE_BOOLEAN) and override (everything else)
5937  * zc_value		name of snapshot to create
5938  * zc_string		name of clone origin (if DRR_FLAG_CLONE)
5939  * zc_cookie		file descriptor to recv from
5940  * zc_begin_record	the BEGIN record of the stream (not byteswapped)
5941  * zc_guid		force flag
5942  *
5943  * outputs:
5944  * zc_cookie		number of bytes read
5945  * zc_obj		zprop_errflags_t
5946  * zc_nvlist_dst{_size} error for each unapplied received property
5947  */
5948 static int
zfs_ioc_recv(zfs_cmd_t * zc)5949 zfs_ioc_recv(zfs_cmd_t *zc)
5950 {
5951 	dmu_replay_record_t begin_record;
5952 	nvlist_t *errors = NULL;
5953 	nvlist_t *recvdprops = NULL;
5954 	nvlist_t *localprops = NULL;
5955 	const char *origin = NULL;
5956 	char *tosnap;
5957 	char tofs[ZFS_MAX_DATASET_NAME_LEN];
5958 	int error = 0;
5959 
5960 	if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
5961 	    strchr(zc->zc_value, '@') == NULL ||
5962 	    strchr(zc->zc_value, '%') != NULL) {
5963 		return (SET_ERROR(EINVAL));
5964 	}
5965 
5966 	(void) strlcpy(tofs, zc->zc_value, sizeof (tofs));
5967 	tosnap = strchr(tofs, '@');
5968 	*tosnap++ = '\0';
5969 
5970 	if (zc->zc_nvlist_src != 0 &&
5971 	    (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
5972 	    zc->zc_iflags, &recvdprops)) != 0) {
5973 		goto out;
5974 	}
5975 
5976 	if (zc->zc_nvlist_conf != 0 &&
5977 	    (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
5978 	    zc->zc_iflags, &localprops)) != 0) {
5979 		goto out;
5980 	}
5981 
5982 	if (zc->zc_string[0])
5983 		origin = zc->zc_string;
5984 
5985 	begin_record.drr_type = DRR_BEGIN;
5986 	begin_record.drr_payloadlen = 0;
5987 	begin_record.drr_u.drr_begin = zc->zc_begin_record;
5988 
5989 	error = zfs_ioc_recv_impl(tofs, tosnap, origin, recvdprops, localprops,
5990 	    NULL, zc->zc_guid, B_FALSE, B_FALSE, zc->zc_cookie, &begin_record,
5991 	    &zc->zc_cookie, &zc->zc_obj, &errors);
5992 
5993 	/*
5994 	 * Now that all props, initial and delayed, are set, report the prop
5995 	 * errors to the caller.
5996 	 */
5997 	if (zc->zc_nvlist_dst_size != 0 && errors != NULL &&
5998 	    (nvlist_smush(errors, zc->zc_nvlist_dst_size) != 0 ||
5999 	    put_nvlist(zc, errors) != 0)) {
6000 		/*
6001 		 * Caller made zc->zc_nvlist_dst less than the minimum expected
6002 		 * size or supplied an invalid address.
6003 		 */
6004 		error = SET_ERROR(EINVAL);
6005 	}
6006 
6007 out:
6008 	nvlist_free(errors);
6009 	nvlist_free(recvdprops);
6010 	nvlist_free(localprops);
6011 
6012 	return (error);
6013 }
6014 
6015 /*
6016  * innvl: {
6017  *     "snapname" -> full name of the snapshot to create
6018  *     (optional) "props" -> received properties to set (nvlist)
6019  *     (optional) "localprops" -> override and exclude properties (nvlist)
6020  *     (optional) "origin" -> name of clone origin (DRR_FLAG_CLONE)
6021  *     "begin_record" -> non-byteswapped dmu_replay_record_t
6022  *     "input_fd" -> file descriptor to read stream from (int32)
6023  *     (optional) "force" -> force flag (value ignored)
6024  *     (optional) "heal" -> use send stream to heal data corruption
6025  *     (optional) "resumable" -> resumable flag (value ignored)
6026  *     (optional) "cleanup_fd" -> unused
6027  *     (optional) "action_handle" -> unused
6028  *     (optional) "hidden_args" -> { "wkeydata" -> value }
6029  * }
6030  *
6031  * outnvl: {
6032  *     "read_bytes" -> number of bytes read
6033  *     "error_flags" -> zprop_errflags_t
6034  *     "errors" -> error for each unapplied received property (nvlist)
6035  * }
6036  */
6037 static const zfs_ioc_key_t zfs_keys_recv_new[] = {
6038 	{"snapname",		DATA_TYPE_STRING,	0},
6039 	{"props",		DATA_TYPE_NVLIST,	ZK_OPTIONAL},
6040 	{"localprops",		DATA_TYPE_NVLIST,	ZK_OPTIONAL},
6041 	{"origin",		DATA_TYPE_STRING,	ZK_OPTIONAL},
6042 	{"begin_record",	DATA_TYPE_BYTE_ARRAY,	0},
6043 	{"input_fd",		DATA_TYPE_INT32,	0},
6044 	{"force",		DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
6045 	{"heal",		DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
6046 	{"resumable",		DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
6047 	{"cleanup_fd",		DATA_TYPE_INT32,	ZK_OPTIONAL},
6048 	{"action_handle",	DATA_TYPE_UINT64,	ZK_OPTIONAL},
6049 	{"hidden_args",		DATA_TYPE_NVLIST,	ZK_OPTIONAL},
6050 };
6051 
6052 static int
zfs_ioc_recv_new(const char * fsname,nvlist_t * innvl,nvlist_t * outnvl)6053 zfs_ioc_recv_new(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
6054 {
6055 	dmu_replay_record_t *begin_record;
6056 	uint_t begin_record_size;
6057 	nvlist_t *errors = NULL;
6058 	nvlist_t *recvprops = NULL;
6059 	nvlist_t *localprops = NULL;
6060 	nvlist_t *hidden_args = NULL;
6061 	const char *snapname;
6062 	const char *origin = NULL;
6063 	char *tosnap;
6064 	char tofs[ZFS_MAX_DATASET_NAME_LEN];
6065 	boolean_t force;
6066 	boolean_t heal;
6067 	boolean_t resumable;
6068 	uint64_t read_bytes = 0;
6069 	uint64_t errflags = 0;
6070 	int input_fd = -1;
6071 	int error;
6072 
6073 	snapname = fnvlist_lookup_string(innvl, "snapname");
6074 
6075 	if (dataset_namecheck(snapname, NULL, NULL) != 0 ||
6076 	    strchr(snapname, '@') == NULL ||
6077 	    strchr(snapname, '%') != NULL) {
6078 		return (SET_ERROR(EINVAL));
6079 	}
6080 
6081 	(void) strlcpy(tofs, snapname, sizeof (tofs));
6082 	tosnap = strchr(tofs, '@');
6083 	*tosnap++ = '\0';
6084 
6085 	error = nvlist_lookup_string(innvl, "origin", &origin);
6086 	if (error && error != ENOENT)
6087 		return (error);
6088 
6089 	error = nvlist_lookup_byte_array(innvl, "begin_record",
6090 	    (uchar_t **)&begin_record, &begin_record_size);
6091 	if (error != 0 || begin_record_size != sizeof (*begin_record))
6092 		return (SET_ERROR(EINVAL));
6093 
6094 	input_fd = fnvlist_lookup_int32(innvl, "input_fd");
6095 
6096 	force = nvlist_exists(innvl, "force");
6097 	heal = nvlist_exists(innvl, "heal");
6098 	resumable = nvlist_exists(innvl, "resumable");
6099 
6100 	/* we still use "props" here for backwards compatibility */
6101 	error = nvlist_lookup_nvlist(innvl, "props", &recvprops);
6102 	if (error && error != ENOENT)
6103 		goto out;
6104 
6105 	error = nvlist_lookup_nvlist(innvl, "localprops", &localprops);
6106 	if (error && error != ENOENT)
6107 		goto out;
6108 
6109 	error = nvlist_lookup_nvlist(innvl, ZPOOL_HIDDEN_ARGS, &hidden_args);
6110 	if (error && error != ENOENT)
6111 		goto out;
6112 
6113 	error = zfs_ioc_recv_impl(tofs, tosnap, origin, recvprops, localprops,
6114 	    hidden_args, force, heal, resumable, input_fd, begin_record,
6115 	    &read_bytes, &errflags, &errors);
6116 
6117 	fnvlist_add_uint64(outnvl, "read_bytes", read_bytes);
6118 	fnvlist_add_uint64(outnvl, "error_flags", errflags);
6119 	fnvlist_add_nvlist(outnvl, "errors", errors);
6120 
6121 out:
6122 	nvlist_free(errors);
6123 	nvlist_free(recvprops);
6124 	nvlist_free(localprops);
6125 	nvlist_free(hidden_args);
6126 
6127 	return (error);
6128 }
6129 
6130 /*
6131  * When stack space is limited, we write replication stream data to the target
6132  * on a separate taskq thread, to make sure there's enough stack space.
6133  */
6134 #ifndef HAVE_LARGE_STACKS
6135 #define	USE_SEND_TASKQ	1
6136 #endif
6137 
6138 typedef struct dump_bytes_io {
6139 	zfs_file_t	*dbi_fp;
6140 	caddr_t		dbi_buf;
6141 	int		dbi_len;
6142 	int		dbi_err;
6143 } dump_bytes_io_t;
6144 
6145 static void
dump_bytes_cb(void * arg)6146 dump_bytes_cb(void *arg)
6147 {
6148 	dump_bytes_io_t *dbi = (dump_bytes_io_t *)arg;
6149 	zfs_file_t *fp;
6150 	caddr_t buf;
6151 
6152 	fp = dbi->dbi_fp;
6153 	buf = dbi->dbi_buf;
6154 
6155 	dbi->dbi_err = zfs_file_write(fp, buf, dbi->dbi_len, NULL);
6156 }
6157 
6158 typedef struct dump_bytes_arg {
6159 	zfs_file_t	*dba_fp;
6160 #ifdef USE_SEND_TASKQ
6161 	taskq_t		*dba_tq;
6162 	taskq_ent_t	dba_tqent;
6163 #endif
6164 } dump_bytes_arg_t;
6165 
6166 static int
dump_bytes(objset_t * os,void * buf,int len,void * arg)6167 dump_bytes(objset_t *os, void *buf, int len, void *arg)
6168 {
6169 	dump_bytes_arg_t *dba = (dump_bytes_arg_t *)arg;
6170 	dump_bytes_io_t dbi;
6171 
6172 	dbi.dbi_fp = dba->dba_fp;
6173 	dbi.dbi_buf = buf;
6174 	dbi.dbi_len = len;
6175 
6176 #ifdef USE_SEND_TASKQ
6177 	taskq_dispatch_ent(dba->dba_tq, dump_bytes_cb, &dbi, TQ_SLEEP,
6178 	    &dba->dba_tqent);
6179 	taskq_wait(dba->dba_tq);
6180 #else
6181 	dump_bytes_cb(&dbi);
6182 #endif
6183 
6184 	return (dbi.dbi_err);
6185 }
6186 
6187 static int
dump_bytes_init(dump_bytes_arg_t * dba,int fd,dmu_send_outparams_t * out)6188 dump_bytes_init(dump_bytes_arg_t *dba, int fd, dmu_send_outparams_t *out)
6189 {
6190 	zfs_file_t *fp = zfs_file_get(fd);
6191 	if (fp == NULL)
6192 		return (SET_ERROR(EBADF));
6193 
6194 	dba->dba_fp = fp;
6195 #ifdef USE_SEND_TASKQ
6196 	dba->dba_tq = taskq_create("z_send", 1, defclsyspri, 0, 0, 0);
6197 	taskq_init_ent(&dba->dba_tqent);
6198 #endif
6199 
6200 	memset(out, 0, sizeof (dmu_send_outparams_t));
6201 	out->dso_outfunc = dump_bytes;
6202 	out->dso_arg = dba;
6203 	out->dso_dryrun = B_FALSE;
6204 
6205 	return (0);
6206 }
6207 
6208 static void
dump_bytes_fini(dump_bytes_arg_t * dba)6209 dump_bytes_fini(dump_bytes_arg_t *dba)
6210 {
6211 	zfs_file_put(dba->dba_fp);
6212 #ifdef USE_SEND_TASKQ
6213 	taskq_destroy(dba->dba_tq);
6214 #endif
6215 }
6216 
6217 /*
6218  * inputs:
6219  * zc_name	name of snapshot to send
6220  * zc_cookie	file descriptor to send stream to
6221  * zc_obj	fromorigin flag (mutually exclusive with zc_fromobj)
6222  * zc_sendobj	objsetid of snapshot to send
6223  * zc_fromobj	objsetid of incremental fromsnap (may be zero)
6224  * zc_guid	if set, estimate size of stream only.  zc_cookie is ignored.
6225  *		output size in zc_objset_type.
6226  * zc_flags	lzc_send_flags
6227  *
6228  * outputs:
6229  * zc_objset_type	estimated size, if zc_guid is set
6230  *
6231  * NOTE: This is no longer the preferred interface, any new functionality
6232  *	  should be added to zfs_ioc_send_new() instead.
6233  */
6234 static int
zfs_ioc_send(zfs_cmd_t * zc)6235 zfs_ioc_send(zfs_cmd_t *zc)
6236 {
6237 	int error;
6238 	offset_t off;
6239 	boolean_t estimate = (zc->zc_guid != 0);
6240 	boolean_t embedok = (zc->zc_flags & 0x1);
6241 	boolean_t large_block_ok = (zc->zc_flags & 0x2);
6242 	boolean_t compressok = (zc->zc_flags & 0x4);
6243 	boolean_t rawok = (zc->zc_flags & 0x8);
6244 	boolean_t savedok = (zc->zc_flags & 0x10);
6245 
6246 	if (zc->zc_obj != 0) {
6247 		dsl_pool_t *dp;
6248 		dsl_dataset_t *tosnap;
6249 
6250 		error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
6251 		if (error != 0)
6252 			return (error);
6253 
6254 		error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &tosnap);
6255 		if (error != 0) {
6256 			dsl_pool_rele(dp, FTAG);
6257 			return (error);
6258 		}
6259 
6260 		if (dsl_dir_is_clone(tosnap->ds_dir))
6261 			zc->zc_fromobj =
6262 			    dsl_dir_phys(tosnap->ds_dir)->dd_origin_obj;
6263 		dsl_dataset_rele(tosnap, FTAG);
6264 		dsl_pool_rele(dp, FTAG);
6265 	}
6266 
6267 	if (estimate) {
6268 		dsl_pool_t *dp;
6269 		dsl_dataset_t *tosnap;
6270 		dsl_dataset_t *fromsnap = NULL;
6271 
6272 		error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
6273 		if (error != 0)
6274 			return (error);
6275 
6276 		error = dsl_dataset_hold_obj(dp, zc->zc_sendobj,
6277 		    FTAG, &tosnap);
6278 		if (error != 0) {
6279 			dsl_pool_rele(dp, FTAG);
6280 			return (error);
6281 		}
6282 
6283 		if (zc->zc_fromobj != 0) {
6284 			error = dsl_dataset_hold_obj(dp, zc->zc_fromobj,
6285 			    FTAG, &fromsnap);
6286 			if (error != 0) {
6287 				dsl_dataset_rele(tosnap, FTAG);
6288 				dsl_pool_rele(dp, FTAG);
6289 				return (error);
6290 			}
6291 		}
6292 
6293 		error = dmu_send_estimate_fast(tosnap, fromsnap, NULL,
6294 		    compressok || rawok, savedok, &zc->zc_objset_type);
6295 
6296 		if (fromsnap != NULL)
6297 			dsl_dataset_rele(fromsnap, FTAG);
6298 		dsl_dataset_rele(tosnap, FTAG);
6299 		dsl_pool_rele(dp, FTAG);
6300 	} else {
6301 		dump_bytes_arg_t dba;
6302 		dmu_send_outparams_t out;
6303 		error = dump_bytes_init(&dba, zc->zc_cookie, &out);
6304 		if (error)
6305 			return (error);
6306 
6307 		off = zfs_file_off(dba.dba_fp);
6308 		error = dmu_send_obj(zc->zc_name, zc->zc_sendobj,
6309 		    zc->zc_fromobj, embedok, large_block_ok, compressok,
6310 		    rawok, savedok, zc->zc_cookie, &off, &out);
6311 
6312 		dump_bytes_fini(&dba);
6313 	}
6314 	return (error);
6315 }
6316 
6317 /*
6318  * inputs:
6319  * zc_name		name of snapshot on which to report progress
6320  * zc_cookie		file descriptor of send stream
6321  *
6322  * outputs:
6323  * zc_cookie		number of bytes written in send stream thus far
6324  * zc_objset_type	logical size of data traversed by send thus far
6325  */
6326 static int
zfs_ioc_send_progress(zfs_cmd_t * zc)6327 zfs_ioc_send_progress(zfs_cmd_t *zc)
6328 {
6329 	dsl_pool_t *dp;
6330 	dsl_dataset_t *ds;
6331 	dmu_sendstatus_t *dsp = NULL;
6332 	int error;
6333 
6334 	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
6335 	if (error != 0)
6336 		return (error);
6337 
6338 	error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &ds);
6339 	if (error != 0) {
6340 		dsl_pool_rele(dp, FTAG);
6341 		return (error);
6342 	}
6343 
6344 	mutex_enter(&ds->ds_sendstream_lock);
6345 
6346 	/*
6347 	 * Iterate over all the send streams currently active on this dataset.
6348 	 * If there's one which matches the specified file descriptor _and_ the
6349 	 * stream was started by the current process, return the progress of
6350 	 * that stream.
6351 	 */
6352 
6353 	for (dsp = list_head(&ds->ds_sendstreams); dsp != NULL;
6354 	    dsp = list_next(&ds->ds_sendstreams, dsp)) {
6355 		if (dsp->dss_outfd == zc->zc_cookie &&
6356 		    zfs_proc_is_caller(dsp->dss_proc))
6357 			break;
6358 	}
6359 
6360 	if (dsp != NULL) {
6361 		zc->zc_cookie = atomic_cas_64((volatile uint64_t *)dsp->dss_off,
6362 		    0, 0);
6363 		/* This is the closest thing we have to atomic_read_64. */
6364 		zc->zc_objset_type = atomic_cas_64(&dsp->dss_blocks, 0, 0);
6365 	} else {
6366 		error = SET_ERROR(ENOENT);
6367 	}
6368 
6369 	mutex_exit(&ds->ds_sendstream_lock);
6370 	dsl_dataset_rele(ds, FTAG);
6371 	dsl_pool_rele(dp, FTAG);
6372 	return (error);
6373 }
6374 
6375 static int
zfs_ioc_inject_fault(zfs_cmd_t * zc)6376 zfs_ioc_inject_fault(zfs_cmd_t *zc)
6377 {
6378 	int id, error;
6379 
6380 	error = zio_inject_fault(zc->zc_name, (int)zc->zc_guid, &id,
6381 	    &zc->zc_inject_record);
6382 
6383 	if (error == 0)
6384 		zc->zc_guid = (uint64_t)id;
6385 
6386 	return (error);
6387 }
6388 
6389 static int
zfs_ioc_clear_fault(zfs_cmd_t * zc)6390 zfs_ioc_clear_fault(zfs_cmd_t *zc)
6391 {
6392 	return (zio_clear_fault((int)zc->zc_guid));
6393 }
6394 
6395 static int
zfs_ioc_inject_list_next(zfs_cmd_t * zc)6396 zfs_ioc_inject_list_next(zfs_cmd_t *zc)
6397 {
6398 	int id = (int)zc->zc_guid;
6399 	int error;
6400 
6401 	error = zio_inject_list_next(&id, zc->zc_name, sizeof (zc->zc_name),
6402 	    &zc->zc_inject_record);
6403 
6404 	zc->zc_guid = id;
6405 
6406 	return (error);
6407 }
6408 
6409 static int
zfs_ioc_error_log(zfs_cmd_t * zc)6410 zfs_ioc_error_log(zfs_cmd_t *zc)
6411 {
6412 	spa_t *spa;
6413 	int error;
6414 
6415 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
6416 		return (error);
6417 
6418 	error = spa_get_errlog(spa, (void *)(uintptr_t)zc->zc_nvlist_dst,
6419 	    &zc->zc_nvlist_dst_size);
6420 
6421 	spa_close(spa, FTAG);
6422 
6423 	return (error);
6424 }
6425 
6426 static int
zfs_ioc_clear(zfs_cmd_t * zc)6427 zfs_ioc_clear(zfs_cmd_t *zc)
6428 {
6429 	spa_t *spa;
6430 	vdev_t *vd;
6431 	int error;
6432 
6433 	/*
6434 	 * On zpool clear we also fix up missing slogs
6435 	 */
6436 	spa_namespace_enter(FTAG);
6437 	spa = spa_lookup(zc->zc_name);
6438 	if (spa == NULL) {
6439 		spa_namespace_exit(FTAG);
6440 		return (SET_ERROR(EIO));
6441 	}
6442 	if (spa_get_log_state(spa) == SPA_LOG_MISSING) {
6443 		/* we need to let spa_open/spa_load clear the chains */
6444 		spa_set_log_state(spa, SPA_LOG_CLEAR);
6445 	}
6446 	spa->spa_last_open_failed = 0;
6447 	spa_namespace_exit(FTAG);
6448 
6449 	if (zc->zc_cookie & ZPOOL_NO_REWIND) {
6450 		error = spa_open(zc->zc_name, &spa, FTAG);
6451 	} else {
6452 		nvlist_t *policy;
6453 		nvlist_t *config = NULL;
6454 
6455 		if (zc->zc_nvlist_src == 0)
6456 			return (SET_ERROR(EINVAL));
6457 
6458 		if ((error = get_nvlist(zc->zc_nvlist_src,
6459 		    zc->zc_nvlist_src_size, zc->zc_iflags, &policy)) == 0) {
6460 			error = spa_open_rewind(zc->zc_name, &spa, FTAG,
6461 			    policy, &config);
6462 			if (config != NULL) {
6463 				int err;
6464 
6465 				if ((err = put_nvlist(zc, config)) != 0)
6466 					error = err;
6467 				nvlist_free(config);
6468 			}
6469 			nvlist_free(policy);
6470 		}
6471 	}
6472 
6473 	if (error != 0)
6474 		return (error);
6475 
6476 	/*
6477 	 * If multihost is enabled, resuming I/O is unsafe as another
6478 	 * host may have imported the pool. Check for remote activity.
6479 	 */
6480 	if (spa_multihost(spa) && spa_suspended(spa) &&
6481 	    spa_mmp_remote_host_activity(spa)) {
6482 		spa_close(spa, FTAG);
6483 		return (SET_ERROR(EREMOTEIO));
6484 	}
6485 
6486 	spa_vdev_state_enter(spa, SCL_NONE);
6487 
6488 	if (zc->zc_guid == 0) {
6489 		vd = NULL;
6490 	} else {
6491 		vd = spa_lookup_by_guid(spa, zc->zc_guid, B_TRUE);
6492 		if (vd == NULL) {
6493 			error = SET_ERROR(ENODEV);
6494 			(void) spa_vdev_state_exit(spa, NULL, error);
6495 			spa_close(spa, FTAG);
6496 			return (error);
6497 		}
6498 	}
6499 
6500 	vdev_clear(spa, vd);
6501 
6502 	(void) spa_vdev_state_exit(spa, spa_suspended(spa) ?
6503 	    NULL : spa->spa_root_vdev, 0);
6504 
6505 	/*
6506 	 * Resume any suspended I/Os.
6507 	 */
6508 	if (zio_resume(spa) != 0)
6509 		error = SET_ERROR(EIO);
6510 
6511 	spa_close(spa, FTAG);
6512 
6513 	return (error);
6514 }
6515 
6516 /*
6517  * Reopen all the vdevs associated with the pool.
6518  *
6519  * innvl: {
6520  *  "scrub_restart" -> when true and scrub is running, allow to restart
6521  *              scrub as the side effect of the reopen (boolean).
6522  * }
6523  *
6524  * outnvl is unused
6525  */
6526 static const zfs_ioc_key_t zfs_keys_pool_reopen[] = {
6527 	{"scrub_restart",	DATA_TYPE_BOOLEAN_VALUE,	ZK_OPTIONAL},
6528 };
6529 
6530 static int
zfs_ioc_pool_reopen(const char * pool,nvlist_t * innvl,nvlist_t * outnvl)6531 zfs_ioc_pool_reopen(const char *pool, nvlist_t *innvl, nvlist_t *outnvl)
6532 {
6533 	(void) outnvl;
6534 	spa_t *spa;
6535 	int error;
6536 	boolean_t rc, scrub_restart = B_TRUE;
6537 
6538 	if (innvl) {
6539 		error = nvlist_lookup_boolean_value(innvl,
6540 		    "scrub_restart", &rc);
6541 		if (error == 0)
6542 			scrub_restart = rc;
6543 	}
6544 
6545 	error = spa_open(pool, &spa, FTAG);
6546 	if (error != 0)
6547 		return (error);
6548 
6549 	spa_vdev_state_enter(spa, SCL_NONE);
6550 
6551 	/*
6552 	 * If the scrub_restart flag is B_FALSE and a scrub is already
6553 	 * in progress then set spa_scrub_reopen flag to B_TRUE so that
6554 	 * we don't restart the scrub as a side effect of the reopen.
6555 	 * Otherwise, let vdev_open() decided if a resilver is required.
6556 	 */
6557 
6558 	spa->spa_scrub_reopen = (!scrub_restart &&
6559 	    dsl_scan_scrubbing(spa->spa_dsl_pool));
6560 	vdev_reopen(spa->spa_root_vdev);
6561 	spa->spa_scrub_reopen = B_FALSE;
6562 
6563 	(void) spa_vdev_state_exit(spa, NULL, 0);
6564 	spa_close(spa, FTAG);
6565 	return (0);
6566 }
6567 
6568 /*
6569  * inputs:
6570  * zc_name	name of filesystem
6571  *
6572  * outputs:
6573  * zc_string	name of conflicting snapshot, if there is one
6574  */
6575 static int
zfs_ioc_promote(zfs_cmd_t * zc)6576 zfs_ioc_promote(zfs_cmd_t *zc)
6577 {
6578 	dsl_pool_t *dp;
6579 	dsl_dataset_t *ds, *ods;
6580 	char origin[ZFS_MAX_DATASET_NAME_LEN];
6581 	char *cp;
6582 	int error;
6583 
6584 	zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
6585 	if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0 ||
6586 	    strchr(zc->zc_name, '%'))
6587 		return (SET_ERROR(EINVAL));
6588 
6589 	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
6590 	if (error != 0)
6591 		return (error);
6592 
6593 	error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &ds);
6594 	if (error != 0) {
6595 		dsl_pool_rele(dp, FTAG);
6596 		return (error);
6597 	}
6598 
6599 	if (!dsl_dir_is_clone(ds->ds_dir)) {
6600 		dsl_dataset_rele(ds, FTAG);
6601 		dsl_pool_rele(dp, FTAG);
6602 		return (SET_ERROR(EINVAL));
6603 	}
6604 
6605 	error = dsl_dataset_hold_obj(dp,
6606 	    dsl_dir_phys(ds->ds_dir)->dd_origin_obj, FTAG, &ods);
6607 	if (error != 0) {
6608 		dsl_dataset_rele(ds, FTAG);
6609 		dsl_pool_rele(dp, FTAG);
6610 		return (error);
6611 	}
6612 
6613 	dsl_dataset_name(ods, origin);
6614 	dsl_dataset_rele(ods, FTAG);
6615 	dsl_dataset_rele(ds, FTAG);
6616 	dsl_pool_rele(dp, FTAG);
6617 
6618 	/*
6619 	 * We don't need to unmount *all* the origin fs's snapshots, but
6620 	 * it's easier.
6621 	 */
6622 	cp = strchr(origin, '@');
6623 	if (cp)
6624 		*cp = '\0';
6625 	(void) dmu_objset_find(origin,
6626 	    zfs_unmount_snap_cb, NULL, DS_FIND_SNAPSHOTS);
6627 	return (dsl_dataset_promote(zc->zc_name, zc->zc_string));
6628 }
6629 
6630 /*
6631  * Retrieve a single {user|group|project}{used|quota}@... property.
6632  *
6633  * inputs:
6634  * zc_name	name of filesystem
6635  * zc_objset_type zfs_userquota_prop_t
6636  * zc_value	domain name (eg. "S-1-234-567-89")
6637  * zc_guid	RID/UID/GID
6638  *
6639  * outputs:
6640  * zc_cookie	property value
6641  */
6642 static int
zfs_ioc_userspace_one(zfs_cmd_t * zc)6643 zfs_ioc_userspace_one(zfs_cmd_t *zc)
6644 {
6645 	zfsvfs_t *zfsvfs;
6646 	int error;
6647 
6648 	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
6649 		return (SET_ERROR(EINVAL));
6650 
6651 	error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE);
6652 	if (error != 0)
6653 		return (error);
6654 
6655 	error = zfs_userspace_one(zfsvfs,
6656 	    zc->zc_objset_type, zc->zc_value, zc->zc_guid, &zc->zc_cookie);
6657 	zfsvfs_rele(zfsvfs, FTAG);
6658 
6659 	return (error);
6660 }
6661 
6662 /*
6663  * inputs:
6664  * zc_name		name of filesystem
6665  * zc_cookie		zap cursor
6666  * zc_objset_type	zfs_userquota_prop_t
6667  * zc_nvlist_dst[_size] buffer to fill (not really an nvlist)
6668  *
6669  * outputs:
6670  * zc_nvlist_dst[_size]	data buffer (array of zfs_useracct_t)
6671  * zc_cookie	zap cursor
6672  *
6673  * The zc_nvlist_dst output array is limited to 1000 entries.
6674  */
6675 static int
zfs_ioc_userspace_many(zfs_cmd_t * zc)6676 zfs_ioc_userspace_many(zfs_cmd_t *zc)
6677 {
6678 	const size_t batch_limit = 1000 * sizeof (zfs_useracct_t);
6679 	uint64_t bufsize = MIN(zc->zc_nvlist_dst_size, batch_limit);
6680 	zfsvfs_t *zfsvfs;
6681 
6682 	if (bufsize < sizeof (zfs_useracct_t)) {
6683 		zc->zc_nvlist_dst_size = sizeof (zfs_useracct_t);
6684 		return (SET_ERROR(ENOMEM));
6685 	}
6686 
6687 	int error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE);
6688 	if (error != 0)
6689 		return (error);
6690 
6691 	void *buf = vmem_alloc(bufsize, KM_SLEEP);
6692 	zc->zc_nvlist_dst_size = bufsize;
6693 
6694 	error = zfs_userspace_many(zfsvfs, zc->zc_objset_type, &zc->zc_cookie,
6695 	    buf, &zc->zc_nvlist_dst_size, &zc->zc_guid);
6696 
6697 	if (error == 0) {
6698 		error = xcopyout(buf,
6699 		    (void *)(uintptr_t)zc->zc_nvlist_dst,
6700 		    zc->zc_nvlist_dst_size);
6701 	}
6702 	vmem_free(buf, bufsize);
6703 	zfsvfs_rele(zfsvfs, FTAG);
6704 
6705 	return (error);
6706 }
6707 
6708 /*
6709  * inputs:
6710  * zc_name		name of filesystem
6711  *
6712  * outputs:
6713  * none
6714  */
6715 static int
zfs_ioc_userspace_upgrade(zfs_cmd_t * zc)6716 zfs_ioc_userspace_upgrade(zfs_cmd_t *zc)
6717 {
6718 	int error = 0;
6719 	zfsvfs_t *zfsvfs;
6720 
6721 	if (getzfsvfs(zc->zc_name, &zfsvfs) == 0) {
6722 		if (!dmu_objset_userused_enabled(zfsvfs->z_os)) {
6723 			/*
6724 			 * If userused is not enabled, it may be because the
6725 			 * objset needs to be closed & reopened (to grow the
6726 			 * objset_phys_t).  Suspend/resume the fs will do that.
6727 			 */
6728 			dsl_dataset_t *ds, *newds;
6729 
6730 			ds = dmu_objset_ds(zfsvfs->z_os);
6731 			error = zfs_suspend_fs(zfsvfs);
6732 			if (error == 0) {
6733 				dmu_objset_refresh_ownership(ds, &newds,
6734 				    B_TRUE, zfsvfs);
6735 				error = zfs_resume_fs(zfsvfs, newds);
6736 			}
6737 		}
6738 		if (error == 0) {
6739 			mutex_enter(&zfsvfs->z_os->os_upgrade_lock);
6740 			if (zfsvfs->z_os->os_upgrade_id == 0) {
6741 				/* clear potential error code and retry */
6742 				zfsvfs->z_os->os_upgrade_status = 0;
6743 				mutex_exit(&zfsvfs->z_os->os_upgrade_lock);
6744 
6745 				dsl_pool_config_enter(
6746 				    dmu_objset_pool(zfsvfs->z_os), FTAG);
6747 				dmu_objset_userspace_upgrade(zfsvfs->z_os);
6748 				dsl_pool_config_exit(
6749 				    dmu_objset_pool(zfsvfs->z_os), FTAG);
6750 			} else {
6751 				mutex_exit(&zfsvfs->z_os->os_upgrade_lock);
6752 			}
6753 
6754 			taskq_wait_id(zfsvfs->z_os->os_spa->spa_upgrade_taskq,
6755 			    zfsvfs->z_os->os_upgrade_id);
6756 			error = zfsvfs->z_os->os_upgrade_status;
6757 		}
6758 		zfs_vfs_rele(zfsvfs);
6759 	} else {
6760 		objset_t *os;
6761 
6762 		/* XXX kind of reading contents without owning */
6763 		error = dmu_objset_hold_flags(zc->zc_name, B_TRUE, FTAG, &os);
6764 		if (error != 0)
6765 			return (error);
6766 
6767 		mutex_enter(&os->os_upgrade_lock);
6768 		if (os->os_upgrade_id == 0) {
6769 			/* clear potential error code and retry */
6770 			os->os_upgrade_status = 0;
6771 			mutex_exit(&os->os_upgrade_lock);
6772 
6773 			dmu_objset_userspace_upgrade(os);
6774 		} else {
6775 			mutex_exit(&os->os_upgrade_lock);
6776 		}
6777 
6778 		dsl_pool_rele(dmu_objset_pool(os), FTAG);
6779 
6780 		taskq_wait_id(os->os_spa->spa_upgrade_taskq, os->os_upgrade_id);
6781 		error = os->os_upgrade_status;
6782 
6783 		dsl_dataset_rele_flags(dmu_objset_ds(os), DS_HOLD_FLAG_DECRYPT,
6784 		    FTAG);
6785 	}
6786 	return (error);
6787 }
6788 
6789 /*
6790  * inputs:
6791  * zc_name		name of filesystem
6792  *
6793  * outputs:
6794  * none
6795  */
6796 static int
zfs_ioc_id_quota_upgrade(zfs_cmd_t * zc)6797 zfs_ioc_id_quota_upgrade(zfs_cmd_t *zc)
6798 {
6799 	objset_t *os;
6800 	int error;
6801 
6802 	error = dmu_objset_hold_flags(zc->zc_name, B_TRUE, FTAG, &os);
6803 	if (error != 0)
6804 		return (error);
6805 
6806 	if (dmu_objset_userobjspace_upgradable(os) ||
6807 	    dmu_objset_projectquota_upgradable(os)) {
6808 		mutex_enter(&os->os_upgrade_lock);
6809 		if (os->os_upgrade_id == 0) {
6810 			/* clear potential error code and retry */
6811 			os->os_upgrade_status = 0;
6812 			mutex_exit(&os->os_upgrade_lock);
6813 
6814 			dmu_objset_id_quota_upgrade(os);
6815 		} else {
6816 			mutex_exit(&os->os_upgrade_lock);
6817 		}
6818 
6819 		dsl_pool_rele(dmu_objset_pool(os), FTAG);
6820 
6821 		taskq_wait_id(os->os_spa->spa_upgrade_taskq, os->os_upgrade_id);
6822 		error = os->os_upgrade_status;
6823 	} else {
6824 		dsl_pool_rele(dmu_objset_pool(os), FTAG);
6825 	}
6826 
6827 	dsl_dataset_rele_flags(dmu_objset_ds(os), DS_HOLD_FLAG_DECRYPT, FTAG);
6828 
6829 	return (error);
6830 }
6831 
6832 static int
zfs_ioc_share(zfs_cmd_t * zc)6833 zfs_ioc_share(zfs_cmd_t *zc)
6834 {
6835 	return (SET_ERROR(ENOSYS));
6836 }
6837 
6838 /*
6839  * inputs:
6840  * zc_name		name of containing filesystem
6841  * zc_obj		object # beyond which we want next in-use object #
6842  *
6843  * outputs:
6844  * zc_obj		next in-use object #
6845  */
6846 static int
zfs_ioc_next_obj(zfs_cmd_t * zc)6847 zfs_ioc_next_obj(zfs_cmd_t *zc)
6848 {
6849 	objset_t *os = NULL;
6850 	int error;
6851 
6852 	error = dmu_objset_hold(zc->zc_name, FTAG, &os);
6853 	if (error != 0)
6854 		return (error);
6855 
6856 	error = dmu_object_next(os, &zc->zc_obj, B_FALSE, 0);
6857 
6858 	dmu_objset_rele(os, FTAG);
6859 	return (error);
6860 }
6861 
6862 /*
6863  * inputs:
6864  * zc_name		name of filesystem
6865  * zc_value		prefix name for snapshot
6866  * zc_cleanup_fd	cleanup-on-exit file descriptor for calling process
6867  *
6868  * outputs:
6869  * zc_value		short name of new snapshot
6870  */
6871 static int
zfs_ioc_tmp_snapshot(zfs_cmd_t * zc)6872 zfs_ioc_tmp_snapshot(zfs_cmd_t *zc)
6873 {
6874 	char *snap_name;
6875 	char *hold_name;
6876 	minor_t minor;
6877 
6878 	zfs_file_t *fp = zfs_onexit_fd_hold(zc->zc_cleanup_fd, &minor);
6879 	if (fp == NULL)
6880 		return (SET_ERROR(EBADF));
6881 
6882 	snap_name = kmem_asprintf("%s-%016llx", zc->zc_value,
6883 	    (u_longlong_t)ddi_get_lbolt64());
6884 	hold_name = kmem_asprintf("%%%s", zc->zc_value);
6885 
6886 	int error = dsl_dataset_snapshot_tmp(zc->zc_name, snap_name, minor,
6887 	    hold_name);
6888 	if (error == 0)
6889 		(void) strlcpy(zc->zc_value, snap_name,
6890 		    sizeof (zc->zc_value));
6891 	kmem_strfree(snap_name);
6892 	kmem_strfree(hold_name);
6893 	zfs_onexit_fd_rele(fp);
6894 	return (error);
6895 }
6896 
6897 /*
6898  * inputs:
6899  * zc_name		name of "to" snapshot
6900  * zc_value		name of "from" snapshot
6901  * zc_cookie		file descriptor to write diff data on
6902  *
6903  * outputs:
6904  * dmu_diff_record_t's to the file descriptor
6905  */
6906 static int
zfs_ioc_diff(zfs_cmd_t * zc)6907 zfs_ioc_diff(zfs_cmd_t *zc)
6908 {
6909 	zfs_file_t *fp;
6910 	offset_t off;
6911 	int error;
6912 
6913 	if ((fp = zfs_file_get(zc->zc_cookie)) == NULL)
6914 		return (SET_ERROR(EBADF));
6915 
6916 	off = zfs_file_off(fp);
6917 	error = dmu_diff(zc->zc_name, zc->zc_value, fp, &off);
6918 
6919 	zfs_file_put(fp);
6920 
6921 	return (error);
6922 }
6923 
6924 static int
zfs_ioc_smb_acl(zfs_cmd_t * zc)6925 zfs_ioc_smb_acl(zfs_cmd_t *zc)
6926 {
6927 	return (SET_ERROR(ENOTSUP));
6928 }
6929 
6930 /*
6931  * innvl: {
6932  *     "holds" -> { snapname -> holdname (string), ... }
6933  *     (optional) "cleanup_fd" -> fd (int32)
6934  * }
6935  *
6936  * outnvl: {
6937  *     snapname -> error value (int32)
6938  *     ...
6939  * }
6940  */
6941 static const zfs_ioc_key_t zfs_keys_hold[] = {
6942 	{"holds",		DATA_TYPE_NVLIST,	0},
6943 	{"cleanup_fd",		DATA_TYPE_INT32,	ZK_OPTIONAL},
6944 };
6945 
6946 static int
zfs_ioc_hold(const char * pool,nvlist_t * args,nvlist_t * errlist)6947 zfs_ioc_hold(const char *pool, nvlist_t *args, nvlist_t *errlist)
6948 {
6949 	(void) pool;
6950 	nvpair_t *pair;
6951 	nvlist_t *holds;
6952 	int cleanup_fd = -1;
6953 	int error;
6954 	minor_t minor = 0;
6955 	zfs_file_t *fp = NULL;
6956 
6957 	holds = fnvlist_lookup_nvlist(args, "holds");
6958 
6959 	/* make sure the user didn't pass us any invalid (empty) tags */
6960 	for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
6961 	    pair = nvlist_next_nvpair(holds, pair)) {
6962 		const char *htag;
6963 
6964 		error = nvpair_value_string(pair, &htag);
6965 		if (error != 0)
6966 			return (SET_ERROR(error));
6967 
6968 		if (strlen(htag) == 0)
6969 			return (SET_ERROR(EINVAL));
6970 	}
6971 
6972 	if (nvlist_lookup_int32(args, "cleanup_fd", &cleanup_fd) == 0) {
6973 		fp = zfs_onexit_fd_hold(cleanup_fd, &minor);
6974 		if (fp == NULL)
6975 			return (SET_ERROR(EBADF));
6976 	}
6977 
6978 	error = dsl_dataset_user_hold(holds, minor, errlist);
6979 	if (fp != NULL) {
6980 		ASSERT3U(minor, !=, 0);
6981 		zfs_onexit_fd_rele(fp);
6982 	}
6983 	return (SET_ERROR(error));
6984 }
6985 
6986 /*
6987  * innvl is not used.
6988  *
6989  * outnvl: {
6990  *    holdname -> time added (uint64 seconds since epoch)
6991  *    ...
6992  * }
6993  */
6994 static const zfs_ioc_key_t zfs_keys_get_holds[] = {
6995 	/* no nvl keys */
6996 };
6997 
6998 static int
zfs_ioc_get_holds(const char * snapname,nvlist_t * args,nvlist_t * outnvl)6999 zfs_ioc_get_holds(const char *snapname, nvlist_t *args, nvlist_t *outnvl)
7000 {
7001 	(void) args;
7002 	return (dsl_dataset_get_holds(snapname, outnvl));
7003 }
7004 
7005 /*
7006  * innvl: {
7007  *     snapname -> { holdname, ... }
7008  *     ...
7009  * }
7010  *
7011  * outnvl: {
7012  *     snapname -> error value (int32)
7013  *     ...
7014  * }
7015  */
7016 static const zfs_ioc_key_t zfs_keys_release[] = {
7017 	{"<snapname>...",	DATA_TYPE_NVLIST,	ZK_WILDCARDLIST},
7018 };
7019 
7020 static int
zfs_ioc_release(const char * pool,nvlist_t * holds,nvlist_t * errlist)7021 zfs_ioc_release(const char *pool, nvlist_t *holds, nvlist_t *errlist)
7022 {
7023 	(void) pool;
7024 	return (dsl_dataset_user_release(holds, errlist));
7025 }
7026 
7027 /*
7028  * inputs:
7029  * zc_guid		flags (ZEVENT_NONBLOCK)
7030  * zc_cleanup_fd	zevent file descriptor
7031  *
7032  * outputs:
7033  * zc_nvlist_dst	next nvlist event
7034  * zc_cookie		dropped events since last get
7035  */
7036 static int
zfs_ioc_events_next(zfs_cmd_t * zc)7037 zfs_ioc_events_next(zfs_cmd_t *zc)
7038 {
7039 	zfs_zevent_t *ze;
7040 	nvlist_t *event = NULL;
7041 	minor_t minor;
7042 	uint64_t dropped = 0;
7043 	int error;
7044 
7045 	zfs_file_t *fp = zfs_zevent_fd_hold(zc->zc_cleanup_fd, &minor, &ze);
7046 	if (fp == NULL)
7047 		return (SET_ERROR(EBADF));
7048 
7049 	do {
7050 		error = zfs_zevent_next(ze, &event,
7051 		    &zc->zc_nvlist_dst_size, &dropped);
7052 		if (event != NULL) {
7053 			zc->zc_cookie = dropped;
7054 			error = put_nvlist(zc, event);
7055 			nvlist_free(event);
7056 		}
7057 
7058 		if (zc->zc_guid & ZEVENT_NONBLOCK)
7059 			break;
7060 
7061 		if ((error == 0) || (error != ENOENT))
7062 			break;
7063 
7064 		error = zfs_zevent_wait(ze);
7065 		if (error != 0)
7066 			break;
7067 	} while (1);
7068 
7069 	zfs_zevent_fd_rele(fp);
7070 
7071 	return (error);
7072 }
7073 
7074 /*
7075  * outputs:
7076  * zc_cookie		cleared events count
7077  */
7078 static int
zfs_ioc_events_clear(zfs_cmd_t * zc)7079 zfs_ioc_events_clear(zfs_cmd_t *zc)
7080 {
7081 	uint_t count;
7082 
7083 	zfs_zevent_drain_all(&count);
7084 	zc->zc_cookie = count;
7085 
7086 	return (0);
7087 }
7088 
7089 /*
7090  * inputs:
7091  * zc_guid		eid | ZEVENT_SEEK_START | ZEVENT_SEEK_END
7092  * zc_cleanup		zevent file descriptor
7093  */
7094 static int
zfs_ioc_events_seek(zfs_cmd_t * zc)7095 zfs_ioc_events_seek(zfs_cmd_t *zc)
7096 {
7097 	zfs_zevent_t *ze;
7098 	minor_t minor;
7099 	int error;
7100 
7101 	zfs_file_t *fp = zfs_zevent_fd_hold(zc->zc_cleanup_fd, &minor, &ze);
7102 	if (fp == NULL)
7103 		return (SET_ERROR(EBADF));
7104 
7105 	error = zfs_zevent_seek(ze, zc->zc_guid);
7106 	zfs_zevent_fd_rele(fp);
7107 
7108 	return (error);
7109 }
7110 
7111 /*
7112  * inputs:
7113  * zc_name		name of later filesystem or snapshot
7114  * zc_value		full name of old snapshot or bookmark
7115  *
7116  * outputs:
7117  * zc_cookie		space in bytes
7118  * zc_objset_type	compressed space in bytes
7119  * zc_perm_action	uncompressed space in bytes
7120  */
7121 static int
zfs_ioc_space_written(zfs_cmd_t * zc)7122 zfs_ioc_space_written(zfs_cmd_t *zc)
7123 {
7124 	int error;
7125 	dsl_pool_t *dp;
7126 	dsl_dataset_t *new;
7127 
7128 	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
7129 	if (error != 0)
7130 		return (error);
7131 	error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &new);
7132 	if (error != 0) {
7133 		dsl_pool_rele(dp, FTAG);
7134 		return (error);
7135 	}
7136 	if (strchr(zc->zc_value, '#') != NULL) {
7137 		zfs_bookmark_phys_t bmp;
7138 		error = dsl_bookmark_lookup(dp, zc->zc_value,
7139 		    new, &bmp);
7140 		if (error == 0) {
7141 			error = dsl_dataset_space_written_bookmark(&bmp, new,
7142 			    &zc->zc_cookie,
7143 			    &zc->zc_objset_type, &zc->zc_perm_action);
7144 		}
7145 	} else {
7146 		dsl_dataset_t *old;
7147 		error = dsl_dataset_hold(dp, zc->zc_value, FTAG, &old);
7148 
7149 		if (error == 0) {
7150 			error = dsl_dataset_space_written(old, new,
7151 			    &zc->zc_cookie,
7152 			    &zc->zc_objset_type, &zc->zc_perm_action);
7153 			dsl_dataset_rele(old, FTAG);
7154 		}
7155 	}
7156 	dsl_dataset_rele(new, FTAG);
7157 	dsl_pool_rele(dp, FTAG);
7158 	return (error);
7159 }
7160 
7161 /*
7162  * innvl: {
7163  *     "firstsnap" -> snapshot name
7164  * }
7165  *
7166  * outnvl: {
7167  *     "used" -> space in bytes
7168  *     "compressed" -> compressed space in bytes
7169  *     "uncompressed" -> uncompressed space in bytes
7170  * }
7171  */
7172 static const zfs_ioc_key_t zfs_keys_space_snaps[] = {
7173 	{"firstsnap",	DATA_TYPE_STRING,	0},
7174 };
7175 
7176 static int
zfs_ioc_space_snaps(const char * lastsnap,nvlist_t * innvl,nvlist_t * outnvl)7177 zfs_ioc_space_snaps(const char *lastsnap, nvlist_t *innvl, nvlist_t *outnvl)
7178 {
7179 	int error;
7180 	dsl_pool_t *dp;
7181 	dsl_dataset_t *new, *old;
7182 	const char *firstsnap;
7183 	uint64_t used = 0, comp = 0, uncomp = 0;
7184 
7185 	firstsnap = fnvlist_lookup_string(innvl, "firstsnap");
7186 
7187 	error = dsl_pool_hold(lastsnap, FTAG, &dp);
7188 	if (error != 0)
7189 		return (error);
7190 
7191 	error = dsl_dataset_hold(dp, lastsnap, FTAG, &new);
7192 	if (error == 0 && !new->ds_is_snapshot) {
7193 		dsl_dataset_rele(new, FTAG);
7194 		error = SET_ERROR(EINVAL);
7195 	}
7196 	if (error != 0) {
7197 		dsl_pool_rele(dp, FTAG);
7198 		return (error);
7199 	}
7200 	error = dsl_dataset_hold(dp, firstsnap, FTAG, &old);
7201 	if (error == 0 && !old->ds_is_snapshot) {
7202 		dsl_dataset_rele(old, FTAG);
7203 		error = SET_ERROR(EINVAL);
7204 	}
7205 	if (error != 0) {
7206 		dsl_dataset_rele(new, FTAG);
7207 		dsl_pool_rele(dp, FTAG);
7208 		return (error);
7209 	}
7210 
7211 	error = dsl_dataset_space_wouldfree(old, new, &used, &comp, &uncomp);
7212 	dsl_dataset_rele(old, FTAG);
7213 	dsl_dataset_rele(new, FTAG);
7214 	dsl_pool_rele(dp, FTAG);
7215 	fnvlist_add_uint64(outnvl, "used", used);
7216 	fnvlist_add_uint64(outnvl, "compressed", comp);
7217 	fnvlist_add_uint64(outnvl, "uncompressed", uncomp);
7218 	return (error);
7219 }
7220 
7221 /*
7222  * innvl: {
7223  *     "fd" -> file descriptor to write stream to (int32)
7224  *     (optional) "fromsnap" -> full snap name to send an incremental from
7225  *     (optional) "largeblockok" -> (value ignored)
7226  *         indicates that blocks > 128KB are permitted
7227  *     (optional) "embedok" -> (value ignored)
7228  *         presence indicates DRR_WRITE_EMBEDDED records are permitted
7229  *     (optional) "compressok" -> (value ignored)
7230  *         presence indicates compressed DRR_WRITE records are permitted
7231  *     (optional) "rawok" -> (value ignored)
7232  *         presence indicates raw encrypted records should be used.
7233  *     (optional) "savedok" -> (value ignored)
7234  *         presence indicates we should send a partially received snapshot
7235  *     (optional) "resume_object" and "resume_offset" -> (uint64)
7236  *         if present, resume send stream from specified object and offset.
7237  *     (optional) "redactbook" -> (string)
7238  *         if present, use this bookmark's redaction list to generate a redacted
7239  *         send stream
7240  * }
7241  *
7242  * outnvl is unused
7243  */
7244 static const zfs_ioc_key_t zfs_keys_send_new[] = {
7245 	{"fd",			DATA_TYPE_INT32,	0},
7246 	{"fromsnap",		DATA_TYPE_STRING,	ZK_OPTIONAL},
7247 	{"largeblockok",	DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
7248 	{"embedok",		DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
7249 	{"compressok",		DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
7250 	{"rawok",		DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
7251 	{"savedok",		DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
7252 	{"resume_object",	DATA_TYPE_UINT64,	ZK_OPTIONAL},
7253 	{"resume_offset",	DATA_TYPE_UINT64,	ZK_OPTIONAL},
7254 	{"redactbook",		DATA_TYPE_STRING,	ZK_OPTIONAL},
7255 };
7256 
7257 static int
zfs_ioc_send_new(const char * snapname,nvlist_t * innvl,nvlist_t * outnvl)7258 zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
7259 {
7260 	(void) outnvl;
7261 	int error;
7262 	offset_t off;
7263 	const char *fromname = NULL;
7264 	int fd;
7265 	boolean_t largeblockok;
7266 	boolean_t embedok;
7267 	boolean_t compressok;
7268 	boolean_t rawok;
7269 	boolean_t savedok;
7270 	uint64_t resumeobj = 0;
7271 	uint64_t resumeoff = 0;
7272 	const char *redactbook = NULL;
7273 
7274 	fd = fnvlist_lookup_int32(innvl, "fd");
7275 
7276 	(void) nvlist_lookup_string(innvl, "fromsnap", &fromname);
7277 
7278 	largeblockok = nvlist_exists(innvl, "largeblockok");
7279 	embedok = nvlist_exists(innvl, "embedok");
7280 	compressok = nvlist_exists(innvl, "compressok");
7281 	rawok = nvlist_exists(innvl, "rawok");
7282 	savedok = nvlist_exists(innvl, "savedok");
7283 
7284 	(void) nvlist_lookup_uint64(innvl, "resume_object", &resumeobj);
7285 	(void) nvlist_lookup_uint64(innvl, "resume_offset", &resumeoff);
7286 
7287 	(void) nvlist_lookup_string(innvl, "redactbook", &redactbook);
7288 
7289 	dump_bytes_arg_t dba;
7290 	dmu_send_outparams_t out;
7291 	error = dump_bytes_init(&dba, fd, &out);
7292 	if (error)
7293 		return (error);
7294 
7295 	off = zfs_file_off(dba.dba_fp);
7296 	error = dmu_send(snapname, fromname, embedok, largeblockok,
7297 	    compressok, rawok, savedok, resumeobj, resumeoff,
7298 	    redactbook, fd, &off, &out);
7299 
7300 	dump_bytes_fini(&dba);
7301 
7302 	return (error);
7303 }
7304 
7305 static int
send_space_sum(objset_t * os,void * buf,int len,void * arg)7306 send_space_sum(objset_t *os, void *buf, int len, void *arg)
7307 {
7308 	(void) os, (void) buf;
7309 	uint64_t *size = arg;
7310 
7311 	*size += len;
7312 	return (0);
7313 }
7314 
7315 /*
7316  * Determine approximately how large a zfs send stream will be -- the number
7317  * of bytes that will be written to the fd supplied to zfs_ioc_send_new().
7318  *
7319  * innvl: {
7320  *     (optional) "from" -> full snap or bookmark name to send an incremental
7321  *                          from
7322  *     (optional) "largeblockok" -> (value ignored)
7323  *         indicates that blocks > 128KB are permitted
7324  *     (optional) "embedok" -> (value ignored)
7325  *         presence indicates DRR_WRITE_EMBEDDED records are permitted
7326  *     (optional) "compressok" -> (value ignored)
7327  *         presence indicates compressed DRR_WRITE records are permitted
7328  *     (optional) "rawok" -> (value ignored)
7329  *         presence indicates raw encrypted records should be used.
7330  *     (optional) "resume_object" and "resume_offset" -> (uint64)
7331  *         if present, resume send stream from specified object and offset.
7332  *     (optional) "fd" -> file descriptor to use as a cookie for progress
7333  *         tracking (int32)
7334  * }
7335  *
7336  * outnvl: {
7337  *     "space" -> bytes of space (uint64)
7338  * }
7339  */
7340 static const zfs_ioc_key_t zfs_keys_send_space[] = {
7341 	{"from",		DATA_TYPE_STRING,	ZK_OPTIONAL},
7342 	{"fromsnap",		DATA_TYPE_STRING,	ZK_OPTIONAL},
7343 	{"largeblockok",	DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
7344 	{"embedok",		DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
7345 	{"compressok",		DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
7346 	{"rawok",		DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
7347 	{"fd",			DATA_TYPE_INT32,	ZK_OPTIONAL},
7348 	{"redactbook",		DATA_TYPE_STRING,	ZK_OPTIONAL},
7349 	{"resume_object",	DATA_TYPE_UINT64,	ZK_OPTIONAL},
7350 	{"resume_offset",	DATA_TYPE_UINT64,	ZK_OPTIONAL},
7351 	{"bytes",		DATA_TYPE_UINT64,	ZK_OPTIONAL},
7352 };
7353 
7354 static int
zfs_ioc_send_space(const char * snapname,nvlist_t * innvl,nvlist_t * outnvl)7355 zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
7356 {
7357 	dsl_pool_t *dp;
7358 	dsl_dataset_t *tosnap;
7359 	dsl_dataset_t *fromsnap = NULL;
7360 	int error;
7361 	const char *fromname = NULL;
7362 	const char *redactlist_book = NULL;
7363 	boolean_t largeblockok;
7364 	boolean_t embedok;
7365 	boolean_t compressok;
7366 	boolean_t rawok;
7367 	boolean_t savedok;
7368 	uint64_t space = 0;
7369 	boolean_t full_estimate = B_FALSE;
7370 	uint64_t resumeobj = 0;
7371 	uint64_t resumeoff = 0;
7372 	uint64_t resume_bytes = 0;
7373 	int32_t fd = -1;
7374 	zfs_bookmark_phys_t zbm = {0};
7375 
7376 	error = dsl_pool_hold(snapname, FTAG, &dp);
7377 	if (error != 0)
7378 		return (error);
7379 
7380 	error = dsl_dataset_hold(dp, snapname, FTAG, &tosnap);
7381 	if (error != 0) {
7382 		dsl_pool_rele(dp, FTAG);
7383 		return (error);
7384 	}
7385 	(void) nvlist_lookup_int32(innvl, "fd", &fd);
7386 
7387 	largeblockok = nvlist_exists(innvl, "largeblockok");
7388 	embedok = nvlist_exists(innvl, "embedok");
7389 	compressok = nvlist_exists(innvl, "compressok");
7390 	rawok = nvlist_exists(innvl, "rawok");
7391 	savedok = nvlist_exists(innvl, "savedok");
7392 	boolean_t from = (nvlist_lookup_string(innvl, "from", &fromname) == 0);
7393 	boolean_t altbook = (nvlist_lookup_string(innvl, "redactbook",
7394 	    &redactlist_book) == 0);
7395 
7396 	(void) nvlist_lookup_uint64(innvl, "resume_object", &resumeobj);
7397 	(void) nvlist_lookup_uint64(innvl, "resume_offset", &resumeoff);
7398 	(void) nvlist_lookup_uint64(innvl, "bytes", &resume_bytes);
7399 
7400 	if (altbook) {
7401 		full_estimate = B_TRUE;
7402 	} else if (from) {
7403 		if (strchr(fromname, '#')) {
7404 			error = dsl_bookmark_lookup(dp, fromname, tosnap, &zbm);
7405 
7406 			/*
7407 			 * dsl_bookmark_lookup() will fail with EXDEV if
7408 			 * the from-bookmark and tosnap are at the same txg.
7409 			 * However, it's valid to do a send (and therefore,
7410 			 * a send estimate) from and to the same time point,
7411 			 * if the bookmark is redacted (the incremental send
7412 			 * can change what's redacted on the target).  In
7413 			 * this case, dsl_bookmark_lookup() fills in zbm
7414 			 * but returns EXDEV.  Ignore this error.
7415 			 */
7416 			if (error == EXDEV && zbm.zbm_redaction_obj != 0 &&
7417 			    zbm.zbm_guid ==
7418 			    dsl_dataset_phys(tosnap)->ds_guid)
7419 				error = 0;
7420 
7421 			if (error != 0) {
7422 				dsl_dataset_rele(tosnap, FTAG);
7423 				dsl_pool_rele(dp, FTAG);
7424 				return (error);
7425 			}
7426 			if (zbm.zbm_redaction_obj != 0 || !(zbm.zbm_flags &
7427 			    ZBM_FLAG_HAS_FBN)) {
7428 				full_estimate = B_TRUE;
7429 			}
7430 		} else if (strchr(fromname, '@')) {
7431 			error = dsl_dataset_hold(dp, fromname, FTAG, &fromsnap);
7432 			if (error != 0) {
7433 				dsl_dataset_rele(tosnap, FTAG);
7434 				dsl_pool_rele(dp, FTAG);
7435 				return (error);
7436 			}
7437 
7438 			if (!dsl_dataset_is_before(tosnap, fromsnap, 0)) {
7439 				full_estimate = B_TRUE;
7440 				dsl_dataset_rele(fromsnap, FTAG);
7441 			}
7442 		} else {
7443 			/*
7444 			 * from is not properly formatted as a snapshot or
7445 			 * bookmark
7446 			 */
7447 			dsl_dataset_rele(tosnap, FTAG);
7448 			dsl_pool_rele(dp, FTAG);
7449 			return (SET_ERROR(EINVAL));
7450 		}
7451 	}
7452 
7453 	if (full_estimate) {
7454 		dmu_send_outparams_t out = {0};
7455 		offset_t off = 0;
7456 		out.dso_outfunc = send_space_sum;
7457 		out.dso_arg = &space;
7458 		out.dso_dryrun = B_TRUE;
7459 		/*
7460 		 * We have to release these holds so dmu_send can take them.  It
7461 		 * will do all the error checking we need.
7462 		 */
7463 		dsl_dataset_rele(tosnap, FTAG);
7464 		dsl_pool_rele(dp, FTAG);
7465 		error = dmu_send(snapname, fromname, embedok, largeblockok,
7466 		    compressok, rawok, savedok, resumeobj, resumeoff,
7467 		    redactlist_book, fd, &off, &out);
7468 	} else {
7469 		error = dmu_send_estimate_fast(tosnap, fromsnap,
7470 		    (from && strchr(fromname, '#') != NULL ? &zbm : NULL),
7471 		    compressok || rawok, savedok, &space);
7472 		space -= resume_bytes;
7473 		if (fromsnap != NULL)
7474 			dsl_dataset_rele(fromsnap, FTAG);
7475 		dsl_dataset_rele(tosnap, FTAG);
7476 		dsl_pool_rele(dp, FTAG);
7477 	}
7478 
7479 	fnvlist_add_uint64(outnvl, "space", space);
7480 
7481 	return (error);
7482 }
7483 
7484 /*
7485  * Sync the currently open TXG to disk for the specified pool.
7486  * This is somewhat similar to 'zfs_sync()'.
7487  * For cases that do not result in error this ioctl will wait for
7488  * the currently open TXG to commit before returning back to the caller.
7489  *
7490  * innvl: {
7491  *  "force" -> when true, force uberblock update even if there is no dirty data.
7492  *             In addition this will cause the vdev configuration to be written
7493  *             out including updating the zpool cache file. (boolean_t)
7494  * }
7495  *
7496  * onvl is unused
7497  */
7498 static const zfs_ioc_key_t zfs_keys_pool_sync[] = {
7499 	{"force",	DATA_TYPE_BOOLEAN_VALUE,	0},
7500 };
7501 
7502 static int
zfs_ioc_pool_sync(const char * pool,nvlist_t * innvl,nvlist_t * onvl)7503 zfs_ioc_pool_sync(const char *pool, nvlist_t *innvl, nvlist_t *onvl)
7504 {
7505 	(void) onvl;
7506 	int err;
7507 	boolean_t rc, force = B_FALSE;
7508 	spa_t *spa;
7509 
7510 	if ((err = spa_open(pool, &spa, FTAG)) != 0)
7511 		return (err);
7512 
7513 	if (innvl) {
7514 		err = nvlist_lookup_boolean_value(innvl, "force", &rc);
7515 		if (err == 0)
7516 			force = rc;
7517 	}
7518 
7519 	if (force) {
7520 		spa_config_enter(spa, SCL_CONFIG, FTAG, RW_WRITER);
7521 		vdev_config_dirty(spa->spa_root_vdev);
7522 		spa_config_exit(spa, SCL_CONFIG, FTAG);
7523 	}
7524 	txg_wait_synced(spa_get_dsl(spa), 0);
7525 
7526 	spa_close(spa, FTAG);
7527 
7528 	return (0);
7529 }
7530 
7531 /*
7532  * Load a user's wrapping key into the kernel.
7533  * innvl: {
7534  *     "hidden_args" -> { "wkeydata" -> value }
7535  *         raw uint8_t array of encryption wrapping key data (32 bytes)
7536  *     (optional) "noop" -> (value ignored)
7537  *         presence indicated key should only be verified, not loaded
7538  * }
7539  */
7540 static const zfs_ioc_key_t zfs_keys_load_key[] = {
7541 	{"hidden_args",	DATA_TYPE_NVLIST,	0},
7542 	{"noop",	DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
7543 };
7544 
7545 static int
zfs_ioc_load_key(const char * dsname,nvlist_t * innvl,nvlist_t * outnvl)7546 zfs_ioc_load_key(const char *dsname, nvlist_t *innvl, nvlist_t *outnvl)
7547 {
7548 	(void) outnvl;
7549 	int ret;
7550 	dsl_crypto_params_t *dcp = NULL;
7551 	nvlist_t *hidden_args;
7552 	boolean_t noop = nvlist_exists(innvl, "noop");
7553 
7554 	if (strchr(dsname, '@') != NULL || strchr(dsname, '%') != NULL) {
7555 		ret = SET_ERROR(EINVAL);
7556 		goto error;
7557 	}
7558 
7559 	hidden_args = fnvlist_lookup_nvlist(innvl, ZPOOL_HIDDEN_ARGS);
7560 
7561 	ret = dsl_crypto_params_create_nvlist(DCP_CMD_NONE, NULL,
7562 	    hidden_args, &dcp);
7563 	if (ret != 0)
7564 		goto error;
7565 
7566 	ret = spa_keystore_load_wkey(dsname, dcp, noop);
7567 	if (ret != 0)
7568 		goto error;
7569 
7570 	dsl_crypto_params_free(dcp, noop);
7571 
7572 	return (0);
7573 
7574 error:
7575 	dsl_crypto_params_free(dcp, B_TRUE);
7576 	return (ret);
7577 }
7578 
7579 /*
7580  * Unload a user's wrapping key from the kernel.
7581  * Both innvl and outnvl are unused.
7582  */
7583 static const zfs_ioc_key_t zfs_keys_unload_key[] = {
7584 	/* no nvl keys */
7585 };
7586 
7587 static int
zfs_ioc_unload_key(const char * dsname,nvlist_t * innvl,nvlist_t * outnvl)7588 zfs_ioc_unload_key(const char *dsname, nvlist_t *innvl, nvlist_t *outnvl)
7589 {
7590 	(void) innvl, (void) outnvl;
7591 	int ret = 0;
7592 
7593 	if (strchr(dsname, '@') != NULL || strchr(dsname, '%') != NULL) {
7594 		ret = (SET_ERROR(EINVAL));
7595 		goto out;
7596 	}
7597 
7598 	ret = spa_keystore_unload_wkey(dsname);
7599 	if (ret != 0)
7600 		goto out;
7601 
7602 out:
7603 	return (ret);
7604 }
7605 
7606 /*
7607  * Changes a user's wrapping key used to decrypt a dataset. The keyformat,
7608  * keylocation, pbkdf2salt, and pbkdf2iters properties can also be specified
7609  * here to change how the key is derived in userspace.
7610  *
7611  * innvl: {
7612  *    "hidden_args" (optional) -> { "wkeydata" -> value }
7613  *         raw uint8_t array of new encryption wrapping key data (32 bytes)
7614  *    "props" (optional) -> { prop -> value }
7615  * }
7616  *
7617  * outnvl is unused
7618  */
7619 static const zfs_ioc_key_t zfs_keys_change_key[] = {
7620 	{"crypt_cmd",	DATA_TYPE_UINT64,	ZK_OPTIONAL},
7621 	{"hidden_args",	DATA_TYPE_NVLIST,	ZK_OPTIONAL},
7622 	{"props",	DATA_TYPE_NVLIST,	ZK_OPTIONAL},
7623 };
7624 
7625 static int
zfs_ioc_change_key(const char * dsname,nvlist_t * innvl,nvlist_t * outnvl)7626 zfs_ioc_change_key(const char *dsname, nvlist_t *innvl, nvlist_t *outnvl)
7627 {
7628 	(void) outnvl;
7629 	int ret;
7630 	uint64_t cmd = DCP_CMD_NONE;
7631 	dsl_crypto_params_t *dcp = NULL;
7632 	nvlist_t *props = NULL, *hidden_args = NULL;
7633 
7634 	if (strchr(dsname, '@') != NULL || strchr(dsname, '%') != NULL) {
7635 		ret = (SET_ERROR(EINVAL));
7636 		goto error;
7637 	}
7638 
7639 	(void) nvlist_lookup_uint64(innvl, "crypt_cmd", &cmd);
7640 	(void) nvlist_lookup_nvlist(innvl, "props", &props);
7641 	(void) nvlist_lookup_nvlist(innvl, ZPOOL_HIDDEN_ARGS, &hidden_args);
7642 
7643 	ret = dsl_crypto_params_create_nvlist(cmd, props, hidden_args, &dcp);
7644 	if (ret != 0)
7645 		goto error;
7646 
7647 	/* The keylocation property is set from dcp->cp_keylocation. */
7648 	(void) nvlist_remove_all(props, zfs_prop_to_name(ZFS_PROP_KEYLOCATION));
7649 
7650 	if ((ret = zfs_check_userprops(props)) != 0)
7651 		goto error;
7652 
7653 	ret = spa_keystore_change_key(dsname, dcp, props);
7654 	if (ret != 0)
7655 		goto error;
7656 
7657 	dsl_crypto_params_free(dcp, B_FALSE);
7658 
7659 	return (0);
7660 
7661 error:
7662 	dsl_crypto_params_free(dcp, B_TRUE);
7663 	return (ret);
7664 }
7665 
7666 static zfs_ioc_vec_t zfs_ioc_vec[ZFS_IOC_LAST - ZFS_IOC_FIRST];
7667 
7668 static void
zfs_ioctl_register_legacy(zfs_ioc_t ioc,zfs_ioc_legacy_func_t * func,zfs_secpolicy_func_t * secpolicy,zfs_ioc_namecheck_t namecheck,boolean_t log_history,zfs_ioc_poolcheck_t pool_check)7669 zfs_ioctl_register_legacy(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
7670     zfs_secpolicy_func_t *secpolicy, zfs_ioc_namecheck_t namecheck,
7671     boolean_t log_history, zfs_ioc_poolcheck_t pool_check)
7672 {
7673 	zfs_ioc_vec_t *vec = &zfs_ioc_vec[ioc - ZFS_IOC_FIRST];
7674 
7675 	ASSERT3U(ioc, >=, ZFS_IOC_FIRST);
7676 	ASSERT3U(ioc, <, ZFS_IOC_LAST);
7677 	ASSERT0P(vec->zvec_legacy_func);
7678 	ASSERT0P(vec->zvec_func);
7679 
7680 	vec->zvec_legacy_func = func;
7681 	vec->zvec_secpolicy = secpolicy;
7682 	vec->zvec_namecheck = namecheck;
7683 	vec->zvec_allow_log = log_history;
7684 	vec->zvec_pool_check = pool_check;
7685 }
7686 
7687 /*
7688  * See the block comment at the beginning of this file for details on
7689  * each argument to this function.
7690  */
7691 void
zfs_ioctl_register(const char * name,zfs_ioc_t ioc,zfs_ioc_func_t * func,zfs_secpolicy_func_t * secpolicy,zfs_ioc_namecheck_t namecheck,zfs_ioc_poolcheck_t pool_check,boolean_t smush_outnvlist,boolean_t allow_log,const zfs_ioc_key_t * nvl_keys,size_t num_keys)7692 zfs_ioctl_register(const char *name, zfs_ioc_t ioc, zfs_ioc_func_t *func,
7693     zfs_secpolicy_func_t *secpolicy, zfs_ioc_namecheck_t namecheck,
7694     zfs_ioc_poolcheck_t pool_check, boolean_t smush_outnvlist,
7695     boolean_t allow_log, const zfs_ioc_key_t *nvl_keys, size_t num_keys)
7696 {
7697 	zfs_ioc_vec_t *vec = &zfs_ioc_vec[ioc - ZFS_IOC_FIRST];
7698 
7699 	ASSERT3U(ioc, >=, ZFS_IOC_FIRST);
7700 	ASSERT3U(ioc, <, ZFS_IOC_LAST);
7701 	ASSERT0P(vec->zvec_legacy_func);
7702 	ASSERT0P(vec->zvec_func);
7703 
7704 	/* if we are logging, the name must be valid */
7705 	ASSERT(!allow_log || namecheck != NO_NAME);
7706 
7707 	vec->zvec_name = name;
7708 	vec->zvec_func = func;
7709 	vec->zvec_secpolicy = secpolicy;
7710 	vec->zvec_namecheck = namecheck;
7711 	vec->zvec_pool_check = pool_check;
7712 	vec->zvec_smush_outnvlist = smush_outnvlist;
7713 	vec->zvec_allow_log = allow_log;
7714 	vec->zvec_nvl_keys = nvl_keys;
7715 	vec->zvec_nvl_key_count = num_keys;
7716 }
7717 
7718 static void
zfs_ioctl_register_pool(zfs_ioc_t ioc,zfs_ioc_legacy_func_t * func,zfs_secpolicy_func_t * secpolicy,boolean_t log_history,zfs_ioc_poolcheck_t pool_check)7719 zfs_ioctl_register_pool(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
7720     zfs_secpolicy_func_t *secpolicy, boolean_t log_history,
7721     zfs_ioc_poolcheck_t pool_check)
7722 {
7723 	zfs_ioctl_register_legacy(ioc, func, secpolicy,
7724 	    POOL_NAME, log_history, pool_check);
7725 }
7726 
7727 void
zfs_ioctl_register_dataset_nolog(zfs_ioc_t ioc,zfs_ioc_legacy_func_t * func,zfs_secpolicy_func_t * secpolicy,zfs_ioc_poolcheck_t pool_check)7728 zfs_ioctl_register_dataset_nolog(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
7729     zfs_secpolicy_func_t *secpolicy, zfs_ioc_poolcheck_t pool_check)
7730 {
7731 	zfs_ioctl_register_legacy(ioc, func, secpolicy,
7732 	    DATASET_NAME, B_FALSE, pool_check);
7733 }
7734 
7735 static void
zfs_ioctl_register_pool_modify(zfs_ioc_t ioc,zfs_ioc_legacy_func_t * func)7736 zfs_ioctl_register_pool_modify(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func)
7737 {
7738 	zfs_ioctl_register_legacy(ioc, func, zfs_secpolicy_config,
7739 	    POOL_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
7740 }
7741 
7742 static void
zfs_ioctl_register_pool_meta(zfs_ioc_t ioc,zfs_ioc_legacy_func_t * func,zfs_secpolicy_func_t * secpolicy)7743 zfs_ioctl_register_pool_meta(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
7744     zfs_secpolicy_func_t *secpolicy)
7745 {
7746 	zfs_ioctl_register_legacy(ioc, func, secpolicy,
7747 	    NO_NAME, B_FALSE, POOL_CHECK_NONE);
7748 }
7749 
7750 static void
zfs_ioctl_register_dataset_read_secpolicy(zfs_ioc_t ioc,zfs_ioc_legacy_func_t * func,zfs_secpolicy_func_t * secpolicy)7751 zfs_ioctl_register_dataset_read_secpolicy(zfs_ioc_t ioc,
7752     zfs_ioc_legacy_func_t *func, zfs_secpolicy_func_t *secpolicy)
7753 {
7754 	zfs_ioctl_register_legacy(ioc, func, secpolicy,
7755 	    DATASET_NAME, B_FALSE, POOL_CHECK_SUSPENDED);
7756 }
7757 
7758 static void
zfs_ioctl_register_dataset_read(zfs_ioc_t ioc,zfs_ioc_legacy_func_t * func)7759 zfs_ioctl_register_dataset_read(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func)
7760 {
7761 	zfs_ioctl_register_dataset_read_secpolicy(ioc, func,
7762 	    zfs_secpolicy_read);
7763 }
7764 
7765 static void
zfs_ioctl_register_dataset_modify(zfs_ioc_t ioc,zfs_ioc_legacy_func_t * func,zfs_secpolicy_func_t * secpolicy)7766 zfs_ioctl_register_dataset_modify(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
7767     zfs_secpolicy_func_t *secpolicy)
7768 {
7769 	zfs_ioctl_register_legacy(ioc, func, secpolicy,
7770 	    DATASET_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
7771 }
7772 
7773 static void
zfs_ioctl_init(void)7774 zfs_ioctl_init(void)
7775 {
7776 	zfs_ioctl_register("snapshot", ZFS_IOC_SNAPSHOT,
7777 	    zfs_ioc_snapshot, zfs_secpolicy_snapshot, POOL_NAME,
7778 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7779 	    zfs_keys_snapshot, ARRAY_SIZE(zfs_keys_snapshot));
7780 
7781 	zfs_ioctl_register("log_history", ZFS_IOC_LOG_HISTORY,
7782 	    zfs_ioc_log_history, zfs_secpolicy_log_history, NO_NAME,
7783 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE,
7784 	    zfs_keys_log_history, ARRAY_SIZE(zfs_keys_log_history));
7785 
7786 	zfs_ioctl_register("space_snaps", ZFS_IOC_SPACE_SNAPS,
7787 	    zfs_ioc_space_snaps, zfs_secpolicy_read, DATASET_NAME,
7788 	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE,
7789 	    zfs_keys_space_snaps, ARRAY_SIZE(zfs_keys_space_snaps));
7790 
7791 	zfs_ioctl_register("send", ZFS_IOC_SEND_NEW,
7792 	    zfs_ioc_send_new, zfs_secpolicy_send_new, DATASET_NAME,
7793 	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE,
7794 	    zfs_keys_send_new, ARRAY_SIZE(zfs_keys_send_new));
7795 
7796 	zfs_ioctl_register("send_space", ZFS_IOC_SEND_SPACE,
7797 	    zfs_ioc_send_space, zfs_secpolicy_read, DATASET_NAME,
7798 	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE,
7799 	    zfs_keys_send_space, ARRAY_SIZE(zfs_keys_send_space));
7800 
7801 	zfs_ioctl_register("create", ZFS_IOC_CREATE,
7802 	    zfs_ioc_create, zfs_secpolicy_create_clone, DATASET_NAME,
7803 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7804 	    zfs_keys_create, ARRAY_SIZE(zfs_keys_create));
7805 
7806 	zfs_ioctl_register("clone", ZFS_IOC_CLONE,
7807 	    zfs_ioc_clone, zfs_secpolicy_create_clone, DATASET_NAME,
7808 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7809 	    zfs_keys_clone, ARRAY_SIZE(zfs_keys_clone));
7810 
7811 	zfs_ioctl_register("remap", ZFS_IOC_REMAP,
7812 	    zfs_ioc_remap, zfs_secpolicy_none, DATASET_NAME,
7813 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_TRUE,
7814 	    zfs_keys_remap, ARRAY_SIZE(zfs_keys_remap));
7815 
7816 	zfs_ioctl_register("destroy_snaps", ZFS_IOC_DESTROY_SNAPS,
7817 	    zfs_ioc_destroy_snaps, zfs_secpolicy_destroy_snaps, POOL_NAME,
7818 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7819 	    zfs_keys_destroy_snaps, ARRAY_SIZE(zfs_keys_destroy_snaps));
7820 
7821 	zfs_ioctl_register("hold", ZFS_IOC_HOLD,
7822 	    zfs_ioc_hold, zfs_secpolicy_hold, POOL_NAME,
7823 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7824 	    zfs_keys_hold, ARRAY_SIZE(zfs_keys_hold));
7825 	zfs_ioctl_register("release", ZFS_IOC_RELEASE,
7826 	    zfs_ioc_release, zfs_secpolicy_release, POOL_NAME,
7827 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7828 	    zfs_keys_release, ARRAY_SIZE(zfs_keys_release));
7829 
7830 	zfs_ioctl_register("get_holds", ZFS_IOC_GET_HOLDS,
7831 	    zfs_ioc_get_holds, zfs_secpolicy_read, DATASET_NAME,
7832 	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE,
7833 	    zfs_keys_get_holds, ARRAY_SIZE(zfs_keys_get_holds));
7834 
7835 	zfs_ioctl_register("rollback", ZFS_IOC_ROLLBACK,
7836 	    zfs_ioc_rollback, zfs_secpolicy_rollback, DATASET_NAME,
7837 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_TRUE,
7838 	    zfs_keys_rollback, ARRAY_SIZE(zfs_keys_rollback));
7839 
7840 	zfs_ioctl_register("bookmark", ZFS_IOC_BOOKMARK,
7841 	    zfs_ioc_bookmark, zfs_secpolicy_bookmark, POOL_NAME,
7842 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7843 	    zfs_keys_bookmark, ARRAY_SIZE(zfs_keys_bookmark));
7844 
7845 	zfs_ioctl_register("get_bookmarks", ZFS_IOC_GET_BOOKMARKS,
7846 	    zfs_ioc_get_bookmarks, zfs_secpolicy_read, DATASET_NAME,
7847 	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE,
7848 	    zfs_keys_get_bookmarks, ARRAY_SIZE(zfs_keys_get_bookmarks));
7849 
7850 	zfs_ioctl_register("get_bookmark_props", ZFS_IOC_GET_BOOKMARK_PROPS,
7851 	    zfs_ioc_get_bookmark_props, zfs_secpolicy_read, ENTITY_NAME,
7852 	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE, zfs_keys_get_bookmark_props,
7853 	    ARRAY_SIZE(zfs_keys_get_bookmark_props));
7854 
7855 	zfs_ioctl_register("destroy_bookmarks", ZFS_IOC_DESTROY_BOOKMARKS,
7856 	    zfs_ioc_destroy_bookmarks, zfs_secpolicy_destroy_bookmarks,
7857 	    POOL_NAME,
7858 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7859 	    zfs_keys_destroy_bookmarks,
7860 	    ARRAY_SIZE(zfs_keys_destroy_bookmarks));
7861 
7862 	zfs_ioctl_register("receive", ZFS_IOC_RECV_NEW,
7863 	    zfs_ioc_recv_new, zfs_secpolicy_recv, DATASET_NAME,
7864 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7865 	    zfs_keys_recv_new, ARRAY_SIZE(zfs_keys_recv_new));
7866 	zfs_ioctl_register("load-key", ZFS_IOC_LOAD_KEY,
7867 	    zfs_ioc_load_key, zfs_secpolicy_load_key,
7868 	    DATASET_NAME, POOL_CHECK_SUSPENDED, B_TRUE, B_TRUE,
7869 	    zfs_keys_load_key, ARRAY_SIZE(zfs_keys_load_key));
7870 	zfs_ioctl_register("unload-key", ZFS_IOC_UNLOAD_KEY,
7871 	    zfs_ioc_unload_key, zfs_secpolicy_load_key,
7872 	    DATASET_NAME, POOL_CHECK_SUSPENDED, B_TRUE, B_TRUE,
7873 	    zfs_keys_unload_key, ARRAY_SIZE(zfs_keys_unload_key));
7874 	zfs_ioctl_register("change-key", ZFS_IOC_CHANGE_KEY,
7875 	    zfs_ioc_change_key, zfs_secpolicy_change_key,
7876 	    DATASET_NAME, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY,
7877 	    B_TRUE, B_TRUE, zfs_keys_change_key,
7878 	    ARRAY_SIZE(zfs_keys_change_key));
7879 
7880 	zfs_ioctl_register("sync", ZFS_IOC_POOL_SYNC,
7881 	    zfs_ioc_pool_sync, zfs_secpolicy_none, POOL_NAME,
7882 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE,
7883 	    zfs_keys_pool_sync, ARRAY_SIZE(zfs_keys_pool_sync));
7884 	zfs_ioctl_register("reopen", ZFS_IOC_POOL_REOPEN, zfs_ioc_pool_reopen,
7885 	    zfs_secpolicy_config, POOL_NAME, POOL_CHECK_SUSPENDED, B_TRUE,
7886 	    B_TRUE, zfs_keys_pool_reopen, ARRAY_SIZE(zfs_keys_pool_reopen));
7887 
7888 	zfs_ioctl_register("channel_program", ZFS_IOC_CHANNEL_PROGRAM,
7889 	    zfs_ioc_channel_program, zfs_secpolicy_config,
7890 	    POOL_NAME, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE,
7891 	    B_TRUE, zfs_keys_channel_program,
7892 	    ARRAY_SIZE(zfs_keys_channel_program));
7893 
7894 	zfs_ioctl_register("redact", ZFS_IOC_REDACT,
7895 	    zfs_ioc_redact, zfs_secpolicy_config, DATASET_NAME,
7896 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7897 	    zfs_keys_redact, ARRAY_SIZE(zfs_keys_redact));
7898 
7899 	zfs_ioctl_register("zpool_checkpoint", ZFS_IOC_POOL_CHECKPOINT,
7900 	    zfs_ioc_pool_checkpoint, zfs_secpolicy_config, POOL_NAME,
7901 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7902 	    zfs_keys_pool_checkpoint, ARRAY_SIZE(zfs_keys_pool_checkpoint));
7903 
7904 	zfs_ioctl_register("zpool_discard_checkpoint",
7905 	    ZFS_IOC_POOL_DISCARD_CHECKPOINT, zfs_ioc_pool_discard_checkpoint,
7906 	    zfs_secpolicy_config, POOL_NAME,
7907 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7908 	    zfs_keys_pool_discard_checkpoint,
7909 	    ARRAY_SIZE(zfs_keys_pool_discard_checkpoint));
7910 
7911 	zfs_ioctl_register("zpool_prefetch",
7912 	    ZFS_IOC_POOL_PREFETCH, zfs_ioc_pool_prefetch,
7913 	    zfs_secpolicy_config, POOL_NAME,
7914 	    POOL_CHECK_SUSPENDED, B_TRUE, B_TRUE,
7915 	    zfs_keys_pool_prefetch, ARRAY_SIZE(zfs_keys_pool_prefetch));
7916 
7917 	zfs_ioctl_register("initialize", ZFS_IOC_POOL_INITIALIZE,
7918 	    zfs_ioc_pool_initialize, zfs_secpolicy_config, POOL_NAME,
7919 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7920 	    zfs_keys_pool_initialize, ARRAY_SIZE(zfs_keys_pool_initialize));
7921 
7922 	zfs_ioctl_register("trim", ZFS_IOC_POOL_TRIM,
7923 	    zfs_ioc_pool_trim, zfs_secpolicy_config, POOL_NAME,
7924 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7925 	    zfs_keys_pool_trim, ARRAY_SIZE(zfs_keys_pool_trim));
7926 
7927 	zfs_ioctl_register("wait", ZFS_IOC_WAIT,
7928 	    zfs_ioc_wait, zfs_secpolicy_none, POOL_NAME,
7929 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE,
7930 	    zfs_keys_pool_wait, ARRAY_SIZE(zfs_keys_pool_wait));
7931 
7932 	zfs_ioctl_register("wait_fs", ZFS_IOC_WAIT_FS,
7933 	    zfs_ioc_wait_fs, zfs_secpolicy_none, DATASET_NAME,
7934 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE,
7935 	    zfs_keys_fs_wait, ARRAY_SIZE(zfs_keys_fs_wait));
7936 
7937 	zfs_ioctl_register("set_bootenv", ZFS_IOC_SET_BOOTENV,
7938 	    zfs_ioc_set_bootenv, zfs_secpolicy_config, POOL_NAME,
7939 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_TRUE,
7940 	    zfs_keys_set_bootenv, ARRAY_SIZE(zfs_keys_set_bootenv));
7941 
7942 	zfs_ioctl_register("get_bootenv", ZFS_IOC_GET_BOOTENV,
7943 	    zfs_ioc_get_bootenv, zfs_secpolicy_none, POOL_NAME,
7944 	    POOL_CHECK_SUSPENDED, B_FALSE, B_TRUE,
7945 	    zfs_keys_get_bootenv, ARRAY_SIZE(zfs_keys_get_bootenv));
7946 
7947 	zfs_ioctl_register("zpool_vdev_get_props", ZFS_IOC_VDEV_GET_PROPS,
7948 	    zfs_ioc_vdev_get_props, zfs_secpolicy_read, POOL_NAME,
7949 	    POOL_CHECK_NONE, B_FALSE, B_FALSE, zfs_keys_vdev_get_props,
7950 	    ARRAY_SIZE(zfs_keys_vdev_get_props));
7951 
7952 	zfs_ioctl_register("zpool_vdev_set_props", ZFS_IOC_VDEV_SET_PROPS,
7953 	    zfs_ioc_vdev_set_props, zfs_secpolicy_config, POOL_NAME,
7954 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE,
7955 	    zfs_keys_vdev_set_props, ARRAY_SIZE(zfs_keys_vdev_set_props));
7956 
7957 	zfs_ioctl_register("scrub", ZFS_IOC_POOL_SCRUB,
7958 	    zfs_ioc_pool_scrub, zfs_secpolicy_config, POOL_NAME,
7959 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7960 	    zfs_keys_pool_scrub, ARRAY_SIZE(zfs_keys_pool_scrub));
7961 
7962 	zfs_ioctl_register("get_props", ZFS_IOC_POOL_GET_PROPS,
7963 	    zfs_ioc_pool_get_props, zfs_secpolicy_read, POOL_NAME,
7964 	    POOL_CHECK_NONE, B_FALSE, B_FALSE,
7965 	    zfs_keys_get_props, ARRAY_SIZE(zfs_keys_get_props));
7966 
7967 	zfs_ioctl_register("zpool_ddt_prune", ZFS_IOC_DDT_PRUNE,
7968 	    zfs_ioc_ddt_prune, zfs_secpolicy_config, POOL_NAME,
7969 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7970 	    zfs_keys_ddt_prune, ARRAY_SIZE(zfs_keys_ddt_prune));
7971 
7972 	/* IOCTLS that use the legacy function signature */
7973 
7974 	zfs_ioctl_register_legacy(ZFS_IOC_POOL_FREEZE, zfs_ioc_pool_freeze,
7975 	    zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_READONLY);
7976 
7977 	zfs_ioctl_register_pool(ZFS_IOC_POOL_CREATE, zfs_ioc_pool_create,
7978 	    zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE);
7979 	zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_SCAN,
7980 	    zfs_ioc_pool_scan);
7981 	zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_UPGRADE,
7982 	    zfs_ioc_pool_upgrade);
7983 	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_ADD,
7984 	    zfs_ioc_vdev_add);
7985 	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_REMOVE,
7986 	    zfs_ioc_vdev_remove);
7987 	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SET_STATE,
7988 	    zfs_ioc_vdev_set_state);
7989 	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_ATTACH,
7990 	    zfs_ioc_vdev_attach);
7991 	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_DETACH,
7992 	    zfs_ioc_vdev_detach);
7993 	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SETPATH,
7994 	    zfs_ioc_vdev_setpath);
7995 	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SETFRU,
7996 	    zfs_ioc_vdev_setfru);
7997 	zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_SET_PROPS,
7998 	    zfs_ioc_pool_set_props);
7999 	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SPLIT,
8000 	    zfs_ioc_vdev_split);
8001 	zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_REGUID,
8002 	    zfs_ioc_pool_reguid);
8003 
8004 	zfs_ioctl_register_pool_meta(ZFS_IOC_POOL_CONFIGS,
8005 	    zfs_ioc_pool_configs, zfs_secpolicy_none);
8006 	zfs_ioctl_register_pool_meta(ZFS_IOC_POOL_TRYIMPORT,
8007 	    zfs_ioc_pool_tryimport, zfs_secpolicy_config);
8008 	zfs_ioctl_register_pool_meta(ZFS_IOC_INJECT_FAULT,
8009 	    zfs_ioc_inject_fault, zfs_secpolicy_inject);
8010 	zfs_ioctl_register_pool_meta(ZFS_IOC_CLEAR_FAULT,
8011 	    zfs_ioc_clear_fault, zfs_secpolicy_inject);
8012 	zfs_ioctl_register_pool_meta(ZFS_IOC_INJECT_LIST_NEXT,
8013 	    zfs_ioc_inject_list_next, zfs_secpolicy_inject);
8014 
8015 	/*
8016 	 * pool destroy, and export don't log the history as part of
8017 	 * zfsdev_ioctl, but rather zfs_ioc_pool_export
8018 	 * does the logging of those commands.
8019 	 */
8020 	zfs_ioctl_register_pool(ZFS_IOC_POOL_DESTROY, zfs_ioc_pool_destroy,
8021 	    zfs_secpolicy_config, B_FALSE, POOL_CHECK_SUSPENDED);
8022 	zfs_ioctl_register_pool(ZFS_IOC_POOL_EXPORT, zfs_ioc_pool_export,
8023 	    zfs_secpolicy_config, B_FALSE, POOL_CHECK_SUSPENDED);
8024 
8025 	zfs_ioctl_register_pool(ZFS_IOC_POOL_STATS, zfs_ioc_pool_stats,
8026 	    zfs_secpolicy_read, B_FALSE, POOL_CHECK_NONE);
8027 
8028 	zfs_ioctl_register_pool(ZFS_IOC_ERROR_LOG, zfs_ioc_error_log,
8029 	    zfs_secpolicy_inject, B_FALSE, POOL_CHECK_SUSPENDED);
8030 	zfs_ioctl_register_pool(ZFS_IOC_DSOBJ_TO_DSNAME,
8031 	    zfs_ioc_dsobj_to_dsname,
8032 	    zfs_secpolicy_diff, B_FALSE, POOL_CHECK_SUSPENDED);
8033 	zfs_ioctl_register_pool(ZFS_IOC_POOL_GET_HISTORY,
8034 	    zfs_ioc_pool_get_history,
8035 	    zfs_secpolicy_config, B_FALSE, POOL_CHECK_SUSPENDED);
8036 
8037 	zfs_ioctl_register_pool(ZFS_IOC_POOL_IMPORT, zfs_ioc_pool_import,
8038 	    zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE);
8039 
8040 	zfs_ioctl_register_pool(ZFS_IOC_CLEAR, zfs_ioc_clear,
8041 	    zfs_secpolicy_config, B_TRUE, POOL_CHECK_READONLY);
8042 
8043 	zfs_ioctl_register_dataset_read(ZFS_IOC_SPACE_WRITTEN,
8044 	    zfs_ioc_space_written);
8045 	zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_RECVD_PROPS,
8046 	    zfs_ioc_objset_recvd_props);
8047 	zfs_ioctl_register_dataset_read(ZFS_IOC_NEXT_OBJ,
8048 	    zfs_ioc_next_obj);
8049 	zfs_ioctl_register_dataset_read(ZFS_IOC_GET_FSACL,
8050 	    zfs_ioc_get_fsacl);
8051 	zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_STATS,
8052 	    zfs_ioc_objset_stats);
8053 	zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_ZPLPROPS,
8054 	    zfs_ioc_objset_zplprops);
8055 	zfs_ioctl_register_dataset_read(ZFS_IOC_DATASET_LIST_NEXT,
8056 	    zfs_ioc_dataset_list_next);
8057 	zfs_ioctl_register_dataset_read(ZFS_IOC_SNAPSHOT_LIST_NEXT,
8058 	    zfs_ioc_snapshot_list_next);
8059 	zfs_ioctl_register_dataset_read(ZFS_IOC_SEND_PROGRESS,
8060 	    zfs_ioc_send_progress);
8061 
8062 	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_DIFF,
8063 	    zfs_ioc_diff, zfs_secpolicy_diff);
8064 	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_OBJ_TO_STATS,
8065 	    zfs_ioc_obj_to_stats, zfs_secpolicy_diff);
8066 	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_OBJ_TO_PATH,
8067 	    zfs_ioc_obj_to_path, zfs_secpolicy_diff);
8068 	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_USERSPACE_ONE,
8069 	    zfs_ioc_userspace_one, zfs_secpolicy_userspace_one);
8070 	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_USERSPACE_MANY,
8071 	    zfs_ioc_userspace_many, zfs_secpolicy_userspace_many);
8072 	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_SEND,
8073 	    zfs_ioc_send, zfs_secpolicy_send);
8074 
8075 	zfs_ioctl_register_dataset_modify(ZFS_IOC_SET_PROP, zfs_ioc_set_prop,
8076 	    zfs_secpolicy_setprops);
8077 	zfs_ioctl_register_dataset_modify(ZFS_IOC_DESTROY, zfs_ioc_destroy,
8078 	    zfs_secpolicy_destroy);
8079 	zfs_ioctl_register_dataset_modify(ZFS_IOC_RENAME, zfs_ioc_rename,
8080 	    zfs_secpolicy_rename);
8081 	zfs_ioctl_register_dataset_modify(ZFS_IOC_RECV, zfs_ioc_recv,
8082 	    zfs_secpolicy_recv);
8083 	zfs_ioctl_register_dataset_modify(ZFS_IOC_PROMOTE, zfs_ioc_promote,
8084 	    zfs_secpolicy_promote);
8085 	zfs_ioctl_register_dataset_modify(ZFS_IOC_INHERIT_PROP,
8086 	    zfs_ioc_inherit_prop, zfs_secpolicy_inherit_prop);
8087 	zfs_ioctl_register_dataset_modify(ZFS_IOC_SET_FSACL, zfs_ioc_set_fsacl,
8088 	    zfs_secpolicy_set_fsacl);
8089 
8090 	zfs_ioctl_register_dataset_nolog(ZFS_IOC_SHARE, zfs_ioc_share,
8091 	    zfs_secpolicy_share, POOL_CHECK_NONE);
8092 	zfs_ioctl_register_dataset_nolog(ZFS_IOC_SMB_ACL, zfs_ioc_smb_acl,
8093 	    zfs_secpolicy_smb_acl, POOL_CHECK_NONE);
8094 	zfs_ioctl_register_dataset_nolog(ZFS_IOC_USERSPACE_UPGRADE,
8095 	    zfs_ioc_userspace_upgrade, zfs_secpolicy_userspace_upgrade,
8096 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
8097 	zfs_ioctl_register_dataset_nolog(ZFS_IOC_TMP_SNAPSHOT,
8098 	    zfs_ioc_tmp_snapshot, zfs_secpolicy_tmp_snapshot,
8099 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
8100 
8101 	zfs_ioctl_register_legacy(ZFS_IOC_EVENTS_NEXT, zfs_ioc_events_next,
8102 	    zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_NONE);
8103 	zfs_ioctl_register_legacy(ZFS_IOC_EVENTS_CLEAR, zfs_ioc_events_clear,
8104 	    zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_NONE);
8105 	zfs_ioctl_register_legacy(ZFS_IOC_EVENTS_SEEK, zfs_ioc_events_seek,
8106 	    zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_NONE);
8107 
8108 	zfs_ioctl_init_os();
8109 }
8110 
8111 /*
8112  * Verify that for non-legacy ioctls the input nvlist
8113  * pairs match against the expected input.
8114  *
8115  * Possible errors are:
8116  * ZFS_ERR_IOC_ARG_UNAVAIL	An unrecognized nvpair was encountered
8117  * ZFS_ERR_IOC_ARG_REQUIRED	A required nvpair is missing
8118  * ZFS_ERR_IOC_ARG_BADTYPE	Invalid type for nvpair
8119  */
8120 static int
zfs_check_input_nvpairs(nvlist_t * innvl,const zfs_ioc_vec_t * vec)8121 zfs_check_input_nvpairs(nvlist_t *innvl, const zfs_ioc_vec_t *vec)
8122 {
8123 	const zfs_ioc_key_t *nvl_keys = vec->zvec_nvl_keys;
8124 	boolean_t required_keys_found = B_FALSE;
8125 
8126 	/*
8127 	 * examine each input pair
8128 	 */
8129 	for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
8130 	    pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
8131 		const char *name = nvpair_name(pair);
8132 		data_type_t type = nvpair_type(pair);
8133 		boolean_t identified = B_FALSE;
8134 
8135 		/*
8136 		 * check pair against the documented names and type
8137 		 */
8138 		for (int k = 0; k < vec->zvec_nvl_key_count; k++) {
8139 			/* if not a wild card name, check for an exact match */
8140 			if ((nvl_keys[k].zkey_flags & ZK_WILDCARDLIST) == 0 &&
8141 			    strcmp(nvl_keys[k].zkey_name, name) != 0)
8142 				continue;
8143 
8144 			identified = B_TRUE;
8145 
8146 			if (nvl_keys[k].zkey_type != DATA_TYPE_ANY &&
8147 			    nvl_keys[k].zkey_type != type) {
8148 				return (SET_ERROR(ZFS_ERR_IOC_ARG_BADTYPE));
8149 			}
8150 
8151 			if (nvl_keys[k].zkey_flags & ZK_OPTIONAL)
8152 				continue;
8153 
8154 			required_keys_found = B_TRUE;
8155 			break;
8156 		}
8157 
8158 		/* allow an 'optional' key, everything else is invalid */
8159 		if (!identified &&
8160 		    (strcmp(name, "optional") != 0 ||
8161 		    type != DATA_TYPE_NVLIST)) {
8162 			return (SET_ERROR(ZFS_ERR_IOC_ARG_UNAVAIL));
8163 		}
8164 	}
8165 
8166 	/* verify that all required keys were found */
8167 	for (int k = 0; k < vec->zvec_nvl_key_count; k++) {
8168 		if (nvl_keys[k].zkey_flags & ZK_OPTIONAL)
8169 			continue;
8170 
8171 		if (nvl_keys[k].zkey_flags & ZK_WILDCARDLIST) {
8172 			/* at least one non-optional key is expected here */
8173 			if (!required_keys_found)
8174 				return (SET_ERROR(ZFS_ERR_IOC_ARG_REQUIRED));
8175 			continue;
8176 		}
8177 
8178 		if (!nvlist_exists(innvl, nvl_keys[k].zkey_name))
8179 			return (SET_ERROR(ZFS_ERR_IOC_ARG_REQUIRED));
8180 	}
8181 
8182 	return (0);
8183 }
8184 
8185 static int
pool_status_check(const char * name,zfs_ioc_namecheck_t type,zfs_ioc_poolcheck_t check)8186 pool_status_check(const char *name, zfs_ioc_namecheck_t type,
8187     zfs_ioc_poolcheck_t check)
8188 {
8189 	spa_t *spa;
8190 	int error;
8191 
8192 	ASSERT(type == POOL_NAME || type == DATASET_NAME ||
8193 	    type == ENTITY_NAME);
8194 
8195 	if (check & POOL_CHECK_NONE)
8196 		return (0);
8197 
8198 	error = spa_open(name, &spa, FTAG);
8199 	if (error == 0) {
8200 		if ((check & POOL_CHECK_SUSPENDED) && spa_suspended(spa))
8201 			error = SET_ERROR(EAGAIN);
8202 		else if ((check & POOL_CHECK_READONLY) && !spa_writeable(spa))
8203 			error = SET_ERROR(EROFS);
8204 		spa_close(spa, FTAG);
8205 	}
8206 	return (error);
8207 }
8208 
8209 int
zfsdev_getminor(zfs_file_t * fp,minor_t * minorp)8210 zfsdev_getminor(zfs_file_t *fp, minor_t *minorp)
8211 {
8212 	zfsdev_state_t *zs, *fpd;
8213 
8214 	ASSERT(!MUTEX_HELD(&zfsdev_state_lock));
8215 
8216 	fpd = zfs_file_private(fp);
8217 	if (fpd == NULL)
8218 		return (SET_ERROR(EBADF));
8219 
8220 	mutex_enter(&zfsdev_state_lock);
8221 
8222 	for (zs = &zfsdev_state_listhead; zs != NULL; zs = zs->zs_next) {
8223 
8224 		if (zs->zs_minor == -1)
8225 			continue;
8226 
8227 		if (fpd == zs) {
8228 			*minorp = fpd->zs_minor;
8229 			mutex_exit(&zfsdev_state_lock);
8230 			return (0);
8231 		}
8232 	}
8233 
8234 	mutex_exit(&zfsdev_state_lock);
8235 
8236 	return (SET_ERROR(EBADF));
8237 }
8238 
8239 void *
zfsdev_get_state(minor_t minor,enum zfsdev_state_type which)8240 zfsdev_get_state(minor_t minor, enum zfsdev_state_type which)
8241 {
8242 	zfsdev_state_t *zs;
8243 
8244 	for (zs = &zfsdev_state_listhead; zs != NULL; zs = zs->zs_next) {
8245 		if (zs->zs_minor == minor) {
8246 			membar_consumer();
8247 			switch (which) {
8248 			case ZST_ONEXIT:
8249 				return (zs->zs_onexit);
8250 			case ZST_ZEVENT:
8251 				return (zs->zs_zevent);
8252 			case ZST_ALL:
8253 				return (zs);
8254 			}
8255 		}
8256 	}
8257 
8258 	return (NULL);
8259 }
8260 
8261 /*
8262  * Find a free minor number.  The zfsdev_state_list is expected to
8263  * be short since it is only a list of currently open file handles.
8264  */
8265 static minor_t
zfsdev_minor_alloc(void)8266 zfsdev_minor_alloc(void)
8267 {
8268 	static minor_t last_minor = 0;
8269 	minor_t m;
8270 
8271 	ASSERT(MUTEX_HELD(&zfsdev_state_lock));
8272 
8273 	for (m = last_minor + 1; m != last_minor; m++) {
8274 		if (m > ZFSDEV_MAX_MINOR)
8275 			m = 1;
8276 		if (zfsdev_get_state(m, ZST_ALL) == NULL) {
8277 			last_minor = m;
8278 			return (m);
8279 		}
8280 	}
8281 
8282 	return (0);
8283 }
8284 
8285 int
zfsdev_state_init(void * priv)8286 zfsdev_state_init(void *priv)
8287 {
8288 	zfsdev_state_t *zs, *zsprev = NULL;
8289 	minor_t minor;
8290 	boolean_t newzs = B_FALSE;
8291 
8292 	ASSERT(MUTEX_HELD(&zfsdev_state_lock));
8293 
8294 	minor = zfsdev_minor_alloc();
8295 	if (minor == 0)
8296 		return (SET_ERROR(ENXIO));
8297 
8298 	for (zs = &zfsdev_state_listhead; zs != NULL; zs = zs->zs_next) {
8299 		if (zs->zs_minor == -1)
8300 			break;
8301 		zsprev = zs;
8302 	}
8303 
8304 	if (!zs) {
8305 		zs = kmem_zalloc(sizeof (zfsdev_state_t), KM_SLEEP);
8306 		newzs = B_TRUE;
8307 	}
8308 
8309 	zfsdev_private_set_state(priv, zs);
8310 
8311 	zfs_onexit_init((zfs_onexit_t **)&zs->zs_onexit);
8312 	zfs_zevent_init((zfs_zevent_t **)&zs->zs_zevent);
8313 
8314 	/*
8315 	 * In order to provide for lock-free concurrent read access
8316 	 * to the minor list in zfsdev_get_state(), new entries
8317 	 * must be completely written before linking them into the
8318 	 * list whereas existing entries are already linked; the last
8319 	 * operation must be updating zs_minor (from -1 to the new
8320 	 * value).
8321 	 */
8322 	if (newzs) {
8323 		zs->zs_minor = minor;
8324 		membar_producer();
8325 		zsprev->zs_next = zs;
8326 	} else {
8327 		membar_producer();
8328 		zs->zs_minor = minor;
8329 	}
8330 
8331 	return (0);
8332 }
8333 
8334 void
zfsdev_state_destroy(void * priv)8335 zfsdev_state_destroy(void *priv)
8336 {
8337 	zfsdev_state_t *zs = zfsdev_private_get_state(priv);
8338 
8339 	ASSERT(zs != NULL);
8340 	ASSERT3S(zs->zs_minor, >, 0);
8341 
8342 	/*
8343 	 * The last reference to this zfsdev file descriptor is being dropped.
8344 	 * We don't have to worry about lookup grabbing this state object, and
8345 	 * zfsdev_state_init() will not try to reuse this object until it is
8346 	 * invalidated by setting zs_minor to -1.  Invalidation must be done
8347 	 * last, with a memory barrier to ensure ordering.  This lets us avoid
8348 	 * taking the global zfsdev state lock around destruction.
8349 	 */
8350 	zfs_onexit_destroy(zs->zs_onexit);
8351 	zfs_zevent_destroy(zs->zs_zevent);
8352 	zs->zs_onexit = NULL;
8353 	zs->zs_zevent = NULL;
8354 	membar_producer();
8355 	zs->zs_minor = -1;
8356 }
8357 
8358 long
zfsdev_ioctl_common(uint_t vecnum,zfs_cmd_t * zc,int flag)8359 zfsdev_ioctl_common(uint_t vecnum, zfs_cmd_t *zc, int flag)
8360 {
8361 	int error, cmd;
8362 	const zfs_ioc_vec_t *vec;
8363 	char *saved_poolname = NULL;
8364 	uint64_t max_nvlist_src_size;
8365 	size_t saved_poolname_len = 0;
8366 	nvlist_t *innvl = NULL;
8367 	fstrans_cookie_t cookie;
8368 	hrtime_t start_time = gethrtime();
8369 
8370 	cmd = vecnum;
8371 	error = 0;
8372 	if (vecnum >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0]))
8373 		return (SET_ERROR(ZFS_ERR_IOC_CMD_UNAVAIL));
8374 
8375 	vec = &zfs_ioc_vec[vecnum];
8376 
8377 	/*
8378 	 * The registered ioctl list may be sparse, verify that either
8379 	 * a normal or legacy handler are registered.
8380 	 */
8381 	if (vec->zvec_func == NULL && vec->zvec_legacy_func == NULL)
8382 		return (SET_ERROR(ZFS_ERR_IOC_CMD_UNAVAIL));
8383 
8384 	zc->zc_iflags = flag & FKIOCTL;
8385 	max_nvlist_src_size = zfs_max_nvlist_src_size_os();
8386 	if (zc->zc_nvlist_src_size > max_nvlist_src_size) {
8387 		/*
8388 		 * Make sure the user doesn't pass in an insane value for
8389 		 * zc_nvlist_src_size.  We have to check, since we will end
8390 		 * up allocating that much memory inside of get_nvlist().  This
8391 		 * prevents a nefarious user from allocating tons of kernel
8392 		 * memory.
8393 		 *
8394 		 * Also, we return EINVAL instead of ENOMEM here.  The reason
8395 		 * being that returning ENOMEM from an ioctl() has a special
8396 		 * connotation; that the user's size value is too small and
8397 		 * needs to be expanded to hold the nvlist.  See
8398 		 * zcmd_expand_dst_nvlist() for details.
8399 		 */
8400 		error = SET_ERROR(EINVAL);	/* User's size too big */
8401 
8402 	} else if (zc->zc_nvlist_src_size != 0) {
8403 		error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
8404 		    zc->zc_iflags, &innvl);
8405 		if (error != 0)
8406 			goto out;
8407 	}
8408 
8409 	/*
8410 	 * Ensure that all pool/dataset names are valid before we pass down to
8411 	 * the lower layers.
8412 	 */
8413 	zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
8414 	switch (vec->zvec_namecheck) {
8415 	case POOL_NAME:
8416 		if (pool_namecheck(zc->zc_name, NULL, NULL) != 0)
8417 			error = SET_ERROR(EINVAL);
8418 		else
8419 			error = pool_status_check(zc->zc_name,
8420 			    vec->zvec_namecheck, vec->zvec_pool_check);
8421 		break;
8422 
8423 	case DATASET_NAME:
8424 		if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0)
8425 			error = SET_ERROR(EINVAL);
8426 		else
8427 			error = pool_status_check(zc->zc_name,
8428 			    vec->zvec_namecheck, vec->zvec_pool_check);
8429 		break;
8430 
8431 	case ENTITY_NAME:
8432 		if (entity_namecheck(zc->zc_name, NULL, NULL) != 0) {
8433 			error = SET_ERROR(EINVAL);
8434 		} else {
8435 			error = pool_status_check(zc->zc_name,
8436 			    vec->zvec_namecheck, vec->zvec_pool_check);
8437 		}
8438 		break;
8439 
8440 	case NO_NAME:
8441 		break;
8442 	}
8443 	/*
8444 	 * Ensure that all input pairs are valid before we pass them down
8445 	 * to the lower layers.
8446 	 *
8447 	 * The vectored functions can use fnvlist_lookup_{type} for any
8448 	 * required pairs since zfs_check_input_nvpairs() confirmed that
8449 	 * they exist and are of the correct type.
8450 	 */
8451 	if (error == 0 && vec->zvec_func != NULL) {
8452 		error = zfs_check_input_nvpairs(innvl, vec);
8453 		if (error != 0)
8454 			goto out;
8455 	}
8456 
8457 	if (error == 0) {
8458 		cookie = spl_fstrans_mark();
8459 		error = vec->zvec_secpolicy(zc, innvl, CRED());
8460 		spl_fstrans_unmark(cookie);
8461 	}
8462 
8463 	if (error != 0)
8464 		goto out;
8465 
8466 	/* legacy ioctls can modify zc_name */
8467 	/*
8468 	 * Can't use kmem_strdup() as we might truncate the string and
8469 	 * kmem_strfree() would then free with incorrect size.
8470 	 */
8471 	const char *spa_name = zc->zc_name;
8472 	const char *tname;
8473 	if (nvlist_lookup_string(innvl,
8474 	    zpool_prop_to_name(ZPOOL_PROP_TNAME), &tname) == 0) {
8475 		spa_name = tname;
8476 	}
8477 	saved_poolname_len = strlen(spa_name) + 1;
8478 	saved_poolname = kmem_alloc(saved_poolname_len, KM_SLEEP);
8479 
8480 	strlcpy(saved_poolname, spa_name, saved_poolname_len);
8481 	saved_poolname[strcspn(saved_poolname, "/@#")] = '\0';
8482 
8483 	if (vec->zvec_func != NULL) {
8484 		nvlist_t *outnvl;
8485 		int puterror = 0;
8486 		spa_t *spa;
8487 		nvlist_t *lognv = NULL;
8488 
8489 		ASSERT0P(vec->zvec_legacy_func);
8490 
8491 		/*
8492 		 * Add the innvl to the lognv before calling the func,
8493 		 * in case the func changes the innvl.
8494 		 */
8495 		if (vec->zvec_allow_log) {
8496 			lognv = fnvlist_alloc();
8497 			fnvlist_add_string(lognv, ZPOOL_HIST_IOCTL,
8498 			    vec->zvec_name);
8499 			if (!nvlist_empty(innvl)) {
8500 				fnvlist_add_nvlist(lognv, ZPOOL_HIST_INPUT_NVL,
8501 				    innvl);
8502 			}
8503 		}
8504 
8505 		outnvl = fnvlist_alloc();
8506 		cookie = spl_fstrans_mark();
8507 		error = vec->zvec_func(zc->zc_name, innvl, outnvl);
8508 		spl_fstrans_unmark(cookie);
8509 
8510 		/*
8511 		 * Some commands can partially execute, modify state, and still
8512 		 * return an error.  In these cases, attempt to record what
8513 		 * was modified.
8514 		 */
8515 		if ((error == 0 ||
8516 		    (cmd == ZFS_IOC_CHANNEL_PROGRAM && error != EINVAL)) &&
8517 		    vec->zvec_allow_log &&
8518 		    spa_open(zc->zc_name, &spa, FTAG) == 0) {
8519 			if (!nvlist_empty(outnvl)) {
8520 				size_t out_size = fnvlist_size(outnvl);
8521 				if (out_size > zfs_history_output_max) {
8522 					fnvlist_add_int64(lognv,
8523 					    ZPOOL_HIST_OUTPUT_SIZE, out_size);
8524 				} else {
8525 					fnvlist_add_nvlist(lognv,
8526 					    ZPOOL_HIST_OUTPUT_NVL, outnvl);
8527 				}
8528 			}
8529 			if (error != 0) {
8530 				fnvlist_add_int64(lognv, ZPOOL_HIST_ERRNO,
8531 				    error);
8532 			}
8533 			fnvlist_add_int64(lognv, ZPOOL_HIST_ELAPSED_NS,
8534 			    gethrtime() - start_time);
8535 			(void) spa_history_log_nvl(spa, lognv);
8536 			spa_close(spa, FTAG);
8537 		}
8538 		fnvlist_free(lognv);
8539 
8540 		if (!nvlist_empty(outnvl) || zc->zc_nvlist_dst_size != 0) {
8541 			int smusherror = 0;
8542 			if (vec->zvec_smush_outnvlist) {
8543 				smusherror = nvlist_smush(outnvl,
8544 				    zc->zc_nvlist_dst_size);
8545 			}
8546 			if (smusherror == 0)
8547 				puterror = put_nvlist(zc, outnvl);
8548 		}
8549 
8550 		if (puterror != 0)
8551 			error = puterror;
8552 
8553 		nvlist_free(outnvl);
8554 	} else {
8555 		cookie = spl_fstrans_mark();
8556 		error = vec->zvec_legacy_func(zc);
8557 		spl_fstrans_unmark(cookie);
8558 	}
8559 
8560 out:
8561 	nvlist_free(innvl);
8562 	if (error == 0 && vec->zvec_allow_log) {
8563 		char *s = tsd_get(zfs_allow_log_key);
8564 		if (s != NULL)
8565 			kmem_strfree(s);
8566 		(void) tsd_set(zfs_allow_log_key, kmem_strdup(saved_poolname));
8567 	}
8568 	if (saved_poolname != NULL)
8569 		kmem_free(saved_poolname, saved_poolname_len);
8570 
8571 	return (error);
8572 }
8573 
8574 int
zfs_kmod_init(void)8575 zfs_kmod_init(void)
8576 {
8577 	int error;
8578 
8579 	if ((error = zvol_init()) != 0)
8580 		return (error);
8581 
8582 	spa_init(SPA_MODE_READ | SPA_MODE_WRITE);
8583 	zfs_init();
8584 
8585 	zfs_ioctl_init();
8586 
8587 	/* Register zoned_uid property lookup callback with SPL */
8588 	zone_register_zoned_uid_callback(zfs_get_zoned_uid);
8589 
8590 	mutex_init(&zfsdev_state_lock, NULL, MUTEX_DEFAULT, NULL);
8591 	zfsdev_state_listhead.zs_minor = -1;
8592 
8593 	if ((error = zfsdev_attach()) != 0)
8594 		goto out;
8595 
8596 	tsd_create(&rrw_tsd_key, rrw_tsd_destroy);
8597 	tsd_create(&zfs_allow_log_key, zfs_allow_log_destroy);
8598 
8599 	return (0);
8600 out:
8601 	zfs_fini();
8602 	spa_fini();
8603 	zvol_fini();
8604 
8605 	return (error);
8606 }
8607 
8608 void
zfs_kmod_fini(void)8609 zfs_kmod_fini(void)
8610 {
8611 	zfsdev_state_t *zs, *zsnext = NULL;
8612 
8613 	zfsdev_detach();
8614 
8615 	mutex_destroy(&zfsdev_state_lock);
8616 
8617 	for (zs = &zfsdev_state_listhead; zs != NULL; zs = zsnext) {
8618 		zsnext = zs->zs_next;
8619 		if (zs->zs_onexit)
8620 			zfs_onexit_destroy(zs->zs_onexit);
8621 		if (zs->zs_zevent)
8622 			zfs_zevent_destroy(zs->zs_zevent);
8623 		if (zs != &zfsdev_state_listhead)
8624 			kmem_free(zs, sizeof (zfsdev_state_t));
8625 	}
8626 
8627 	zfs_ereport_taskq_fini();	/* run before zfs_fini() on Linux */
8628 
8629 	/* Unregister zoned_uid callback before ZFS layer is torn down */
8630 	zone_unregister_zoned_uid_callback();
8631 
8632 	zfs_fini();
8633 	spa_fini();
8634 	zvol_fini();
8635 
8636 	tsd_destroy(&rrw_tsd_key);
8637 	tsd_destroy(&zfs_allow_log_key);
8638 }
8639 
8640 ZFS_MODULE_PARAM(zfs, zfs_, max_nvlist_src_size, U64, ZMOD_RW,
8641 	"Maximum size in bytes allowed for src nvlist passed with ZFS ioctls");
8642 
8643 ZFS_MODULE_PARAM(zfs, zfs_, history_output_max, U64, ZMOD_RW,
8644 	"Maximum size in bytes of ZFS ioctl output that will be logged");
8645