xref: /freebsd/sys/contrib/openzfs/module/zfs/zfs_ioctl.c (revision d8fbbd371ca11d9ad4b29b9d3a316885a5da0b15)
1 // SPDX-License-Identifier: CDDL-1.0
2 /*
3  * CDDL HEADER START
4  *
5  * The contents of this file are subject to the terms of the
6  * Common Development and Distribution License (the "License").
7  * You may not use this file except in compliance with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or https://opensource.org/licenses/CDDL-1.0.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 
23 /*
24  * Copyright (c) 2005, 2010, Oracle and/or its affiliates. All rights reserved.
25  * Portions Copyright 2011 Martin Matuska
26  * Copyright 2015, OmniTI Computer Consulting, Inc. All rights reserved.
27  * Copyright (c) 2012 Pawel Jakub Dawidek
28  * Copyright (c) 2014, 2016 Joyent, Inc. All rights reserved.
29  * Copyright 2016 Nexenta Systems, Inc.  All rights reserved.
30  * Copyright (c) 2014, Joyent, Inc. All rights reserved.
31  * Copyright (c) 2011, 2024 by Delphix. All rights reserved.
32  * Copyright (c) 2013 by Saso Kiselkov. All rights reserved.
33  * Copyright (c) 2013 Steven Hartland. All rights reserved.
34  * Copyright (c) 2014 Integros [integros.com]
35  * Copyright 2016 Toomas Soome <tsoome@me.com>
36  * Copyright (c) 2016 Actifio, Inc. All rights reserved.
37  * Copyright (c) 2018, loli10K <ezomori.nozomu@gmail.com>. All rights reserved.
38  * Copyright 2017 RackTop Systems.
39  * Copyright (c) 2017 Open-E, Inc. All Rights Reserved.
40  * Copyright (c) 2019 Datto Inc.
41  * Copyright (c) 2019, 2020 by Christian Schwarz. All rights reserved.
42  * Copyright (c) 2019, 2021, 2023, 2024, Klara Inc.
43  * Copyright (c) 2019, Allan Jude
44  * Copyright 2026 Oxide Computer Company
45  */
46 
47 /*
48  * ZFS ioctls.
49  *
50  * This file handles the ioctls to /dev/zfs, used for configuring ZFS storage
51  * pools and filesystems, e.g. with /sbin/zfs and /sbin/zpool.
52  *
53  * There are two ways that we handle ioctls: the legacy way where almost
54  * all of the logic is in the ioctl callback, and the new way where most
55  * of the marshalling is handled in the common entry point, zfsdev_ioctl().
56  *
57  * Non-legacy ioctls should be registered by calling
58  * zfs_ioctl_register() from zfs_ioctl_init().  The ioctl is invoked
59  * from userland by lzc_ioctl().
60  *
61  * The registration arguments are as follows:
62  *
63  * const char *name
64  *   The name of the ioctl.  This is used for history logging.  If the
65  *   ioctl returns successfully (the callback returns 0), and allow_log
66  *   is true, then a history log entry will be recorded with the input &
67  *   output nvlists.  The log entry can be printed with "zpool history -i".
68  *
69  * zfs_ioc_t ioc
70  *   The ioctl request number, which userland will pass to ioctl(2).
71  *   We want newer versions of libzfs and libzfs_core to run against
72  *   existing zfs kernel modules (i.e. a deferred reboot after an update).
73  *   Therefore the ioctl numbers cannot change from release to release.
74  *
75  * zfs_secpolicy_func_t *secpolicy
76  *   This function will be called before the zfs_ioc_func_t, to
77  *   determine if this operation is permitted.  It should return EPERM
78  *   on failure, and 0 on success.  Checks include determining if the
79  *   dataset is visible in this zone, and if the user has either all
80  *   zfs privileges in the zone (SYS_MOUNT), or has been granted permission
81  *   to do this operation on this dataset with "zfs allow".
82  *
83  * zfs_ioc_namecheck_t namecheck
84  *   This specifies what to expect in the zfs_cmd_t:zc_name -- a pool
85  *   name, a dataset name, or nothing.  If the name is not well-formed,
86  *   the ioctl will fail and the callback will not be called.
87  *   Therefore, the callback can assume that the name is well-formed
88  *   (e.g. is null-terminated, doesn't have more than one '@' character,
89  *   doesn't have invalid characters).
90  *
91  * zfs_ioc_poolcheck_t pool_check
92  *   This specifies requirements on the pool state.  If the pool does
93  *   not meet them (is suspended or is readonly), the ioctl will fail
94  *   and the callback will not be called.  If any checks are specified
95  *   (i.e. it is not POOL_CHECK_NONE), namecheck must not be NO_NAME.
96  *   Multiple checks can be or-ed together (e.g. POOL_CHECK_SUSPENDED |
97  *   POOL_CHECK_READONLY).
98  *
99  * zfs_ioc_key_t *nvl_keys
100  *  The list of expected/allowable innvl input keys. This list is used
101  *  to validate the nvlist input to the ioctl.
102  *
103  * boolean_t smush_outnvlist
104  *   If smush_outnvlist is true, then the output is presumed to be a
105  *   list of errors, and it will be "smushed" down to fit into the
106  *   caller's buffer, by removing some entries and replacing them with a
107  *   single "N_MORE_ERRORS" entry indicating how many were removed.  See
108  *   nvlist_smush() for details.  If smush_outnvlist is false, and the
109  *   outnvlist does not fit into the userland-provided buffer, then the
110  *   ioctl will fail with ENOMEM.
111  *
112  * zfs_ioc_func_t *func
113  *   The callback function that will perform the operation.
114  *
115  *   The callback should return 0 on success, or an error number on
116  *   failure.  If the function fails, the userland ioctl will return -1,
117  *   and errno will be set to the callback's return value.  The callback
118  *   will be called with the following arguments:
119  *
120  *   const char *name
121  *     The name of the pool or dataset to operate on, from
122  *     zfs_cmd_t:zc_name.  The 'namecheck' argument specifies the
123  *     expected type (pool, dataset, or none).
124  *
125  *   nvlist_t *innvl
126  *     The input nvlist, deserialized from zfs_cmd_t:zc_nvlist_src.  Or
127  *     NULL if no input nvlist was provided.  Changes to this nvlist are
128  *     ignored.  If the input nvlist could not be deserialized, the
129  *     ioctl will fail and the callback will not be called.
130  *
131  *   nvlist_t *outnvl
132  *     The output nvlist, initially empty.  The callback can fill it in,
133  *     and it will be returned to userland by serializing it into
134  *     zfs_cmd_t:zc_nvlist_dst.  If it is non-empty, and serialization
135  *     fails (e.g. because the caller didn't supply a large enough
136  *     buffer), then the overall ioctl will fail.  See the
137  *     'smush_nvlist' argument above for additional behaviors.
138  *
139  *     There are two typical uses of the output nvlist:
140  *       - To return state, e.g. property values.  In this case,
141  *         smush_outnvlist should be false.  If the buffer was not large
142  *         enough, the caller will reallocate a larger buffer and try
143  *         the ioctl again.
144  *
145  *       - To return multiple errors from an ioctl which makes on-disk
146  *         changes.  In this case, smush_outnvlist should be true.
147  *         Ioctls which make on-disk modifications should generally not
148  *         use the outnvl if they succeed, because the caller can not
149  *         distinguish between the operation failing, and
150  *         deserialization failing.
151  *
152  * IOCTL Interface Errors
153  *
154  * The following ioctl input errors can be returned:
155  *   ZFS_ERR_IOC_CMD_UNAVAIL	the ioctl number is not supported by kernel
156  *   ZFS_ERR_IOC_ARG_UNAVAIL	an input argument is not supported by kernel
157  *   ZFS_ERR_IOC_ARG_REQUIRED	a required input argument is missing
158  *   ZFS_ERR_IOC_ARG_BADTYPE	an input argument has an invalid type
159  */
160 
161 #include <sys/types.h>
162 #include <sys/param.h>
163 #include <sys/errno.h>
164 #include <sys/file.h>
165 #include <sys/kmem.h>
166 #include <sys/cmn_err.h>
167 #include <sys/stat.h>
168 #include <sys/zfs_ioctl.h>
169 #include <sys/zfs_quota.h>
170 #include <sys/zfs_vfsops.h>
171 #include <sys/zfs_znode.h>
172 #include <sys/zap.h>
173 #include <sys/spa.h>
174 #include <sys/spa_impl.h>
175 #include <sys/vdev.h>
176 #include <sys/vdev_impl.h>
177 #include <sys/dmu.h>
178 #include <sys/dsl_dir.h>
179 #include <sys/dsl_dataset.h>
180 #include <sys/dsl_prop.h>
181 #include <sys/dsl_deleg.h>
182 #include <sys/dmu_objset.h>
183 #include <sys/dmu_impl.h>
184 #include <sys/dmu_redact.h>
185 #include <sys/dmu_tx.h>
186 #include <sys/sunddi.h>
187 #include <sys/policy.h>
188 #include <sys/zone.h>
189 #include <sys/nvpair.h>
190 #include <sys/pathname.h>
191 #include <sys/fs/zfs.h>
192 #include <sys/zfs_ctldir.h>
193 #include <sys/zfs_dir.h>
194 #include <sys/zfs_onexit.h>
195 #include <sys/zvol.h>
196 #include <sys/dsl_scan.h>
197 #include <sys/fm/util.h>
198 #include <sys/dsl_crypt.h>
199 #include <sys/rrwlock.h>
200 #include <sys/zfs_file.h>
201 
202 #include <sys/dmu_recv.h>
203 #include <sys/dmu_send.h>
204 #include <sys/dmu_recv.h>
205 #include <sys/dsl_destroy.h>
206 #include <sys/dsl_bookmark.h>
207 #include <sys/dsl_userhold.h>
208 #include <sys/zfeature.h>
209 #include <sys/zcp.h>
210 #include <sys/zio_checksum.h>
211 #include <sys/vdev_removal.h>
212 #include <sys/vdev_impl.h>
213 #include <sys/vdev_initialize.h>
214 #include <sys/vdev_trim.h>
215 #include <sys/brt.h>
216 #include <sys/ddt.h>
217 
218 #include "zfs_namecheck.h"
219 #include "zfs_prop.h"
220 #include "zfs_deleg.h"
221 #include "zfs_comutil.h"
222 
223 #include <sys/lua/lua.h>
224 #include <sys/lua/lauxlib.h>
225 #include <sys/zfs_ioctl_impl.h>
226 
227 kmutex_t zfsdev_state_lock;
228 static zfsdev_state_t zfsdev_state_listhead;
229 
230 /*
231  * Limit maximum nvlist size.  We don't want users passing in insane values
232  * for zc->zc_nvlist_src_size, since we will need to allocate that much memory.
233  * Defaults to 0=auto which is handled by platform code.
234  */
235 uint64_t zfs_max_nvlist_src_size = 0;
236 
237 /*
238  * When logging the output nvlist of an ioctl in the on-disk history, limit
239  * the logged size to this many bytes.  This must be less than DMU_MAX_ACCESS.
240  * This applies primarily to zfs_ioc_channel_program().
241  */
242 static uint64_t zfs_history_output_max = 1024 * 1024;
243 
244 uint_t zfs_allow_log_key;
245 
246 /* DATA_TYPE_ANY is used when zkey_type can vary. */
247 #define	DATA_TYPE_ANY	DATA_TYPE_UNKNOWN
248 
249 typedef struct zfs_ioc_vec {
250 	zfs_ioc_legacy_func_t	*zvec_legacy_func;
251 	zfs_ioc_func_t		*zvec_func;
252 	zfs_secpolicy_func_t	*zvec_secpolicy;
253 	zfs_ioc_namecheck_t	zvec_namecheck;
254 	boolean_t		zvec_allow_log;
255 	zfs_ioc_poolcheck_t	zvec_pool_check;
256 	boolean_t		zvec_smush_outnvlist;
257 	const char		*zvec_name;
258 	const zfs_ioc_key_t	*zvec_nvl_keys;
259 	size_t			zvec_nvl_key_count;
260 } zfs_ioc_vec_t;
261 
262 /* This array is indexed by zfs_userquota_prop_t */
263 static const char *userquota_perms[] = {
264 	ZFS_DELEG_PERM_USERUSED,
265 	ZFS_DELEG_PERM_USERQUOTA,
266 	ZFS_DELEG_PERM_GROUPUSED,
267 	ZFS_DELEG_PERM_GROUPQUOTA,
268 	ZFS_DELEG_PERM_USEROBJUSED,
269 	ZFS_DELEG_PERM_USEROBJQUOTA,
270 	ZFS_DELEG_PERM_GROUPOBJUSED,
271 	ZFS_DELEG_PERM_GROUPOBJQUOTA,
272 	ZFS_DELEG_PERM_PROJECTUSED,
273 	ZFS_DELEG_PERM_PROJECTQUOTA,
274 	ZFS_DELEG_PERM_PROJECTOBJUSED,
275 	ZFS_DELEG_PERM_PROJECTOBJQUOTA,
276 };
277 
278 static int zfs_ioc_userspace_upgrade(zfs_cmd_t *zc);
279 static int zfs_ioc_id_quota_upgrade(zfs_cmd_t *zc);
280 static int zfs_check_settable(const char *name, nvpair_t *property,
281     cred_t *cr);
282 static int zfs_check_clearable(const char *dataset, nvlist_t *props,
283     nvlist_t **errors);
284 static int zfs_fill_zplprops_root(uint64_t, nvlist_t *, nvlist_t *,
285     boolean_t *);
286 int zfs_set_prop_nvlist(const char *, zprop_source_t, nvlist_t *, nvlist_t *);
287 static int get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp);
288 
289 /*
290  * Callback for SPL to look up zoned_uid property.
291  * Walks ancestors to find the delegation root with zoned_uid set.
292  * Returns the zoned_uid value if found, or 0 if not set.
293  */
294 static uid_t
zfs_get_zoned_uid(const char * dataset,char * root_out,size_t root_size)295 zfs_get_zoned_uid(const char *dataset, char *root_out, size_t root_size)
296 {
297 	char path[ZFS_MAX_DATASET_NAME_LEN];
298 	char setpoint[ZFS_MAX_DATASET_NAME_LEN];
299 	char *slash, *at;
300 	uint64_t zoned_uid_val = 0;
301 	int error;
302 
303 	(void) strlcpy(path, dataset, sizeof (path));
304 
305 	/*
306 	 * Strip snapshot suffix if present — snapshots inherit properties
307 	 * from their parent filesystem.
308 	 */
309 	at = strchr(path, '@');
310 	if (at != NULL)
311 		*at = '\0';
312 
313 	/*
314 	 * Walk up the hierarchy until we find a dataset with zoned_uid set.
315 	 * This handles the case where the dataset doesn't exist yet (e.g.,
316 	 * rename destination) — dsl_prop_get fails on non-existent datasets,
317 	 * so we walk up to find an existing ancestor.
318 	 *
319 	 * When the property is found (possibly via inheritance), setpoint
320 	 * tells us the actual delegation root where zoned_uid is locally
321 	 * set, rather than the dataset where we happened to query it.
322 	 */
323 	while (path[0] != '\0') {
324 		error = dsl_prop_get(path, "zoned_uid", 8, 1,
325 		    &zoned_uid_val, setpoint);
326 
327 		if (error == 0 && zoned_uid_val != 0) {
328 			if (root_out != NULL)
329 				(void) strlcpy(root_out, setpoint, root_size);
330 			return ((uid_t)zoned_uid_val);
331 		}
332 
333 		slash = strrchr(path, '/');
334 		if (slash == NULL)
335 			break;
336 		*slash = '\0';
337 	}
338 
339 	return (0);
340 }
341 
342 static void
history_str_free(char * buf)343 history_str_free(char *buf)
344 {
345 	kmem_free(buf, HIS_MAX_RECORD_LEN);
346 }
347 
348 static char *
history_str_get(zfs_cmd_t * zc)349 history_str_get(zfs_cmd_t *zc)
350 {
351 	char *buf;
352 
353 	if (zc->zc_history == 0)
354 		return (NULL);
355 
356 	buf = kmem_alloc(HIS_MAX_RECORD_LEN, KM_SLEEP);
357 	if (copyinstr((void *)(uintptr_t)zc->zc_history,
358 	    buf, HIS_MAX_RECORD_LEN, NULL) != 0) {
359 		history_str_free(buf);
360 		return (NULL);
361 	}
362 
363 	buf[HIS_MAX_RECORD_LEN -1] = '\0';
364 
365 	return (buf);
366 }
367 
368 /*
369  * Return non-zero if the spa version is less than requested version.
370  */
371 static int
zfs_earlier_version(const char * name,int version)372 zfs_earlier_version(const char *name, int version)
373 {
374 	spa_t *spa;
375 
376 	if (spa_open(name, &spa, FTAG) == 0) {
377 		if (spa_version(spa) < version) {
378 			spa_close(spa, FTAG);
379 			return (1);
380 		}
381 		spa_close(spa, FTAG);
382 	}
383 	return (0);
384 }
385 
386 /*
387  * Return TRUE if the ZPL version is less than requested version.
388  */
389 static boolean_t
zpl_earlier_version(const char * name,int version)390 zpl_earlier_version(const char *name, int version)
391 {
392 	objset_t *os;
393 	boolean_t rc = B_TRUE;
394 
395 	if (dmu_objset_hold(name, FTAG, &os) == 0) {
396 		uint64_t zplversion;
397 
398 		if (dmu_objset_type(os) != DMU_OST_ZFS) {
399 			dmu_objset_rele(os, FTAG);
400 			return (B_TRUE);
401 		}
402 		/* XXX reading from non-owned objset */
403 		if (zfs_get_zplprop(os, ZFS_PROP_VERSION, &zplversion) == 0)
404 			rc = zplversion < version;
405 		dmu_objset_rele(os, FTAG);
406 	}
407 	return (rc);
408 }
409 
410 static void
zfs_log_history(zfs_cmd_t * zc)411 zfs_log_history(zfs_cmd_t *zc)
412 {
413 	spa_t *spa;
414 	char *buf;
415 
416 	if ((buf = history_str_get(zc)) == NULL)
417 		return;
418 
419 	if (spa_open(zc->zc_name, &spa, FTAG) == 0) {
420 		if (spa_version(spa) >= SPA_VERSION_ZPOOL_HISTORY)
421 			(void) spa_history_log(spa, buf);
422 		spa_close(spa, FTAG);
423 	}
424 	history_str_free(buf);
425 }
426 
427 /*
428  * Policy for top-level read operations (list pools).  Requires no privileges,
429  * and can be used in the local zone, as there is no associated dataset.
430  */
431 static int
zfs_secpolicy_none(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)432 zfs_secpolicy_none(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
433 {
434 	(void) zc, (void) innvl, (void) cr;
435 	return (0);
436 }
437 
438 /*
439  * Policy for dataset read operations (list children, get statistics).  Requires
440  * no privileges, but must be visible in the local zone.
441  */
442 static int
zfs_secpolicy_read(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)443 zfs_secpolicy_read(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
444 {
445 	(void) innvl, (void) cr;
446 	if (INGLOBALZONE(curproc) ||
447 	    zone_dataset_visible(zc->zc_name, NULL))
448 		return (0);
449 
450 	return (SET_ERROR(ENOENT));
451 }
452 
453 static int
zfs_dozonecheck_impl(const char * dataset,uint64_t zoned,cred_t * cr)454 zfs_dozonecheck_impl(const char *dataset, uint64_t zoned, cred_t *cr)
455 {
456 	int writable = 1;
457 
458 	/*
459 	 * The dataset must be visible by this zone -- check this first
460 	 * so they don't see EPERM on something they shouldn't know about.
461 	 */
462 	if (!INGLOBALZONE(curproc) &&
463 	    !zone_dataset_visible(dataset, &writable))
464 		return (SET_ERROR(ENOENT));
465 
466 	if (INGLOBALZONE(curproc)) {
467 		/*
468 		 * If the fs is zoned, only root can access it from the
469 		 * global zone.
470 		 */
471 		if (secpolicy_zfs(cr) && zoned)
472 			return (SET_ERROR(EPERM));
473 	} else {
474 		/*
475 		 * If we are in a local zone, the 'zoned' property must be set.
476 		 */
477 		if (!zoned)
478 			return (SET_ERROR(EPERM));
479 
480 		/* must be writable by this zone */
481 		if (!writable)
482 			return (SET_ERROR(EPERM));
483 	}
484 	return (0);
485 }
486 
487 static int
zfs_dozonecheck(const char * dataset,cred_t * cr)488 zfs_dozonecheck(const char *dataset, cred_t *cr)
489 {
490 	uint64_t zoned;
491 
492 	if (dsl_prop_get_integer(dataset, zfs_prop_to_name(ZFS_PROP_ZONED),
493 	    &zoned, NULL))
494 		return (SET_ERROR(ENOENT));
495 
496 	return (zfs_dozonecheck_impl(dataset, zoned, cr));
497 }
498 
499 static int
zfs_dozonecheck_ds(const char * dataset,dsl_dataset_t * ds,cred_t * cr)500 zfs_dozonecheck_ds(const char *dataset, dsl_dataset_t *ds, cred_t *cr)
501 {
502 	uint64_t zoned;
503 
504 	if (dsl_prop_get_int_ds(ds, zfs_prop_to_name(ZFS_PROP_ZONED), &zoned))
505 		return (SET_ERROR(ENOENT));
506 
507 	return (zfs_dozonecheck_impl(dataset, zoned, cr));
508 }
509 
510 static int
zfs_secpolicy_write_perms_ds(const char * name,dsl_dataset_t * ds,const char * perm,cred_t * cr)511 zfs_secpolicy_write_perms_ds(const char *name, dsl_dataset_t *ds,
512     const char *perm, cred_t *cr)
513 {
514 	int error;
515 
516 	error = zfs_dozonecheck_ds(name, ds, cr);
517 	if (error == 0) {
518 		error = secpolicy_zfs(cr);
519 		if (error != 0)
520 			error = dsl_deleg_access_impl(ds, perm, cr);
521 	}
522 	return (error);
523 }
524 
525 static int
zfs_secpolicy_write_perms(const char * name,const char * perm,cred_t * cr)526 zfs_secpolicy_write_perms(const char *name, const char *perm, cred_t *cr)
527 {
528 	int error;
529 	dsl_dataset_t *ds;
530 	dsl_pool_t *dp;
531 
532 	/*
533 	 * First do a quick check for root in the global zone, which
534 	 * is allowed to do all write_perms.  This ensures that zfs_ioc_*
535 	 * will get to handle nonexistent datasets.
536 	 */
537 	if (INGLOBALZONE(curproc) && secpolicy_zfs(cr) == 0)
538 		return (0);
539 
540 	error = dsl_pool_hold(name, FTAG, &dp);
541 	if (error != 0)
542 		return (error);
543 
544 	error = dsl_dataset_hold(dp, name, FTAG, &ds);
545 	if (error != 0) {
546 		dsl_pool_rele(dp, FTAG);
547 		return (error);
548 	}
549 
550 	error = zfs_secpolicy_write_perms_ds(name, ds, perm, cr);
551 
552 	dsl_dataset_rele(ds, FTAG);
553 	dsl_pool_rele(dp, FTAG);
554 	return (error);
555 }
556 
557 /*
558  * Check dsl_deleg permission for zoned_uid datasets.
559  *
560  * This bypasses zfs_dozonecheck_ds() (which requires the 'zoned' property)
561  * because zoned_uid datasets use a different authentication model.  The zone
562  * check was already performed by zone_dataset_admin_check().
563  *
564  * Returns 0 if permission is granted, error otherwise.
565  * ECANCELED from dsl_deleg_access_impl() means delegation is disabled on the
566  * pool — in that case we deny access (POLP: no delegation = no access).
567  */
568 static int
zfs_secpolicy_zoned_uid_deleg(const char * name,const char * perm,cred_t * cr)569 zfs_secpolicy_zoned_uid_deleg(const char *name, const char *perm, cred_t *cr)
570 {
571 	dsl_pool_t *dp;
572 	dsl_dataset_t *ds;
573 	int error;
574 
575 	error = dsl_pool_hold(name, FTAG, &dp);
576 	if (error != 0)
577 		return (error);
578 	error = dsl_dataset_hold(dp, name, FTAG, &ds);
579 	if (error != 0) {
580 		dsl_pool_rele(dp, FTAG);
581 		return (error);
582 	}
583 	error = dsl_deleg_access_impl(ds, perm, cr);
584 	dsl_dataset_rele(ds, FTAG);
585 	dsl_pool_rele(dp, FTAG);
586 
587 	/* ECANCELED = delegation disabled on pool; deny access (POLP) */
588 	if (error == ECANCELED)
589 		return (SET_ERROR(EPERM));
590 	return (error);
591 }
592 
593 /*
594  * Policy for setting the security label property.
595  *
596  * Returns 0 for success, non-zero for access and other errors.
597  */
598 static int
zfs_set_slabel_policy(const char * name,const char * strval,cred_t * cr)599 zfs_set_slabel_policy(const char *name, const char *strval, cred_t *cr)
600 {
601 #ifdef HAVE_MLSLABEL
602 	char		ds_hexsl[MAXNAMELEN];
603 	bslabel_t	ds_sl, new_sl;
604 	boolean_t	new_default = FALSE;
605 	uint64_t	zoned;
606 	int		needed_priv = -1;
607 	int		error;
608 
609 	/* First get the existing dataset label. */
610 	error = dsl_prop_get(name, zfs_prop_to_name(ZFS_PROP_MLSLABEL),
611 	    1, sizeof (ds_hexsl), &ds_hexsl, NULL);
612 	if (error != 0)
613 		return (SET_ERROR(EPERM));
614 
615 	if (strcasecmp(strval, ZFS_MLSLABEL_DEFAULT) == 0)
616 		new_default = TRUE;
617 
618 	/* The label must be translatable */
619 	if (!new_default && (hexstr_to_label(strval, &new_sl) != 0))
620 		return (SET_ERROR(EINVAL));
621 
622 	/*
623 	 * In a non-global zone, disallow attempts to set a label that
624 	 * doesn't match that of the zone; otherwise no other checks
625 	 * are needed.
626 	 */
627 	if (!INGLOBALZONE(curproc)) {
628 		if (new_default || !blequal(&new_sl, CR_SL(CRED())))
629 			return (SET_ERROR(EPERM));
630 		return (0);
631 	}
632 
633 	/*
634 	 * For global-zone datasets (i.e., those whose zoned property is
635 	 * "off", verify that the specified new label is valid for the
636 	 * global zone.
637 	 */
638 	if (dsl_prop_get_integer(name,
639 	    zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, NULL))
640 		return (SET_ERROR(EPERM));
641 	if (!zoned) {
642 		if (zfs_check_global_label(name, strval) != 0)
643 			return (SET_ERROR(EPERM));
644 	}
645 
646 	/*
647 	 * If the existing dataset label is nondefault, check if the
648 	 * dataset is mounted (label cannot be changed while mounted).
649 	 * Get the zfsvfs_t; if there isn't one, then the dataset isn't
650 	 * mounted (or isn't a dataset, doesn't exist, ...).
651 	 */
652 	if (strcasecmp(ds_hexsl, ZFS_MLSLABEL_DEFAULT) != 0) {
653 		objset_t *os;
654 		static const char *setsl_tag = "setsl_tag";
655 
656 		/*
657 		 * Try to own the dataset; abort if there is any error,
658 		 * (e.g., already mounted, in use, or other error).
659 		 */
660 		error = dmu_objset_own(name, DMU_OST_ZFS, B_TRUE, B_TRUE,
661 		    setsl_tag, &os);
662 		if (error != 0)
663 			return (SET_ERROR(EPERM));
664 
665 		dmu_objset_disown(os, B_TRUE, setsl_tag);
666 
667 		if (new_default) {
668 			needed_priv = PRIV_FILE_DOWNGRADE_SL;
669 			goto out_check;
670 		}
671 
672 		if (hexstr_to_label(strval, &new_sl) != 0)
673 			return (SET_ERROR(EPERM));
674 
675 		if (blstrictdom(&ds_sl, &new_sl))
676 			needed_priv = PRIV_FILE_DOWNGRADE_SL;
677 		else if (blstrictdom(&new_sl, &ds_sl))
678 			needed_priv = PRIV_FILE_UPGRADE_SL;
679 	} else {
680 		/* dataset currently has a default label */
681 		if (!new_default)
682 			needed_priv = PRIV_FILE_UPGRADE_SL;
683 	}
684 
685 out_check:
686 	if (needed_priv != -1)
687 		return (PRIV_POLICY(cr, needed_priv, B_FALSE, EPERM, NULL));
688 	return (0);
689 #else
690 	return (SET_ERROR(ENOTSUP));
691 #endif /* HAVE_MLSLABEL */
692 }
693 
694 static int
zfs_secpolicy_setprop(const char * dsname,zfs_prop_t prop,nvpair_t * propval,cred_t * cr)695 zfs_secpolicy_setprop(const char *dsname, zfs_prop_t prop, nvpair_t *propval,
696     cred_t *cr)
697 {
698 	const char *strval;
699 	zone_admin_result_t zone_result;
700 
701 	/*
702 	 * Check zoned_uid delegation first.  However, even delegated
703 	 * namespace users must not be allowed to modify zoned_uid itself.
704 	 */
705 	zone_result = zone_dataset_admin_check(dsname, ZONE_OP_SETPROP, NULL);
706 	if (zone_result == ZONE_ADMIN_ALLOWED) {
707 		if (prop == ZFS_PROP_ZONED_UID)
708 			return (SET_ERROR(EPERM));
709 		if (prop == ZFS_PROP_FILESYSTEM_LIMIT ||
710 		    prop == ZFS_PROP_SNAPSHOT_LIMIT) {
711 			char setpoint[ZFS_MAX_DATASET_NAME_LEN];
712 			uint64_t zoned_uid_val = 0;
713 			if (dsl_prop_get(dsname, "zoned_uid", 8, 1,
714 			    &zoned_uid_val, setpoint) == 0 &&
715 			    zoned_uid_val != 0 &&
716 			    strcmp(dsname, setpoint) == 0)
717 				return (SET_ERROR(EPERM));
718 		}
719 		return (zfs_secpolicy_zoned_uid_deleg(dsname,
720 		    zfs_prop_to_name(prop), cr));
721 	}
722 	if (zone_result == ZONE_ADMIN_DENIED)
723 		return (SET_ERROR(EPERM));
724 
725 	/*
726 	 * Check permissions for special properties.
727 	 */
728 	switch (prop) {
729 	default:
730 		break;
731 	case ZFS_PROP_ZONED:
732 		/*
733 		 * Disallow setting of 'zoned' from within a local zone.
734 		 */
735 		if (!INGLOBALZONE(curproc))
736 			return (SET_ERROR(EPERM));
737 		break;
738 	case ZFS_PROP_ZONED_UID:
739 		/*
740 		 * Disallow setting of 'zoned_uid' from within a
741 		 * delegated namespace -- only global zone can manage
742 		 * delegation assignments.
743 		 */
744 		if (!INGLOBALZONE(curproc))
745 			return (SET_ERROR(EPERM));
746 		break;
747 
748 	case ZFS_PROP_QUOTA:
749 	case ZFS_PROP_FILESYSTEM_LIMIT:
750 	case ZFS_PROP_SNAPSHOT_LIMIT:
751 		if (!INGLOBALZONE(curproc)) {
752 			uint64_t zoned;
753 			char setpoint[ZFS_MAX_DATASET_NAME_LEN];
754 			/*
755 			 * Unprivileged users are allowed to modify the
756 			 * limit on things *under* (ie. contained by)
757 			 * the thing they own.
758 			 */
759 			if (dsl_prop_get_integer(dsname,
760 			    zfs_prop_to_name(ZFS_PROP_ZONED), &zoned, setpoint))
761 				return (SET_ERROR(EPERM));
762 			if (!zoned || strlen(dsname) <= strlen(setpoint))
763 				return (SET_ERROR(EPERM));
764 		}
765 		break;
766 
767 	case ZFS_PROP_MLSLABEL:
768 		if (!is_system_labeled())
769 			return (SET_ERROR(EPERM));
770 
771 		if (nvpair_value_string(propval, &strval) == 0) {
772 			int err;
773 
774 			err = zfs_set_slabel_policy(dsname, strval, CRED());
775 			if (err != 0)
776 				return (err);
777 		}
778 		break;
779 	}
780 
781 	return (zfs_secpolicy_write_perms(dsname, zfs_prop_to_name(prop), cr));
782 }
783 
784 static int
zfs_secpolicy_set_fsacl(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)785 zfs_secpolicy_set_fsacl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
786 {
787 	/*
788 	 * permission to set permissions will be evaluated later in
789 	 * dsl_deleg_can_allow()
790 	 */
791 	(void) innvl;
792 	return (zfs_dozonecheck(zc->zc_name, cr));
793 }
794 
795 static int
zfs_secpolicy_rollback(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)796 zfs_secpolicy_rollback(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
797 {
798 	(void) innvl;
799 	return (zfs_secpolicy_write_perms(zc->zc_name,
800 	    ZFS_DELEG_PERM_ROLLBACK, cr));
801 }
802 
803 static int
zfs_secpolicy_send(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)804 zfs_secpolicy_send(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
805 {
806 	(void) innvl;
807 	dsl_pool_t *dp;
808 	dsl_dataset_t *ds;
809 	const char *cp;
810 	int error;
811 	boolean_t rawok = !!(zc->zc_flags & 0x8);
812 
813 	/*
814 	 * Generate the current snapshot name from the given objsetid, then
815 	 * use that name for the secpolicy/zone checks.
816 	 */
817 	cp = strchr(zc->zc_name, '@');
818 	if (cp == NULL)
819 		return (SET_ERROR(EINVAL));
820 	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
821 	if (error != 0)
822 		return (error);
823 
824 	error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &ds);
825 	if (error != 0) {
826 		dsl_pool_rele(dp, FTAG);
827 		return (error);
828 	}
829 
830 	dsl_dataset_name(ds, zc->zc_name);
831 
832 	error = zfs_secpolicy_write_perms_ds(zc->zc_name, ds,
833 	    ZFS_DELEG_PERM_SEND, cr);
834 	if (error != 0 && rawok) {
835 		error = zfs_secpolicy_write_perms_ds(zc->zc_name, ds,
836 		    ZFS_DELEG_PERM_SEND_RAW, cr);
837 	}
838 	dsl_dataset_rele(ds, FTAG);
839 	dsl_pool_rele(dp, FTAG);
840 
841 	return (error);
842 }
843 
844 static int
zfs_secpolicy_send_new(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)845 zfs_secpolicy_send_new(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
846 {
847 	boolean_t rawok = nvlist_exists(innvl, "rawok");
848 	int error;
849 
850 	(void) innvl;
851 	error = zfs_secpolicy_write_perms(zc->zc_name,
852 	    ZFS_DELEG_PERM_SEND, cr);
853 	if (error != 0 && rawok) {
854 		error = zfs_secpolicy_write_perms(zc->zc_name,
855 		    ZFS_DELEG_PERM_SEND_RAW, cr);
856 	}
857 	return (error);
858 }
859 
860 static int
zfs_secpolicy_share(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)861 zfs_secpolicy_share(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
862 {
863 	(void) zc, (void) innvl, (void) cr;
864 	return (SET_ERROR(ENOTSUP));
865 }
866 
867 static int
zfs_secpolicy_smb_acl(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)868 zfs_secpolicy_smb_acl(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
869 {
870 	(void) zc, (void) innvl, (void) cr;
871 	return (SET_ERROR(ENOTSUP));
872 }
873 
874 static int
zfs_get_parent(const char * datasetname,char * parent,int parentsize)875 zfs_get_parent(const char *datasetname, char *parent, int parentsize)
876 {
877 	char *cp;
878 
879 	/*
880 	 * Remove the @bla or /bla from the end of the name to get the parent.
881 	 */
882 	(void) strlcpy(parent, datasetname, parentsize);
883 	cp = strrchr(parent, '@');
884 	if (cp != NULL) {
885 		cp[0] = '\0';
886 	} else {
887 		cp = strrchr(parent, '/');
888 		if (cp == NULL)
889 			return (SET_ERROR(ENOENT));
890 		cp[0] = '\0';
891 	}
892 
893 	return (0);
894 }
895 
896 int
zfs_secpolicy_destroy_perms(const char * name,cred_t * cr)897 zfs_secpolicy_destroy_perms(const char *name, cred_t *cr)
898 {
899 	int error;
900 	zone_admin_result_t result;
901 
902 	/* Check zoned_uid delegation first */
903 	result = zone_dataset_admin_check(name, ZONE_OP_DESTROY, NULL);
904 	if (result == ZONE_ADMIN_ALLOWED) {
905 		if ((error = zfs_secpolicy_zoned_uid_deleg(name,
906 		    ZFS_DELEG_PERM_DESTROY, cr)) != 0)
907 			return (error);
908 		return (zfs_secpolicy_zoned_uid_deleg(name,
909 		    ZFS_DELEG_PERM_MOUNT, cr));
910 	}
911 	if (result == ZONE_ADMIN_DENIED)
912 		return (SET_ERROR(EPERM));
913 
914 	/* NOT_APPLICABLE: continue with existing checks */
915 	if ((error = zfs_secpolicy_write_perms(name,
916 	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
917 		return (error);
918 
919 	return (zfs_secpolicy_write_perms(name, ZFS_DELEG_PERM_DESTROY, cr));
920 }
921 
922 static int
zfs_secpolicy_destroy(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)923 zfs_secpolicy_destroy(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
924 {
925 	(void) innvl;
926 	return (zfs_secpolicy_destroy_perms(zc->zc_name, cr));
927 }
928 
929 /*
930  * Destroying snapshots with delegated permissions requires
931  * descendant mount and destroy permissions.
932  */
933 static int
zfs_secpolicy_destroy_snaps(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)934 zfs_secpolicy_destroy_snaps(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
935 {
936 	(void) zc;
937 	nvlist_t *snaps;
938 	nvpair_t *pair, *nextpair;
939 	int error = 0;
940 
941 	snaps = fnvlist_lookup_nvlist(innvl, "snaps");
942 
943 	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
944 	    pair = nextpair) {
945 		nextpair = nvlist_next_nvpair(snaps, pair);
946 		error = zfs_secpolicy_destroy_perms(nvpair_name(pair), cr);
947 		if (error == ENOENT) {
948 			/*
949 			 * Ignore any snapshots that don't exist (we consider
950 			 * them "already destroyed").  Remove the name from the
951 			 * nvl here in case the snapshot is created between
952 			 * now and when we try to destroy it (in which case
953 			 * we don't want to destroy it since we haven't
954 			 * checked for permission).
955 			 */
956 			fnvlist_remove_nvpair(snaps, pair);
957 			error = 0;
958 		}
959 		if (error != 0)
960 			break;
961 	}
962 
963 	return (error);
964 }
965 
966 int
zfs_secpolicy_rename_perms(const char * from,const char * to,cred_t * cr)967 zfs_secpolicy_rename_perms(const char *from, const char *to, cred_t *cr)
968 {
969 	char	parentname[ZFS_MAX_DATASET_NAME_LEN];
970 	int	error;
971 	zone_admin_result_t result;
972 
973 	/* Check zoned_uid delegation first */
974 	result = zone_dataset_admin_check(from, ZONE_OP_RENAME, to);
975 	if (result == ZONE_ADMIN_ALLOWED) {
976 		if ((error = zfs_secpolicy_zoned_uid_deleg(from,
977 		    ZFS_DELEG_PERM_RENAME, cr)) != 0)
978 			return (error);
979 		return (zfs_secpolicy_zoned_uid_deleg(from,
980 		    ZFS_DELEG_PERM_MOUNT, cr));
981 	}
982 	if (result == ZONE_ADMIN_DENIED)
983 		return (SET_ERROR(EPERM));
984 
985 	/* NOT_APPLICABLE: continue with existing checks */
986 	if ((error = zfs_secpolicy_write_perms(from,
987 	    ZFS_DELEG_PERM_RENAME, cr)) != 0)
988 		return (error);
989 
990 	if ((error = zfs_secpolicy_write_perms(from,
991 	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
992 		return (error);
993 
994 	if ((error = zfs_get_parent(to, parentname,
995 	    sizeof (parentname))) != 0)
996 		return (error);
997 
998 	if ((error = zfs_secpolicy_write_perms(parentname,
999 	    ZFS_DELEG_PERM_CREATE, cr)) != 0)
1000 		return (error);
1001 
1002 	if ((error = zfs_secpolicy_write_perms(parentname,
1003 	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
1004 		return (error);
1005 
1006 	return (error);
1007 }
1008 
1009 static int
zfs_secpolicy_rename(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)1010 zfs_secpolicy_rename(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1011 {
1012 	(void) innvl;
1013 	return (zfs_secpolicy_rename_perms(zc->zc_name, zc->zc_value, cr));
1014 }
1015 
1016 static int
zfs_secpolicy_promote(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)1017 zfs_secpolicy_promote(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1018 {
1019 	(void) innvl;
1020 	dsl_pool_t *dp;
1021 	dsl_dataset_t *clone;
1022 	int error;
1023 
1024 	error = zfs_secpolicy_write_perms(zc->zc_name,
1025 	    ZFS_DELEG_PERM_PROMOTE, cr);
1026 	if (error != 0)
1027 		return (error);
1028 
1029 	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
1030 	if (error != 0)
1031 		return (error);
1032 
1033 	error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &clone);
1034 
1035 	if (error == 0) {
1036 		char parentname[ZFS_MAX_DATASET_NAME_LEN];
1037 		dsl_dataset_t *origin = NULL;
1038 		dsl_dir_t *dd;
1039 		dd = clone->ds_dir;
1040 
1041 		error = dsl_dataset_hold_obj(dd->dd_pool,
1042 		    dsl_dir_phys(dd)->dd_origin_obj, FTAG, &origin);
1043 		if (error != 0) {
1044 			dsl_dataset_rele(clone, FTAG);
1045 			dsl_pool_rele(dp, FTAG);
1046 			return (error);
1047 		}
1048 
1049 		error = zfs_secpolicy_write_perms_ds(zc->zc_name, clone,
1050 		    ZFS_DELEG_PERM_MOUNT, cr);
1051 
1052 		dsl_dataset_name(origin, parentname);
1053 		if (error == 0) {
1054 			error = zfs_secpolicy_write_perms_ds(parentname, origin,
1055 			    ZFS_DELEG_PERM_PROMOTE, cr);
1056 		}
1057 		dsl_dataset_rele(clone, FTAG);
1058 		dsl_dataset_rele(origin, FTAG);
1059 	}
1060 	dsl_pool_rele(dp, FTAG);
1061 	return (error);
1062 }
1063 
1064 static int
zfs_secpolicy_recv(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)1065 zfs_secpolicy_recv(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1066 {
1067 	(void) innvl;
1068 	int error;
1069 
1070 	/*
1071 	 * zfs receive -F requires full receive permission,
1072 	 * otherwise receive:append permission is enough
1073 	 */
1074 	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
1075 	    ZFS_DELEG_PERM_RECEIVE, cr)) != 0) {
1076 		if (zc->zc_guid || nvlist_exists(innvl, "force"))
1077 			return (error);
1078 		if ((error = zfs_secpolicy_write_perms(zc->zc_name,
1079 		    ZFS_DELEG_PERM_RECEIVE_APPEND, cr)) != 0)
1080 			return (error);
1081 	}
1082 
1083 	if ((error = zfs_secpolicy_write_perms(zc->zc_name,
1084 	    ZFS_DELEG_PERM_MOUNT, cr)) != 0)
1085 		return (error);
1086 
1087 	return (zfs_secpolicy_write_perms(zc->zc_name,
1088 	    ZFS_DELEG_PERM_CREATE, cr));
1089 }
1090 
1091 int
zfs_secpolicy_snapshot_perms(const char * name,cred_t * cr)1092 zfs_secpolicy_snapshot_perms(const char *name, cred_t *cr)
1093 {
1094 	zone_admin_result_t result;
1095 
1096 	/* Check zoned_uid delegation first */
1097 	result = zone_dataset_admin_check(name, ZONE_OP_SNAPSHOT, NULL);
1098 	if (result == ZONE_ADMIN_ALLOWED)
1099 		return (zfs_secpolicy_zoned_uid_deleg(name,
1100 		    ZFS_DELEG_PERM_SNAPSHOT, cr));
1101 	if (result == ZONE_ADMIN_DENIED)
1102 		return (SET_ERROR(EPERM));
1103 
1104 	/* NOT_APPLICABLE: continue with existing checks */
1105 	return (zfs_secpolicy_write_perms(name,
1106 	    ZFS_DELEG_PERM_SNAPSHOT, cr));
1107 }
1108 
1109 /*
1110  * Check for permission to create each snapshot in the nvlist.
1111  */
1112 static int
zfs_secpolicy_snapshot(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)1113 zfs_secpolicy_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1114 {
1115 	(void) zc;
1116 	nvlist_t *snaps;
1117 	int error = 0;
1118 	nvpair_t *pair;
1119 
1120 	snaps = fnvlist_lookup_nvlist(innvl, "snaps");
1121 
1122 	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
1123 	    pair = nvlist_next_nvpair(snaps, pair)) {
1124 		char *name = (char *)nvpair_name(pair);
1125 		char *atp = strchr(name, '@');
1126 
1127 		if (atp == NULL) {
1128 			error = SET_ERROR(EINVAL);
1129 			break;
1130 		}
1131 		*atp = '\0';
1132 		error = zfs_secpolicy_snapshot_perms(name, cr);
1133 		*atp = '@';
1134 		if (error != 0)
1135 			break;
1136 	}
1137 	return (error);
1138 }
1139 
1140 /*
1141  * Check for permission to create each bookmark in the nvlist.
1142  */
1143 static int
zfs_secpolicy_bookmark(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)1144 zfs_secpolicy_bookmark(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1145 {
1146 	(void) zc;
1147 	int error = 0;
1148 
1149 	for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
1150 	    pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
1151 		char *name = (char *)nvpair_name(pair);
1152 		char *hashp = strchr(name, '#');
1153 
1154 		if (hashp == NULL) {
1155 			error = SET_ERROR(EINVAL);
1156 			break;
1157 		}
1158 		*hashp = '\0';
1159 		error = zfs_secpolicy_write_perms(name,
1160 		    ZFS_DELEG_PERM_BOOKMARK, cr);
1161 		*hashp = '#';
1162 		if (error != 0)
1163 			break;
1164 	}
1165 	return (error);
1166 }
1167 
1168 static int
zfs_secpolicy_destroy_bookmarks(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)1169 zfs_secpolicy_destroy_bookmarks(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1170 {
1171 	(void) zc;
1172 	nvpair_t *pair, *nextpair;
1173 	int error = 0;
1174 
1175 	for (pair = nvlist_next_nvpair(innvl, NULL); pair != NULL;
1176 	    pair = nextpair) {
1177 		char *name = (char *)nvpair_name(pair);
1178 		char *hashp = strchr(name, '#');
1179 		nextpair = nvlist_next_nvpair(innvl, pair);
1180 
1181 		if (hashp == NULL) {
1182 			error = SET_ERROR(EINVAL);
1183 			break;
1184 		}
1185 
1186 		*hashp = '\0';
1187 		error = zfs_secpolicy_write_perms(name,
1188 		    ZFS_DELEG_PERM_DESTROY, cr);
1189 		*hashp = '#';
1190 		if (error == ENOENT) {
1191 			/*
1192 			 * Ignore any filesystems that don't exist (we consider
1193 			 * their bookmarks "already destroyed").  Remove
1194 			 * the name from the nvl here in case the filesystem
1195 			 * is created between now and when we try to destroy
1196 			 * the bookmark (in which case we don't want to
1197 			 * destroy it since we haven't checked for permission).
1198 			 */
1199 			fnvlist_remove_nvpair(innvl, pair);
1200 			error = 0;
1201 		}
1202 		if (error != 0)
1203 			break;
1204 	}
1205 
1206 	return (error);
1207 }
1208 
1209 static int
zfs_secpolicy_log_history(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)1210 zfs_secpolicy_log_history(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1211 {
1212 	(void) zc, (void) innvl, (void) cr;
1213 	/*
1214 	 * Even root must have a proper TSD so that we know what pool
1215 	 * to log to.
1216 	 */
1217 	if (tsd_get(zfs_allow_log_key) == NULL)
1218 		return (SET_ERROR(EPERM));
1219 	return (0);
1220 }
1221 
1222 static int
zfs_secpolicy_create_clone(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)1223 zfs_secpolicy_create_clone(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1224 {
1225 	char		parentname[ZFS_MAX_DATASET_NAME_LEN];
1226 	int		error;
1227 	const char	*origin = NULL;
1228 	zone_admin_result_t result;
1229 
1230 	if ((error = zfs_get_parent(zc->zc_name, parentname,
1231 	    sizeof (parentname))) != 0)
1232 		return (error);
1233 
1234 	(void) nvlist_lookup_string(innvl, "origin", &origin);
1235 
1236 	/* Check zoned_uid delegation first */
1237 	result = zone_dataset_admin_check(parentname,
1238 	    origin != NULL ? ZONE_OP_CLONE : ZONE_OP_CREATE, origin);
1239 	if (result == ZONE_ADMIN_ALLOWED) {
1240 		if (origin != NULL) {
1241 			if ((error = zfs_secpolicy_zoned_uid_deleg(origin,
1242 			    ZFS_DELEG_PERM_CLONE, cr)) != 0)
1243 				return (error);
1244 		}
1245 		if ((error = zfs_secpolicy_zoned_uid_deleg(parentname,
1246 		    ZFS_DELEG_PERM_CREATE, cr)) != 0)
1247 			return (error);
1248 		return (zfs_secpolicy_zoned_uid_deleg(parentname,
1249 		    ZFS_DELEG_PERM_MOUNT, cr));
1250 	}
1251 	if (result == ZONE_ADMIN_DENIED)
1252 		return (SET_ERROR(EPERM));
1253 
1254 	/* NOT_APPLICABLE: continue with existing checks */
1255 	if (origin != NULL &&
1256 	    (error = zfs_secpolicy_write_perms(origin,
1257 	    ZFS_DELEG_PERM_CLONE, cr)) != 0)
1258 		return (error);
1259 
1260 	if ((error = zfs_secpolicy_write_perms(parentname,
1261 	    ZFS_DELEG_PERM_CREATE, cr)) != 0)
1262 		return (error);
1263 
1264 	return (zfs_secpolicy_write_perms(parentname,
1265 	    ZFS_DELEG_PERM_MOUNT, cr));
1266 }
1267 
1268 /*
1269  * Policy for pool operations - create/destroy pools, add vdevs, etc.  Requires
1270  * SYS_CONFIG privilege, which is not available in a local zone.
1271  */
1272 int
zfs_secpolicy_config(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)1273 zfs_secpolicy_config(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1274 {
1275 	(void) zc, (void) innvl;
1276 
1277 	if (secpolicy_sys_config(cr, B_FALSE) != 0)
1278 		return (SET_ERROR(EPERM));
1279 
1280 	return (0);
1281 }
1282 
1283 /*
1284  * Policy for object to name lookups.
1285  */
1286 static int
zfs_secpolicy_diff(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)1287 zfs_secpolicy_diff(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1288 {
1289 	(void) innvl;
1290 	int error;
1291 
1292 	if (secpolicy_sys_config(cr, B_FALSE) == 0)
1293 		return (0);
1294 
1295 	error = zfs_secpolicy_write_perms(zc->zc_name, ZFS_DELEG_PERM_DIFF, cr);
1296 	return (error);
1297 }
1298 
1299 /*
1300  * Policy for fault injection.  Requires all privileges.
1301  */
1302 static int
zfs_secpolicy_inject(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)1303 zfs_secpolicy_inject(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1304 {
1305 	(void) zc, (void) innvl;
1306 	return (secpolicy_zinject(cr));
1307 }
1308 
1309 static int
zfs_secpolicy_inherit_prop(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)1310 zfs_secpolicy_inherit_prop(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1311 {
1312 	(void) innvl;
1313 	zfs_prop_t prop = zfs_name_to_prop(zc->zc_value);
1314 
1315 	if (prop == ZPROP_USERPROP) {
1316 		if (!zfs_prop_user(zc->zc_value))
1317 			return (SET_ERROR(EINVAL));
1318 		zone_admin_result_t zone_result;
1319 		zone_result = zone_dataset_admin_check(zc->zc_name,
1320 		    ZONE_OP_SETPROP, NULL);
1321 		if (zone_result == ZONE_ADMIN_ALLOWED)
1322 			return (zfs_secpolicy_zoned_uid_deleg(zc->zc_name,
1323 			    ZFS_DELEG_PERM_USERPROP, cr));
1324 		if (zone_result == ZONE_ADMIN_DENIED)
1325 			return (SET_ERROR(EPERM));
1326 		return (zfs_secpolicy_write_perms(zc->zc_name,
1327 		    ZFS_DELEG_PERM_USERPROP, cr));
1328 	} else {
1329 		return (zfs_secpolicy_setprop(zc->zc_name, prop,
1330 		    NULL, cr));
1331 	}
1332 }
1333 
1334 static int
zfs_secpolicy_userspace_one(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)1335 zfs_secpolicy_userspace_one(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1336 {
1337 	int err = zfs_secpolicy_read(zc, innvl, cr);
1338 	if (err)
1339 		return (err);
1340 
1341 	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
1342 		return (SET_ERROR(EINVAL));
1343 
1344 	if (zc->zc_value[0] == 0) {
1345 		/*
1346 		 * They are asking about a posix uid/gid.  If it's
1347 		 * themself, allow it.
1348 		 */
1349 		if (zc->zc_objset_type == ZFS_PROP_USERUSED ||
1350 		    zc->zc_objset_type == ZFS_PROP_USERQUOTA ||
1351 		    zc->zc_objset_type == ZFS_PROP_USEROBJUSED ||
1352 		    zc->zc_objset_type == ZFS_PROP_USEROBJQUOTA) {
1353 			if (zc->zc_guid == crgetuid(cr))
1354 				return (0);
1355 		} else if (zc->zc_objset_type == ZFS_PROP_GROUPUSED ||
1356 		    zc->zc_objset_type == ZFS_PROP_GROUPQUOTA ||
1357 		    zc->zc_objset_type == ZFS_PROP_GROUPOBJUSED ||
1358 		    zc->zc_objset_type == ZFS_PROP_GROUPOBJQUOTA) {
1359 			if (groupmember(zc->zc_guid, cr))
1360 				return (0);
1361 		}
1362 		/* else is for project quota/used */
1363 	}
1364 
1365 	return (zfs_secpolicy_write_perms(zc->zc_name,
1366 	    userquota_perms[zc->zc_objset_type], cr));
1367 }
1368 
1369 static int
zfs_secpolicy_userspace_many(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)1370 zfs_secpolicy_userspace_many(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1371 {
1372 	int err = zfs_secpolicy_read(zc, innvl, cr);
1373 	if (err)
1374 		return (err);
1375 
1376 	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
1377 		return (SET_ERROR(EINVAL));
1378 
1379 	return (zfs_secpolicy_write_perms(zc->zc_name,
1380 	    userquota_perms[zc->zc_objset_type], cr));
1381 }
1382 
1383 static int
zfs_secpolicy_userspace_upgrade(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)1384 zfs_secpolicy_userspace_upgrade(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1385 {
1386 	(void) innvl;
1387 	return (zfs_secpolicy_setprop(zc->zc_name, ZFS_PROP_VERSION,
1388 	    NULL, cr));
1389 }
1390 
1391 static int
zfs_secpolicy_hold(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)1392 zfs_secpolicy_hold(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1393 {
1394 	(void) zc;
1395 	nvpair_t *pair;
1396 	nvlist_t *holds;
1397 	int error;
1398 
1399 	holds = fnvlist_lookup_nvlist(innvl, "holds");
1400 
1401 	for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
1402 	    pair = nvlist_next_nvpair(holds, pair)) {
1403 		char fsname[ZFS_MAX_DATASET_NAME_LEN];
1404 		error = dmu_fsname(nvpair_name(pair), fsname);
1405 		if (error != 0)
1406 			return (error);
1407 		error = zfs_secpolicy_write_perms(fsname,
1408 		    ZFS_DELEG_PERM_HOLD, cr);
1409 		if (error != 0)
1410 			return (error);
1411 	}
1412 	return (0);
1413 }
1414 
1415 static int
zfs_secpolicy_release(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)1416 zfs_secpolicy_release(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1417 {
1418 	(void) zc;
1419 	nvpair_t *pair;
1420 	int error;
1421 
1422 	for (pair = nvlist_next_nvpair(innvl, NULL); pair != NULL;
1423 	    pair = nvlist_next_nvpair(innvl, pair)) {
1424 		char fsname[ZFS_MAX_DATASET_NAME_LEN];
1425 		error = dmu_fsname(nvpair_name(pair), fsname);
1426 		if (error != 0)
1427 			return (error);
1428 		error = zfs_secpolicy_write_perms(fsname,
1429 		    ZFS_DELEG_PERM_RELEASE, cr);
1430 		if (error != 0)
1431 			return (error);
1432 	}
1433 	return (0);
1434 }
1435 
1436 /*
1437  * Policy for allowing temporary snapshots to be taken or released
1438  */
1439 static int
zfs_secpolicy_tmp_snapshot(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)1440 zfs_secpolicy_tmp_snapshot(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1441 {
1442 	/*
1443 	 * A temporary snapshot is the same as a snapshot,
1444 	 * hold, destroy and release all rolled into one.
1445 	 * Delegated diff alone is sufficient that we allow this.
1446 	 */
1447 	int error;
1448 
1449 	if (zfs_secpolicy_write_perms(zc->zc_name,
1450 	    ZFS_DELEG_PERM_DIFF, cr) == 0)
1451 		return (0);
1452 
1453 	error = zfs_secpolicy_snapshot_perms(zc->zc_name, cr);
1454 
1455 	if (innvl != NULL) {
1456 		if (error == 0)
1457 			error = zfs_secpolicy_hold(zc, innvl, cr);
1458 		if (error == 0)
1459 			error = zfs_secpolicy_release(zc, innvl, cr);
1460 		if (error == 0)
1461 			error = zfs_secpolicy_destroy(zc, innvl, cr);
1462 	}
1463 	return (error);
1464 }
1465 
1466 static int
zfs_secpolicy_load_key(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)1467 zfs_secpolicy_load_key(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1468 {
1469 	return (zfs_secpolicy_write_perms(zc->zc_name,
1470 	    ZFS_DELEG_PERM_LOAD_KEY, cr));
1471 }
1472 
1473 static int
zfs_secpolicy_change_key(zfs_cmd_t * zc,nvlist_t * innvl,cred_t * cr)1474 zfs_secpolicy_change_key(zfs_cmd_t *zc, nvlist_t *innvl, cred_t *cr)
1475 {
1476 	return (zfs_secpolicy_write_perms(zc->zc_name,
1477 	    ZFS_DELEG_PERM_CHANGE_KEY, cr));
1478 }
1479 
1480 /*
1481  * Returns the nvlist as specified by the user in the zfs_cmd_t.
1482  */
1483 static int
get_nvlist(uint64_t nvl,uint64_t size,int iflag,nvlist_t ** nvp)1484 get_nvlist(uint64_t nvl, uint64_t size, int iflag, nvlist_t **nvp)
1485 {
1486 	char *packed;
1487 	int error;
1488 	nvlist_t *list = NULL;
1489 
1490 	/*
1491 	 * Read in and unpack the user-supplied nvlist.
1492 	 */
1493 	if (size == 0)
1494 		return (SET_ERROR(EINVAL));
1495 
1496 	packed = vmem_alloc(size, KM_SLEEP);
1497 
1498 	if (ddi_copyin((void *)(uintptr_t)nvl, packed, size, iflag) != 0) {
1499 		vmem_free(packed, size);
1500 		return (SET_ERROR(EFAULT));
1501 	}
1502 
1503 	if ((error = nvlist_unpack(packed, size, &list, 0)) != 0) {
1504 		vmem_free(packed, size);
1505 		return (error);
1506 	}
1507 
1508 	vmem_free(packed, size);
1509 
1510 	*nvp = list;
1511 	return (0);
1512 }
1513 
1514 /*
1515  * Reduce the size of this nvlist until it can be serialized in 'max' bytes.
1516  * Entries will be removed from the end of the nvlist, and one int32 entry
1517  * named "N_MORE_ERRORS" will be added indicating how many entries were
1518  * removed.
1519  */
1520 static int
nvlist_smush(nvlist_t * errors,size_t max)1521 nvlist_smush(nvlist_t *errors, size_t max)
1522 {
1523 	size_t size;
1524 
1525 	size = fnvlist_size(errors);
1526 
1527 	if (size > max) {
1528 		nvpair_t *more_errors;
1529 		int n = 0;
1530 
1531 		if (max < 1024)
1532 			return (SET_ERROR(ENOMEM));
1533 
1534 		fnvlist_add_int32(errors, ZPROP_N_MORE_ERRORS, 0);
1535 		more_errors = nvlist_prev_nvpair(errors, NULL);
1536 
1537 		do {
1538 			nvpair_t *pair = nvlist_prev_nvpair(errors,
1539 			    more_errors);
1540 			fnvlist_remove_nvpair(errors, pair);
1541 			n++;
1542 			size = fnvlist_size(errors);
1543 		} while (size > max);
1544 
1545 		fnvlist_remove_nvpair(errors, more_errors);
1546 		fnvlist_add_int32(errors, ZPROP_N_MORE_ERRORS, n);
1547 		ASSERT3U(fnvlist_size(errors), <=, max);
1548 	}
1549 
1550 	return (0);
1551 }
1552 
1553 static int
put_nvlist(zfs_cmd_t * zc,nvlist_t * nvl)1554 put_nvlist(zfs_cmd_t *zc, nvlist_t *nvl)
1555 {
1556 	char *packed = NULL;
1557 	int error = 0;
1558 	size_t size;
1559 
1560 	size = fnvlist_size(nvl);
1561 
1562 	if (size > zc->zc_nvlist_dst_size) {
1563 		error = SET_ERROR(ENOMEM);
1564 	} else {
1565 		packed = fnvlist_pack(nvl, &size);
1566 		if (ddi_copyout(packed, (void *)(uintptr_t)zc->zc_nvlist_dst,
1567 		    size, zc->zc_iflags) != 0)
1568 			error = SET_ERROR(EFAULT);
1569 		fnvlist_pack_free(packed, size);
1570 	}
1571 
1572 	zc->zc_nvlist_dst_size = size;
1573 	zc->zc_nvlist_dst_filled = B_TRUE;
1574 	return (error);
1575 }
1576 
1577 int
getzfsvfs_impl(objset_t * os,zfsvfs_t ** zfvp)1578 getzfsvfs_impl(objset_t *os, zfsvfs_t **zfvp)
1579 {
1580 	int error = 0;
1581 	if (dmu_objset_type(os) != DMU_OST_ZFS) {
1582 		return (SET_ERROR(EINVAL));
1583 	}
1584 
1585 	mutex_enter(&os->os_user_ptr_lock);
1586 	*zfvp = dmu_objset_get_user(os);
1587 	/* bump s_active only when non-zero to prevent umount race */
1588 	error = zfs_vfs_ref(zfvp);
1589 	mutex_exit(&os->os_user_ptr_lock);
1590 	return (error);
1591 }
1592 
1593 int
getzfsvfs(const char * dsname,zfsvfs_t ** zfvp)1594 getzfsvfs(const char *dsname, zfsvfs_t **zfvp)
1595 {
1596 	objset_t *os;
1597 	int error;
1598 
1599 	error = dmu_objset_hold(dsname, FTAG, &os);
1600 	if (error != 0)
1601 		return (error);
1602 
1603 	error = getzfsvfs_impl(os, zfvp);
1604 	dmu_objset_rele(os, FTAG);
1605 	return (error);
1606 }
1607 
1608 /*
1609  * Find a zfsvfs_t for a mounted filesystem, or create our own, in which
1610  * case its z_sb will be NULL, and it will be opened as the owner.
1611  * If 'writer' is set, the z_teardown_lock will be held for RW_WRITER,
1612  * which prevents all inode ops from running.
1613  */
1614 static int
zfsvfs_hold(const char * name,const void * tag,zfsvfs_t ** zfvp,boolean_t writer)1615 zfsvfs_hold(const char *name, const void *tag, zfsvfs_t **zfvp,
1616     boolean_t writer)
1617 {
1618 	int error = 0;
1619 
1620 	if (getzfsvfs(name, zfvp) != 0)
1621 		error = zfsvfs_create(name, B_FALSE, zfvp);
1622 	if (error == 0) {
1623 		if (writer)
1624 			ZFS_TEARDOWN_ENTER_WRITE(*zfvp, tag);
1625 		else
1626 			ZFS_TEARDOWN_ENTER_READ(*zfvp, tag);
1627 		if ((*zfvp)->z_unmounted) {
1628 			/*
1629 			 * XXX we could probably try again, since the unmounting
1630 			 * thread should be just about to disassociate the
1631 			 * objset from the zfsvfs.
1632 			 */
1633 			ZFS_TEARDOWN_EXIT(*zfvp, tag);
1634 			zfs_vfs_rele(*zfvp);
1635 			return (SET_ERROR(EBUSY));
1636 		}
1637 	}
1638 	return (error);
1639 }
1640 
1641 static void
zfsvfs_rele(zfsvfs_t * zfsvfs,const void * tag)1642 zfsvfs_rele(zfsvfs_t *zfsvfs, const void *tag)
1643 {
1644 	ZFS_TEARDOWN_EXIT(zfsvfs, tag);
1645 
1646 	if (zfs_vfs_held(zfsvfs)) {
1647 		zfs_vfs_rele(zfsvfs);
1648 	} else {
1649 		dmu_objset_disown(zfsvfs->z_os, B_TRUE, zfsvfs);
1650 		zfsvfs_free(zfsvfs);
1651 	}
1652 }
1653 
1654 static int
zfs_ioc_pool_create(zfs_cmd_t * zc)1655 zfs_ioc_pool_create(zfs_cmd_t *zc)
1656 {
1657 	int error;
1658 	nvlist_t *config, *props = NULL;
1659 	nvlist_t *rootprops = NULL;
1660 	nvlist_t *zplprops = NULL;
1661 	dsl_crypto_params_t *dcp = NULL;
1662 	const char *spa_name = zc->zc_name;
1663 	boolean_t unload_wkey = B_TRUE;
1664 	nvlist_t *errinfo = NULL;
1665 
1666 	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1667 	    zc->zc_iflags, &config)))
1668 		return (error);
1669 
1670 	if (zc->zc_nvlist_src_size != 0 && (error =
1671 	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1672 	    zc->zc_iflags, &props))) {
1673 		nvlist_free(config);
1674 		return (error);
1675 	}
1676 
1677 	if (props) {
1678 		nvlist_t *nvl = NULL;
1679 		nvlist_t *hidden_args = NULL;
1680 		uint64_t version = SPA_VERSION;
1681 		const char *tname;
1682 
1683 		(void) nvlist_lookup_uint64(props,
1684 		    zpool_prop_to_name(ZPOOL_PROP_VERSION), &version);
1685 		if (!SPA_VERSION_IS_SUPPORTED(version)) {
1686 			error = SET_ERROR(EINVAL);
1687 			goto pool_props_bad;
1688 		}
1689 		(void) nvlist_lookup_nvlist(props, ZPOOL_ROOTFS_PROPS, &nvl);
1690 		if (nvl) {
1691 			error = nvlist_dup(nvl, &rootprops, KM_SLEEP);
1692 			if (error != 0)
1693 				goto pool_props_bad;
1694 			(void) nvlist_remove_all(props, ZPOOL_ROOTFS_PROPS);
1695 		}
1696 
1697 		(void) nvlist_lookup_nvlist(props, ZPOOL_HIDDEN_ARGS,
1698 		    &hidden_args);
1699 		error = dsl_crypto_params_create_nvlist(DCP_CMD_NONE,
1700 		    rootprops, hidden_args, &dcp);
1701 		if (error != 0)
1702 			goto pool_props_bad;
1703 		(void) nvlist_remove_all(props, ZPOOL_HIDDEN_ARGS);
1704 
1705 		VERIFY0(nvlist_alloc(&zplprops, NV_UNIQUE_NAME, KM_SLEEP));
1706 		error = zfs_fill_zplprops_root(version, rootprops,
1707 		    zplprops, NULL);
1708 		if (error != 0)
1709 			goto pool_props_bad;
1710 
1711 		if (nvlist_lookup_string(props,
1712 		    zpool_prop_to_name(ZPOOL_PROP_TNAME), &tname) == 0)
1713 			spa_name = tname;
1714 	}
1715 
1716 	error = spa_create(zc->zc_name, config, props, zplprops, dcp,
1717 	    &errinfo);
1718 	if (errinfo != NULL) {
1719 		nvlist_t *outnv = fnvlist_alloc();
1720 		fnvlist_add_nvlist(outnv,
1721 		    ZPOOL_CONFIG_CREATE_INFO, errinfo);
1722 		(void) put_nvlist(zc, outnv);
1723 		nvlist_free(outnv);
1724 		nvlist_free(errinfo);
1725 	}
1726 
1727 	/*
1728 	 * Set the remaining root properties
1729 	 */
1730 	if (!error && (error = zfs_set_prop_nvlist(spa_name,
1731 	    ZPROP_SRC_LOCAL, rootprops, NULL)) != 0) {
1732 		(void) spa_destroy(spa_name);
1733 		unload_wkey = B_FALSE; /* spa_destroy() unloads wrapping keys */
1734 	}
1735 
1736 pool_props_bad:
1737 	nvlist_free(rootprops);
1738 	nvlist_free(zplprops);
1739 	nvlist_free(config);
1740 	nvlist_free(props);
1741 	dsl_crypto_params_free(dcp, unload_wkey && !!error);
1742 
1743 	return (error);
1744 }
1745 
1746 static int
zfs_ioc_pool_destroy(zfs_cmd_t * zc)1747 zfs_ioc_pool_destroy(zfs_cmd_t *zc)
1748 {
1749 	int error;
1750 	zfs_log_history(zc);
1751 	error = spa_destroy(zc->zc_name);
1752 
1753 	return (error);
1754 }
1755 
1756 static int
zfs_ioc_pool_import(zfs_cmd_t * zc)1757 zfs_ioc_pool_import(zfs_cmd_t *zc)
1758 {
1759 	nvlist_t *config, *props = NULL;
1760 	uint64_t guid;
1761 	int error;
1762 
1763 	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1764 	    zc->zc_iflags, &config)) != 0)
1765 		return (error);
1766 
1767 	if (zc->zc_nvlist_src_size != 0 && (error =
1768 	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
1769 	    zc->zc_iflags, &props))) {
1770 		nvlist_free(config);
1771 		return (error);
1772 	}
1773 
1774 	if (nvlist_lookup_uint64(config, ZPOOL_CONFIG_POOL_GUID, &guid) != 0 ||
1775 	    guid != zc->zc_guid)
1776 		error = SET_ERROR(EINVAL);
1777 	else
1778 		error = spa_import(zc->zc_name, config, props, zc->zc_cookie);
1779 
1780 	if (zc->zc_nvlist_dst != 0) {
1781 		int err;
1782 
1783 		if ((err = put_nvlist(zc, config)) != 0)
1784 			error = err;
1785 	}
1786 
1787 	nvlist_free(config);
1788 	nvlist_free(props);
1789 
1790 	return (error);
1791 }
1792 
1793 static int
zfs_ioc_pool_export(zfs_cmd_t * zc)1794 zfs_ioc_pool_export(zfs_cmd_t *zc)
1795 {
1796 	int error;
1797 	boolean_t force = (boolean_t)zc->zc_cookie;
1798 	boolean_t hardforce = (boolean_t)zc->zc_guid;
1799 
1800 	zfs_log_history(zc);
1801 	error = spa_export(zc->zc_name, NULL, force, hardforce);
1802 
1803 	return (error);
1804 }
1805 
1806 static int
zfs_ioc_pool_configs(zfs_cmd_t * zc)1807 zfs_ioc_pool_configs(zfs_cmd_t *zc)
1808 {
1809 	nvlist_t *configs;
1810 	int error;
1811 
1812 	error = spa_all_configs(&zc->zc_cookie, &configs);
1813 	if (error)
1814 		return (error);
1815 
1816 	error = put_nvlist(zc, configs);
1817 
1818 	nvlist_free(configs);
1819 
1820 	return (error);
1821 }
1822 
1823 /*
1824  * inputs:
1825  * zc_name		name of the pool
1826  *
1827  * outputs:
1828  * zc_cookie		real errno
1829  * zc_nvlist_dst	config nvlist
1830  * zc_nvlist_dst_size	size of config nvlist
1831  */
1832 static int
zfs_ioc_pool_stats(zfs_cmd_t * zc)1833 zfs_ioc_pool_stats(zfs_cmd_t *zc)
1834 {
1835 	nvlist_t *config;
1836 	int error;
1837 	int ret = 0;
1838 
1839 	error = spa_get_stats(zc->zc_name, &config, zc->zc_value,
1840 	    sizeof (zc->zc_value));
1841 
1842 	if (config != NULL) {
1843 		ret = put_nvlist(zc, config);
1844 		nvlist_free(config);
1845 
1846 		/*
1847 		 * The config may be present even if 'error' is non-zero.
1848 		 * In this case we return success, and preserve the real errno
1849 		 * in 'zc_cookie'.
1850 		 */
1851 		zc->zc_cookie = error;
1852 	} else {
1853 		ret = error;
1854 	}
1855 
1856 	return (ret);
1857 }
1858 
1859 /*
1860  * Try to import the given pool, returning pool stats as appropriate so that
1861  * user land knows which devices are available and overall pool health.
1862  */
1863 static int
zfs_ioc_pool_tryimport(zfs_cmd_t * zc)1864 zfs_ioc_pool_tryimport(zfs_cmd_t *zc)
1865 {
1866 	nvlist_t *tryconfig, *config = NULL;
1867 	int error;
1868 
1869 	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
1870 	    zc->zc_iflags, &tryconfig)) != 0)
1871 		return (error);
1872 
1873 	config = spa_tryimport(tryconfig);
1874 
1875 	nvlist_free(tryconfig);
1876 
1877 	if (config == NULL)
1878 		return (SET_ERROR(EINVAL));
1879 
1880 	error = put_nvlist(zc, config);
1881 	nvlist_free(config);
1882 
1883 	return (error);
1884 }
1885 
1886 /*
1887  * inputs:
1888  * zc_name              name of the pool
1889  * zc_cookie            scan func (pool_scan_func_t)
1890  * zc_flags             scrub pause/resume flag (pool_scrub_cmd_t)
1891  */
1892 static int
zfs_ioc_pool_scan(zfs_cmd_t * zc)1893 zfs_ioc_pool_scan(zfs_cmd_t *zc)
1894 {
1895 	spa_t *spa;
1896 	int error;
1897 
1898 	if (zc->zc_flags >= POOL_SCRUB_FLAGS_END)
1899 		return (SET_ERROR(EINVAL));
1900 
1901 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
1902 		return (error);
1903 
1904 	if (zc->zc_flags == POOL_SCRUB_PAUSE)
1905 		error = spa_scrub_pause_resume(spa, POOL_SCRUB_PAUSE);
1906 	else if (zc->zc_cookie == POOL_SCAN_NONE)
1907 		error = spa_scan_stop(spa);
1908 	else
1909 		error = spa_scan(spa, zc->zc_cookie);
1910 
1911 	spa_close(spa, FTAG);
1912 
1913 	return (error);
1914 }
1915 
1916 /*
1917  * inputs:
1918  * poolname             name of the pool
1919  * scan_type            scan func (pool_scan_func_t)
1920  * scan_command         scrub pause/resume flag (pool_scrub_cmd_t)
1921  */
1922 static const zfs_ioc_key_t zfs_keys_pool_scrub[] = {
1923 	{"scan_type",		DATA_TYPE_UINT64,	0},
1924 	{"scan_command",	DATA_TYPE_UINT64,	0},
1925 	{"scan_date_start",	DATA_TYPE_UINT64,	ZK_OPTIONAL},
1926 	{"scan_date_end",	DATA_TYPE_UINT64,	ZK_OPTIONAL},
1927 };
1928 
1929 static int
zfs_ioc_pool_scrub(const char * poolname,nvlist_t * innvl,nvlist_t * outnvl)1930 zfs_ioc_pool_scrub(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
1931 {
1932 	spa_t *spa;
1933 	int error;
1934 	uint64_t scan_type, scan_cmd;
1935 	uint64_t date_start, date_end;
1936 
1937 	if (nvlist_lookup_uint64(innvl, "scan_type", &scan_type) != 0)
1938 		return (SET_ERROR(EINVAL));
1939 	if (nvlist_lookup_uint64(innvl, "scan_command", &scan_cmd) != 0)
1940 		return (SET_ERROR(EINVAL));
1941 
1942 	if (scan_cmd >= POOL_SCRUB_FLAGS_END)
1943 		return (SET_ERROR(EINVAL));
1944 
1945 	if (nvlist_lookup_uint64(innvl, "scan_date_start", &date_start) != 0)
1946 		date_start = 0;
1947 	if (nvlist_lookup_uint64(innvl, "scan_date_end", &date_end) != 0)
1948 		date_end = 0;
1949 
1950 	if ((error = spa_open(poolname, &spa, FTAG)) != 0)
1951 		return (error);
1952 
1953 	if (scan_cmd == POOL_SCRUB_PAUSE) {
1954 		error = spa_scrub_pause_resume(spa, POOL_SCRUB_PAUSE);
1955 	} else if (scan_type == POOL_SCAN_NONE) {
1956 		error = spa_scan_stop(spa);
1957 	} else if (scan_cmd == POOL_SCRUB_FROM_LAST_TXG) {
1958 		error = spa_scan_range(spa, scan_type,
1959 		    spa_get_last_scrubbed_txg(spa), 0);
1960 	} else {
1961 		uint64_t txg_start, txg_end;
1962 
1963 		txg_start = txg_end = 0;
1964 		if (date_start != 0 || date_end != 0) {
1965 			mutex_enter(&spa->spa_txg_log_time_lock);
1966 			if (date_start != 0) {
1967 				txg_start = dbrrd_query(&spa->spa_txg_log_time,
1968 				    date_start, DBRRD_FLOOR);
1969 			}
1970 
1971 			if (date_end != 0) {
1972 				txg_end = dbrrd_query(&spa->spa_txg_log_time,
1973 				    date_end, DBRRD_CEILING);
1974 			}
1975 			mutex_exit(&spa->spa_txg_log_time_lock);
1976 		}
1977 
1978 		error = spa_scan_range(spa, scan_type, txg_start, txg_end);
1979 	}
1980 
1981 	spa_close(spa, FTAG);
1982 	return (error);
1983 }
1984 
1985 static int
zfs_ioc_pool_freeze(zfs_cmd_t * zc)1986 zfs_ioc_pool_freeze(zfs_cmd_t *zc)
1987 {
1988 	spa_t *spa;
1989 	int error;
1990 
1991 	error = spa_open(zc->zc_name, &spa, FTAG);
1992 	if (error == 0) {
1993 		spa_freeze(spa);
1994 		spa_close(spa, FTAG);
1995 	}
1996 	return (error);
1997 }
1998 
1999 static int
zfs_ioc_pool_upgrade(zfs_cmd_t * zc)2000 zfs_ioc_pool_upgrade(zfs_cmd_t *zc)
2001 {
2002 	spa_t *spa;
2003 	int error;
2004 
2005 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
2006 		return (error);
2007 
2008 	if (zc->zc_cookie < spa_version(spa) ||
2009 	    !SPA_VERSION_IS_SUPPORTED(zc->zc_cookie)) {
2010 		spa_close(spa, FTAG);
2011 		return (SET_ERROR(EINVAL));
2012 	}
2013 
2014 	spa_upgrade(spa, zc->zc_cookie);
2015 	spa_close(spa, FTAG);
2016 
2017 	return (error);
2018 }
2019 
2020 static int
zfs_ioc_pool_get_history(zfs_cmd_t * zc)2021 zfs_ioc_pool_get_history(zfs_cmd_t *zc)
2022 {
2023 	spa_t *spa;
2024 	char *hist_buf;
2025 	uint64_t size;
2026 	int error;
2027 
2028 	if ((size = zc->zc_history_len) == 0)
2029 		return (SET_ERROR(EINVAL));
2030 
2031 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
2032 		return (error);
2033 
2034 	if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) {
2035 		spa_close(spa, FTAG);
2036 		return (SET_ERROR(ENOTSUP));
2037 	}
2038 
2039 	hist_buf = vmem_alloc(size, KM_SLEEP);
2040 	if ((error = spa_history_get(spa, &zc->zc_history_offset,
2041 	    &zc->zc_history_len, hist_buf)) == 0) {
2042 		error = ddi_copyout(hist_buf,
2043 		    (void *)(uintptr_t)zc->zc_history,
2044 		    zc->zc_history_len, zc->zc_iflags);
2045 	}
2046 
2047 	spa_close(spa, FTAG);
2048 	vmem_free(hist_buf, size);
2049 	return (error);
2050 }
2051 
2052 /*
2053  * inputs:
2054  * zc_nvlist_src	nvlist optionally containing ZPOOL_REGUID_GUID
2055  * zc_nvlist_src_size	size of the nvlist
2056  */
2057 static int
zfs_ioc_pool_reguid(zfs_cmd_t * zc)2058 zfs_ioc_pool_reguid(zfs_cmd_t *zc)
2059 {
2060 	uint64_t *guidp = NULL;
2061 	nvlist_t *props = NULL;
2062 	spa_t *spa;
2063 	uint64_t guid;
2064 	int error;
2065 
2066 	if (zc->zc_nvlist_src_size != 0) {
2067 		error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2068 		    zc->zc_iflags, &props);
2069 		if (error != 0)
2070 			return (error);
2071 
2072 		error = nvlist_lookup_uint64(props, ZPOOL_REGUID_GUID, &guid);
2073 		if (error == 0)
2074 			guidp = &guid;
2075 		else if (error == ENOENT)
2076 			guidp = NULL;
2077 		else
2078 			goto out;
2079 	}
2080 
2081 	error = spa_open(zc->zc_name, &spa, FTAG);
2082 	if (error == 0) {
2083 		error = spa_change_guid(spa, guidp);
2084 		spa_close(spa, FTAG);
2085 	}
2086 
2087 out:
2088 	if (props != NULL)
2089 		nvlist_free(props);
2090 
2091 	return (error);
2092 }
2093 
2094 static int
zfs_ioc_dsobj_to_dsname(zfs_cmd_t * zc)2095 zfs_ioc_dsobj_to_dsname(zfs_cmd_t *zc)
2096 {
2097 	return (dsl_dsobj_to_dsname(zc->zc_name, zc->zc_obj, zc->zc_value));
2098 }
2099 
2100 /*
2101  * inputs:
2102  * zc_name		name of filesystem
2103  * zc_obj		object to find
2104  *
2105  * outputs:
2106  * zc_value		name of object
2107  */
2108 static int
zfs_ioc_obj_to_path(zfs_cmd_t * zc)2109 zfs_ioc_obj_to_path(zfs_cmd_t *zc)
2110 {
2111 	objset_t *os;
2112 	int error;
2113 
2114 	/* XXX reading from objset not owned */
2115 	if ((error = dmu_objset_hold_flags(zc->zc_name, B_TRUE,
2116 	    FTAG, &os)) != 0)
2117 		return (error);
2118 	if (dmu_objset_type(os) != DMU_OST_ZFS) {
2119 		dmu_objset_rele_flags(os, B_TRUE, FTAG);
2120 		return (SET_ERROR(EINVAL));
2121 	}
2122 	error = zfs_obj_to_path(os, zc->zc_obj, zc->zc_value,
2123 	    sizeof (zc->zc_value));
2124 	dmu_objset_rele_flags(os, B_TRUE, FTAG);
2125 
2126 	return (error);
2127 }
2128 
2129 /*
2130  * inputs:
2131  * zc_name		name of filesystem
2132  * zc_obj		object to find
2133  *
2134  * outputs:
2135  * zc_stat		stats on object
2136  * zc_value		path to object
2137  */
2138 static int
zfs_ioc_obj_to_stats(zfs_cmd_t * zc)2139 zfs_ioc_obj_to_stats(zfs_cmd_t *zc)
2140 {
2141 	objset_t *os;
2142 	int error;
2143 
2144 	/* XXX reading from objset not owned */
2145 	if ((error = dmu_objset_hold_flags(zc->zc_name, B_TRUE,
2146 	    FTAG, &os)) != 0)
2147 		return (error);
2148 	if (dmu_objset_type(os) != DMU_OST_ZFS) {
2149 		dmu_objset_rele_flags(os, B_TRUE, FTAG);
2150 		return (SET_ERROR(EINVAL));
2151 	}
2152 	error = zfs_obj_to_stats(os, zc->zc_obj, &zc->zc_stat, zc->zc_value,
2153 	    sizeof (zc->zc_value));
2154 	dmu_objset_rele_flags(os, B_TRUE, FTAG);
2155 
2156 	return (error);
2157 }
2158 
2159 static int
zfs_ioc_vdev_add(zfs_cmd_t * zc)2160 zfs_ioc_vdev_add(zfs_cmd_t *zc)
2161 {
2162 	spa_t *spa;
2163 	int error;
2164 	nvlist_t *config;
2165 
2166 	error = spa_open(zc->zc_name, &spa, FTAG);
2167 	if (error != 0)
2168 		return (error);
2169 
2170 	error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
2171 	    zc->zc_iflags, &config);
2172 	if (error == 0) {
2173 		error = spa_vdev_add(spa, config, zc->zc_flags);
2174 		nvlist_free(config);
2175 	}
2176 	spa_close(spa, FTAG);
2177 	return (error);
2178 }
2179 
2180 /*
2181  * inputs:
2182  * zc_name		name of the pool
2183  * zc_guid		guid of vdev to remove
2184  * zc_cookie		cancel removal
2185  */
2186 static int
zfs_ioc_vdev_remove(zfs_cmd_t * zc)2187 zfs_ioc_vdev_remove(zfs_cmd_t *zc)
2188 {
2189 	spa_t *spa;
2190 	int error;
2191 
2192 	error = spa_open(zc->zc_name, &spa, FTAG);
2193 	if (error != 0)
2194 		return (error);
2195 	if (zc->zc_cookie != 0) {
2196 		error = spa_vdev_remove_cancel(spa);
2197 	} else {
2198 		error = spa_vdev_remove(spa, zc->zc_guid, B_FALSE);
2199 	}
2200 	spa_close(spa, FTAG);
2201 	return (error);
2202 }
2203 
2204 static int
zfs_ioc_vdev_set_state(zfs_cmd_t * zc)2205 zfs_ioc_vdev_set_state(zfs_cmd_t *zc)
2206 {
2207 	spa_t *spa;
2208 	int error;
2209 	vdev_state_t newstate = VDEV_STATE_UNKNOWN;
2210 
2211 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
2212 		return (error);
2213 	switch (zc->zc_cookie) {
2214 	case VDEV_STATE_ONLINE:
2215 		error = vdev_online(spa, zc->zc_guid, zc->zc_obj, &newstate);
2216 		break;
2217 
2218 	case VDEV_STATE_OFFLINE:
2219 		error = vdev_offline(spa, zc->zc_guid, zc->zc_obj);
2220 		break;
2221 
2222 	case VDEV_STATE_FAULTED:
2223 		if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
2224 		    zc->zc_obj != VDEV_AUX_EXTERNAL &&
2225 		    zc->zc_obj != VDEV_AUX_EXTERNAL_PERSIST)
2226 			zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
2227 
2228 		error = vdev_fault(spa, zc->zc_guid, zc->zc_obj);
2229 		break;
2230 
2231 	case VDEV_STATE_DEGRADED:
2232 		if (zc->zc_obj != VDEV_AUX_ERR_EXCEEDED &&
2233 		    zc->zc_obj != VDEV_AUX_EXTERNAL)
2234 			zc->zc_obj = VDEV_AUX_ERR_EXCEEDED;
2235 
2236 		error = vdev_degrade(spa, zc->zc_guid, zc->zc_obj);
2237 		break;
2238 
2239 	case VDEV_STATE_REMOVED:
2240 		error = vdev_remove_wanted(spa, zc->zc_guid);
2241 		break;
2242 
2243 	default:
2244 		error = SET_ERROR(EINVAL);
2245 	}
2246 	zc->zc_cookie = newstate;
2247 	spa_close(spa, FTAG);
2248 	return (error);
2249 }
2250 
2251 static int
zfs_ioc_vdev_attach(zfs_cmd_t * zc)2252 zfs_ioc_vdev_attach(zfs_cmd_t *zc)
2253 {
2254 	spa_t *spa;
2255 	nvlist_t *config;
2256 	int replacing = zc->zc_cookie;
2257 	int rebuild = zc->zc_simple;
2258 	int error;
2259 
2260 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
2261 		return (error);
2262 
2263 	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
2264 	    zc->zc_iflags, &config)) == 0) {
2265 		error = spa_vdev_attach(spa, zc->zc_guid, config, replacing,
2266 		    rebuild);
2267 		nvlist_free(config);
2268 	}
2269 
2270 	spa_close(spa, FTAG);
2271 	return (error);
2272 }
2273 
2274 static int
zfs_ioc_vdev_detach(zfs_cmd_t * zc)2275 zfs_ioc_vdev_detach(zfs_cmd_t *zc)
2276 {
2277 	spa_t *spa;
2278 	int error;
2279 
2280 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
2281 		return (error);
2282 
2283 	error = spa_vdev_detach(spa, zc->zc_guid, 0, B_FALSE);
2284 
2285 	spa_close(spa, FTAG);
2286 	return (error);
2287 }
2288 
2289 static int
zfs_ioc_vdev_split(zfs_cmd_t * zc)2290 zfs_ioc_vdev_split(zfs_cmd_t *zc)
2291 {
2292 	spa_t *spa;
2293 	nvlist_t *config, *props = NULL;
2294 	int error;
2295 	boolean_t exp = !!(zc->zc_cookie & ZPOOL_EXPORT_AFTER_SPLIT);
2296 
2297 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
2298 		return (error);
2299 
2300 	if ((error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
2301 	    zc->zc_iflags, &config))) {
2302 		spa_close(spa, FTAG);
2303 		return (error);
2304 	}
2305 
2306 	if (zc->zc_nvlist_src_size != 0 && (error =
2307 	    get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2308 	    zc->zc_iflags, &props))) {
2309 		spa_close(spa, FTAG);
2310 		nvlist_free(config);
2311 		return (error);
2312 	}
2313 
2314 	error = spa_vdev_split_mirror(spa, zc->zc_string, config, props, exp);
2315 
2316 	spa_close(spa, FTAG);
2317 
2318 	nvlist_free(config);
2319 	nvlist_free(props);
2320 
2321 	return (error);
2322 }
2323 
2324 static int
zfs_ioc_vdev_setpath(zfs_cmd_t * zc)2325 zfs_ioc_vdev_setpath(zfs_cmd_t *zc)
2326 {
2327 	spa_t *spa;
2328 	const char *path = zc->zc_value;
2329 	uint64_t guid = zc->zc_guid;
2330 	int error;
2331 
2332 	error = spa_open(zc->zc_name, &spa, FTAG);
2333 	if (error != 0)
2334 		return (error);
2335 
2336 	error = spa_vdev_setpath(spa, guid, path);
2337 	spa_close(spa, FTAG);
2338 	return (error);
2339 }
2340 
2341 static int
zfs_ioc_vdev_setfru(zfs_cmd_t * zc)2342 zfs_ioc_vdev_setfru(zfs_cmd_t *zc)
2343 {
2344 	spa_t *spa;
2345 	const char *fru = zc->zc_value;
2346 	uint64_t guid = zc->zc_guid;
2347 	int error;
2348 
2349 	error = spa_open(zc->zc_name, &spa, FTAG);
2350 	if (error != 0)
2351 		return (error);
2352 
2353 	error = spa_vdev_setfru(spa, guid, fru);
2354 	spa_close(spa, FTAG);
2355 	return (error);
2356 }
2357 
2358 static int
zfs_ioc_objset_stats_impl(zfs_cmd_t * zc,objset_t * os)2359 zfs_ioc_objset_stats_impl(zfs_cmd_t *zc, objset_t *os)
2360 {
2361 	int error = 0;
2362 	nvlist_t *nv;
2363 
2364 	dmu_objset_fast_stat(os, &zc->zc_objset_stats);
2365 
2366 	if (!zc->zc_simple && zc->zc_nvlist_dst != 0 &&
2367 	    (error = dsl_prop_get_all(os, &nv)) == 0) {
2368 		dmu_objset_stats(os, nv);
2369 		/*
2370 		 * NB: zvol_get_stats() will read the objset contents,
2371 		 * which we aren't supposed to do with a
2372 		 * DS_MODE_USER hold, because it could be
2373 		 * inconsistent.  So this is a bit of a workaround...
2374 		 * XXX reading without owning
2375 		 */
2376 		if (!zc->zc_objset_stats.dds_inconsistent &&
2377 		    dmu_objset_type(os) == DMU_OST_ZVOL) {
2378 			error = zvol_get_stats(os, nv);
2379 			if (error == EIO) {
2380 				nvlist_free(nv);
2381 				return (error);
2382 			}
2383 			VERIFY0(error);
2384 		}
2385 		if (error == 0)
2386 			error = put_nvlist(zc, nv);
2387 		nvlist_free(nv);
2388 	}
2389 
2390 	return (error);
2391 }
2392 
2393 /*
2394  * inputs:
2395  * zc_name		name of filesystem
2396  * zc_nvlist_dst_size	size of buffer for property nvlist
2397  *
2398  * outputs:
2399  * zc_objset_stats	stats
2400  * zc_nvlist_dst	property nvlist
2401  * zc_nvlist_dst_size	size of property nvlist
2402  */
2403 static int
zfs_ioc_objset_stats(zfs_cmd_t * zc)2404 zfs_ioc_objset_stats(zfs_cmd_t *zc)
2405 {
2406 	objset_t *os;
2407 	int error;
2408 
2409 	error = dmu_objset_hold(zc->zc_name, FTAG, &os);
2410 	if (error == 0) {
2411 		error = zfs_ioc_objset_stats_impl(zc, os);
2412 		dmu_objset_rele(os, FTAG);
2413 	}
2414 
2415 	return (error);
2416 }
2417 
2418 /*
2419  * inputs:
2420  * zc_name		name of filesystem
2421  * zc_nvlist_dst_size	size of buffer for property nvlist
2422  *
2423  * outputs:
2424  * zc_nvlist_dst	received property nvlist
2425  * zc_nvlist_dst_size	size of received property nvlist
2426  *
2427  * Gets received properties (distinct from local properties on or after
2428  * SPA_VERSION_RECVD_PROPS) for callers who want to differentiate received from
2429  * local property values.
2430  */
2431 static int
zfs_ioc_objset_recvd_props(zfs_cmd_t * zc)2432 zfs_ioc_objset_recvd_props(zfs_cmd_t *zc)
2433 {
2434 	int error = 0;
2435 	nvlist_t *nv;
2436 
2437 	/*
2438 	 * Without this check, we would return local property values if the
2439 	 * caller has not already received properties on or after
2440 	 * SPA_VERSION_RECVD_PROPS.
2441 	 */
2442 	if (!dsl_prop_get_hasrecvd(zc->zc_name))
2443 		return (SET_ERROR(ENOTSUP));
2444 
2445 	if (zc->zc_nvlist_dst != 0 &&
2446 	    (error = dsl_prop_get_received(zc->zc_name, &nv)) == 0) {
2447 		error = put_nvlist(zc, nv);
2448 		nvlist_free(nv);
2449 	}
2450 
2451 	return (error);
2452 }
2453 
2454 static int
nvl_add_zplprop(objset_t * os,nvlist_t * props,zfs_prop_t prop)2455 nvl_add_zplprop(objset_t *os, nvlist_t *props, zfs_prop_t prop)
2456 {
2457 	uint64_t value;
2458 	int error;
2459 
2460 	/*
2461 	 * zfs_get_zplprop() will either find a value or give us
2462 	 * the default value (if there is one).
2463 	 */
2464 	if ((error = zfs_get_zplprop(os, prop, &value)) != 0)
2465 		return (error);
2466 	VERIFY0(nvlist_add_uint64(props, zfs_prop_to_name(prop), value));
2467 	return (0);
2468 }
2469 
2470 /*
2471  * inputs:
2472  * zc_name		name of filesystem
2473  * zc_nvlist_dst_size	size of buffer for zpl property nvlist
2474  *
2475  * outputs:
2476  * zc_nvlist_dst	zpl property nvlist
2477  * zc_nvlist_dst_size	size of zpl property nvlist
2478  */
2479 static int
zfs_ioc_objset_zplprops(zfs_cmd_t * zc)2480 zfs_ioc_objset_zplprops(zfs_cmd_t *zc)
2481 {
2482 	objset_t *os;
2483 	int err;
2484 
2485 	/* XXX reading without owning */
2486 	if ((err = dmu_objset_hold(zc->zc_name, FTAG, &os)))
2487 		return (err);
2488 
2489 	dmu_objset_fast_stat(os, &zc->zc_objset_stats);
2490 
2491 	/*
2492 	 * NB: nvl_add_zplprop() will read the objset contents,
2493 	 * which we aren't supposed to do with a DS_MODE_USER
2494 	 * hold, because it could be inconsistent.
2495 	 */
2496 	if (zc->zc_nvlist_dst != 0 &&
2497 	    !zc->zc_objset_stats.dds_inconsistent &&
2498 	    dmu_objset_type(os) == DMU_OST_ZFS) {
2499 		nvlist_t *nv;
2500 
2501 		VERIFY0(nvlist_alloc(&nv, NV_UNIQUE_NAME, KM_SLEEP));
2502 		if ((err = nvl_add_zplprop(os, nv, ZFS_PROP_VERSION)) == 0 &&
2503 		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_NORMALIZE)) == 0 &&
2504 		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_UTF8ONLY)) == 0 &&
2505 		    (err = nvl_add_zplprop(os, nv, ZFS_PROP_CASE)) == 0 &&
2506 		    (err = nvl_add_zplprop(os, nv,
2507 		    ZFS_PROP_DEFAULTUSERQUOTA)) == 0 &&
2508 		    (err = nvl_add_zplprop(os, nv,
2509 		    ZFS_PROP_DEFAULTGROUPQUOTA)) == 0 &&
2510 		    (err = nvl_add_zplprop(os, nv,
2511 		    ZFS_PROP_DEFAULTPROJECTQUOTA)) == 0 &&
2512 		    (err = nvl_add_zplprop(os, nv,
2513 		    ZFS_PROP_DEFAULTUSEROBJQUOTA)) == 0 &&
2514 		    (err = nvl_add_zplprop(os, nv,
2515 		    ZFS_PROP_DEFAULTGROUPOBJQUOTA)) == 0 &&
2516 		    (err = nvl_add_zplprop(os, nv,
2517 		    ZFS_PROP_DEFAULTPROJECTOBJQUOTA)) == 0)
2518 			err = put_nvlist(zc, nv);
2519 		nvlist_free(nv);
2520 	} else {
2521 		err = SET_ERROR(ENOENT);
2522 	}
2523 	dmu_objset_rele(os, FTAG);
2524 	return (err);
2525 }
2526 
2527 /*
2528  * inputs:
2529  * zc_name		name of filesystem
2530  * zc_cookie		zap cursor
2531  * zc_nvlist_dst_size	size of buffer for property nvlist
2532  *
2533  * outputs:
2534  * zc_name		name of next filesystem
2535  * zc_cookie		zap cursor
2536  * zc_objset_stats	stats
2537  * zc_nvlist_dst	property nvlist
2538  * zc_nvlist_dst_size	size of property nvlist
2539  */
2540 static int
zfs_ioc_dataset_list_next(zfs_cmd_t * zc)2541 zfs_ioc_dataset_list_next(zfs_cmd_t *zc)
2542 {
2543 	objset_t *os;
2544 	int error;
2545 	char *p;
2546 	size_t orig_len = strlen(zc->zc_name);
2547 
2548 top:
2549 	if ((error = dmu_objset_hold(zc->zc_name, FTAG, &os))) {
2550 		if (error == ENOENT)
2551 			error = SET_ERROR(ESRCH);
2552 		return (error);
2553 	}
2554 
2555 	p = strrchr(zc->zc_name, '/');
2556 	if (p == NULL || p[1] != '\0')
2557 		(void) strlcat(zc->zc_name, "/", sizeof (zc->zc_name));
2558 	p = zc->zc_name + strlen(zc->zc_name);
2559 
2560 	do {
2561 		error = dmu_dir_list_next(os,
2562 		    sizeof (zc->zc_name) - (p - zc->zc_name), p,
2563 		    NULL, &zc->zc_cookie);
2564 		if (error == ENOENT)
2565 			error = SET_ERROR(ESRCH);
2566 	} while (error == 0 && zfs_dataset_name_hidden(zc->zc_name));
2567 	dmu_objset_rele(os, FTAG);
2568 
2569 	/*
2570 	 * If it's an internal dataset (ie. with a '$' in its name),
2571 	 * don't try to get stats for it, otherwise we'll return ENOENT.
2572 	 */
2573 	if (error == 0 && strchr(zc->zc_name, '$') == NULL) {
2574 		error = zfs_ioc_objset_stats(zc); /* fill in the stats */
2575 		if (error == ENOENT) {
2576 			/* We lost a race with destroy, get the next one. */
2577 			zc->zc_name[orig_len] = '\0';
2578 			goto top;
2579 		}
2580 	}
2581 	return (error);
2582 }
2583 
2584 /*
2585  * inputs:
2586  * zc_name		name of filesystem
2587  * zc_cookie		zap cursor
2588  * zc_nvlist_src	iteration range nvlist
2589  * zc_nvlist_src_size	size of iteration range nvlist
2590  *
2591  * outputs:
2592  * zc_name		name of next snapshot
2593  * zc_objset_stats	stats
2594  * zc_nvlist_dst	property nvlist
2595  * zc_nvlist_dst_size	size of property nvlist
2596  */
2597 static int
zfs_ioc_snapshot_list_next(zfs_cmd_t * zc)2598 zfs_ioc_snapshot_list_next(zfs_cmd_t *zc)
2599 {
2600 	int error;
2601 	objset_t *os, *ossnap;
2602 	dsl_dataset_t *ds;
2603 	uint64_t min_txg = 0, max_txg = 0;
2604 
2605 	if (zc->zc_nvlist_src_size != 0) {
2606 		nvlist_t *props = NULL;
2607 		error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
2608 		    zc->zc_iflags, &props);
2609 		if (error != 0)
2610 			return (error);
2611 		(void) nvlist_lookup_uint64(props, SNAP_ITER_MIN_TXG,
2612 		    &min_txg);
2613 		(void) nvlist_lookup_uint64(props, SNAP_ITER_MAX_TXG,
2614 		    &max_txg);
2615 		nvlist_free(props);
2616 	}
2617 
2618 	error = dmu_objset_hold(zc->zc_name, FTAG, &os);
2619 	if (error != 0) {
2620 		return (error == ENOENT ? SET_ERROR(ESRCH) : error);
2621 	}
2622 
2623 	/*
2624 	 * A dataset name of maximum length cannot have any snapshots,
2625 	 * so exit immediately.
2626 	 */
2627 	if (strlcat(zc->zc_name, "@", sizeof (zc->zc_name)) >=
2628 	    ZFS_MAX_DATASET_NAME_LEN) {
2629 		dmu_objset_rele(os, FTAG);
2630 		return (SET_ERROR(ESRCH));
2631 	}
2632 
2633 	while (error == 0) {
2634 		if (issig()) {
2635 			error = SET_ERROR(EINTR);
2636 			break;
2637 		}
2638 
2639 		error = dmu_snapshot_list_next(os,
2640 		    sizeof (zc->zc_name) - strlen(zc->zc_name),
2641 		    zc->zc_name + strlen(zc->zc_name), &zc->zc_obj,
2642 		    &zc->zc_cookie, NULL);
2643 		if (error == ENOENT) {
2644 			error = SET_ERROR(ESRCH);
2645 			break;
2646 		} else if (error != 0) {
2647 			break;
2648 		}
2649 
2650 		error = dsl_dataset_hold_obj(dmu_objset_pool(os), zc->zc_obj,
2651 		    FTAG, &ds);
2652 		if (error != 0)
2653 			break;
2654 
2655 		if ((min_txg != 0 && dsl_get_creationtxg(ds) < min_txg) ||
2656 		    (max_txg != 0 && dsl_get_creationtxg(ds) > max_txg)) {
2657 			dsl_dataset_rele(ds, FTAG);
2658 			/* undo snapshot name append */
2659 			*(strchr(zc->zc_name, '@') + 1) = '\0';
2660 			/* skip snapshot */
2661 			continue;
2662 		}
2663 
2664 		if (zc->zc_simple) {
2665 			dsl_dataset_fast_stat(ds, &zc->zc_objset_stats);
2666 			dsl_dataset_rele(ds, FTAG);
2667 			break;
2668 		}
2669 
2670 		if ((error = dmu_objset_from_ds(ds, &ossnap)) != 0) {
2671 			dsl_dataset_rele(ds, FTAG);
2672 			break;
2673 		}
2674 		if ((error = zfs_ioc_objset_stats_impl(zc, ossnap)) != 0) {
2675 			dsl_dataset_rele(ds, FTAG);
2676 			break;
2677 		}
2678 		dsl_dataset_rele(ds, FTAG);
2679 		break;
2680 	}
2681 
2682 	dmu_objset_rele(os, FTAG);
2683 	/* if we failed, undo the @ that we tacked on to zc_name */
2684 	if (error != 0)
2685 		*strchr(zc->zc_name, '@') = '\0';
2686 	return (error);
2687 }
2688 
2689 static int
zfs_prop_set_userquota(const char * dsname,nvpair_t * pair)2690 zfs_prop_set_userquota(const char *dsname, nvpair_t *pair)
2691 {
2692 	const char *propname = nvpair_name(pair);
2693 	uint64_t *valary;
2694 	unsigned int vallen;
2695 	const char *dash, *domain;
2696 	zfs_userquota_prop_t type;
2697 	uint64_t rid;
2698 	uint64_t quota;
2699 	zfsvfs_t *zfsvfs;
2700 	int err;
2701 
2702 	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2703 		nvlist_t *attrs;
2704 		VERIFY0(nvpair_value_nvlist(pair, &attrs));
2705 		if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2706 		    &pair) != 0)
2707 			return (SET_ERROR(EINVAL));
2708 	}
2709 
2710 	/*
2711 	 * A correctly constructed propname is encoded as
2712 	 * userquota@<rid>-<domain>.
2713 	 */
2714 	if ((dash = strchr(propname, '-')) == NULL ||
2715 	    nvpair_value_uint64_array(pair, &valary, &vallen) != 0 ||
2716 	    vallen != 3)
2717 		return (SET_ERROR(EINVAL));
2718 
2719 	domain = dash + 1;
2720 	type = valary[0];
2721 	rid = valary[1];
2722 	quota = valary[2];
2723 
2724 	err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_FALSE);
2725 	if (err == 0) {
2726 		err = zfs_set_userquota(zfsvfs, type, domain, rid, quota);
2727 		zfsvfs_rele(zfsvfs, FTAG);
2728 	}
2729 
2730 	return (err);
2731 }
2732 
2733 /*
2734  * If the named property is one that has a special function to set its value,
2735  * return 0 on success and a positive error code on failure; otherwise if it is
2736  * not one of the special properties handled by this function, return -1.
2737  *
2738  * XXX: It would be better for callers of the property interface if we handled
2739  * these special cases in dsl_prop.c (in the dsl layer).
2740  */
2741 static int
zfs_prop_set_special(const char * dsname,zprop_source_t source,nvpair_t * pair)2742 zfs_prop_set_special(const char *dsname, zprop_source_t source,
2743     nvpair_t *pair)
2744 {
2745 	const char *propname = nvpair_name(pair);
2746 	zfs_prop_t prop = zfs_name_to_prop(propname);
2747 	uint64_t intval = 0;
2748 	const char *strval = NULL;
2749 	int err = -1;
2750 
2751 	if (prop == ZPROP_USERPROP) {
2752 		if (zfs_prop_userquota(propname))
2753 			return (zfs_prop_set_userquota(dsname, pair));
2754 		return (-1);
2755 	}
2756 
2757 	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2758 		nvlist_t *attrs;
2759 		VERIFY0(nvpair_value_nvlist(pair, &attrs));
2760 		VERIFY0(nvlist_lookup_nvpair(attrs, ZPROP_VALUE, &pair));
2761 	}
2762 
2763 	/* all special properties are numeric except for keylocation */
2764 	if (zfs_prop_get_type(prop) == PROP_TYPE_STRING) {
2765 		strval = fnvpair_value_string(pair);
2766 	} else {
2767 		intval = fnvpair_value_uint64(pair);
2768 	}
2769 
2770 	switch (prop) {
2771 	case ZFS_PROP_QUOTA:
2772 		err = dsl_dir_set_quota(dsname, source, intval);
2773 		break;
2774 	case ZFS_PROP_REFQUOTA:
2775 		err = dsl_dataset_set_refquota(dsname, source, intval);
2776 		break;
2777 	case ZFS_PROP_FILESYSTEM_LIMIT:
2778 	case ZFS_PROP_SNAPSHOT_LIMIT:
2779 		if (intval == UINT64_MAX) {
2780 			/* clearing the limit, just do it */
2781 			err = 0;
2782 		} else {
2783 			err = dsl_dir_activate_fs_ss_limit(dsname);
2784 		}
2785 		/*
2786 		 * Set err to -1 to force the zfs_set_prop_nvlist code down the
2787 		 * default path to set the value in the nvlist.
2788 		 */
2789 		if (err == 0)
2790 			err = -1;
2791 		break;
2792 	case ZFS_PROP_KEYLOCATION:
2793 		err = dsl_crypto_can_set_keylocation(dsname, strval);
2794 
2795 		/*
2796 		 * Set err to -1 to force the zfs_set_prop_nvlist code down the
2797 		 * default path to set the value in the nvlist.
2798 		 */
2799 		if (err == 0)
2800 			err = -1;
2801 		break;
2802 	case ZFS_PROP_RESERVATION:
2803 		err = dsl_dir_set_reservation(dsname, source, intval);
2804 		break;
2805 	case ZFS_PROP_REFRESERVATION:
2806 		err = dsl_dataset_set_refreservation(dsname, source, intval);
2807 		break;
2808 	case ZFS_PROP_COMPRESSION:
2809 		err = dsl_dataset_set_compression(dsname, source, intval);
2810 		/*
2811 		 * Set err to -1 to force the zfs_set_prop_nvlist code down the
2812 		 * default path to set the value in the nvlist.
2813 		 */
2814 		if (err == 0)
2815 			err = -1;
2816 		break;
2817 	case ZFS_PROP_VOLSIZE:
2818 		err = zvol_set_volsize(dsname, intval);
2819 		break;
2820 	case ZFS_PROP_VOLTHREADING:
2821 		err = zvol_set_volthreading(dsname, intval);
2822 		/*
2823 		 * Set err to -1 to force the zfs_set_prop_nvlist code down the
2824 		 * default path to set the value in the nvlist.
2825 		 */
2826 		if (err == 0)
2827 			err = -1;
2828 		break;
2829 	case ZFS_PROP_SNAPDEV:
2830 	case ZFS_PROP_VOLMODE:
2831 		err = zvol_set_common(dsname, prop, source, intval);
2832 		break;
2833 	case ZFS_PROP_READONLY:
2834 		err = zvol_set_ro(dsname, intval);
2835 		/*
2836 		 * Set err to -1 to force the zfs_set_prop_nvlist code down the
2837 		 * default path to set the value in the nvlist.
2838 		 */
2839 		if (err == 0)
2840 			err = -1;
2841 		break;
2842 	case ZFS_PROP_VERSION:
2843 	{
2844 		zfsvfs_t *zfsvfs;
2845 
2846 		if ((err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_TRUE)) != 0)
2847 			break;
2848 
2849 		err = zfs_set_version(zfsvfs, intval);
2850 		zfsvfs_rele(zfsvfs, FTAG);
2851 
2852 		if (err == 0 && intval >= ZPL_VERSION_USERSPACE) {
2853 			zfs_cmd_t *zc;
2854 
2855 			zc = kmem_zalloc(sizeof (zfs_cmd_t), KM_SLEEP);
2856 			(void) strlcpy(zc->zc_name, dsname,
2857 			    sizeof (zc->zc_name));
2858 			(void) zfs_ioc_userspace_upgrade(zc);
2859 			(void) zfs_ioc_id_quota_upgrade(zc);
2860 			kmem_free(zc, sizeof (zfs_cmd_t));
2861 		}
2862 		break;
2863 	}
2864 	case ZFS_PROP_LONGNAME:
2865 	{
2866 		zfsvfs_t *zfsvfs;
2867 
2868 		/*
2869 		 * Ignore the checks if the property is being applied as part of
2870 		 * 'zfs receive'. Because, we already check if the local pool
2871 		 * has SPA_FEATURE_LONGNAME enabled in dmu_recv_begin_check().
2872 		 */
2873 		if (source == ZPROP_SRC_RECEIVED) {
2874 			cmn_err(CE_NOTE, "Skipping ZFS_PROP_LONGNAME checks "
2875 			    "for dsname=%s\n", dsname);
2876 			err = -1;
2877 			break;
2878 		}
2879 
2880 		if ((err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_FALSE)) != 0) {
2881 			cmn_err(CE_WARN, "%s:%d Failed to hold for dsname=%s "
2882 			    "err=%d\n", __FILE__, __LINE__, dsname, err);
2883 			break;
2884 		}
2885 
2886 		if (!spa_feature_is_enabled(zfsvfs->z_os->os_spa,
2887 		    SPA_FEATURE_LONGNAME)) {
2888 			err = ENOTSUP;
2889 		} else {
2890 			/*
2891 			 * Set err to -1 to force the zfs_set_prop_nvlist code
2892 			 * down the default path to set the value in the nvlist.
2893 			 */
2894 			err = -1;
2895 		}
2896 		zfsvfs_rele(zfsvfs, FTAG);
2897 		break;
2898 	}
2899 	case ZFS_PROP_DEFAULTUSERQUOTA:
2900 	case ZFS_PROP_DEFAULTGROUPQUOTA:
2901 	case ZFS_PROP_DEFAULTPROJECTQUOTA:
2902 	case ZFS_PROP_DEFAULTUSEROBJQUOTA:
2903 	case ZFS_PROP_DEFAULTGROUPOBJQUOTA:
2904 	case ZFS_PROP_DEFAULTPROJECTOBJQUOTA:
2905 	{
2906 		zfsvfs_t *zfsvfs;
2907 		if ((err = zfsvfs_hold(dsname, FTAG, &zfsvfs, B_TRUE)) != 0)
2908 			break;
2909 		err = zfs_set_default_quota(zfsvfs, prop, intval);
2910 		zfsvfs_rele(zfsvfs, FTAG);
2911 		break;
2912 	}
2913 	case ZFS_PROP_ZONED_UID:
2914 	{
2915 		uint64_t old_uid = 0;
2916 		(void) dsl_prop_get(dsname, "zoned_uid", 8, 1, &old_uid, NULL);
2917 		if (old_uid != 0)
2918 			(void) zone_dataset_detach_uid(CRED(), dsname,
2919 			    (uid_t)old_uid);
2920 		if (intval != 0) {
2921 			err = zone_dataset_attach_uid(CRED(), dsname,
2922 			    (uid_t)intval);
2923 			if (err == ENXIO)
2924 				err = ZFS_ERR_NO_USER_NS_SUPPORT;
2925 			if (err != 0)
2926 				break;
2927 		}
2928 		/*
2929 		 * Set err to -1 to force the zfs_set_prop_nvlist code down the
2930 		 * default path to set the value in the nvlist.
2931 		 */
2932 		err = -1;
2933 		break;
2934 	}
2935 	default:
2936 		err = -1;
2937 	}
2938 
2939 	return (err);
2940 }
2941 
2942 static boolean_t
zfs_is_namespace_prop(zfs_prop_t prop)2943 zfs_is_namespace_prop(zfs_prop_t prop)
2944 {
2945 	switch (prop) {
2946 
2947 	case ZFS_PROP_ATIME:
2948 	case ZFS_PROP_RELATIME:
2949 	case ZFS_PROP_DEVICES:
2950 	case ZFS_PROP_EXEC:
2951 	case ZFS_PROP_SETUID:
2952 	case ZFS_PROP_READONLY:
2953 	case ZFS_PROP_XATTR:
2954 	case ZFS_PROP_NBMAND:
2955 		return (B_TRUE);
2956 
2957 	default:
2958 		return (B_FALSE);
2959 	}
2960 }
2961 
2962 /*
2963  * This function is best effort. If it fails to set any of the given properties,
2964  * it continues to set as many as it can and returns the last error
2965  * encountered. If the caller provides a non-NULL errlist, it will be filled in
2966  * with the list of names of all the properties that failed along with the
2967  * corresponding error numbers.
2968  *
2969  * If every property is set successfully, zero is returned and errlist is not
2970  * modified.
2971  */
2972 int
zfs_set_prop_nvlist(const char * dsname,zprop_source_t source,nvlist_t * nvl,nvlist_t * errlist)2973 zfs_set_prop_nvlist(const char *dsname, zprop_source_t source, nvlist_t *nvl,
2974     nvlist_t *errlist)
2975 {
2976 	nvpair_t *pair;
2977 	nvpair_t *propval;
2978 	int rv = 0;
2979 	int err;
2980 	uint64_t intval;
2981 	const char *strval;
2982 	boolean_t should_update_mount_cache = B_FALSE;
2983 
2984 	nvlist_t *genericnvl = fnvlist_alloc();
2985 	nvlist_t *retrynvl = fnvlist_alloc();
2986 retry:
2987 	pair = NULL;
2988 	while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
2989 		const char *propname = nvpair_name(pair);
2990 		zfs_prop_t prop = zfs_name_to_prop(propname);
2991 		err = 0;
2992 
2993 		/* decode the property value */
2994 		propval = pair;
2995 		if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
2996 			nvlist_t *attrs;
2997 			attrs = fnvpair_value_nvlist(pair);
2998 			if (nvlist_lookup_nvpair(attrs, ZPROP_VALUE,
2999 			    &propval) != 0)
3000 				err = SET_ERROR(EINVAL);
3001 		}
3002 
3003 		/* Validate value type */
3004 		if (err == 0 && source == ZPROP_SRC_INHERITED) {
3005 			/* inherited properties are expected to be booleans */
3006 			if (nvpair_type(propval) != DATA_TYPE_BOOLEAN)
3007 				err = SET_ERROR(EINVAL);
3008 		} else if (err == 0 && prop == ZPROP_USERPROP) {
3009 			if (zfs_prop_user(propname)) {
3010 				if (nvpair_type(propval) != DATA_TYPE_STRING)
3011 					err = SET_ERROR(EINVAL);
3012 			} else if (zfs_prop_userquota(propname)) {
3013 				if (nvpair_type(propval) !=
3014 				    DATA_TYPE_UINT64_ARRAY)
3015 					err = SET_ERROR(EINVAL);
3016 			} else {
3017 				err = SET_ERROR(EINVAL);
3018 			}
3019 		} else if (err == 0) {
3020 			if (nvpair_type(propval) == DATA_TYPE_STRING) {
3021 				if (zfs_prop_get_type(prop) != PROP_TYPE_STRING)
3022 					err = SET_ERROR(EINVAL);
3023 			} else if (nvpair_type(propval) == DATA_TYPE_UINT64) {
3024 				const char *unused;
3025 
3026 				intval = fnvpair_value_uint64(propval);
3027 
3028 				switch (zfs_prop_get_type(prop)) {
3029 				case PROP_TYPE_NUMBER:
3030 					break;
3031 				case PROP_TYPE_STRING:
3032 					err = SET_ERROR(EINVAL);
3033 					break;
3034 				case PROP_TYPE_INDEX:
3035 					if (zfs_prop_index_to_string(prop,
3036 					    intval, &unused) != 0)
3037 						err =
3038 						    SET_ERROR(ZFS_ERR_BADPROP);
3039 					break;
3040 				default:
3041 					cmn_err(CE_PANIC,
3042 					    "unknown property type");
3043 				}
3044 			} else {
3045 				err = SET_ERROR(EINVAL);
3046 			}
3047 		}
3048 
3049 		/* Validate permissions */
3050 		if (err == 0)
3051 			err = zfs_check_settable(dsname, pair, CRED());
3052 
3053 		if (err == 0) {
3054 			if (source == ZPROP_SRC_INHERITED)
3055 				err = -1; /* does not need special handling */
3056 			else
3057 				err = zfs_prop_set_special(dsname, source,
3058 				    pair);
3059 			if (err == -1) {
3060 				/*
3061 				 * For better performance we build up a list of
3062 				 * properties to set in a single transaction.
3063 				 */
3064 				err = nvlist_add_nvpair(genericnvl, pair);
3065 			} else if (err != 0 && nvl != retrynvl) {
3066 				/*
3067 				 * This may be a spurious error caused by
3068 				 * receiving quota and reservation out of order.
3069 				 * Try again in a second pass.
3070 				 */
3071 				err = nvlist_add_nvpair(retrynvl, pair);
3072 			}
3073 		}
3074 
3075 		if (err != 0) {
3076 			if (errlist != NULL)
3077 				fnvlist_add_int32(errlist, propname, err);
3078 			rv = err;
3079 		}
3080 
3081 		if (zfs_is_namespace_prop(prop))
3082 			should_update_mount_cache = B_TRUE;
3083 	}
3084 
3085 	if (nvl != retrynvl && !nvlist_empty(retrynvl)) {
3086 		nvl = retrynvl;
3087 		goto retry;
3088 	}
3089 
3090 	if (nvlist_empty(genericnvl))
3091 		goto out;
3092 
3093 	/*
3094 	 * Try to set them all in one batch.
3095 	 */
3096 	err = dsl_props_set(dsname, source, genericnvl);
3097 	if (err == 0)
3098 		goto out;
3099 
3100 	/*
3101 	 * If batching fails, we still want to set as many properties as we
3102 	 * can, so try setting them individually.
3103 	 */
3104 	pair = NULL;
3105 	while ((pair = nvlist_next_nvpair(genericnvl, pair)) != NULL) {
3106 		const char *propname = nvpair_name(pair);
3107 
3108 		propval = pair;
3109 		if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
3110 			nvlist_t *attrs;
3111 			attrs = fnvpair_value_nvlist(pair);
3112 			propval = fnvlist_lookup_nvpair(attrs, ZPROP_VALUE);
3113 		}
3114 
3115 		if (nvpair_type(propval) == DATA_TYPE_STRING) {
3116 			strval = fnvpair_value_string(propval);
3117 			err = dsl_prop_set_string(dsname, propname,
3118 			    source, strval);
3119 		} else if (nvpair_type(propval) == DATA_TYPE_BOOLEAN) {
3120 			err = dsl_prop_inherit(dsname, propname, source);
3121 		} else {
3122 			intval = fnvpair_value_uint64(propval);
3123 			err = dsl_prop_set_int(dsname, propname, source,
3124 			    intval);
3125 		}
3126 
3127 		if (err != 0) {
3128 			if (errlist != NULL) {
3129 				fnvlist_add_int32(errlist, propname, err);
3130 			}
3131 			rv = err;
3132 		}
3133 	}
3134 
3135 out:
3136 	if (should_update_mount_cache)
3137 		zfs_ioctl_update_mount_cache(dsname);
3138 
3139 	nvlist_free(genericnvl);
3140 	nvlist_free(retrynvl);
3141 
3142 	return (rv);
3143 }
3144 
3145 /*
3146  * Check that all the properties are valid user properties.
3147  */
3148 static int
zfs_check_userprops(nvlist_t * nvl)3149 zfs_check_userprops(nvlist_t *nvl)
3150 {
3151 	nvpair_t *pair = NULL;
3152 
3153 	while ((pair = nvlist_next_nvpair(nvl, pair)) != NULL) {
3154 		const char *propname = nvpair_name(pair);
3155 
3156 		if (!zfs_prop_user(propname) ||
3157 		    nvpair_type(pair) != DATA_TYPE_STRING)
3158 			return (SET_ERROR(EINVAL));
3159 
3160 		if (strlen(propname) >= ZAP_MAXNAMELEN)
3161 			return (SET_ERROR(ENAMETOOLONG));
3162 
3163 		if (strlen(fnvpair_value_string(pair)) >= ZAP_MAXVALUELEN)
3164 			return (SET_ERROR(E2BIG));
3165 	}
3166 	return (0);
3167 }
3168 
3169 static void
props_skip(nvlist_t * props,nvlist_t * skipped,nvlist_t ** newprops)3170 props_skip(nvlist_t *props, nvlist_t *skipped, nvlist_t **newprops)
3171 {
3172 	nvpair_t *pair;
3173 
3174 	VERIFY0(nvlist_alloc(newprops, NV_UNIQUE_NAME, KM_SLEEP));
3175 
3176 	pair = NULL;
3177 	while ((pair = nvlist_next_nvpair(props, pair)) != NULL) {
3178 		if (nvlist_exists(skipped, nvpair_name(pair)))
3179 			continue;
3180 
3181 		VERIFY0(nvlist_add_nvpair(*newprops, pair));
3182 	}
3183 }
3184 
3185 static int
clear_received_props(const char * dsname,nvlist_t * props,nvlist_t * skipped)3186 clear_received_props(const char *dsname, nvlist_t *props,
3187     nvlist_t *skipped)
3188 {
3189 	int err = 0;
3190 	nvlist_t *cleared_props = NULL;
3191 	props_skip(props, skipped, &cleared_props);
3192 	if (!nvlist_empty(cleared_props)) {
3193 		/*
3194 		 * Acts on local properties until the dataset has received
3195 		 * properties at least once on or after SPA_VERSION_RECVD_PROPS.
3196 		 */
3197 		zprop_source_t flags = (ZPROP_SRC_NONE |
3198 		    (dsl_prop_get_hasrecvd(dsname) ? ZPROP_SRC_RECEIVED : 0));
3199 		err = zfs_set_prop_nvlist(dsname, flags, cleared_props, NULL);
3200 	}
3201 	nvlist_free(cleared_props);
3202 	return (err);
3203 }
3204 
3205 /*
3206  * inputs:
3207  * zc_name		name of filesystem
3208  * zc_value		name of property to set
3209  * zc_nvlist_src{_size}	nvlist of properties to apply
3210  * zc_cookie		received properties flag
3211  *
3212  * outputs:
3213  * zc_nvlist_dst{_size} error for each unapplied received property
3214  */
3215 static int
zfs_ioc_set_prop(zfs_cmd_t * zc)3216 zfs_ioc_set_prop(zfs_cmd_t *zc)
3217 {
3218 	nvlist_t *nvl;
3219 	boolean_t received = zc->zc_cookie;
3220 	zprop_source_t source = (received ? ZPROP_SRC_RECEIVED :
3221 	    ZPROP_SRC_LOCAL);
3222 	nvlist_t *errors;
3223 	int error;
3224 
3225 	if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
3226 	    zc->zc_iflags, &nvl)) != 0)
3227 		return (error);
3228 
3229 	if (received) {
3230 		nvlist_t *origprops;
3231 
3232 		if (dsl_prop_get_received(zc->zc_name, &origprops) == 0) {
3233 			(void) clear_received_props(zc->zc_name,
3234 			    origprops, nvl);
3235 			nvlist_free(origprops);
3236 		}
3237 
3238 		error = dsl_prop_set_hasrecvd(zc->zc_name);
3239 	}
3240 
3241 	errors = fnvlist_alloc();
3242 	if (error == 0)
3243 		error = zfs_set_prop_nvlist(zc->zc_name, source, nvl, errors);
3244 
3245 	if (zc->zc_nvlist_dst != 0 && errors != NULL) {
3246 		(void) put_nvlist(zc, errors);
3247 	}
3248 
3249 	nvlist_free(errors);
3250 	nvlist_free(nvl);
3251 	return (error);
3252 }
3253 
3254 /*
3255  * inputs:
3256  * zc_name		name of filesystem
3257  * zc_value		name of property to inherit
3258  * zc_cookie		revert to received value if TRUE
3259  *
3260  * outputs:		none
3261  */
3262 static int
zfs_ioc_inherit_prop(zfs_cmd_t * zc)3263 zfs_ioc_inherit_prop(zfs_cmd_t *zc)
3264 {
3265 	const char *propname = zc->zc_value;
3266 	zfs_prop_t prop = zfs_name_to_prop(propname);
3267 	boolean_t received = zc->zc_cookie;
3268 	zprop_source_t source = (received
3269 	    ? ZPROP_SRC_NONE		/* revert to received value, if any */
3270 	    : ZPROP_SRC_INHERITED);	/* explicitly inherit */
3271 	nvlist_t *dummy;
3272 	nvpair_t *pair;
3273 	zprop_type_t type;
3274 	int err;
3275 
3276 	if (!received) {
3277 		/*
3278 		 * Only check this in the non-received case. We want to allow
3279 		 * 'inherit -S' to revert non-inheritable properties like quota
3280 		 * and reservation to the received or default values even though
3281 		 * they are not considered inheritable.
3282 		 */
3283 		if (prop != ZPROP_USERPROP && !zfs_prop_inheritable(prop))
3284 			return (SET_ERROR(EINVAL));
3285 	}
3286 
3287 	if (prop == ZPROP_USERPROP) {
3288 		if (!zfs_prop_user(propname))
3289 			return (SET_ERROR(EINVAL));
3290 
3291 		type = PROP_TYPE_STRING;
3292 	} else if (prop == ZFS_PROP_VOLSIZE || prop == ZFS_PROP_VERSION) {
3293 		return (SET_ERROR(EINVAL));
3294 	} else {
3295 		type = zfs_prop_get_type(prop);
3296 	}
3297 
3298 	/*
3299 	 * zfs_prop_set_special() expects properties in the form of an
3300 	 * nvpair with type info.
3301 	 */
3302 	dummy = fnvlist_alloc();
3303 
3304 	switch (type) {
3305 	case PROP_TYPE_STRING:
3306 		VERIFY0(nvlist_add_string(dummy, propname, ""));
3307 		break;
3308 	case PROP_TYPE_NUMBER:
3309 	case PROP_TYPE_INDEX:
3310 		VERIFY0(nvlist_add_uint64(dummy, propname, 0));
3311 		break;
3312 	default:
3313 		err = SET_ERROR(EINVAL);
3314 		goto errout;
3315 	}
3316 
3317 	pair = nvlist_next_nvpair(dummy, NULL);
3318 	if (pair == NULL) {
3319 		err = SET_ERROR(EINVAL);
3320 	} else {
3321 		err = zfs_prop_set_special(zc->zc_name, source, pair);
3322 		if (err == -1) /* property is not "special", needs handling */
3323 			err = dsl_prop_inherit(zc->zc_name, zc->zc_value,
3324 			    source);
3325 	}
3326 
3327 errout:
3328 	nvlist_free(dummy);
3329 	return (err);
3330 }
3331 
3332 static int
zfs_ioc_pool_set_props(zfs_cmd_t * zc)3333 zfs_ioc_pool_set_props(zfs_cmd_t *zc)
3334 {
3335 	nvlist_t *props;
3336 	spa_t *spa;
3337 	int error;
3338 	nvpair_t *pair;
3339 
3340 	if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
3341 	    zc->zc_iflags, &props)))
3342 		return (error);
3343 
3344 	/*
3345 	 * If the only property is the configfile, then just do a spa_lookup()
3346 	 * to handle the faulted case.
3347 	 */
3348 	pair = nvlist_next_nvpair(props, NULL);
3349 	if (pair != NULL && strcmp(nvpair_name(pair),
3350 	    zpool_prop_to_name(ZPOOL_PROP_CACHEFILE)) == 0 &&
3351 	    nvlist_next_nvpair(props, pair) == NULL) {
3352 		spa_namespace_enter(FTAG);
3353 		if ((spa = spa_lookup(zc->zc_name)) != NULL) {
3354 			spa_configfile_set(spa, props, B_FALSE);
3355 			spa_write_cachefile(spa, B_FALSE, B_TRUE, B_FALSE);
3356 		}
3357 		spa_namespace_exit(FTAG);
3358 		if (spa != NULL) {
3359 			nvlist_free(props);
3360 			return (0);
3361 		}
3362 	}
3363 
3364 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0) {
3365 		nvlist_free(props);
3366 		return (error);
3367 	}
3368 
3369 	error = spa_prop_set(spa, props);
3370 
3371 	nvlist_free(props);
3372 	spa_close(spa, FTAG);
3373 
3374 	return (error);
3375 }
3376 
3377 /*
3378  * innvl: {
3379  *	"get_props_names": [ "prop1", "prop2", ..., "propN" ]
3380  * }
3381  */
3382 
3383 static const zfs_ioc_key_t zfs_keys_get_props[] = {
3384 	{ ZPOOL_GET_PROPS_NAMES,	DATA_TYPE_STRING_ARRAY,	ZK_OPTIONAL },
3385 };
3386 
3387 static int
zfs_ioc_pool_get_props(const char * pool,nvlist_t * innvl,nvlist_t * outnvl)3388 zfs_ioc_pool_get_props(const char *pool, nvlist_t *innvl, nvlist_t *outnvl)
3389 {
3390 	spa_t *spa;
3391 	char **props = NULL;
3392 	unsigned int n_props = 0;
3393 	int error;
3394 
3395 	if (nvlist_lookup_string_array(innvl, ZPOOL_GET_PROPS_NAMES,
3396 	    &props, &n_props) != 0) {
3397 		props = NULL;
3398 	}
3399 
3400 	if ((error = spa_open(pool, &spa, FTAG)) != 0) {
3401 		/*
3402 		 * If the pool is faulted, there may be properties we can still
3403 		 * get (such as altroot and cachefile), so attempt to get them
3404 		 * anyway.
3405 		 */
3406 		spa_namespace_enter(FTAG);
3407 		if ((spa = spa_lookup(pool)) != NULL) {
3408 			error = spa_prop_get(spa, outnvl);
3409 			if (error == 0 && props != NULL)
3410 				error = spa_prop_get_nvlist(spa, props, n_props,
3411 				    outnvl);
3412 		}
3413 		spa_namespace_exit(FTAG);
3414 	} else {
3415 		error = spa_prop_get(spa, outnvl);
3416 		if (error == 0 && props != NULL)
3417 			error = spa_prop_get_nvlist(spa, props, n_props,
3418 			    outnvl);
3419 		spa_close(spa, FTAG);
3420 	}
3421 
3422 	return (error);
3423 }
3424 
3425 /*
3426  * innvl: {
3427  *     "vdevprops_set_vdev" -> guid
3428  *     "vdevprops_set_props" -> { prop -> value }
3429  * }
3430  *
3431  * outnvl: propname -> error code (int32)
3432  */
3433 static const zfs_ioc_key_t zfs_keys_vdev_set_props[] = {
3434 	{ZPOOL_VDEV_PROPS_SET_VDEV,	DATA_TYPE_UINT64,	0},
3435 	{ZPOOL_VDEV_PROPS_SET_PROPS,	DATA_TYPE_NVLIST,	0}
3436 };
3437 
3438 static int
zfs_ioc_vdev_set_props(const char * poolname,nvlist_t * innvl,nvlist_t * outnvl)3439 zfs_ioc_vdev_set_props(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3440 {
3441 	spa_t *spa;
3442 	int error;
3443 	vdev_t *vd;
3444 	uint64_t vdev_guid;
3445 
3446 	/* Early validation */
3447 	if (nvlist_lookup_uint64(innvl, ZPOOL_VDEV_PROPS_SET_VDEV,
3448 	    &vdev_guid) != 0)
3449 		return (SET_ERROR(EINVAL));
3450 
3451 	if (outnvl == NULL)
3452 		return (SET_ERROR(EINVAL));
3453 
3454 	if ((error = spa_open(poolname, &spa, FTAG)) != 0)
3455 		return (error);
3456 
3457 	ASSERT(spa_writeable(spa));
3458 
3459 	if ((vd = spa_lookup_by_guid(spa, vdev_guid, B_TRUE)) == NULL) {
3460 		spa_close(spa, FTAG);
3461 		return (SET_ERROR(ENOENT));
3462 	}
3463 
3464 	error = vdev_prop_set(vd, innvl, outnvl);
3465 
3466 	spa_close(spa, FTAG);
3467 
3468 	return (error);
3469 }
3470 
3471 /*
3472  * innvl: {
3473  *     "vdevprops_get_vdev" -> guid
3474  *     (optional) "vdevprops_get_props" -> { propname -> propid }
3475  * }
3476  *
3477  * outnvl: propname -> value
3478  */
3479 static const zfs_ioc_key_t zfs_keys_vdev_get_props[] = {
3480 	{ZPOOL_VDEV_PROPS_GET_VDEV,	DATA_TYPE_UINT64,	0},
3481 	{ZPOOL_VDEV_PROPS_GET_PROPS,	DATA_TYPE_NVLIST,	ZK_OPTIONAL}
3482 };
3483 
3484 static int
zfs_ioc_vdev_get_props(const char * poolname,nvlist_t * innvl,nvlist_t * outnvl)3485 zfs_ioc_vdev_get_props(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
3486 {
3487 	spa_t *spa;
3488 	int error;
3489 	vdev_t *vd;
3490 	uint64_t vdev_guid;
3491 
3492 	/* Early validation */
3493 	if (nvlist_lookup_uint64(innvl, ZPOOL_VDEV_PROPS_GET_VDEV,
3494 	    &vdev_guid) != 0)
3495 		return (SET_ERROR(EINVAL));
3496 
3497 	if (outnvl == NULL)
3498 		return (SET_ERROR(EINVAL));
3499 
3500 	if ((error = spa_open(poolname, &spa, FTAG)) != 0)
3501 		return (error);
3502 
3503 	if ((vd = spa_lookup_by_guid(spa, vdev_guid, B_TRUE)) == NULL) {
3504 		spa_close(spa, FTAG);
3505 		return (SET_ERROR(ENOENT));
3506 	}
3507 
3508 	error = vdev_prop_get(vd, innvl, outnvl);
3509 
3510 	spa_close(spa, FTAG);
3511 
3512 	return (error);
3513 }
3514 
3515 /*
3516  * inputs:
3517  * zc_name		name of filesystem
3518  * zc_nvlist_src{_size}	nvlist of delegated permissions
3519  * zc_perm_action	allow/unallow flag
3520  *
3521  * outputs:		none
3522  */
3523 static int
zfs_ioc_set_fsacl(zfs_cmd_t * zc)3524 zfs_ioc_set_fsacl(zfs_cmd_t *zc)
3525 {
3526 	int error;
3527 	nvlist_t *fsaclnv = NULL;
3528 
3529 	if ((error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
3530 	    zc->zc_iflags, &fsaclnv)) != 0)
3531 		return (error);
3532 
3533 	/*
3534 	 * Verify nvlist is constructed correctly
3535 	 */
3536 	if (zfs_deleg_verify_nvlist(fsaclnv) != 0) {
3537 		nvlist_free(fsaclnv);
3538 		return (SET_ERROR(EINVAL));
3539 	}
3540 
3541 	/*
3542 	 * If we don't have PRIV_SYS_MOUNT, then validate
3543 	 * that user is allowed to hand out each permission in
3544 	 * the nvlist(s)
3545 	 */
3546 
3547 	error = secpolicy_zfs(CRED());
3548 	if (error != 0) {
3549 		if (zc->zc_perm_action == B_FALSE) {
3550 			error = dsl_deleg_can_allow(zc->zc_name,
3551 			    fsaclnv, CRED());
3552 		} else {
3553 			error = dsl_deleg_can_unallow(zc->zc_name,
3554 			    fsaclnv, CRED());
3555 		}
3556 	}
3557 
3558 	if (error == 0)
3559 		error = dsl_deleg_set(zc->zc_name, fsaclnv, zc->zc_perm_action);
3560 
3561 	nvlist_free(fsaclnv);
3562 	return (error);
3563 }
3564 
3565 /*
3566  * inputs:
3567  * zc_name		name of filesystem
3568  *
3569  * outputs:
3570  * zc_nvlist_src{_size}	nvlist of delegated permissions
3571  */
3572 static int
zfs_ioc_get_fsacl(zfs_cmd_t * zc)3573 zfs_ioc_get_fsacl(zfs_cmd_t *zc)
3574 {
3575 	nvlist_t *nvp;
3576 	int error;
3577 
3578 	if ((error = dsl_deleg_get(zc->zc_name, &nvp)) == 0) {
3579 		error = put_nvlist(zc, nvp);
3580 		nvlist_free(nvp);
3581 	}
3582 
3583 	return (error);
3584 }
3585 
3586 static void
zfs_create_cb(objset_t * os,void * arg,cred_t * cr,dmu_tx_t * tx)3587 zfs_create_cb(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx)
3588 {
3589 	zfs_creat_t *zct = arg;
3590 
3591 	zfs_create_fs(os, cr, zct->zct_zplprops, tx);
3592 }
3593 
3594 #define	ZFS_PROP_UNDEFINED	((uint64_t)-1)
3595 
3596 /*
3597  * inputs:
3598  * os			parent objset pointer (NULL if root fs)
3599  * fuids_ok		fuids allowed in this version of the spa?
3600  * sa_ok		SAs allowed in this version of the spa?
3601  * createprops		list of properties requested by creator
3602  *
3603  * outputs:
3604  * zplprops	values for the zplprops we attach to the master node object
3605  * is_ci	true if requested file system will be purely case-insensitive
3606  *
3607  * Determine the settings for utf8only, normalization and
3608  * casesensitivity.  Specific values may have been requested by the
3609  * creator and/or we can inherit values from the parent dataset.  If
3610  * the file system is of too early a vintage, a creator can not
3611  * request settings for these properties, even if the requested
3612  * setting is the default value.  We don't actually want to create dsl
3613  * properties for these, so remove them from the source nvlist after
3614  * processing.
3615  */
3616 static int
zfs_fill_zplprops_impl(objset_t * os,uint64_t zplver,boolean_t fuids_ok,boolean_t sa_ok,nvlist_t * createprops,nvlist_t * zplprops,boolean_t * is_ci)3617 zfs_fill_zplprops_impl(objset_t *os, uint64_t zplver,
3618     boolean_t fuids_ok, boolean_t sa_ok, nvlist_t *createprops,
3619     nvlist_t *zplprops, boolean_t *is_ci)
3620 {
3621 	uint64_t sense = ZFS_PROP_UNDEFINED;
3622 	uint64_t norm = ZFS_PROP_UNDEFINED;
3623 	uint64_t u8 = ZFS_PROP_UNDEFINED;
3624 	uint64_t duq = ZFS_PROP_UNDEFINED, duoq = ZFS_PROP_UNDEFINED;
3625 	uint64_t dgq = ZFS_PROP_UNDEFINED, dgoq = ZFS_PROP_UNDEFINED;
3626 	uint64_t dpq = ZFS_PROP_UNDEFINED, dpoq = ZFS_PROP_UNDEFINED;
3627 	int error;
3628 
3629 	ASSERT(zplprops != NULL);
3630 
3631 	/* parent dataset must be a filesystem */
3632 	if (os != NULL && os->os_phys->os_type != DMU_OST_ZFS)
3633 		return (SET_ERROR(ZFS_ERR_WRONG_PARENT));
3634 
3635 	/*
3636 	 * Pull out creator prop choices, if any.
3637 	 */
3638 	if (createprops) {
3639 		(void) nvlist_lookup_uint64(createprops,
3640 		    zfs_prop_to_name(ZFS_PROP_VERSION), &zplver);
3641 		(void) nvlist_lookup_uint64(createprops,
3642 		    zfs_prop_to_name(ZFS_PROP_NORMALIZE), &norm);
3643 		(void) nvlist_remove_all(createprops,
3644 		    zfs_prop_to_name(ZFS_PROP_NORMALIZE));
3645 		(void) nvlist_lookup_uint64(createprops,
3646 		    zfs_prop_to_name(ZFS_PROP_UTF8ONLY), &u8);
3647 		(void) nvlist_remove_all(createprops,
3648 		    zfs_prop_to_name(ZFS_PROP_UTF8ONLY));
3649 		(void) nvlist_lookup_uint64(createprops,
3650 		    zfs_prop_to_name(ZFS_PROP_CASE), &sense);
3651 		(void) nvlist_remove_all(createprops,
3652 		    zfs_prop_to_name(ZFS_PROP_CASE));
3653 		(void) nvlist_lookup_uint64(createprops,
3654 		    zfs_prop_to_name(ZFS_PROP_DEFAULTUSERQUOTA), &duq);
3655 		(void) nvlist_remove_all(createprops,
3656 		    zfs_prop_to_name(ZFS_PROP_DEFAULTUSERQUOTA));
3657 		(void) nvlist_lookup_uint64(createprops,
3658 		    zfs_prop_to_name(ZFS_PROP_DEFAULTGROUPQUOTA), &dgq);
3659 		(void) nvlist_remove_all(createprops,
3660 		    zfs_prop_to_name(ZFS_PROP_DEFAULTGROUPQUOTA));
3661 		(void) nvlist_lookup_uint64(createprops,
3662 		    zfs_prop_to_name(ZFS_PROP_DEFAULTPROJECTQUOTA), &dpq);
3663 		(void) nvlist_remove_all(createprops,
3664 		    zfs_prop_to_name(ZFS_PROP_DEFAULTPROJECTQUOTA));
3665 		(void) nvlist_lookup_uint64(createprops,
3666 		    zfs_prop_to_name(ZFS_PROP_DEFAULTUSEROBJQUOTA), &duoq);
3667 		(void) nvlist_remove_all(createprops,
3668 		    zfs_prop_to_name(ZFS_PROP_DEFAULTUSEROBJQUOTA));
3669 		(void) nvlist_lookup_uint64(createprops,
3670 		    zfs_prop_to_name(ZFS_PROP_DEFAULTGROUPOBJQUOTA), &dgoq);
3671 		(void) nvlist_remove_all(createprops,
3672 		    zfs_prop_to_name(ZFS_PROP_DEFAULTGROUPOBJQUOTA));
3673 		(void) nvlist_lookup_uint64(createprops,
3674 		    zfs_prop_to_name(ZFS_PROP_DEFAULTPROJECTOBJQUOTA), &dpoq);
3675 		(void) nvlist_remove_all(createprops,
3676 		    zfs_prop_to_name(ZFS_PROP_DEFAULTPROJECTOBJQUOTA));
3677 	}
3678 
3679 	/*
3680 	 * If the zpl version requested is whacky or the file system
3681 	 * or pool is version is too "young" to support normalization
3682 	 * and the creator tried to set a value for one of the props,
3683 	 * error out.
3684 	 */
3685 	if ((zplver < ZPL_VERSION_INITIAL || zplver > ZPL_VERSION) ||
3686 	    (zplver >= ZPL_VERSION_FUID && !fuids_ok) ||
3687 	    (zplver >= ZPL_VERSION_SA && !sa_ok) ||
3688 	    (zplver < ZPL_VERSION_NORMALIZATION &&
3689 	    (norm != ZFS_PROP_UNDEFINED || u8 != ZFS_PROP_UNDEFINED ||
3690 	    sense != ZFS_PROP_UNDEFINED)))
3691 		return (SET_ERROR(ENOTSUP));
3692 
3693 	/*
3694 	 * Put the version in the zplprops
3695 	 */
3696 	VERIFY0(nvlist_add_uint64(zplprops,
3697 	    zfs_prop_to_name(ZFS_PROP_VERSION), zplver));
3698 
3699 	if (norm == ZFS_PROP_UNDEFINED &&
3700 	    (error = zfs_get_zplprop(os, ZFS_PROP_NORMALIZE, &norm)) != 0)
3701 		return (error);
3702 	VERIFY0(nvlist_add_uint64(zplprops,
3703 	    zfs_prop_to_name(ZFS_PROP_NORMALIZE), norm));
3704 
3705 	/*
3706 	 * If we're normalizing, names must always be valid UTF-8 strings.
3707 	 */
3708 	if (norm)
3709 		u8 = 1;
3710 	if (u8 == ZFS_PROP_UNDEFINED &&
3711 	    (error = zfs_get_zplprop(os, ZFS_PROP_UTF8ONLY, &u8)) != 0)
3712 		return (error);
3713 	VERIFY0(nvlist_add_uint64(zplprops,
3714 	    zfs_prop_to_name(ZFS_PROP_UTF8ONLY), u8));
3715 
3716 	if (sense == ZFS_PROP_UNDEFINED &&
3717 	    (error = zfs_get_zplprop(os, ZFS_PROP_CASE, &sense)) != 0)
3718 		return (error);
3719 	VERIFY0(nvlist_add_uint64(zplprops,
3720 	    zfs_prop_to_name(ZFS_PROP_CASE), sense));
3721 
3722 	if (duq == ZFS_PROP_UNDEFINED &&
3723 	    (error = zfs_get_zplprop(os, ZFS_PROP_DEFAULTUSERQUOTA, &duq)) != 0)
3724 		return (error);
3725 	VERIFY0(nvlist_add_uint64(zplprops,
3726 	    zfs_prop_to_name(ZFS_PROP_DEFAULTUSERQUOTA), duq));
3727 
3728 	if (dgq == ZFS_PROP_UNDEFINED &&
3729 	    (error = zfs_get_zplprop(os, ZFS_PROP_DEFAULTGROUPQUOTA,
3730 	    &dgq)) != 0)
3731 		return (error);
3732 	VERIFY0(nvlist_add_uint64(zplprops,
3733 	    zfs_prop_to_name(ZFS_PROP_DEFAULTGROUPQUOTA), dgq));
3734 
3735 	if (dpq == ZFS_PROP_UNDEFINED &&
3736 	    (error = zfs_get_zplprop(os, ZFS_PROP_DEFAULTPROJECTQUOTA,
3737 	    &dpq)) != 0)
3738 		return (error);
3739 	VERIFY0(nvlist_add_uint64(zplprops,
3740 	    zfs_prop_to_name(ZFS_PROP_DEFAULTPROJECTQUOTA), dpq));
3741 
3742 	if (duoq == ZFS_PROP_UNDEFINED &&
3743 	    (error = zfs_get_zplprop(os, ZFS_PROP_DEFAULTUSEROBJQUOTA,
3744 	    &duoq)) != 0)
3745 		return (error);
3746 	VERIFY0(nvlist_add_uint64(zplprops,
3747 	    zfs_prop_to_name(ZFS_PROP_DEFAULTUSEROBJQUOTA), duoq));
3748 
3749 	if (dgoq == ZFS_PROP_UNDEFINED &&
3750 	    (error = zfs_get_zplprop(os, ZFS_PROP_DEFAULTGROUPOBJQUOTA,
3751 	    &dgoq)) != 0)
3752 		return (error);
3753 	VERIFY0(nvlist_add_uint64(zplprops,
3754 	    zfs_prop_to_name(ZFS_PROP_DEFAULTGROUPOBJQUOTA), dgoq));
3755 
3756 	if (dpoq == ZFS_PROP_UNDEFINED &&
3757 	    (error = zfs_get_zplprop(os, ZFS_PROP_DEFAULTPROJECTOBJQUOTA,
3758 	    &dpoq)) != 0)
3759 		return (error);
3760 	VERIFY0(nvlist_add_uint64(zplprops,
3761 	    zfs_prop_to_name(ZFS_PROP_DEFAULTPROJECTOBJQUOTA), dpoq));
3762 
3763 	if (is_ci)
3764 		*is_ci = (sense == ZFS_CASE_INSENSITIVE);
3765 
3766 	return (0);
3767 }
3768 
3769 static int
zfs_fill_zplprops(const char * dataset,nvlist_t * createprops,nvlist_t * zplprops,boolean_t * is_ci)3770 zfs_fill_zplprops(const char *dataset, nvlist_t *createprops,
3771     nvlist_t *zplprops, boolean_t *is_ci)
3772 {
3773 	boolean_t fuids_ok, sa_ok;
3774 	uint64_t zplver = ZPL_VERSION;
3775 	objset_t *os = NULL;
3776 	char parentname[ZFS_MAX_DATASET_NAME_LEN];
3777 	spa_t *spa;
3778 	uint64_t spa_vers;
3779 	int error;
3780 
3781 	zfs_get_parent(dataset, parentname, sizeof (parentname));
3782 
3783 	if ((error = spa_open(dataset, &spa, FTAG)) != 0)
3784 		return (error);
3785 
3786 	spa_vers = spa_version(spa);
3787 	spa_close(spa, FTAG);
3788 
3789 	zplver = zfs_zpl_version_map(spa_vers);
3790 	fuids_ok = (zplver >= ZPL_VERSION_FUID);
3791 	sa_ok = (zplver >= ZPL_VERSION_SA);
3792 
3793 	/*
3794 	 * Open parent object set so we can inherit zplprop values.
3795 	 */
3796 	if ((error = dmu_objset_hold(parentname, FTAG, &os)) != 0)
3797 		return (error);
3798 
3799 	error = zfs_fill_zplprops_impl(os, zplver, fuids_ok, sa_ok, createprops,
3800 	    zplprops, is_ci);
3801 	dmu_objset_rele(os, FTAG);
3802 	return (error);
3803 }
3804 
3805 static int
zfs_fill_zplprops_root(uint64_t spa_vers,nvlist_t * createprops,nvlist_t * zplprops,boolean_t * is_ci)3806 zfs_fill_zplprops_root(uint64_t spa_vers, nvlist_t *createprops,
3807     nvlist_t *zplprops, boolean_t *is_ci)
3808 {
3809 	boolean_t fuids_ok;
3810 	boolean_t sa_ok;
3811 	uint64_t zplver = ZPL_VERSION;
3812 	int error;
3813 
3814 	zplver = zfs_zpl_version_map(spa_vers);
3815 	fuids_ok = (zplver >= ZPL_VERSION_FUID);
3816 	sa_ok = (zplver >= ZPL_VERSION_SA);
3817 
3818 	error = zfs_fill_zplprops_impl(NULL, zplver, fuids_ok, sa_ok,
3819 	    createprops, zplprops, is_ci);
3820 	return (error);
3821 }
3822 
3823 /*
3824  * innvl: {
3825  *     "type" -> dmu_objset_type_t (int32)
3826  *     (optional) "props" -> { prop -> value }
3827  *     (optional) "hidden_args" -> { "wkeydata" -> value }
3828  *         raw uint8_t array of encryption wrapping key data (32 bytes)
3829  * }
3830  *
3831  * outnvl: propname -> error code (int32)
3832  */
3833 
3834 static const zfs_ioc_key_t zfs_keys_create[] = {
3835 	{"type",	DATA_TYPE_INT32,	0},
3836 	{"props",	DATA_TYPE_NVLIST,	ZK_OPTIONAL},
3837 	{"hidden_args",	DATA_TYPE_NVLIST,	ZK_OPTIONAL},
3838 };
3839 
3840 static int
zfs_ioc_create(const char * fsname,nvlist_t * innvl,nvlist_t * outnvl)3841 zfs_ioc_create(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3842 {
3843 	int error = 0;
3844 	zfs_creat_t zct = { 0 };
3845 	nvlist_t *nvprops = NULL;
3846 	nvlist_t *hidden_args = NULL;
3847 	void (*cbfunc)(objset_t *os, void *arg, cred_t *cr, dmu_tx_t *tx);
3848 	dmu_objset_type_t type;
3849 	boolean_t is_insensitive = B_FALSE;
3850 	dsl_crypto_params_t *dcp = NULL;
3851 
3852 	type = (dmu_objset_type_t)fnvlist_lookup_int32(innvl, "type");
3853 	(void) nvlist_lookup_nvlist(innvl, "props", &nvprops);
3854 	(void) nvlist_lookup_nvlist(innvl, ZPOOL_HIDDEN_ARGS, &hidden_args);
3855 
3856 	switch (type) {
3857 	case DMU_OST_ZFS:
3858 		cbfunc = zfs_create_cb;
3859 		break;
3860 
3861 	case DMU_OST_ZVOL:
3862 		cbfunc = zvol_create_cb;
3863 		break;
3864 
3865 	default:
3866 		cbfunc = NULL;
3867 		break;
3868 	}
3869 	if (strchr(fsname, '@') ||
3870 	    strchr(fsname, '%'))
3871 		return (SET_ERROR(EINVAL));
3872 
3873 	zct.zct_props = nvprops;
3874 
3875 	if (cbfunc == NULL)
3876 		return (SET_ERROR(EINVAL));
3877 
3878 	if (type == DMU_OST_ZVOL) {
3879 		uint64_t volsize, volblocksize;
3880 
3881 		if (nvprops == NULL)
3882 			return (SET_ERROR(EINVAL));
3883 		if (nvlist_lookup_uint64(nvprops,
3884 		    zfs_prop_to_name(ZFS_PROP_VOLSIZE), &volsize) != 0)
3885 			return (SET_ERROR(EINVAL));
3886 
3887 		if ((error = nvlist_lookup_uint64(nvprops,
3888 		    zfs_prop_to_name(ZFS_PROP_VOLBLOCKSIZE),
3889 		    &volblocksize)) != 0 && error != ENOENT)
3890 			return (SET_ERROR(EINVAL));
3891 
3892 		if (error != 0)
3893 			volblocksize = zfs_prop_default_numeric(
3894 			    ZFS_PROP_VOLBLOCKSIZE);
3895 
3896 		if ((error = zvol_check_volblocksize(fsname,
3897 		    volblocksize)) != 0 ||
3898 		    (error = zvol_check_volsize(volsize,
3899 		    volblocksize)) != 0)
3900 			return (error);
3901 	} else if (type == DMU_OST_ZFS) {
3902 		int error;
3903 
3904 		/*
3905 		 * We have to have normalization and
3906 		 * case-folding flags correct when we do the
3907 		 * file system creation, so go figure them out
3908 		 * now.
3909 		 */
3910 		VERIFY0(nvlist_alloc(&zct.zct_zplprops,
3911 		    NV_UNIQUE_NAME, KM_SLEEP));
3912 		error = zfs_fill_zplprops(fsname, nvprops,
3913 		    zct.zct_zplprops, &is_insensitive);
3914 		if (error != 0) {
3915 			nvlist_free(zct.zct_zplprops);
3916 			return (error);
3917 		}
3918 	}
3919 
3920 	error = dsl_crypto_params_create_nvlist(DCP_CMD_NONE, nvprops,
3921 	    hidden_args, &dcp);
3922 	if (error != 0) {
3923 		nvlist_free(zct.zct_zplprops);
3924 		return (error);
3925 	}
3926 
3927 	error = dmu_objset_create(fsname, type,
3928 	    is_insensitive ? DS_FLAG_CI_DATASET : 0, dcp, cbfunc, &zct);
3929 
3930 	nvlist_free(zct.zct_zplprops);
3931 	dsl_crypto_params_free(dcp, !!error);
3932 
3933 	/*
3934 	 * It would be nice to do this atomically.
3935 	 */
3936 	if (error == 0) {
3937 		error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL,
3938 		    nvprops, outnvl);
3939 		if (error != 0) {
3940 			spa_t *spa;
3941 			int error2;
3942 
3943 			/*
3944 			 * Volumes will return EBUSY and cannot be destroyed
3945 			 * until all asynchronous minor handling (e.g. from
3946 			 * setting the volmode property) has completed. Wait for
3947 			 * the spa_zvol_taskq to drain then retry.
3948 			 */
3949 			error2 = dsl_destroy_head(fsname);
3950 			while ((error2 == EBUSY) && (type == DMU_OST_ZVOL)) {
3951 				error2 = spa_open(fsname, &spa, FTAG);
3952 				if (error2 == 0) {
3953 					taskq_wait(spa->spa_zvol_taskq);
3954 					spa_close(spa, FTAG);
3955 				}
3956 				error2 = dsl_destroy_head(fsname);
3957 			}
3958 		}
3959 	}
3960 	return (error);
3961 }
3962 
3963 /*
3964  * innvl: {
3965  *     "origin" -> name of origin snapshot
3966  *     (optional) "props" -> { prop -> value }
3967  *     (optional) "hidden_args" -> { "wkeydata" -> value }
3968  *         raw uint8_t array of encryption wrapping key data (32 bytes)
3969  * }
3970  *
3971  * outputs:
3972  * outnvl: propname -> error code (int32)
3973  */
3974 static const zfs_ioc_key_t zfs_keys_clone[] = {
3975 	{"origin",	DATA_TYPE_STRING,	0},
3976 	{"props",	DATA_TYPE_NVLIST,	ZK_OPTIONAL},
3977 	{"hidden_args",	DATA_TYPE_NVLIST,	ZK_OPTIONAL},
3978 };
3979 
3980 static int
zfs_ioc_clone(const char * fsname,nvlist_t * innvl,nvlist_t * outnvl)3981 zfs_ioc_clone(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
3982 {
3983 	int error = 0;
3984 	nvlist_t *nvprops = NULL;
3985 	const char *origin_name;
3986 
3987 	origin_name = fnvlist_lookup_string(innvl, "origin");
3988 	(void) nvlist_lookup_nvlist(innvl, "props", &nvprops);
3989 
3990 	if (strchr(fsname, '@') ||
3991 	    strchr(fsname, '%'))
3992 		return (SET_ERROR(EINVAL));
3993 
3994 	if (dataset_namecheck(origin_name, NULL, NULL) != 0)
3995 		return (SET_ERROR(EINVAL));
3996 
3997 	error = dsl_dataset_clone(fsname, origin_name);
3998 
3999 	/*
4000 	 * It would be nice to do this atomically.
4001 	 */
4002 	if (error == 0) {
4003 		error = zfs_set_prop_nvlist(fsname, ZPROP_SRC_LOCAL,
4004 		    nvprops, outnvl);
4005 		if (error != 0)
4006 			(void) dsl_destroy_head(fsname);
4007 	}
4008 	return (error);
4009 }
4010 
4011 static const zfs_ioc_key_t zfs_keys_remap[] = {
4012 	/* no nvl keys */
4013 };
4014 
4015 static int
zfs_ioc_remap(const char * fsname,nvlist_t * innvl,nvlist_t * outnvl)4016 zfs_ioc_remap(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
4017 {
4018 	/* This IOCTL is no longer supported. */
4019 	(void) fsname, (void) innvl, (void) outnvl;
4020 	return (0);
4021 }
4022 
4023 /*
4024  * innvl: {
4025  *     "snaps" -> { snapshot1, snapshot2 }
4026  *     (optional) "props" -> { prop -> value (string) }
4027  * }
4028  *
4029  * outnvl: snapshot -> error code (int32)
4030  */
4031 static const zfs_ioc_key_t zfs_keys_snapshot[] = {
4032 	{"snaps",	DATA_TYPE_NVLIST,	0},
4033 	{"props",	DATA_TYPE_NVLIST,	ZK_OPTIONAL},
4034 };
4035 
4036 static int
zfs_ioc_snapshot(const char * poolname,nvlist_t * innvl,nvlist_t * outnvl)4037 zfs_ioc_snapshot(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
4038 {
4039 	nvlist_t *snaps;
4040 	nvlist_t *props = NULL;
4041 	int error, poollen;
4042 	nvpair_t *pair;
4043 
4044 	(void) nvlist_lookup_nvlist(innvl, "props", &props);
4045 	if (!nvlist_empty(props) &&
4046 	    zfs_earlier_version(poolname, SPA_VERSION_SNAP_PROPS))
4047 		return (SET_ERROR(ENOTSUP));
4048 	if ((error = zfs_check_userprops(props)) != 0)
4049 		return (error);
4050 
4051 	snaps = fnvlist_lookup_nvlist(innvl, "snaps");
4052 	poollen = strlen(poolname);
4053 	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
4054 	    pair = nvlist_next_nvpair(snaps, pair)) {
4055 		const char *name = nvpair_name(pair);
4056 		char *cp = strchr(name, '@');
4057 
4058 		/*
4059 		 * The snap name must contain an @, and the part after it must
4060 		 * contain only valid characters.
4061 		 */
4062 		if (cp == NULL ||
4063 		    zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
4064 			return (SET_ERROR(EINVAL));
4065 
4066 		/*
4067 		 * The snap must be in the specified pool.
4068 		 */
4069 		if (strncmp(name, poolname, poollen) != 0 ||
4070 		    (name[poollen] != '/' && name[poollen] != '@'))
4071 			return (SET_ERROR(EXDEV));
4072 
4073 		/*
4074 		 * Check for permission to set the properties on the fs.
4075 		 */
4076 		if (!nvlist_empty(props)) {
4077 			*cp = '\0';
4078 			zone_admin_result_t zone_result;
4079 			zone_result = zone_dataset_admin_check(name,
4080 			    ZONE_OP_SETPROP, NULL);
4081 			if (zone_result == ZONE_ADMIN_DENIED) {
4082 				*cp = '@';
4083 				return (SET_ERROR(EPERM));
4084 			}
4085 			if (zone_result == ZONE_ADMIN_ALLOWED) {
4086 				error = zfs_secpolicy_zoned_uid_deleg(name,
4087 				    ZFS_DELEG_PERM_USERPROP, CRED());
4088 			} else {
4089 				error = zfs_secpolicy_write_perms(name,
4090 				    ZFS_DELEG_PERM_USERPROP, CRED());
4091 			}
4092 			*cp = '@';
4093 			if (error != 0)
4094 				return (error);
4095 		}
4096 
4097 		/* This must be the only snap of this fs. */
4098 		for (nvpair_t *pair2 = nvlist_next_nvpair(snaps, pair);
4099 		    pair2 != NULL; pair2 = nvlist_next_nvpair(snaps, pair2)) {
4100 			if (strncmp(name, nvpair_name(pair2), cp - name + 1)
4101 			    == 0) {
4102 				return (SET_ERROR(EXDEV));
4103 			}
4104 		}
4105 	}
4106 
4107 	error = dsl_dataset_snapshot(snaps, props, outnvl);
4108 
4109 	return (error);
4110 }
4111 
4112 /*
4113  * innvl: "message" -> string
4114  */
4115 static const zfs_ioc_key_t zfs_keys_log_history[] = {
4116 	{"message",	DATA_TYPE_STRING,	0},
4117 };
4118 
4119 static int
zfs_ioc_log_history(const char * unused,nvlist_t * innvl,nvlist_t * outnvl)4120 zfs_ioc_log_history(const char *unused, nvlist_t *innvl, nvlist_t *outnvl)
4121 {
4122 	(void) unused, (void) outnvl;
4123 	const char *message;
4124 	char *poolname;
4125 	spa_t *spa;
4126 	int error;
4127 
4128 	/*
4129 	 * The poolname in the ioctl is not set, we get it from the TSD,
4130 	 * which was set at the end of the last successful ioctl that allows
4131 	 * logging.  The secpolicy func already checked that it is set.
4132 	 * Only one log ioctl is allowed after each successful ioctl, so
4133 	 * we clear the TSD here.
4134 	 */
4135 	poolname = tsd_get(zfs_allow_log_key);
4136 	if (poolname == NULL)
4137 		return (SET_ERROR(EINVAL));
4138 	(void) tsd_set(zfs_allow_log_key, NULL);
4139 	error = spa_open(poolname, &spa, FTAG);
4140 	kmem_strfree(poolname);
4141 	if (error != 0)
4142 		return (error);
4143 
4144 	message = fnvlist_lookup_string(innvl, "message");
4145 
4146 	if (spa_version(spa) < SPA_VERSION_ZPOOL_HISTORY) {
4147 		spa_close(spa, FTAG);
4148 		return (SET_ERROR(ENOTSUP));
4149 	}
4150 
4151 	error = spa_history_log(spa, message);
4152 	spa_close(spa, FTAG);
4153 	return (error);
4154 }
4155 
4156 /*
4157  * This ioctl is used to set the bootenv configuration on the current
4158  * pool. This configuration is stored in the second padding area of the label,
4159  * and it is used by the bootloader(s) to store the bootloader and/or system
4160  * specific data.
4161  * The data is stored as nvlist data stream, and is protected by
4162  * an embedded checksum.
4163  * The version can have two possible values:
4164  * VB_RAW: nvlist should have key GRUB_ENVMAP, value DATA_TYPE_STRING.
4165  * VB_NVLIST: nvlist with arbitrary <key, value> pairs.
4166  */
4167 static const zfs_ioc_key_t zfs_keys_set_bootenv[] = {
4168 	{"version",	DATA_TYPE_UINT64,	0},
4169 	{"<keys>",	DATA_TYPE_ANY, ZK_OPTIONAL | ZK_WILDCARDLIST},
4170 };
4171 
4172 static int
zfs_ioc_set_bootenv(const char * name,nvlist_t * innvl,nvlist_t * outnvl)4173 zfs_ioc_set_bootenv(const char *name, nvlist_t *innvl, nvlist_t *outnvl)
4174 {
4175 	int error;
4176 	spa_t *spa;
4177 
4178 	if ((error = spa_open(name, &spa, FTAG)) != 0)
4179 		return (error);
4180 	spa_vdev_state_enter(spa, SCL_ALL);
4181 	error = vdev_label_write_bootenv(spa->spa_root_vdev, innvl);
4182 	(void) spa_vdev_state_exit(spa, NULL, 0);
4183 	spa_close(spa, FTAG);
4184 	return (error);
4185 }
4186 
4187 static const zfs_ioc_key_t zfs_keys_get_bootenv[] = {
4188 	/* no nvl keys */
4189 };
4190 
4191 static int
zfs_ioc_get_bootenv(const char * name,nvlist_t * innvl,nvlist_t * outnvl)4192 zfs_ioc_get_bootenv(const char *name, nvlist_t *innvl, nvlist_t *outnvl)
4193 {
4194 	spa_t *spa;
4195 	int error;
4196 
4197 	if ((error = spa_open(name, &spa, FTAG)) != 0)
4198 		return (error);
4199 	spa_vdev_state_enter(spa, SCL_ALL);
4200 	error = vdev_label_read_bootenv(spa->spa_root_vdev, outnvl);
4201 	(void) spa_vdev_state_exit(spa, NULL, 0);
4202 	spa_close(spa, FTAG);
4203 	return (error);
4204 }
4205 
4206 /*
4207  * The dp_config_rwlock must not be held when calling this, because the
4208  * unmount may need to write out data.
4209  *
4210  * This function is best-effort.  Callers must deal gracefully if it
4211  * remains mounted (or is remounted after this call).
4212  *
4213  * Returns 0 if the argument is not a snapshot, or it is not currently a
4214  * filesystem, or we were able to unmount it.  Returns error code otherwise.
4215  */
4216 void
zfs_unmount_snap(const char * snapname)4217 zfs_unmount_snap(const char *snapname)
4218 {
4219 	if (strchr(snapname, '@') == NULL)
4220 		return;
4221 
4222 	(void) zfsctl_snapshot_unmount(snapname, MNT_FORCE);
4223 }
4224 
4225 static int
zfs_unmount_snap_cb(const char * snapname,void * arg)4226 zfs_unmount_snap_cb(const char *snapname, void *arg)
4227 {
4228 	(void) arg;
4229 	zfs_unmount_snap(snapname);
4230 	return (0);
4231 }
4232 
4233 /*
4234  * When a clone is destroyed, its origin may also need to be destroyed,
4235  * in which case it must be unmounted.  This routine will do that unmount
4236  * if necessary.
4237  */
4238 void
zfs_destroy_unmount_origin(const char * fsname)4239 zfs_destroy_unmount_origin(const char *fsname)
4240 {
4241 	int error;
4242 	objset_t *os;
4243 	dsl_dataset_t *ds;
4244 
4245 	error = dmu_objset_hold(fsname, FTAG, &os);
4246 	if (error != 0)
4247 		return;
4248 	ds = dmu_objset_ds(os);
4249 	if (dsl_dir_is_clone(ds->ds_dir) && DS_IS_DEFER_DESTROY(ds->ds_prev)) {
4250 		char originname[ZFS_MAX_DATASET_NAME_LEN];
4251 		dsl_dataset_name(ds->ds_prev, originname);
4252 		dmu_objset_rele(os, FTAG);
4253 		zfs_unmount_snap(originname);
4254 	} else {
4255 		dmu_objset_rele(os, FTAG);
4256 	}
4257 }
4258 
4259 /*
4260  * innvl: {
4261  *     "snaps" -> { snapshot1, snapshot2 }
4262  *     (optional boolean) "defer"
4263  * }
4264  *
4265  * outnvl: snapshot -> error code (int32)
4266  */
4267 static const zfs_ioc_key_t zfs_keys_destroy_snaps[] = {
4268 	{"snaps",	DATA_TYPE_NVLIST,	0},
4269 	{"defer",	DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
4270 };
4271 
4272 static int
zfs_ioc_destroy_snaps(const char * poolname,nvlist_t * innvl,nvlist_t * outnvl)4273 zfs_ioc_destroy_snaps(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
4274 {
4275 	int poollen;
4276 	nvlist_t *snaps;
4277 	nvpair_t *pair;
4278 	boolean_t defer;
4279 	spa_t *spa;
4280 
4281 	snaps = fnvlist_lookup_nvlist(innvl, "snaps");
4282 	defer = nvlist_exists(innvl, "defer");
4283 
4284 	poollen = strlen(poolname);
4285 	for (pair = nvlist_next_nvpair(snaps, NULL); pair != NULL;
4286 	    pair = nvlist_next_nvpair(snaps, pair)) {
4287 		const char *name = nvpair_name(pair);
4288 
4289 		/*
4290 		 * The snap must be in the specified pool to prevent the
4291 		 * invalid removal of zvol minors below.
4292 		 */
4293 		if (strncmp(name, poolname, poollen) != 0 ||
4294 		    (name[poollen] != '/' && name[poollen] != '@'))
4295 			return (SET_ERROR(EXDEV));
4296 
4297 		zfs_unmount_snap(nvpair_name(pair));
4298 		if (spa_open(name, &spa, FTAG) == 0) {
4299 			zvol_remove_minors(spa, name, B_TRUE);
4300 			spa_close(spa, FTAG);
4301 		}
4302 	}
4303 
4304 	return (dsl_destroy_snapshots_nvl(snaps, defer, outnvl));
4305 }
4306 
4307 /*
4308  * Create bookmarks. The bookmark names are of the form <fs>#<bmark>.
4309  * All bookmarks and snapshots must be in the same pool.
4310  * dsl_bookmark_create_nvl_validate describes the nvlist schema in more detail.
4311  *
4312  * innvl: {
4313  *     new_bookmark1 -> existing_snapshot,
4314  *     new_bookmark2 -> existing_bookmark,
4315  * }
4316  *
4317  * outnvl: bookmark -> error code (int32)
4318  *
4319  */
4320 static const zfs_ioc_key_t zfs_keys_bookmark[] = {
4321 	{"<bookmark>...",	DATA_TYPE_STRING,	ZK_WILDCARDLIST},
4322 };
4323 
4324 static int
zfs_ioc_bookmark(const char * poolname,nvlist_t * innvl,nvlist_t * outnvl)4325 zfs_ioc_bookmark(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
4326 {
4327 	(void) poolname;
4328 	return (dsl_bookmark_create(innvl, outnvl));
4329 }
4330 
4331 /*
4332  * innvl: {
4333  *     property 1, property 2, ...
4334  * }
4335  *
4336  * outnvl: {
4337  *     bookmark name 1 -> { property 1, property 2, ... },
4338  *     bookmark name 2 -> { property 1, property 2, ... }
4339  * }
4340  *
4341  */
4342 static const zfs_ioc_key_t zfs_keys_get_bookmarks[] = {
4343 	{"<property>...", DATA_TYPE_BOOLEAN, ZK_WILDCARDLIST | ZK_OPTIONAL},
4344 };
4345 
4346 static int
zfs_ioc_get_bookmarks(const char * fsname,nvlist_t * innvl,nvlist_t * outnvl)4347 zfs_ioc_get_bookmarks(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
4348 {
4349 	return (dsl_get_bookmarks(fsname, innvl, outnvl));
4350 }
4351 
4352 /*
4353  * innvl is not used.
4354  *
4355  * outnvl: {
4356  *     property 1, property 2, ...
4357  * }
4358  *
4359  */
4360 static const zfs_ioc_key_t zfs_keys_get_bookmark_props[] = {
4361 	/* no nvl keys */
4362 };
4363 
4364 static int
zfs_ioc_get_bookmark_props(const char * bookmark,nvlist_t * innvl,nvlist_t * outnvl)4365 zfs_ioc_get_bookmark_props(const char *bookmark, nvlist_t *innvl,
4366     nvlist_t *outnvl)
4367 {
4368 	(void) innvl;
4369 	char fsname[ZFS_MAX_DATASET_NAME_LEN];
4370 	char *bmname;
4371 
4372 	bmname = strchr(bookmark, '#');
4373 	if (bmname == NULL)
4374 		return (SET_ERROR(EINVAL));
4375 	bmname++;
4376 
4377 	(void) strlcpy(fsname, bookmark, sizeof (fsname));
4378 	*(strchr(fsname, '#')) = '\0';
4379 
4380 	return (dsl_get_bookmark_props(fsname, bmname, outnvl));
4381 }
4382 
4383 /*
4384  * innvl: {
4385  *     bookmark name 1, bookmark name 2
4386  * }
4387  *
4388  * outnvl: bookmark -> error code (int32)
4389  *
4390  */
4391 static const zfs_ioc_key_t zfs_keys_destroy_bookmarks[] = {
4392 	{"<bookmark>...",	DATA_TYPE_BOOLEAN,	ZK_WILDCARDLIST},
4393 };
4394 
4395 static int
zfs_ioc_destroy_bookmarks(const char * poolname,nvlist_t * innvl,nvlist_t * outnvl)4396 zfs_ioc_destroy_bookmarks(const char *poolname, nvlist_t *innvl,
4397     nvlist_t *outnvl)
4398 {
4399 	int error, poollen;
4400 
4401 	poollen = strlen(poolname);
4402 	for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
4403 	    pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
4404 		const char *name = nvpair_name(pair);
4405 		const char *cp = strchr(name, '#');
4406 
4407 		/*
4408 		 * The bookmark name must contain an #, and the part after it
4409 		 * must contain only valid characters.
4410 		 */
4411 		if (cp == NULL ||
4412 		    zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
4413 			return (SET_ERROR(EINVAL));
4414 
4415 		/*
4416 		 * The bookmark must be in the specified pool.
4417 		 */
4418 		if (strncmp(name, poolname, poollen) != 0 ||
4419 		    (name[poollen] != '/' && name[poollen] != '#'))
4420 			return (SET_ERROR(EXDEV));
4421 	}
4422 
4423 	error = dsl_bookmark_destroy(innvl, outnvl);
4424 	return (error);
4425 }
4426 
4427 static const zfs_ioc_key_t zfs_keys_channel_program[] = {
4428 	{"program",	DATA_TYPE_STRING,		0},
4429 	{"arg",		DATA_TYPE_ANY,			0},
4430 	{"sync",	DATA_TYPE_BOOLEAN_VALUE,	ZK_OPTIONAL},
4431 	{"instrlimit",	DATA_TYPE_UINT64,		ZK_OPTIONAL},
4432 	{"memlimit",	DATA_TYPE_UINT64,		ZK_OPTIONAL},
4433 };
4434 
4435 static int
zfs_ioc_channel_program(const char * poolname,nvlist_t * innvl,nvlist_t * outnvl)4436 zfs_ioc_channel_program(const char *poolname, nvlist_t *innvl,
4437     nvlist_t *outnvl)
4438 {
4439 	const char *program;
4440 	uint64_t instrlimit, memlimit;
4441 	boolean_t sync_flag;
4442 	nvpair_t *nvarg = NULL;
4443 
4444 	program = fnvlist_lookup_string(innvl, ZCP_ARG_PROGRAM);
4445 	if (0 != nvlist_lookup_boolean_value(innvl, ZCP_ARG_SYNC, &sync_flag)) {
4446 		sync_flag = B_TRUE;
4447 	}
4448 	if (0 != nvlist_lookup_uint64(innvl, ZCP_ARG_INSTRLIMIT, &instrlimit)) {
4449 		instrlimit = ZCP_DEFAULT_INSTRLIMIT;
4450 	}
4451 	if (0 != nvlist_lookup_uint64(innvl, ZCP_ARG_MEMLIMIT, &memlimit)) {
4452 		memlimit = ZCP_DEFAULT_MEMLIMIT;
4453 	}
4454 	nvarg = fnvlist_lookup_nvpair(innvl, ZCP_ARG_ARGLIST);
4455 
4456 	if (instrlimit == 0 || instrlimit > zfs_lua_max_instrlimit)
4457 		return (SET_ERROR(EINVAL));
4458 	if (memlimit == 0 || memlimit > zfs_lua_max_memlimit)
4459 		return (SET_ERROR(EINVAL));
4460 
4461 	return (zcp_eval(poolname, program, sync_flag, instrlimit, memlimit,
4462 	    nvarg, outnvl));
4463 }
4464 
4465 /*
4466  * innvl: unused
4467  * outnvl: empty
4468  */
4469 static const zfs_ioc_key_t zfs_keys_pool_checkpoint[] = {
4470 	/* no nvl keys */
4471 };
4472 
4473 static int
zfs_ioc_pool_checkpoint(const char * poolname,nvlist_t * innvl,nvlist_t * outnvl)4474 zfs_ioc_pool_checkpoint(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
4475 {
4476 	(void) innvl, (void) outnvl;
4477 	return (spa_checkpoint(poolname));
4478 }
4479 
4480 /*
4481  * innvl: unused
4482  * outnvl: empty
4483  */
4484 static const zfs_ioc_key_t zfs_keys_pool_discard_checkpoint[] = {
4485 	/* no nvl keys */
4486 };
4487 
4488 static int
zfs_ioc_pool_discard_checkpoint(const char * poolname,nvlist_t * innvl,nvlist_t * outnvl)4489 zfs_ioc_pool_discard_checkpoint(const char *poolname, nvlist_t *innvl,
4490     nvlist_t *outnvl)
4491 {
4492 	(void) innvl, (void) outnvl;
4493 	return (spa_checkpoint_discard(poolname));
4494 }
4495 
4496 /*
4497  * Loads specific types of data for the given pool
4498  *
4499  * innvl: {
4500  *     "prefetch_type" -> int32_t
4501  * }
4502  *
4503  * outnvl: empty
4504  */
4505 static const zfs_ioc_key_t zfs_keys_pool_prefetch[] = {
4506 	{ZPOOL_PREFETCH_TYPE,	DATA_TYPE_INT32,	0},
4507 };
4508 
4509 static int
zfs_ioc_pool_prefetch(const char * poolname,nvlist_t * innvl,nvlist_t * outnvl)4510 zfs_ioc_pool_prefetch(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
4511 {
4512 	(void) outnvl;
4513 
4514 	int error;
4515 	spa_t *spa;
4516 	int32_t type;
4517 
4518 	if (nvlist_lookup_int32(innvl, ZPOOL_PREFETCH_TYPE, &type) != 0)
4519 		return (EINVAL);
4520 
4521 	if (type != ZPOOL_PREFETCH_DDT && type != ZPOOL_PREFETCH_BRT)
4522 		return (EINVAL);
4523 
4524 	error = spa_open(poolname, &spa, FTAG);
4525 	if (error != 0)
4526 		return (error);
4527 
4528 	hrtime_t start_time = gethrtime();
4529 
4530 	if (type == ZPOOL_PREFETCH_DDT) {
4531 		ddt_prefetch_all(spa);
4532 		zfs_dbgmsg("pool '%s': loaded ddt into ARC in %llu ms",
4533 		    spa->spa_name,
4534 		    (u_longlong_t)NSEC2MSEC(gethrtime() - start_time));
4535 	} else {
4536 		brt_prefetch_all(spa);
4537 		zfs_dbgmsg("pool '%s': loaded brt into ARC in %llu ms",
4538 		    spa->spa_name,
4539 		    (u_longlong_t)NSEC2MSEC(gethrtime() - start_time));
4540 	}
4541 
4542 	spa_close(spa, FTAG);
4543 
4544 	return (error);
4545 }
4546 
4547 /*
4548  * inputs:
4549  * zc_name		name of dataset to destroy
4550  * zc_defer_destroy	mark for deferred destroy
4551  *
4552  * outputs:		none
4553  */
4554 static int
zfs_ioc_destroy(zfs_cmd_t * zc)4555 zfs_ioc_destroy(zfs_cmd_t *zc)
4556 {
4557 	objset_t *os;
4558 	dmu_objset_type_t ost;
4559 	int err;
4560 
4561 	err = dmu_objset_hold(zc->zc_name, FTAG, &os);
4562 	if (err != 0)
4563 		return (err);
4564 	ost = dmu_objset_type(os);
4565 	dmu_objset_rele(os, FTAG);
4566 
4567 	if (ost == DMU_OST_ZFS)
4568 		zfs_unmount_snap(zc->zc_name);
4569 
4570 	if (strchr(zc->zc_name, '@')) {
4571 		err = dsl_destroy_snapshot(zc->zc_name, zc->zc_defer_destroy);
4572 	} else {
4573 		/*
4574 		 * Save zoned_uid before destroying so we can clean up
4575 		 * kernel-side zone tracking after a successful destroy.
4576 		 */
4577 		uint64_t zoned_uid = 0;
4578 		(void) dsl_prop_get(zc->zc_name, "zoned_uid",
4579 		    8, 1, &zoned_uid, NULL);
4580 
4581 		err = dsl_destroy_head(zc->zc_name);
4582 		if (err == EEXIST) {
4583 			/*
4584 			 * It is possible that the given DS may have
4585 			 * hidden child (%recv) datasets - "leftovers"
4586 			 * resulting from the previously interrupted
4587 			 * 'zfs receive'.
4588 			 *
4589 			 * 6 extra bytes for /%recv
4590 			 */
4591 			char namebuf[ZFS_MAX_DATASET_NAME_LEN + 6];
4592 
4593 			if (snprintf(namebuf, sizeof (namebuf), "%s/%s",
4594 			    zc->zc_name, recv_clone_name) >=
4595 			    sizeof (namebuf))
4596 				return (SET_ERROR(EINVAL));
4597 
4598 			/*
4599 			 * Try to remove the hidden child (%recv) and after
4600 			 * that try to remove the target dataset.
4601 			 * If the hidden child (%recv) does not exist
4602 			 * the original error (EEXIST) will be returned
4603 			 */
4604 			err = dsl_destroy_head(namebuf);
4605 			if (err == 0)
4606 				err = dsl_destroy_head(zc->zc_name);
4607 			else if (err == ENOENT)
4608 				err = SET_ERROR(EEXIST);
4609 		}
4610 
4611 		if (err == 0 && zoned_uid != 0) {
4612 			(void) zone_dataset_detach_uid(kcred,
4613 			    zc->zc_name, (uid_t)zoned_uid);
4614 		}
4615 	}
4616 
4617 	return (err);
4618 }
4619 
4620 /*
4621  * innvl: {
4622  *     "initialize_command" -> POOL_INITIALIZE_{CANCEL|START|SUSPEND} (uint64)
4623  *     "initialize_vdevs": { -> guids to initialize (nvlist)
4624  *         "vdev_path_1": vdev_guid_1, (uint64),
4625  *         "vdev_path_2": vdev_guid_2, (uint64),
4626  *         ...
4627  *     },
4628  * }
4629  *
4630  * outnvl: {
4631  *     "initialize_vdevs": { -> initialization errors (nvlist)
4632  *         "vdev_path_1": errno, see function body for possible errnos (uint64)
4633  *         "vdev_path_2": errno, ... (uint64)
4634  *         ...
4635  *     }
4636  * }
4637  *
4638  * EINVAL is returned for an unknown commands or if any of the provided vdev
4639  * guids have be specified with a type other than uint64.
4640  */
4641 static const zfs_ioc_key_t zfs_keys_pool_initialize[] = {
4642 	{ZPOOL_INITIALIZE_COMMAND,	DATA_TYPE_UINT64,	0},
4643 	{ZPOOL_INITIALIZE_VDEVS,	DATA_TYPE_NVLIST,	0}
4644 };
4645 
4646 static int
zfs_ioc_pool_initialize(const char * poolname,nvlist_t * innvl,nvlist_t * outnvl)4647 zfs_ioc_pool_initialize(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
4648 {
4649 	uint64_t cmd_type;
4650 	if (nvlist_lookup_uint64(innvl, ZPOOL_INITIALIZE_COMMAND,
4651 	    &cmd_type) != 0) {
4652 		return (SET_ERROR(EINVAL));
4653 	}
4654 
4655 	if (!(cmd_type == POOL_INITIALIZE_CANCEL ||
4656 	    cmd_type == POOL_INITIALIZE_START ||
4657 	    cmd_type == POOL_INITIALIZE_SUSPEND ||
4658 	    cmd_type == POOL_INITIALIZE_UNINIT)) {
4659 		return (SET_ERROR(EINVAL));
4660 	}
4661 
4662 	nvlist_t *vdev_guids;
4663 	if (nvlist_lookup_nvlist(innvl, ZPOOL_INITIALIZE_VDEVS,
4664 	    &vdev_guids) != 0) {
4665 		return (SET_ERROR(EINVAL));
4666 	}
4667 
4668 	for (nvpair_t *pair = nvlist_next_nvpair(vdev_guids, NULL);
4669 	    pair != NULL; pair = nvlist_next_nvpair(vdev_guids, pair)) {
4670 		uint64_t vdev_guid;
4671 		if (nvpair_value_uint64(pair, &vdev_guid) != 0) {
4672 			return (SET_ERROR(EINVAL));
4673 		}
4674 	}
4675 
4676 	spa_t *spa;
4677 	int error = spa_open(poolname, &spa, FTAG);
4678 	if (error != 0)
4679 		return (error);
4680 
4681 	nvlist_t *vdev_errlist = fnvlist_alloc();
4682 	int total_errors = spa_vdev_initialize(spa, vdev_guids, cmd_type,
4683 	    vdev_errlist);
4684 
4685 	if (fnvlist_size(vdev_errlist) > 0) {
4686 		fnvlist_add_nvlist(outnvl, ZPOOL_INITIALIZE_VDEVS,
4687 		    vdev_errlist);
4688 	}
4689 	fnvlist_free(vdev_errlist);
4690 
4691 	spa_close(spa, FTAG);
4692 	return (total_errors > 0 ? SET_ERROR(EINVAL) : 0);
4693 }
4694 
4695 /*
4696  * innvl: {
4697  *     "trim_command" -> POOL_TRIM_{CANCEL|START|SUSPEND} (uint64)
4698  *     "trim_vdevs": { -> guids to TRIM (nvlist)
4699  *         "vdev_path_1": vdev_guid_1, (uint64),
4700  *         "vdev_path_2": vdev_guid_2, (uint64),
4701  *         ...
4702  *     },
4703  *     "trim_rate" -> Target TRIM rate in bytes/sec.
4704  *     "trim_secure" -> Set to request a secure TRIM.
4705  * }
4706  *
4707  * outnvl: {
4708  *     "trim_vdevs": { -> TRIM errors (nvlist)
4709  *         "vdev_path_1": errno, see function body for possible errnos (uint64)
4710  *         "vdev_path_2": errno, ... (uint64)
4711  *         ...
4712  *     }
4713  * }
4714  *
4715  * EINVAL is returned for an unknown commands or if any of the provided vdev
4716  * guids have be specified with a type other than uint64.
4717  */
4718 static const zfs_ioc_key_t zfs_keys_pool_trim[] = {
4719 	{ZPOOL_TRIM_COMMAND,	DATA_TYPE_UINT64,		0},
4720 	{ZPOOL_TRIM_VDEVS,	DATA_TYPE_NVLIST,		0},
4721 	{ZPOOL_TRIM_RATE,	DATA_TYPE_UINT64,		ZK_OPTIONAL},
4722 	{ZPOOL_TRIM_SECURE,	DATA_TYPE_BOOLEAN_VALUE,	ZK_OPTIONAL},
4723 };
4724 
4725 static int
zfs_ioc_pool_trim(const char * poolname,nvlist_t * innvl,nvlist_t * outnvl)4726 zfs_ioc_pool_trim(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
4727 {
4728 	uint64_t cmd_type;
4729 	if (nvlist_lookup_uint64(innvl, ZPOOL_TRIM_COMMAND, &cmd_type) != 0)
4730 		return (SET_ERROR(EINVAL));
4731 
4732 	if (!(cmd_type == POOL_TRIM_CANCEL ||
4733 	    cmd_type == POOL_TRIM_START ||
4734 	    cmd_type == POOL_TRIM_SUSPEND)) {
4735 		return (SET_ERROR(EINVAL));
4736 	}
4737 
4738 	nvlist_t *vdev_guids;
4739 	if (nvlist_lookup_nvlist(innvl, ZPOOL_TRIM_VDEVS, &vdev_guids) != 0)
4740 		return (SET_ERROR(EINVAL));
4741 
4742 	for (nvpair_t *pair = nvlist_next_nvpair(vdev_guids, NULL);
4743 	    pair != NULL; pair = nvlist_next_nvpair(vdev_guids, pair)) {
4744 		uint64_t vdev_guid;
4745 		if (nvpair_value_uint64(pair, &vdev_guid) != 0) {
4746 			return (SET_ERROR(EINVAL));
4747 		}
4748 	}
4749 
4750 	/* Optional, defaults to maximum rate when not provided */
4751 	uint64_t rate;
4752 	if (nvlist_lookup_uint64(innvl, ZPOOL_TRIM_RATE, &rate) != 0)
4753 		rate = 0;
4754 
4755 	/* Optional, defaults to standard TRIM when not provided */
4756 	boolean_t secure;
4757 	if (nvlist_lookup_boolean_value(innvl, ZPOOL_TRIM_SECURE,
4758 	    &secure) != 0) {
4759 		secure = B_FALSE;
4760 	}
4761 
4762 	spa_t *spa;
4763 	int error = spa_open(poolname, &spa, FTAG);
4764 	if (error != 0)
4765 		return (error);
4766 
4767 	nvlist_t *vdev_errlist = fnvlist_alloc();
4768 	int total_errors = spa_vdev_trim(spa, vdev_guids, cmd_type,
4769 	    rate, !!zfs_trim_metaslab_skip, secure, vdev_errlist);
4770 
4771 	if (fnvlist_size(vdev_errlist) > 0)
4772 		fnvlist_add_nvlist(outnvl, ZPOOL_TRIM_VDEVS, vdev_errlist);
4773 
4774 	fnvlist_free(vdev_errlist);
4775 
4776 	spa_close(spa, FTAG);
4777 	return (total_errors > 0 ? SET_ERROR(EINVAL) : 0);
4778 }
4779 
4780 #define	DDT_PRUNE_UNIT		"ddt_prune_unit"
4781 #define	DDT_PRUNE_AMOUNT	"ddt_prune_amount"
4782 
4783 /*
4784  * innvl: {
4785  *     "ddt_prune_unit" -> uint32_t
4786  *     "ddt_prune_amount" -> uint64_t
4787  * }
4788  *
4789  * outnvl: "waited" -> boolean_t
4790  */
4791 static const zfs_ioc_key_t zfs_keys_ddt_prune[] = {
4792 	{DDT_PRUNE_UNIT,	DATA_TYPE_INT32,	0},
4793 	{DDT_PRUNE_AMOUNT,	DATA_TYPE_UINT64,	0},
4794 };
4795 
4796 static int
zfs_ioc_ddt_prune(const char * poolname,nvlist_t * innvl,nvlist_t * outnvl)4797 zfs_ioc_ddt_prune(const char *poolname, nvlist_t *innvl, nvlist_t *outnvl)
4798 {
4799 	int32_t unit;
4800 	uint64_t amount;
4801 
4802 	if (nvlist_lookup_int32(innvl, DDT_PRUNE_UNIT, &unit) != 0 ||
4803 	    nvlist_lookup_uint64(innvl, DDT_PRUNE_AMOUNT, &amount) != 0) {
4804 		return (EINVAL);
4805 	}
4806 
4807 	spa_t *spa;
4808 	int error = spa_open(poolname, &spa, FTAG);
4809 	if (error != 0)
4810 		return (error);
4811 
4812 	if (!spa_feature_is_enabled(spa, SPA_FEATURE_FAST_DEDUP)) {
4813 		spa_close(spa, FTAG);
4814 		return (SET_ERROR(ENOTSUP));
4815 	}
4816 
4817 	error = ddt_prune_unique_entries(spa, (zpool_ddt_prune_unit_t)unit,
4818 	    amount);
4819 
4820 	spa_close(spa, FTAG);
4821 
4822 	return (error);
4823 }
4824 
4825 /*
4826  * This ioctl waits for activity of a particular type to complete. If there is
4827  * no activity of that type in progress, it returns immediately, and the
4828  * returned value "waited" is false. If there is activity in progress, and no
4829  * tag is passed in, the ioctl blocks until all activity of that type is
4830  * complete, and then returns with "waited" set to true.
4831  *
4832  * If a tag is provided, it identifies a particular instance of an activity to
4833  * wait for. Currently, this is only valid for use with 'initialize', because
4834  * that is the only activity for which there can be multiple instances running
4835  * concurrently. In the case of 'initialize', the tag corresponds to the guid of
4836  * the vdev on which to wait.
4837  *
4838  * If a thread waiting in the ioctl receives a signal, the call will return
4839  * immediately, and the return value will be EINTR.
4840  *
4841  * innvl: {
4842  *     "wait_activity" -> int32_t
4843  *     (optional) "wait_tag" -> uint64_t
4844  * }
4845  *
4846  * outnvl: "waited" -> boolean_t
4847  */
4848 static const zfs_ioc_key_t zfs_keys_pool_wait[] = {
4849 	{ZPOOL_WAIT_ACTIVITY,	DATA_TYPE_INT32,		0},
4850 	{ZPOOL_WAIT_TAG,	DATA_TYPE_UINT64,		ZK_OPTIONAL},
4851 };
4852 
4853 static int
zfs_ioc_wait(const char * name,nvlist_t * innvl,nvlist_t * outnvl)4854 zfs_ioc_wait(const char *name, nvlist_t *innvl, nvlist_t *outnvl)
4855 {
4856 	int32_t activity;
4857 	uint64_t tag;
4858 	boolean_t waited;
4859 	int error;
4860 
4861 	if (nvlist_lookup_int32(innvl, ZPOOL_WAIT_ACTIVITY, &activity) != 0)
4862 		return (EINVAL);
4863 
4864 	if (nvlist_lookup_uint64(innvl, ZPOOL_WAIT_TAG, &tag) == 0)
4865 		error = spa_wait_tag(name, activity, tag, &waited);
4866 	else
4867 		error = spa_wait(name, activity, &waited);
4868 
4869 	if (error == 0)
4870 		fnvlist_add_boolean_value(outnvl, ZPOOL_WAIT_WAITED, waited);
4871 
4872 	return (error);
4873 }
4874 
4875 /*
4876  * This ioctl waits for activity of a particular type to complete. If there is
4877  * no activity of that type in progress, it returns immediately, and the
4878  * returned value "waited" is false. If there is activity in progress, and no
4879  * tag is passed in, the ioctl blocks until all activity of that type is
4880  * complete, and then returns with "waited" set to true.
4881  *
4882  * If a thread waiting in the ioctl receives a signal, the call will return
4883  * immediately, and the return value will be EINTR.
4884  *
4885  * innvl: {
4886  *     "wait_activity" -> int32_t
4887  * }
4888  *
4889  * outnvl: "waited" -> boolean_t
4890  */
4891 static const zfs_ioc_key_t zfs_keys_fs_wait[] = {
4892 	{ZFS_WAIT_ACTIVITY,	DATA_TYPE_INT32,		0},
4893 };
4894 
4895 static int
zfs_ioc_wait_fs(const char * name,nvlist_t * innvl,nvlist_t * outnvl)4896 zfs_ioc_wait_fs(const char *name, nvlist_t *innvl, nvlist_t *outnvl)
4897 {
4898 	int32_t activity;
4899 	boolean_t waited = B_FALSE;
4900 	int error;
4901 	dsl_pool_t *dp;
4902 	dsl_dir_t *dd;
4903 	dsl_dataset_t *ds;
4904 
4905 	if (nvlist_lookup_int32(innvl, ZFS_WAIT_ACTIVITY, &activity) != 0)
4906 		return (SET_ERROR(EINVAL));
4907 
4908 	if (activity >= ZFS_WAIT_NUM_ACTIVITIES || activity < 0)
4909 		return (SET_ERROR(EINVAL));
4910 
4911 	if ((error = dsl_pool_hold(name, FTAG, &dp)) != 0)
4912 		return (error);
4913 
4914 	if ((error = dsl_dataset_hold(dp, name, FTAG, &ds)) != 0) {
4915 		dsl_pool_rele(dp, FTAG);
4916 		return (error);
4917 	}
4918 
4919 	dd = ds->ds_dir;
4920 	mutex_enter(&dd->dd_activity_lock);
4921 	dd->dd_activity_waiters++;
4922 
4923 	/*
4924 	 * We get a long-hold here so that the dsl_dataset_t and dsl_dir_t
4925 	 * aren't evicted while we're waiting. Normally this is prevented by
4926 	 * holding the pool, but we can't do that while we're waiting since
4927 	 * that would prevent TXGs from syncing out. Some of the functionality
4928 	 * of long-holds (e.g. preventing deletion) is unnecessary for this
4929 	 * case, since we would cancel the waiters before proceeding with a
4930 	 * deletion. An alternative mechanism for keeping the dataset around
4931 	 * could be developed but this is simpler.
4932 	 */
4933 	dsl_dataset_long_hold(ds, FTAG);
4934 	dsl_pool_rele(dp, FTAG);
4935 
4936 	error = dsl_dir_wait(dd, ds, activity, &waited);
4937 
4938 	dsl_dataset_long_rele(ds, FTAG);
4939 	dd->dd_activity_waiters--;
4940 	if (dd->dd_activity_waiters == 0)
4941 		cv_signal(&dd->dd_activity_cv);
4942 	mutex_exit(&dd->dd_activity_lock);
4943 
4944 	dsl_dataset_rele(ds, FTAG);
4945 
4946 	if (error == 0)
4947 		fnvlist_add_boolean_value(outnvl, ZFS_WAIT_WAITED, waited);
4948 
4949 	return (error);
4950 }
4951 
4952 /*
4953  * fsname is name of dataset to rollback (to most recent snapshot)
4954  *
4955  * innvl may contain name of expected target snapshot
4956  *
4957  * outnvl: "target" -> name of most recent snapshot
4958  * }
4959  */
4960 static const zfs_ioc_key_t zfs_keys_rollback[] = {
4961 	{"target",	DATA_TYPE_STRING,	ZK_OPTIONAL},
4962 };
4963 
4964 static int
zfs_ioc_rollback(const char * fsname,nvlist_t * innvl,nvlist_t * outnvl)4965 zfs_ioc_rollback(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
4966 {
4967 	zfsvfs_t *zfsvfs;
4968 	zvol_state_handle_t *zv;
4969 	const char *target = NULL;
4970 	int error;
4971 
4972 	(void) nvlist_lookup_string(innvl, "target", &target);
4973 	if (target != NULL) {
4974 		const char *cp = strchr(target, '@');
4975 
4976 		/*
4977 		 * The snap name must contain an @, and the part after it must
4978 		 * contain only valid characters.
4979 		 */
4980 		if (cp == NULL ||
4981 		    zfs_component_namecheck(cp + 1, NULL, NULL) != 0)
4982 			return (SET_ERROR(EINVAL));
4983 	}
4984 
4985 	if (getzfsvfs(fsname, &zfsvfs) == 0) {
4986 		dsl_dataset_t *ds;
4987 
4988 		ds = dmu_objset_ds(zfsvfs->z_os);
4989 		error = zfs_suspend_fs(zfsvfs);
4990 		if (error == 0) {
4991 			int resume_err;
4992 
4993 			error = dsl_dataset_rollback(fsname, target, zfsvfs,
4994 			    outnvl);
4995 			resume_err = zfs_resume_fs(zfsvfs, ds);
4996 			error = error ? error : resume_err;
4997 		}
4998 		zfs_vfs_rele(zfsvfs);
4999 	} else if (zvol_suspend(fsname, &zv) == 0) {
5000 		error = dsl_dataset_rollback(fsname, target, zvol_tag(zv),
5001 		    outnvl);
5002 		zvol_resume(zv);
5003 	} else {
5004 		error = dsl_dataset_rollback(fsname, target, NULL, outnvl);
5005 	}
5006 	return (error);
5007 }
5008 
5009 static int
recursive_unmount(const char * fsname,void * arg)5010 recursive_unmount(const char *fsname, void *arg)
5011 {
5012 	const char *snapname = arg;
5013 	char *fullname;
5014 
5015 	fullname = kmem_asprintf("%s@%s", fsname, snapname);
5016 	zfs_unmount_snap(fullname);
5017 	kmem_strfree(fullname);
5018 
5019 	return (0);
5020 }
5021 
5022 /*
5023  *
5024  * snapname is the snapshot to redact.
5025  * innvl: {
5026  *     "bookname" -> (string)
5027  *         shortname of the redaction bookmark to generate
5028  *     "snapnv" -> (nvlist, values ignored)
5029  *         snapshots to redact snapname with respect to
5030  * }
5031  *
5032  * outnvl is unused
5033  */
5034 
5035 static const zfs_ioc_key_t zfs_keys_redact[] = {
5036 	{"bookname",		DATA_TYPE_STRING,	0},
5037 	{"snapnv",		DATA_TYPE_NVLIST,	0},
5038 };
5039 
5040 static int
zfs_ioc_redact(const char * snapname,nvlist_t * innvl,nvlist_t * outnvl)5041 zfs_ioc_redact(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
5042 {
5043 	(void) outnvl;
5044 	nvlist_t *redactnvl = NULL;
5045 	const char *redactbook = NULL;
5046 
5047 	if (nvlist_lookup_nvlist(innvl, "snapnv", &redactnvl) != 0)
5048 		return (SET_ERROR(EINVAL));
5049 	if (fnvlist_num_pairs(redactnvl) == 0)
5050 		return (SET_ERROR(ENXIO));
5051 	if (nvlist_lookup_string(innvl, "bookname", &redactbook) != 0)
5052 		return (SET_ERROR(EINVAL));
5053 
5054 	return (dmu_redact_snap(snapname, redactnvl, redactbook));
5055 }
5056 
5057 /*
5058  * inputs:
5059  * zc_name	old name of dataset
5060  * zc_value	new name of dataset
5061  * zc_cookie	recursive flag (only valid for snapshots)
5062  *
5063  * outputs:	none
5064  */
5065 static int
zfs_ioc_rename(zfs_cmd_t * zc)5066 zfs_ioc_rename(zfs_cmd_t *zc)
5067 {
5068 	objset_t *os;
5069 	dmu_objset_type_t ost;
5070 	boolean_t recursive = zc->zc_cookie & 1;
5071 	boolean_t nounmount = !!(zc->zc_cookie & 2);
5072 	char *at;
5073 	int err;
5074 
5075 	/* "zfs rename" from and to ...%recv datasets should both fail */
5076 	zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
5077 	zc->zc_value[sizeof (zc->zc_value) - 1] = '\0';
5078 	if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0 ||
5079 	    dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
5080 	    strchr(zc->zc_name, '%') || strchr(zc->zc_value, '%'))
5081 		return (SET_ERROR(EINVAL));
5082 
5083 	err = dmu_objset_hold(zc->zc_name, FTAG, &os);
5084 	if (err != 0)
5085 		return (err);
5086 	ost = dmu_objset_type(os);
5087 	dmu_objset_rele(os, FTAG);
5088 
5089 	at = strchr(zc->zc_name, '@');
5090 	if (at != NULL) {
5091 		/* snaps must be in same fs */
5092 		int error;
5093 
5094 		if (strncmp(zc->zc_name, zc->zc_value, at - zc->zc_name + 1))
5095 			return (SET_ERROR(EXDEV));
5096 		*at = '\0';
5097 		if (ost == DMU_OST_ZFS && !nounmount) {
5098 			error = dmu_objset_find(zc->zc_name,
5099 			    recursive_unmount, at + 1,
5100 			    recursive ? DS_FIND_CHILDREN : 0);
5101 			if (error != 0) {
5102 				*at = '@';
5103 				return (error);
5104 			}
5105 		}
5106 		error = dsl_dataset_rename_snapshot(zc->zc_name,
5107 		    at + 1, strchr(zc->zc_value, '@') + 1, recursive);
5108 		*at = '@';
5109 
5110 		return (error);
5111 	} else {
5112 		/*
5113 		 * For dataset renames, update kernel-side zone tracking
5114 		 * if the dataset has a zoned_uid delegation.  Read the
5115 		 * property before rename, then detach old / attach new.
5116 		 */
5117 		uint64_t zoned_uid = 0;
5118 		(void) dsl_prop_get(zc->zc_name, "zoned_uid",
5119 		    8, 1, &zoned_uid, NULL);
5120 
5121 		err = dsl_dir_rename(zc->zc_name, zc->zc_value);
5122 
5123 		if (err == 0 && zoned_uid != 0) {
5124 			(void) zone_dataset_detach_uid(kcred,
5125 			    zc->zc_name, (uid_t)zoned_uid);
5126 			(void) zone_dataset_attach_uid(kcred,
5127 			    zc->zc_value, (uid_t)zoned_uid);
5128 		}
5129 		return (err);
5130 	}
5131 }
5132 
5133 static int
zfs_check_settable(const char * dsname,nvpair_t * pair,cred_t * cr)5134 zfs_check_settable(const char *dsname, nvpair_t *pair, cred_t *cr)
5135 {
5136 	const char *propname = nvpair_name(pair);
5137 	boolean_t issnap = (strchr(dsname, '@') != NULL);
5138 	zfs_prop_t prop = zfs_name_to_prop(propname);
5139 	uint64_t intval, compval;
5140 	int err;
5141 
5142 	if (prop == ZPROP_USERPROP) {
5143 		if (zfs_prop_user(propname)) {
5144 			zone_admin_result_t zone_result;
5145 			zone_result = zone_dataset_admin_check(dsname,
5146 			    ZONE_OP_SETPROP, NULL);
5147 			if (zone_result == ZONE_ADMIN_ALLOWED)
5148 				return (zfs_secpolicy_zoned_uid_deleg(dsname,
5149 				    ZFS_DELEG_PERM_USERPROP, cr));
5150 			if (zone_result == ZONE_ADMIN_DENIED)
5151 				return (SET_ERROR(EPERM));
5152 			if ((err = zfs_secpolicy_write_perms(dsname,
5153 			    ZFS_DELEG_PERM_USERPROP, cr)))
5154 				return (err);
5155 			return (0);
5156 		}
5157 
5158 		if (!issnap && zfs_prop_userquota(propname)) {
5159 			const char *perm = NULL;
5160 			const char *uq_prefix =
5161 			    zfs_userquota_prop_prefixes[ZFS_PROP_USERQUOTA];
5162 			const char *gq_prefix =
5163 			    zfs_userquota_prop_prefixes[ZFS_PROP_GROUPQUOTA];
5164 			const char *uiq_prefix =
5165 			    zfs_userquota_prop_prefixes[ZFS_PROP_USEROBJQUOTA];
5166 			const char *giq_prefix =
5167 			    zfs_userquota_prop_prefixes[ZFS_PROP_GROUPOBJQUOTA];
5168 			const char *pq_prefix =
5169 			    zfs_userquota_prop_prefixes[ZFS_PROP_PROJECTQUOTA];
5170 			const char *piq_prefix = zfs_userquota_prop_prefixes[\
5171 			    ZFS_PROP_PROJECTOBJQUOTA];
5172 
5173 			if (strncmp(propname, uq_prefix,
5174 			    strlen(uq_prefix)) == 0) {
5175 				perm = ZFS_DELEG_PERM_USERQUOTA;
5176 			} else if (strncmp(propname, uiq_prefix,
5177 			    strlen(uiq_prefix)) == 0) {
5178 				perm = ZFS_DELEG_PERM_USEROBJQUOTA;
5179 			} else if (strncmp(propname, gq_prefix,
5180 			    strlen(gq_prefix)) == 0) {
5181 				perm = ZFS_DELEG_PERM_GROUPQUOTA;
5182 			} else if (strncmp(propname, giq_prefix,
5183 			    strlen(giq_prefix)) == 0) {
5184 				perm = ZFS_DELEG_PERM_GROUPOBJQUOTA;
5185 			} else if (strncmp(propname, pq_prefix,
5186 			    strlen(pq_prefix)) == 0) {
5187 				perm = ZFS_DELEG_PERM_PROJECTQUOTA;
5188 			} else if (strncmp(propname, piq_prefix,
5189 			    strlen(piq_prefix)) == 0) {
5190 				perm = ZFS_DELEG_PERM_PROJECTOBJQUOTA;
5191 			} else {
5192 				/* {USER|GROUP|PROJECT}USED are read-only */
5193 				return (SET_ERROR(EINVAL));
5194 			}
5195 
5196 			zone_admin_result_t zone_result;
5197 			zone_result = zone_dataset_admin_check(dsname,
5198 			    ZONE_OP_SETPROP, NULL);
5199 			if (zone_result == ZONE_ADMIN_ALLOWED)
5200 				return (zfs_secpolicy_zoned_uid_deleg(dsname,
5201 				    perm, cr));
5202 			if (zone_result == ZONE_ADMIN_DENIED)
5203 				return (SET_ERROR(EPERM));
5204 			if ((err = zfs_secpolicy_write_perms(dsname, perm, cr)))
5205 				return (err);
5206 			return (0);
5207 		}
5208 
5209 		return (SET_ERROR(EINVAL));
5210 	}
5211 
5212 	if (issnap)
5213 		return (SET_ERROR(EINVAL));
5214 
5215 	if (nvpair_type(pair) == DATA_TYPE_NVLIST) {
5216 		/*
5217 		 * dsl_prop_get_all_impl() returns properties in this
5218 		 * format.
5219 		 */
5220 		nvlist_t *attrs;
5221 		VERIFY0(nvpair_value_nvlist(pair, &attrs));
5222 		VERIFY0(nvlist_lookup_nvpair(attrs, ZPROP_VALUE, &pair));
5223 	}
5224 
5225 	/*
5226 	 * Check that this value is valid for this pool version
5227 	 */
5228 	switch (prop) {
5229 	case ZFS_PROP_COMPRESSION:
5230 		/*
5231 		 * If the user specified gzip compression, make sure
5232 		 * the SPA supports it. We ignore any errors here since
5233 		 * we'll catch them later.
5234 		 */
5235 		if (nvpair_value_uint64(pair, &intval) == 0) {
5236 			compval = ZIO_COMPRESS_ALGO(intval);
5237 			if (compval >= ZIO_COMPRESS_GZIP_1 &&
5238 			    compval <= ZIO_COMPRESS_GZIP_9 &&
5239 			    zfs_earlier_version(dsname,
5240 			    SPA_VERSION_GZIP_COMPRESSION)) {
5241 				return (SET_ERROR(ENOTSUP));
5242 			}
5243 
5244 			if (compval == ZIO_COMPRESS_ZLE &&
5245 			    zfs_earlier_version(dsname,
5246 			    SPA_VERSION_ZLE_COMPRESSION))
5247 				return (SET_ERROR(ENOTSUP));
5248 
5249 			if (compval == ZIO_COMPRESS_LZ4) {
5250 				spa_t *spa;
5251 
5252 				if ((err = spa_open(dsname, &spa, FTAG)) != 0)
5253 					return (err);
5254 
5255 				if (!spa_feature_is_enabled(spa,
5256 				    SPA_FEATURE_LZ4_COMPRESS)) {
5257 					spa_close(spa, FTAG);
5258 					return (SET_ERROR(ENOTSUP));
5259 				}
5260 				spa_close(spa, FTAG);
5261 			}
5262 
5263 			if (compval == ZIO_COMPRESS_ZSTD) {
5264 				spa_t *spa;
5265 
5266 				if ((err = spa_open(dsname, &spa, FTAG)) != 0)
5267 					return (err);
5268 
5269 				if (!spa_feature_is_enabled(spa,
5270 				    SPA_FEATURE_ZSTD_COMPRESS)) {
5271 					spa_close(spa, FTAG);
5272 					return (SET_ERROR(ENOTSUP));
5273 				}
5274 				spa_close(spa, FTAG);
5275 			}
5276 		}
5277 		break;
5278 
5279 	case ZFS_PROP_COPIES:
5280 		if (zfs_earlier_version(dsname, SPA_VERSION_DITTO_BLOCKS))
5281 			return (SET_ERROR(ENOTSUP));
5282 		break;
5283 
5284 	case ZFS_PROP_VOLBLOCKSIZE:
5285 	case ZFS_PROP_RECORDSIZE:
5286 		/* Record sizes above 128k need the feature to be enabled */
5287 		if (nvpair_value_uint64(pair, &intval) == 0 &&
5288 		    intval > SPA_OLD_MAXBLOCKSIZE) {
5289 			spa_t *spa;
5290 
5291 			/*
5292 			 * We don't allow setting the property above 1MB,
5293 			 * unless the tunable has been changed.
5294 			 */
5295 			if (intval > zfs_max_recordsize ||
5296 			    intval > SPA_MAXBLOCKSIZE)
5297 				return (SET_ERROR(ERANGE));
5298 
5299 			if ((err = spa_open(dsname, &spa, FTAG)) != 0)
5300 				return (err);
5301 
5302 			if (!spa_feature_is_enabled(spa,
5303 			    SPA_FEATURE_LARGE_BLOCKS)) {
5304 				spa_close(spa, FTAG);
5305 				return (SET_ERROR(ENOTSUP));
5306 			}
5307 			spa_close(spa, FTAG);
5308 		}
5309 		break;
5310 
5311 	case ZFS_PROP_DNODESIZE:
5312 		/* Dnode sizes above 512 need the feature to be enabled */
5313 		if (nvpair_value_uint64(pair, &intval) == 0 &&
5314 		    intval != ZFS_DNSIZE_LEGACY) {
5315 			spa_t *spa;
5316 
5317 			if ((err = spa_open(dsname, &spa, FTAG)) != 0)
5318 				return (err);
5319 
5320 			if (!spa_feature_is_enabled(spa,
5321 			    SPA_FEATURE_LARGE_DNODE)) {
5322 				spa_close(spa, FTAG);
5323 				return (SET_ERROR(ENOTSUP));
5324 			}
5325 			spa_close(spa, FTAG);
5326 		}
5327 		break;
5328 
5329 	case ZFS_PROP_SHARESMB:
5330 		if (zpl_earlier_version(dsname, ZPL_VERSION_FUID))
5331 			return (SET_ERROR(ENOTSUP));
5332 		break;
5333 
5334 	case ZFS_PROP_ACLINHERIT:
5335 		if (nvpair_type(pair) == DATA_TYPE_UINT64 &&
5336 		    nvpair_value_uint64(pair, &intval) == 0) {
5337 			if (intval == ZFS_ACL_PASSTHROUGH_X &&
5338 			    zfs_earlier_version(dsname,
5339 			    SPA_VERSION_PASSTHROUGH_X))
5340 				return (SET_ERROR(ENOTSUP));
5341 		}
5342 		break;
5343 	case ZFS_PROP_CHECKSUM:
5344 	case ZFS_PROP_DEDUP:
5345 	{
5346 		spa_feature_t feature;
5347 		spa_t *spa;
5348 		int err;
5349 
5350 		/* dedup feature version checks */
5351 		if (prop == ZFS_PROP_DEDUP &&
5352 		    zfs_earlier_version(dsname, SPA_VERSION_DEDUP))
5353 			return (SET_ERROR(ENOTSUP));
5354 
5355 		if (nvpair_type(pair) == DATA_TYPE_UINT64 &&
5356 		    nvpair_value_uint64(pair, &intval) == 0) {
5357 			/* check prop value is enabled in features */
5358 			feature = zio_checksum_to_feature(
5359 			    intval & ZIO_CHECKSUM_MASK);
5360 			if (feature == SPA_FEATURE_NONE)
5361 				break;
5362 
5363 			if ((err = spa_open(dsname, &spa, FTAG)) != 0)
5364 				return (err);
5365 
5366 			if (!spa_feature_is_enabled(spa, feature)) {
5367 				spa_close(spa, FTAG);
5368 				return (SET_ERROR(ENOTSUP));
5369 			}
5370 			spa_close(spa, FTAG);
5371 		}
5372 		break;
5373 	}
5374 
5375 	default:
5376 		break;
5377 	}
5378 
5379 	return (zfs_secpolicy_setprop(dsname, prop, pair, CRED()));
5380 }
5381 
5382 /*
5383  * Removes properties from the given props list that fail permission checks
5384  * needed to clear them and to restore them in case of a receive error. For each
5385  * property, make sure we have both set and inherit permissions.
5386  *
5387  * Returns the first error encountered if any permission checks fail. If the
5388  * caller provides a non-NULL errlist, it also gives the complete list of names
5389  * of all the properties that failed a permission check along with the
5390  * corresponding error numbers. The caller is responsible for freeing the
5391  * returned errlist.
5392  *
5393  * If every property checks out successfully, zero is returned and the list
5394  * pointed at by errlist is NULL.
5395  */
5396 static int
zfs_check_clearable(const char * dataset,nvlist_t * props,nvlist_t ** errlist)5397 zfs_check_clearable(const char *dataset, nvlist_t *props, nvlist_t **errlist)
5398 {
5399 	zfs_cmd_t *zc;
5400 	nvpair_t *pair, *next_pair;
5401 	nvlist_t *errors;
5402 	int err, rv = 0;
5403 
5404 	if (props == NULL)
5405 		return (0);
5406 
5407 	VERIFY0(nvlist_alloc(&errors, NV_UNIQUE_NAME, KM_SLEEP));
5408 
5409 	zc = kmem_alloc(sizeof (zfs_cmd_t), KM_SLEEP);
5410 	(void) strlcpy(zc->zc_name, dataset, sizeof (zc->zc_name));
5411 	pair = nvlist_next_nvpair(props, NULL);
5412 	while (pair != NULL) {
5413 		next_pair = nvlist_next_nvpair(props, pair);
5414 
5415 		(void) strlcpy(zc->zc_value, nvpair_name(pair),
5416 		    sizeof (zc->zc_value));
5417 		if ((err = zfs_check_settable(dataset, pair, CRED())) != 0 ||
5418 		    (err = zfs_secpolicy_inherit_prop(zc, NULL, CRED())) != 0) {
5419 			VERIFY0(nvlist_remove_nvpair(props, pair));
5420 			VERIFY0(nvlist_add_int32(errors, zc->zc_value, err));
5421 		}
5422 		pair = next_pair;
5423 	}
5424 	kmem_free(zc, sizeof (zfs_cmd_t));
5425 
5426 	if ((pair = nvlist_next_nvpair(errors, NULL)) == NULL) {
5427 		nvlist_free(errors);
5428 		errors = NULL;
5429 	} else {
5430 		VERIFY0(nvpair_value_int32(pair, &rv));
5431 	}
5432 
5433 	if (errlist == NULL)
5434 		nvlist_free(errors);
5435 	else
5436 		*errlist = errors;
5437 
5438 	return (rv);
5439 }
5440 
5441 static boolean_t
propval_equals(nvpair_t * p1,nvpair_t * p2)5442 propval_equals(nvpair_t *p1, nvpair_t *p2)
5443 {
5444 	if (nvpair_type(p1) == DATA_TYPE_NVLIST) {
5445 		/* dsl_prop_get_all_impl() format */
5446 		nvlist_t *attrs;
5447 		VERIFY0(nvpair_value_nvlist(p1, &attrs));
5448 		VERIFY0(nvlist_lookup_nvpair(attrs, ZPROP_VALUE, &p1));
5449 	}
5450 
5451 	if (nvpair_type(p2) == DATA_TYPE_NVLIST) {
5452 		nvlist_t *attrs;
5453 		VERIFY0(nvpair_value_nvlist(p2, &attrs));
5454 		VERIFY0(nvlist_lookup_nvpair(attrs, ZPROP_VALUE, &p2));
5455 	}
5456 
5457 	if (nvpair_type(p1) != nvpair_type(p2))
5458 		return (B_FALSE);
5459 
5460 	if (nvpair_type(p1) == DATA_TYPE_STRING) {
5461 		const char *valstr1, *valstr2;
5462 
5463 		VERIFY0(nvpair_value_string(p1, &valstr1));
5464 		VERIFY0(nvpair_value_string(p2, &valstr2));
5465 		return (strcmp(valstr1, valstr2) == 0);
5466 	} else {
5467 		uint64_t intval1, intval2;
5468 
5469 		VERIFY0(nvpair_value_uint64(p1, &intval1));
5470 		VERIFY0(nvpair_value_uint64(p2, &intval2));
5471 		return (intval1 == intval2);
5472 	}
5473 }
5474 
5475 /*
5476  * Remove properties from props if they are not going to change (as determined
5477  * by comparison with origprops). Remove them from origprops as well, since we
5478  * do not need to clear or restore properties that won't change.
5479  */
5480 static void
props_reduce(nvlist_t * props,nvlist_t * origprops)5481 props_reduce(nvlist_t *props, nvlist_t *origprops)
5482 {
5483 	nvpair_t *pair, *next_pair;
5484 
5485 	if (origprops == NULL)
5486 		return; /* all props need to be received */
5487 
5488 	pair = nvlist_next_nvpair(props, NULL);
5489 	while (pair != NULL) {
5490 		const char *propname = nvpair_name(pair);
5491 		nvpair_t *match;
5492 
5493 		next_pair = nvlist_next_nvpair(props, pair);
5494 
5495 		if ((nvlist_lookup_nvpair(origprops, propname,
5496 		    &match) != 0) || !propval_equals(pair, match))
5497 			goto next; /* need to set received value */
5498 
5499 		/* don't clear the existing received value */
5500 		(void) nvlist_remove_nvpair(origprops, match);
5501 		/* don't bother receiving the property */
5502 		(void) nvlist_remove_nvpair(props, pair);
5503 next:
5504 		pair = next_pair;
5505 	}
5506 }
5507 
5508 /*
5509  * Extract properties that cannot be set PRIOR to the receipt of a dataset.
5510  * For example, refquota cannot be set until after the receipt of a dataset,
5511  * because in replication streams, an older/earlier snapshot may exceed the
5512  * refquota.  We want to receive the older/earlier snapshot, but setting
5513  * refquota pre-receipt will set the dsl's ACTUAL quota, which will prevent
5514  * the older/earlier snapshot from being received (with EDQUOT).
5515  *
5516  * The ZFS test "zfs_receive_011_pos" demonstrates such a scenario.
5517  *
5518  * libzfs will need to be judicious handling errors encountered by props
5519  * extracted by this function.
5520  */
5521 static nvlist_t *
extract_delay_props(nvlist_t * props)5522 extract_delay_props(nvlist_t *props)
5523 {
5524 	nvlist_t *delayprops;
5525 	nvpair_t *nvp, *tmp;
5526 	static const zfs_prop_t delayable[] = {
5527 		ZFS_PROP_REFQUOTA,
5528 		ZFS_PROP_KEYLOCATION,
5529 		/*
5530 		 * Setting ZFS_PROP_SHARESMB requires the objset type to be
5531 		 * known, which is not possible prior to receipt of raw sends.
5532 		 */
5533 		ZFS_PROP_SHARESMB,
5534 		0
5535 	};
5536 	int i;
5537 
5538 	VERIFY0(nvlist_alloc(&delayprops, NV_UNIQUE_NAME, KM_SLEEP));
5539 
5540 	for (nvp = nvlist_next_nvpair(props, NULL); nvp != NULL;
5541 	    nvp = nvlist_next_nvpair(props, nvp)) {
5542 		/*
5543 		 * strcmp() is safe because zfs_prop_to_name() always returns
5544 		 * a bounded string.
5545 		 */
5546 		for (i = 0; delayable[i] != 0; i++) {
5547 			if (strcmp(zfs_prop_to_name(delayable[i]),
5548 			    nvpair_name(nvp)) == 0) {
5549 				break;
5550 			}
5551 		}
5552 		if (delayable[i] != 0) {
5553 			tmp = nvlist_prev_nvpair(props, nvp);
5554 			VERIFY0(nvlist_add_nvpair(delayprops, nvp));
5555 			VERIFY0(nvlist_remove_nvpair(props, nvp));
5556 			nvp = tmp;
5557 		}
5558 	}
5559 
5560 	if (nvlist_empty(delayprops)) {
5561 		nvlist_free(delayprops);
5562 		delayprops = NULL;
5563 	}
5564 	return (delayprops);
5565 }
5566 
5567 static void
zfs_allow_log_destroy(void * arg)5568 zfs_allow_log_destroy(void *arg)
5569 {
5570 	char *poolname = arg;
5571 
5572 	if (poolname != NULL)
5573 		kmem_strfree(poolname);
5574 }
5575 
5576 #ifdef	ZFS_DEBUG
5577 static boolean_t zfs_ioc_recv_inject_err;
5578 #endif
5579 
5580 /*
5581  * nvlist 'errors' is always allocated. It will contain descriptions of
5582  * encountered errors, if any. It's the callers responsibility to free.
5583  */
5584 static int
zfs_ioc_recv_impl(char * tofs,char * tosnap,const char * origin,nvlist_t * recvprops,nvlist_t * localprops,nvlist_t * hidden_args,boolean_t force,boolean_t heal,boolean_t resumable,int input_fd,dmu_replay_record_t * begin_record,uint64_t * read_bytes,uint64_t * errflags,nvlist_t ** errors)5585 zfs_ioc_recv_impl(char *tofs, char *tosnap, const char *origin,
5586     nvlist_t *recvprops, nvlist_t *localprops, nvlist_t *hidden_args,
5587     boolean_t force, boolean_t heal, boolean_t resumable, int input_fd,
5588     dmu_replay_record_t *begin_record, uint64_t *read_bytes,
5589     uint64_t *errflags, nvlist_t **errors)
5590 {
5591 	dmu_recv_cookie_t drc;
5592 	int error = 0;
5593 	int props_error = 0;
5594 	offset_t off, noff;
5595 	nvlist_t *local_delayprops = NULL;
5596 	nvlist_t *recv_delayprops = NULL;
5597 	nvlist_t *inherited_delayprops = NULL;
5598 	nvlist_t *origprops = NULL; /* existing properties */
5599 	nvlist_t *origrecvd = NULL; /* existing received properties */
5600 	boolean_t first_recvd_props = B_FALSE;
5601 	boolean_t tofs_was_redacted;
5602 	zfs_file_t *input_fp;
5603 
5604 	*read_bytes = 0;
5605 	*errflags = 0;
5606 	*errors = fnvlist_alloc();
5607 	off = 0;
5608 
5609 	if ((input_fp = zfs_file_get(input_fd)) == NULL)
5610 		return (SET_ERROR(EBADF));
5611 
5612 	noff = off = zfs_file_off(input_fp);
5613 	error = dmu_recv_begin(tofs, tosnap, begin_record, force, heal,
5614 	    resumable, localprops, hidden_args, origin, &drc, input_fp,
5615 	    &off);
5616 	if (error != 0)
5617 		goto out;
5618 	tofs_was_redacted = dsl_get_redacted(drc.drc_ds);
5619 
5620 	/*
5621 	 * Set properties before we receive the stream so that they are applied
5622 	 * to the new data. Note that we must call dmu_recv_stream() if
5623 	 * dmu_recv_begin() succeeds.
5624 	 */
5625 	if (recvprops != NULL && !drc.drc_newfs) {
5626 		if (spa_version(dsl_dataset_get_spa(drc.drc_ds)) >=
5627 		    SPA_VERSION_RECVD_PROPS &&
5628 		    !dsl_prop_get_hasrecvd(tofs))
5629 			first_recvd_props = B_TRUE;
5630 
5631 		/*
5632 		 * If new received properties are supplied, they are to
5633 		 * completely replace the existing received properties,
5634 		 * so stash away the existing ones.
5635 		 */
5636 		if (dsl_prop_get_received(tofs, &origrecvd) == 0) {
5637 			nvlist_t *errlist = NULL;
5638 			/*
5639 			 * Don't bother writing a property if its value won't
5640 			 * change (and avoid the unnecessary security checks).
5641 			 *
5642 			 * The first receive after SPA_VERSION_RECVD_PROPS is a
5643 			 * special case where we blow away all local properties
5644 			 * regardless.
5645 			 */
5646 			if (!first_recvd_props)
5647 				props_reduce(recvprops, origrecvd);
5648 			if (zfs_check_clearable(tofs, origrecvd, &errlist) != 0)
5649 				(void) nvlist_merge(*errors, errlist, 0);
5650 			nvlist_free(errlist);
5651 
5652 			if (clear_received_props(tofs, origrecvd,
5653 			    first_recvd_props ? NULL : recvprops) != 0)
5654 				*errflags |= ZPROP_ERR_NOCLEAR;
5655 		} else {
5656 			*errflags |= ZPROP_ERR_NOCLEAR;
5657 		}
5658 	}
5659 
5660 	/*
5661 	 * Stash away existing properties so we can restore them on error unless
5662 	 * we're doing the first receive after SPA_VERSION_RECVD_PROPS, in which
5663 	 * case "origrecvd" will take care of that.
5664 	 */
5665 	if (localprops != NULL && !drc.drc_newfs && !first_recvd_props) {
5666 		objset_t *os;
5667 		if (dmu_objset_hold(tofs, FTAG, &os) == 0) {
5668 			if (dsl_prop_get_all(os, &origprops) != 0) {
5669 				*errflags |= ZPROP_ERR_NOCLEAR;
5670 			}
5671 			dmu_objset_rele(os, FTAG);
5672 		} else {
5673 			*errflags |= ZPROP_ERR_NOCLEAR;
5674 		}
5675 	}
5676 
5677 	if (recvprops != NULL) {
5678 		props_error = dsl_prop_set_hasrecvd(tofs);
5679 
5680 		if (props_error == 0) {
5681 			recv_delayprops = extract_delay_props(recvprops);
5682 			(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED,
5683 			    recvprops, *errors);
5684 		}
5685 	}
5686 
5687 	if (localprops != NULL) {
5688 		nvlist_t *oprops = fnvlist_alloc();
5689 		nvlist_t *xprops = fnvlist_alloc();
5690 		nvpair_t *nvp = NULL;
5691 
5692 		while ((nvp = nvlist_next_nvpair(localprops, nvp)) != NULL) {
5693 			if (nvpair_type(nvp) == DATA_TYPE_BOOLEAN) {
5694 				/* -x property */
5695 				const char *name = nvpair_name(nvp);
5696 				zfs_prop_t prop = zfs_name_to_prop(name);
5697 				if (prop != ZPROP_USERPROP) {
5698 					if (!zfs_prop_inheritable(prop))
5699 						continue;
5700 				} else if (!zfs_prop_user(name))
5701 					continue;
5702 				fnvlist_add_boolean(xprops, name);
5703 			} else {
5704 				/* -o property=value */
5705 				fnvlist_add_nvpair(oprops, nvp);
5706 			}
5707 		}
5708 
5709 		local_delayprops = extract_delay_props(oprops);
5710 		(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_LOCAL,
5711 		    oprops, *errors);
5712 		inherited_delayprops = extract_delay_props(xprops);
5713 		(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_INHERITED,
5714 		    xprops, *errors);
5715 
5716 		nvlist_free(oprops);
5717 		nvlist_free(xprops);
5718 	}
5719 
5720 	error = dmu_recv_stream(&drc, &off);
5721 
5722 	if (error == 0) {
5723 		zfsvfs_t *zfsvfs = NULL;
5724 		zvol_state_handle_t *zv = NULL;
5725 
5726 		if (getzfsvfs(tofs, &zfsvfs) == 0) {
5727 			/* online recv */
5728 			dsl_dataset_t *ds;
5729 			int end_err;
5730 			boolean_t stream_is_redacted = DMU_GET_FEATUREFLAGS(
5731 			    begin_record->drr_u.drr_begin.
5732 			    drr_versioninfo) & DMU_BACKUP_FEATURE_REDACTED;
5733 
5734 			ds = dmu_objset_ds(zfsvfs->z_os);
5735 			error = zfs_suspend_fs(zfsvfs);
5736 			/*
5737 			 * If the suspend fails, then the recv_end will
5738 			 * likely also fail, and clean up after itself.
5739 			 */
5740 			end_err = dmu_recv_end(&drc, zfsvfs);
5741 			/*
5742 			 * If the dataset was not redacted, but we received a
5743 			 * redacted stream onto it, we need to unmount the
5744 			 * dataset.  Otherwise, resume the filesystem.
5745 			 */
5746 			if (error == 0 && !drc.drc_newfs &&
5747 			    stream_is_redacted && !tofs_was_redacted) {
5748 				error = zfs_end_fs(zfsvfs, ds);
5749 			} else if (error == 0) {
5750 				error = zfs_resume_fs(zfsvfs, ds);
5751 			}
5752 			error = error ? error : end_err;
5753 			zfs_vfs_rele(zfsvfs);
5754 		} else if (zvol_suspend(tofs, &zv) == 0) {
5755 			error = dmu_recv_end(&drc, zvol_tag(zv));
5756 			zvol_resume(zv);
5757 		} else {
5758 			error = dmu_recv_end(&drc, NULL);
5759 		}
5760 
5761 		/* Set delayed properties now, after we're done receiving. */
5762 		if (recv_delayprops != NULL && error == 0) {
5763 			(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_RECEIVED,
5764 			    recv_delayprops, *errors);
5765 		}
5766 		if (local_delayprops != NULL && error == 0) {
5767 			(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_LOCAL,
5768 			    local_delayprops, *errors);
5769 		}
5770 		if (inherited_delayprops != NULL && error == 0) {
5771 			(void) zfs_set_prop_nvlist(tofs, ZPROP_SRC_INHERITED,
5772 			    inherited_delayprops, *errors);
5773 		}
5774 	}
5775 
5776 	/*
5777 	 * Merge delayed props back in with initial props, in case
5778 	 * we're DEBUG and zfs_ioc_recv_inject_err is set (which means
5779 	 * we have to make sure clear_received_props() includes
5780 	 * the delayed properties).
5781 	 *
5782 	 * Since zfs_ioc_recv_inject_err is only in DEBUG kernels,
5783 	 * using ASSERT() will be just like a VERIFY.
5784 	 */
5785 	if (recv_delayprops != NULL) {
5786 		ASSERT0(nvlist_merge(recvprops, recv_delayprops, 0));
5787 		nvlist_free(recv_delayprops);
5788 	}
5789 	if (local_delayprops != NULL) {
5790 		ASSERT0(nvlist_merge(localprops, local_delayprops, 0));
5791 		nvlist_free(local_delayprops);
5792 	}
5793 	if (inherited_delayprops != NULL) {
5794 		ASSERT0(nvlist_merge(localprops, inherited_delayprops, 0));
5795 		nvlist_free(inherited_delayprops);
5796 	}
5797 	*read_bytes = off - noff;
5798 
5799 #ifdef	ZFS_DEBUG
5800 	if (zfs_ioc_recv_inject_err) {
5801 		zfs_ioc_recv_inject_err = B_FALSE;
5802 		error = 1;
5803 	}
5804 #endif
5805 
5806 	/*
5807 	 * On error, restore the original props.
5808 	 */
5809 	if (error != 0 && recvprops != NULL && !drc.drc_newfs) {
5810 		if (clear_received_props(tofs, recvprops, NULL) != 0) {
5811 			/*
5812 			 * We failed to clear the received properties.
5813 			 * Since we may have left a $recvd value on the
5814 			 * system, we can't clear the $hasrecvd flag.
5815 			 */
5816 			*errflags |= ZPROP_ERR_NORESTORE;
5817 		} else if (first_recvd_props) {
5818 			dsl_prop_unset_hasrecvd(tofs);
5819 		}
5820 
5821 		if (origrecvd == NULL && !drc.drc_newfs) {
5822 			/* We failed to stash the original properties. */
5823 			*errflags |= ZPROP_ERR_NORESTORE;
5824 		}
5825 
5826 		/*
5827 		 * dsl_props_set() will not convert RECEIVED to LOCAL on or
5828 		 * after SPA_VERSION_RECVD_PROPS, so we need to specify LOCAL
5829 		 * explicitly if we're restoring local properties cleared in the
5830 		 * first new-style receive.
5831 		 */
5832 		if (origrecvd != NULL &&
5833 		    zfs_set_prop_nvlist(tofs, (first_recvd_props ?
5834 		    ZPROP_SRC_LOCAL : ZPROP_SRC_RECEIVED),
5835 		    origrecvd, NULL) != 0) {
5836 			/*
5837 			 * We stashed the original properties but failed to
5838 			 * restore them.
5839 			 */
5840 			*errflags |= ZPROP_ERR_NORESTORE;
5841 		}
5842 	}
5843 	if (error != 0 && localprops != NULL && !drc.drc_newfs &&
5844 	    !first_recvd_props) {
5845 		nvlist_t *setprops;
5846 		nvlist_t *inheritprops;
5847 		nvpair_t *nvp;
5848 
5849 		if (origprops == NULL) {
5850 			/* We failed to stash the original properties. */
5851 			*errflags |= ZPROP_ERR_NORESTORE;
5852 			goto out;
5853 		}
5854 
5855 		/* Restore original props */
5856 		setprops = fnvlist_alloc();
5857 		inheritprops = fnvlist_alloc();
5858 		nvp = NULL;
5859 		while ((nvp = nvlist_next_nvpair(localprops, nvp)) != NULL) {
5860 			const char *name = nvpair_name(nvp);
5861 			const char *source;
5862 			nvlist_t *attrs;
5863 
5864 			if (!nvlist_exists(origprops, name)) {
5865 				/*
5866 				 * Property was not present or was explicitly
5867 				 * inherited before the receive, restore this.
5868 				 */
5869 				fnvlist_add_boolean(inheritprops, name);
5870 				continue;
5871 			}
5872 			attrs = fnvlist_lookup_nvlist(origprops, name);
5873 			source = fnvlist_lookup_string(attrs, ZPROP_SOURCE);
5874 
5875 			/* Skip received properties */
5876 			if (strcmp(source, ZPROP_SOURCE_VAL_RECVD) == 0)
5877 				continue;
5878 
5879 			if (strcmp(source, tofs) == 0) {
5880 				/* Property was locally set */
5881 				fnvlist_add_nvlist(setprops, name, attrs);
5882 			} else {
5883 				/* Property was implicitly inherited */
5884 				fnvlist_add_boolean(inheritprops, name);
5885 			}
5886 		}
5887 
5888 		if (zfs_set_prop_nvlist(tofs, ZPROP_SRC_LOCAL, setprops,
5889 		    NULL) != 0)
5890 			*errflags |= ZPROP_ERR_NORESTORE;
5891 		if (zfs_set_prop_nvlist(tofs, ZPROP_SRC_INHERITED, inheritprops,
5892 		    NULL) != 0)
5893 			*errflags |= ZPROP_ERR_NORESTORE;
5894 
5895 		nvlist_free(setprops);
5896 		nvlist_free(inheritprops);
5897 	}
5898 out:
5899 	zfs_file_put(input_fp);
5900 	nvlist_free(origrecvd);
5901 	nvlist_free(origprops);
5902 
5903 	if (error == 0)
5904 		error = props_error;
5905 
5906 	return (error);
5907 }
5908 
5909 /*
5910  * inputs:
5911  * zc_name		name of containing filesystem (unused)
5912  * zc_nvlist_src{_size}	nvlist of properties to apply
5913  * zc_nvlist_conf{_size}	nvlist of properties to exclude
5914  *			(DATA_TYPE_BOOLEAN) and override (everything else)
5915  * zc_value		name of snapshot to create
5916  * zc_string		name of clone origin (if DRR_FLAG_CLONE)
5917  * zc_cookie		file descriptor to recv from
5918  * zc_begin_record	the BEGIN record of the stream (not byteswapped)
5919  * zc_guid		force flag
5920  *
5921  * outputs:
5922  * zc_cookie		number of bytes read
5923  * zc_obj		zprop_errflags_t
5924  * zc_nvlist_dst{_size} error for each unapplied received property
5925  */
5926 static int
zfs_ioc_recv(zfs_cmd_t * zc)5927 zfs_ioc_recv(zfs_cmd_t *zc)
5928 {
5929 	dmu_replay_record_t begin_record;
5930 	nvlist_t *errors = NULL;
5931 	nvlist_t *recvdprops = NULL;
5932 	nvlist_t *localprops = NULL;
5933 	const char *origin = NULL;
5934 	char *tosnap;
5935 	char tofs[ZFS_MAX_DATASET_NAME_LEN];
5936 	int error = 0;
5937 
5938 	if (dataset_namecheck(zc->zc_value, NULL, NULL) != 0 ||
5939 	    strchr(zc->zc_value, '@') == NULL ||
5940 	    strchr(zc->zc_value, '%') != NULL) {
5941 		return (SET_ERROR(EINVAL));
5942 	}
5943 
5944 	(void) strlcpy(tofs, zc->zc_value, sizeof (tofs));
5945 	tosnap = strchr(tofs, '@');
5946 	*tosnap++ = '\0';
5947 
5948 	if (zc->zc_nvlist_src != 0 &&
5949 	    (error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
5950 	    zc->zc_iflags, &recvdprops)) != 0) {
5951 		goto out;
5952 	}
5953 
5954 	if (zc->zc_nvlist_conf != 0 &&
5955 	    (error = get_nvlist(zc->zc_nvlist_conf, zc->zc_nvlist_conf_size,
5956 	    zc->zc_iflags, &localprops)) != 0) {
5957 		goto out;
5958 	}
5959 
5960 	if (zc->zc_string[0])
5961 		origin = zc->zc_string;
5962 
5963 	begin_record.drr_type = DRR_BEGIN;
5964 	begin_record.drr_payloadlen = 0;
5965 	begin_record.drr_u.drr_begin = zc->zc_begin_record;
5966 
5967 	error = zfs_ioc_recv_impl(tofs, tosnap, origin, recvdprops, localprops,
5968 	    NULL, zc->zc_guid, B_FALSE, B_FALSE, zc->zc_cookie, &begin_record,
5969 	    &zc->zc_cookie, &zc->zc_obj, &errors);
5970 
5971 	/*
5972 	 * Now that all props, initial and delayed, are set, report the prop
5973 	 * errors to the caller.
5974 	 */
5975 	if (zc->zc_nvlist_dst_size != 0 && errors != NULL &&
5976 	    (nvlist_smush(errors, zc->zc_nvlist_dst_size) != 0 ||
5977 	    put_nvlist(zc, errors) != 0)) {
5978 		/*
5979 		 * Caller made zc->zc_nvlist_dst less than the minimum expected
5980 		 * size or supplied an invalid address.
5981 		 */
5982 		error = SET_ERROR(EINVAL);
5983 	}
5984 
5985 out:
5986 	nvlist_free(errors);
5987 	nvlist_free(recvdprops);
5988 	nvlist_free(localprops);
5989 
5990 	return (error);
5991 }
5992 
5993 /*
5994  * innvl: {
5995  *     "snapname" -> full name of the snapshot to create
5996  *     (optional) "props" -> received properties to set (nvlist)
5997  *     (optional) "localprops" -> override and exclude properties (nvlist)
5998  *     (optional) "origin" -> name of clone origin (DRR_FLAG_CLONE)
5999  *     "begin_record" -> non-byteswapped dmu_replay_record_t
6000  *     "input_fd" -> file descriptor to read stream from (int32)
6001  *     (optional) "force" -> force flag (value ignored)
6002  *     (optional) "heal" -> use send stream to heal data corruption
6003  *     (optional) "resumable" -> resumable flag (value ignored)
6004  *     (optional) "cleanup_fd" -> unused
6005  *     (optional) "action_handle" -> unused
6006  *     (optional) "hidden_args" -> { "wkeydata" -> value }
6007  * }
6008  *
6009  * outnvl: {
6010  *     "read_bytes" -> number of bytes read
6011  *     "error_flags" -> zprop_errflags_t
6012  *     "errors" -> error for each unapplied received property (nvlist)
6013  * }
6014  */
6015 static const zfs_ioc_key_t zfs_keys_recv_new[] = {
6016 	{"snapname",		DATA_TYPE_STRING,	0},
6017 	{"props",		DATA_TYPE_NVLIST,	ZK_OPTIONAL},
6018 	{"localprops",		DATA_TYPE_NVLIST,	ZK_OPTIONAL},
6019 	{"origin",		DATA_TYPE_STRING,	ZK_OPTIONAL},
6020 	{"begin_record",	DATA_TYPE_BYTE_ARRAY,	0},
6021 	{"input_fd",		DATA_TYPE_INT32,	0},
6022 	{"force",		DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
6023 	{"heal",		DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
6024 	{"resumable",		DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
6025 	{"cleanup_fd",		DATA_TYPE_INT32,	ZK_OPTIONAL},
6026 	{"action_handle",	DATA_TYPE_UINT64,	ZK_OPTIONAL},
6027 	{"hidden_args",		DATA_TYPE_NVLIST,	ZK_OPTIONAL},
6028 };
6029 
6030 static int
zfs_ioc_recv_new(const char * fsname,nvlist_t * innvl,nvlist_t * outnvl)6031 zfs_ioc_recv_new(const char *fsname, nvlist_t *innvl, nvlist_t *outnvl)
6032 {
6033 	dmu_replay_record_t *begin_record;
6034 	uint_t begin_record_size;
6035 	nvlist_t *errors = NULL;
6036 	nvlist_t *recvprops = NULL;
6037 	nvlist_t *localprops = NULL;
6038 	nvlist_t *hidden_args = NULL;
6039 	const char *snapname;
6040 	const char *origin = NULL;
6041 	char *tosnap;
6042 	char tofs[ZFS_MAX_DATASET_NAME_LEN];
6043 	boolean_t force;
6044 	boolean_t heal;
6045 	boolean_t resumable;
6046 	uint64_t read_bytes = 0;
6047 	uint64_t errflags = 0;
6048 	int input_fd = -1;
6049 	int error;
6050 
6051 	snapname = fnvlist_lookup_string(innvl, "snapname");
6052 
6053 	if (dataset_namecheck(snapname, NULL, NULL) != 0 ||
6054 	    strchr(snapname, '@') == NULL ||
6055 	    strchr(snapname, '%') != NULL) {
6056 		return (SET_ERROR(EINVAL));
6057 	}
6058 
6059 	(void) strlcpy(tofs, snapname, sizeof (tofs));
6060 	tosnap = strchr(tofs, '@');
6061 	*tosnap++ = '\0';
6062 
6063 	error = nvlist_lookup_string(innvl, "origin", &origin);
6064 	if (error && error != ENOENT)
6065 		return (error);
6066 
6067 	error = nvlist_lookup_byte_array(innvl, "begin_record",
6068 	    (uchar_t **)&begin_record, &begin_record_size);
6069 	if (error != 0 || begin_record_size != sizeof (*begin_record))
6070 		return (SET_ERROR(EINVAL));
6071 
6072 	input_fd = fnvlist_lookup_int32(innvl, "input_fd");
6073 
6074 	force = nvlist_exists(innvl, "force");
6075 	heal = nvlist_exists(innvl, "heal");
6076 	resumable = nvlist_exists(innvl, "resumable");
6077 
6078 	/* we still use "props" here for backwards compatibility */
6079 	error = nvlist_lookup_nvlist(innvl, "props", &recvprops);
6080 	if (error && error != ENOENT)
6081 		goto out;
6082 
6083 	error = nvlist_lookup_nvlist(innvl, "localprops", &localprops);
6084 	if (error && error != ENOENT)
6085 		goto out;
6086 
6087 	error = nvlist_lookup_nvlist(innvl, ZPOOL_HIDDEN_ARGS, &hidden_args);
6088 	if (error && error != ENOENT)
6089 		goto out;
6090 
6091 	error = zfs_ioc_recv_impl(tofs, tosnap, origin, recvprops, localprops,
6092 	    hidden_args, force, heal, resumable, input_fd, begin_record,
6093 	    &read_bytes, &errflags, &errors);
6094 
6095 	fnvlist_add_uint64(outnvl, "read_bytes", read_bytes);
6096 	fnvlist_add_uint64(outnvl, "error_flags", errflags);
6097 	fnvlist_add_nvlist(outnvl, "errors", errors);
6098 
6099 out:
6100 	nvlist_free(errors);
6101 	nvlist_free(recvprops);
6102 	nvlist_free(localprops);
6103 	nvlist_free(hidden_args);
6104 
6105 	return (error);
6106 }
6107 
6108 /*
6109  * When stack space is limited, we write replication stream data to the target
6110  * on a separate taskq thread, to make sure there's enough stack space.
6111  */
6112 #ifndef HAVE_LARGE_STACKS
6113 #define	USE_SEND_TASKQ	1
6114 #endif
6115 
6116 typedef struct dump_bytes_io {
6117 	zfs_file_t	*dbi_fp;
6118 	caddr_t		dbi_buf;
6119 	int		dbi_len;
6120 	int		dbi_err;
6121 } dump_bytes_io_t;
6122 
6123 static void
dump_bytes_cb(void * arg)6124 dump_bytes_cb(void *arg)
6125 {
6126 	dump_bytes_io_t *dbi = (dump_bytes_io_t *)arg;
6127 	zfs_file_t *fp;
6128 	caddr_t buf;
6129 
6130 	fp = dbi->dbi_fp;
6131 	buf = dbi->dbi_buf;
6132 
6133 	dbi->dbi_err = zfs_file_write(fp, buf, dbi->dbi_len, NULL);
6134 }
6135 
6136 typedef struct dump_bytes_arg {
6137 	zfs_file_t	*dba_fp;
6138 #ifdef USE_SEND_TASKQ
6139 	taskq_t		*dba_tq;
6140 	taskq_ent_t	dba_tqent;
6141 #endif
6142 } dump_bytes_arg_t;
6143 
6144 static int
dump_bytes(objset_t * os,void * buf,int len,void * arg)6145 dump_bytes(objset_t *os, void *buf, int len, void *arg)
6146 {
6147 	dump_bytes_arg_t *dba = (dump_bytes_arg_t *)arg;
6148 	dump_bytes_io_t dbi;
6149 
6150 	dbi.dbi_fp = dba->dba_fp;
6151 	dbi.dbi_buf = buf;
6152 	dbi.dbi_len = len;
6153 
6154 #ifdef USE_SEND_TASKQ
6155 	taskq_dispatch_ent(dba->dba_tq, dump_bytes_cb, &dbi, TQ_SLEEP,
6156 	    &dba->dba_tqent);
6157 	taskq_wait(dba->dba_tq);
6158 #else
6159 	dump_bytes_cb(&dbi);
6160 #endif
6161 
6162 	return (dbi.dbi_err);
6163 }
6164 
6165 static int
dump_bytes_init(dump_bytes_arg_t * dba,int fd,dmu_send_outparams_t * out)6166 dump_bytes_init(dump_bytes_arg_t *dba, int fd, dmu_send_outparams_t *out)
6167 {
6168 	zfs_file_t *fp = zfs_file_get(fd);
6169 	if (fp == NULL)
6170 		return (SET_ERROR(EBADF));
6171 
6172 	dba->dba_fp = fp;
6173 #ifdef USE_SEND_TASKQ
6174 	dba->dba_tq = taskq_create("z_send", 1, defclsyspri, 0, 0, 0);
6175 	taskq_init_ent(&dba->dba_tqent);
6176 #endif
6177 
6178 	memset(out, 0, sizeof (dmu_send_outparams_t));
6179 	out->dso_outfunc = dump_bytes;
6180 	out->dso_arg = dba;
6181 	out->dso_dryrun = B_FALSE;
6182 
6183 	return (0);
6184 }
6185 
6186 static void
dump_bytes_fini(dump_bytes_arg_t * dba)6187 dump_bytes_fini(dump_bytes_arg_t *dba)
6188 {
6189 	zfs_file_put(dba->dba_fp);
6190 #ifdef USE_SEND_TASKQ
6191 	taskq_destroy(dba->dba_tq);
6192 #endif
6193 }
6194 
6195 /*
6196  * inputs:
6197  * zc_name	name of snapshot to send
6198  * zc_cookie	file descriptor to send stream to
6199  * zc_obj	fromorigin flag (mutually exclusive with zc_fromobj)
6200  * zc_sendobj	objsetid of snapshot to send
6201  * zc_fromobj	objsetid of incremental fromsnap (may be zero)
6202  * zc_guid	if set, estimate size of stream only.  zc_cookie is ignored.
6203  *		output size in zc_objset_type.
6204  * zc_flags	lzc_send_flags
6205  *
6206  * outputs:
6207  * zc_objset_type	estimated size, if zc_guid is set
6208  *
6209  * NOTE: This is no longer the preferred interface, any new functionality
6210  *	  should be added to zfs_ioc_send_new() instead.
6211  */
6212 static int
zfs_ioc_send(zfs_cmd_t * zc)6213 zfs_ioc_send(zfs_cmd_t *zc)
6214 {
6215 	int error;
6216 	offset_t off;
6217 	boolean_t estimate = (zc->zc_guid != 0);
6218 	boolean_t embedok = (zc->zc_flags & 0x1);
6219 	boolean_t large_block_ok = (zc->zc_flags & 0x2);
6220 	boolean_t compressok = (zc->zc_flags & 0x4);
6221 	boolean_t rawok = (zc->zc_flags & 0x8);
6222 	boolean_t savedok = (zc->zc_flags & 0x10);
6223 
6224 	if (zc->zc_obj != 0) {
6225 		dsl_pool_t *dp;
6226 		dsl_dataset_t *tosnap;
6227 
6228 		error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
6229 		if (error != 0)
6230 			return (error);
6231 
6232 		error = dsl_dataset_hold_obj(dp, zc->zc_sendobj, FTAG, &tosnap);
6233 		if (error != 0) {
6234 			dsl_pool_rele(dp, FTAG);
6235 			return (error);
6236 		}
6237 
6238 		if (dsl_dir_is_clone(tosnap->ds_dir))
6239 			zc->zc_fromobj =
6240 			    dsl_dir_phys(tosnap->ds_dir)->dd_origin_obj;
6241 		dsl_dataset_rele(tosnap, FTAG);
6242 		dsl_pool_rele(dp, FTAG);
6243 	}
6244 
6245 	if (estimate) {
6246 		dsl_pool_t *dp;
6247 		dsl_dataset_t *tosnap;
6248 		dsl_dataset_t *fromsnap = NULL;
6249 
6250 		error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
6251 		if (error != 0)
6252 			return (error);
6253 
6254 		error = dsl_dataset_hold_obj(dp, zc->zc_sendobj,
6255 		    FTAG, &tosnap);
6256 		if (error != 0) {
6257 			dsl_pool_rele(dp, FTAG);
6258 			return (error);
6259 		}
6260 
6261 		if (zc->zc_fromobj != 0) {
6262 			error = dsl_dataset_hold_obj(dp, zc->zc_fromobj,
6263 			    FTAG, &fromsnap);
6264 			if (error != 0) {
6265 				dsl_dataset_rele(tosnap, FTAG);
6266 				dsl_pool_rele(dp, FTAG);
6267 				return (error);
6268 			}
6269 		}
6270 
6271 		error = dmu_send_estimate_fast(tosnap, fromsnap, NULL,
6272 		    compressok || rawok, savedok, &zc->zc_objset_type);
6273 
6274 		if (fromsnap != NULL)
6275 			dsl_dataset_rele(fromsnap, FTAG);
6276 		dsl_dataset_rele(tosnap, FTAG);
6277 		dsl_pool_rele(dp, FTAG);
6278 	} else {
6279 		dump_bytes_arg_t dba;
6280 		dmu_send_outparams_t out;
6281 		error = dump_bytes_init(&dba, zc->zc_cookie, &out);
6282 		if (error)
6283 			return (error);
6284 
6285 		off = zfs_file_off(dba.dba_fp);
6286 		error = dmu_send_obj(zc->zc_name, zc->zc_sendobj,
6287 		    zc->zc_fromobj, embedok, large_block_ok, compressok,
6288 		    rawok, savedok, zc->zc_cookie, &off, &out);
6289 
6290 		dump_bytes_fini(&dba);
6291 	}
6292 	return (error);
6293 }
6294 
6295 /*
6296  * inputs:
6297  * zc_name		name of snapshot on which to report progress
6298  * zc_cookie		file descriptor of send stream
6299  *
6300  * outputs:
6301  * zc_cookie		number of bytes written in send stream thus far
6302  * zc_objset_type	logical size of data traversed by send thus far
6303  */
6304 static int
zfs_ioc_send_progress(zfs_cmd_t * zc)6305 zfs_ioc_send_progress(zfs_cmd_t *zc)
6306 {
6307 	dsl_pool_t *dp;
6308 	dsl_dataset_t *ds;
6309 	dmu_sendstatus_t *dsp = NULL;
6310 	int error;
6311 
6312 	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
6313 	if (error != 0)
6314 		return (error);
6315 
6316 	error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &ds);
6317 	if (error != 0) {
6318 		dsl_pool_rele(dp, FTAG);
6319 		return (error);
6320 	}
6321 
6322 	mutex_enter(&ds->ds_sendstream_lock);
6323 
6324 	/*
6325 	 * Iterate over all the send streams currently active on this dataset.
6326 	 * If there's one which matches the specified file descriptor _and_ the
6327 	 * stream was started by the current process, return the progress of
6328 	 * that stream.
6329 	 */
6330 
6331 	for (dsp = list_head(&ds->ds_sendstreams); dsp != NULL;
6332 	    dsp = list_next(&ds->ds_sendstreams, dsp)) {
6333 		if (dsp->dss_outfd == zc->zc_cookie &&
6334 		    zfs_proc_is_caller(dsp->dss_proc))
6335 			break;
6336 	}
6337 
6338 	if (dsp != NULL) {
6339 		zc->zc_cookie = atomic_cas_64((volatile uint64_t *)dsp->dss_off,
6340 		    0, 0);
6341 		/* This is the closest thing we have to atomic_read_64. */
6342 		zc->zc_objset_type = atomic_cas_64(&dsp->dss_blocks, 0, 0);
6343 	} else {
6344 		error = SET_ERROR(ENOENT);
6345 	}
6346 
6347 	mutex_exit(&ds->ds_sendstream_lock);
6348 	dsl_dataset_rele(ds, FTAG);
6349 	dsl_pool_rele(dp, FTAG);
6350 	return (error);
6351 }
6352 
6353 static int
zfs_ioc_inject_fault(zfs_cmd_t * zc)6354 zfs_ioc_inject_fault(zfs_cmd_t *zc)
6355 {
6356 	int id, error;
6357 
6358 	error = zio_inject_fault(zc->zc_name, (int)zc->zc_guid, &id,
6359 	    &zc->zc_inject_record);
6360 
6361 	if (error == 0)
6362 		zc->zc_guid = (uint64_t)id;
6363 
6364 	return (error);
6365 }
6366 
6367 static int
zfs_ioc_clear_fault(zfs_cmd_t * zc)6368 zfs_ioc_clear_fault(zfs_cmd_t *zc)
6369 {
6370 	return (zio_clear_fault((int)zc->zc_guid));
6371 }
6372 
6373 static int
zfs_ioc_inject_list_next(zfs_cmd_t * zc)6374 zfs_ioc_inject_list_next(zfs_cmd_t *zc)
6375 {
6376 	int id = (int)zc->zc_guid;
6377 	int error;
6378 
6379 	error = zio_inject_list_next(&id, zc->zc_name, sizeof (zc->zc_name),
6380 	    &zc->zc_inject_record);
6381 
6382 	zc->zc_guid = id;
6383 
6384 	return (error);
6385 }
6386 
6387 static int
zfs_ioc_error_log(zfs_cmd_t * zc)6388 zfs_ioc_error_log(zfs_cmd_t *zc)
6389 {
6390 	spa_t *spa;
6391 	int error;
6392 
6393 	if ((error = spa_open(zc->zc_name, &spa, FTAG)) != 0)
6394 		return (error);
6395 
6396 	error = spa_get_errlog(spa, (void *)(uintptr_t)zc->zc_nvlist_dst,
6397 	    &zc->zc_nvlist_dst_size);
6398 
6399 	spa_close(spa, FTAG);
6400 
6401 	return (error);
6402 }
6403 
6404 static int
zfs_ioc_clear(zfs_cmd_t * zc)6405 zfs_ioc_clear(zfs_cmd_t *zc)
6406 {
6407 	spa_t *spa;
6408 	vdev_t *vd;
6409 	int error;
6410 
6411 	/*
6412 	 * On zpool clear we also fix up missing slogs
6413 	 */
6414 	spa_namespace_enter(FTAG);
6415 	spa = spa_lookup(zc->zc_name);
6416 	if (spa == NULL) {
6417 		spa_namespace_exit(FTAG);
6418 		return (SET_ERROR(EIO));
6419 	}
6420 	if (spa_get_log_state(spa) == SPA_LOG_MISSING) {
6421 		/* we need to let spa_open/spa_load clear the chains */
6422 		spa_set_log_state(spa, SPA_LOG_CLEAR);
6423 	}
6424 	spa->spa_last_open_failed = 0;
6425 	spa_namespace_exit(FTAG);
6426 
6427 	if (zc->zc_cookie & ZPOOL_NO_REWIND) {
6428 		error = spa_open(zc->zc_name, &spa, FTAG);
6429 	} else {
6430 		nvlist_t *policy;
6431 		nvlist_t *config = NULL;
6432 
6433 		if (zc->zc_nvlist_src == 0)
6434 			return (SET_ERROR(EINVAL));
6435 
6436 		if ((error = get_nvlist(zc->zc_nvlist_src,
6437 		    zc->zc_nvlist_src_size, zc->zc_iflags, &policy)) == 0) {
6438 			error = spa_open_rewind(zc->zc_name, &spa, FTAG,
6439 			    policy, &config);
6440 			if (config != NULL) {
6441 				int err;
6442 
6443 				if ((err = put_nvlist(zc, config)) != 0)
6444 					error = err;
6445 				nvlist_free(config);
6446 			}
6447 			nvlist_free(policy);
6448 		}
6449 	}
6450 
6451 	if (error != 0)
6452 		return (error);
6453 
6454 	/*
6455 	 * If multihost is enabled, resuming I/O is unsafe as another
6456 	 * host may have imported the pool. Check for remote activity.
6457 	 */
6458 	if (spa_multihost(spa) && spa_suspended(spa) &&
6459 	    spa_mmp_remote_host_activity(spa)) {
6460 		spa_close(spa, FTAG);
6461 		return (SET_ERROR(EREMOTEIO));
6462 	}
6463 
6464 	spa_vdev_state_enter(spa, SCL_NONE);
6465 
6466 	if (zc->zc_guid == 0) {
6467 		vd = NULL;
6468 	} else {
6469 		vd = spa_lookup_by_guid(spa, zc->zc_guid, B_TRUE);
6470 		if (vd == NULL) {
6471 			error = SET_ERROR(ENODEV);
6472 			(void) spa_vdev_state_exit(spa, NULL, error);
6473 			spa_close(spa, FTAG);
6474 			return (error);
6475 		}
6476 	}
6477 
6478 	vdev_clear(spa, vd);
6479 
6480 	(void) spa_vdev_state_exit(spa, spa_suspended(spa) ?
6481 	    NULL : spa->spa_root_vdev, 0);
6482 
6483 	/*
6484 	 * Resume any suspended I/Os.
6485 	 */
6486 	if (zio_resume(spa) != 0)
6487 		error = SET_ERROR(EIO);
6488 
6489 	spa_close(spa, FTAG);
6490 
6491 	return (error);
6492 }
6493 
6494 /*
6495  * Reopen all the vdevs associated with the pool.
6496  *
6497  * innvl: {
6498  *  "scrub_restart" -> when true and scrub is running, allow to restart
6499  *              scrub as the side effect of the reopen (boolean).
6500  * }
6501  *
6502  * outnvl is unused
6503  */
6504 static const zfs_ioc_key_t zfs_keys_pool_reopen[] = {
6505 	{"scrub_restart",	DATA_TYPE_BOOLEAN_VALUE,	ZK_OPTIONAL},
6506 };
6507 
6508 static int
zfs_ioc_pool_reopen(const char * pool,nvlist_t * innvl,nvlist_t * outnvl)6509 zfs_ioc_pool_reopen(const char *pool, nvlist_t *innvl, nvlist_t *outnvl)
6510 {
6511 	(void) outnvl;
6512 	spa_t *spa;
6513 	int error;
6514 	boolean_t rc, scrub_restart = B_TRUE;
6515 
6516 	if (innvl) {
6517 		error = nvlist_lookup_boolean_value(innvl,
6518 		    "scrub_restart", &rc);
6519 		if (error == 0)
6520 			scrub_restart = rc;
6521 	}
6522 
6523 	error = spa_open(pool, &spa, FTAG);
6524 	if (error != 0)
6525 		return (error);
6526 
6527 	spa_vdev_state_enter(spa, SCL_NONE);
6528 
6529 	/*
6530 	 * If the scrub_restart flag is B_FALSE and a scrub is already
6531 	 * in progress then set spa_scrub_reopen flag to B_TRUE so that
6532 	 * we don't restart the scrub as a side effect of the reopen.
6533 	 * Otherwise, let vdev_open() decided if a resilver is required.
6534 	 */
6535 
6536 	spa->spa_scrub_reopen = (!scrub_restart &&
6537 	    dsl_scan_scrubbing(spa->spa_dsl_pool));
6538 	vdev_reopen(spa->spa_root_vdev);
6539 	spa->spa_scrub_reopen = B_FALSE;
6540 
6541 	(void) spa_vdev_state_exit(spa, NULL, 0);
6542 	spa_close(spa, FTAG);
6543 	return (0);
6544 }
6545 
6546 /*
6547  * inputs:
6548  * zc_name	name of filesystem
6549  *
6550  * outputs:
6551  * zc_string	name of conflicting snapshot, if there is one
6552  */
6553 static int
zfs_ioc_promote(zfs_cmd_t * zc)6554 zfs_ioc_promote(zfs_cmd_t *zc)
6555 {
6556 	dsl_pool_t *dp;
6557 	dsl_dataset_t *ds, *ods;
6558 	char origin[ZFS_MAX_DATASET_NAME_LEN];
6559 	char *cp;
6560 	int error;
6561 
6562 	zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
6563 	if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0 ||
6564 	    strchr(zc->zc_name, '%'))
6565 		return (SET_ERROR(EINVAL));
6566 
6567 	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
6568 	if (error != 0)
6569 		return (error);
6570 
6571 	error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &ds);
6572 	if (error != 0) {
6573 		dsl_pool_rele(dp, FTAG);
6574 		return (error);
6575 	}
6576 
6577 	if (!dsl_dir_is_clone(ds->ds_dir)) {
6578 		dsl_dataset_rele(ds, FTAG);
6579 		dsl_pool_rele(dp, FTAG);
6580 		return (SET_ERROR(EINVAL));
6581 	}
6582 
6583 	error = dsl_dataset_hold_obj(dp,
6584 	    dsl_dir_phys(ds->ds_dir)->dd_origin_obj, FTAG, &ods);
6585 	if (error != 0) {
6586 		dsl_dataset_rele(ds, FTAG);
6587 		dsl_pool_rele(dp, FTAG);
6588 		return (error);
6589 	}
6590 
6591 	dsl_dataset_name(ods, origin);
6592 	dsl_dataset_rele(ods, FTAG);
6593 	dsl_dataset_rele(ds, FTAG);
6594 	dsl_pool_rele(dp, FTAG);
6595 
6596 	/*
6597 	 * We don't need to unmount *all* the origin fs's snapshots, but
6598 	 * it's easier.
6599 	 */
6600 	cp = strchr(origin, '@');
6601 	if (cp)
6602 		*cp = '\0';
6603 	(void) dmu_objset_find(origin,
6604 	    zfs_unmount_snap_cb, NULL, DS_FIND_SNAPSHOTS);
6605 	return (dsl_dataset_promote(zc->zc_name, zc->zc_string));
6606 }
6607 
6608 /*
6609  * Retrieve a single {user|group|project}{used|quota}@... property.
6610  *
6611  * inputs:
6612  * zc_name	name of filesystem
6613  * zc_objset_type zfs_userquota_prop_t
6614  * zc_value	domain name (eg. "S-1-234-567-89")
6615  * zc_guid	RID/UID/GID
6616  *
6617  * outputs:
6618  * zc_cookie	property value
6619  */
6620 static int
zfs_ioc_userspace_one(zfs_cmd_t * zc)6621 zfs_ioc_userspace_one(zfs_cmd_t *zc)
6622 {
6623 	zfsvfs_t *zfsvfs;
6624 	int error;
6625 
6626 	if (zc->zc_objset_type >= ZFS_NUM_USERQUOTA_PROPS)
6627 		return (SET_ERROR(EINVAL));
6628 
6629 	error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE);
6630 	if (error != 0)
6631 		return (error);
6632 
6633 	error = zfs_userspace_one(zfsvfs,
6634 	    zc->zc_objset_type, zc->zc_value, zc->zc_guid, &zc->zc_cookie);
6635 	zfsvfs_rele(zfsvfs, FTAG);
6636 
6637 	return (error);
6638 }
6639 
6640 /*
6641  * inputs:
6642  * zc_name		name of filesystem
6643  * zc_cookie		zap cursor
6644  * zc_objset_type	zfs_userquota_prop_t
6645  * zc_nvlist_dst[_size] buffer to fill (not really an nvlist)
6646  *
6647  * outputs:
6648  * zc_nvlist_dst[_size]	data buffer (array of zfs_useracct_t)
6649  * zc_cookie	zap cursor
6650  */
6651 static int
zfs_ioc_userspace_many(zfs_cmd_t * zc)6652 zfs_ioc_userspace_many(zfs_cmd_t *zc)
6653 {
6654 	zfsvfs_t *zfsvfs;
6655 	int bufsize = zc->zc_nvlist_dst_size;
6656 
6657 	if (bufsize <= 0)
6658 		return (SET_ERROR(ENOMEM));
6659 
6660 	int error = zfsvfs_hold(zc->zc_name, FTAG, &zfsvfs, B_FALSE);
6661 	if (error != 0)
6662 		return (error);
6663 
6664 	void *buf = vmem_alloc(bufsize, KM_SLEEP);
6665 
6666 	error = zfs_userspace_many(zfsvfs, zc->zc_objset_type, &zc->zc_cookie,
6667 	    buf, &zc->zc_nvlist_dst_size, &zc->zc_guid);
6668 
6669 	if (error == 0) {
6670 		error = xcopyout(buf,
6671 		    (void *)(uintptr_t)zc->zc_nvlist_dst,
6672 		    zc->zc_nvlist_dst_size);
6673 	}
6674 	vmem_free(buf, bufsize);
6675 	zfsvfs_rele(zfsvfs, FTAG);
6676 
6677 	return (error);
6678 }
6679 
6680 /*
6681  * inputs:
6682  * zc_name		name of filesystem
6683  *
6684  * outputs:
6685  * none
6686  */
6687 static int
zfs_ioc_userspace_upgrade(zfs_cmd_t * zc)6688 zfs_ioc_userspace_upgrade(zfs_cmd_t *zc)
6689 {
6690 	int error = 0;
6691 	zfsvfs_t *zfsvfs;
6692 
6693 	if (getzfsvfs(zc->zc_name, &zfsvfs) == 0) {
6694 		if (!dmu_objset_userused_enabled(zfsvfs->z_os)) {
6695 			/*
6696 			 * If userused is not enabled, it may be because the
6697 			 * objset needs to be closed & reopened (to grow the
6698 			 * objset_phys_t).  Suspend/resume the fs will do that.
6699 			 */
6700 			dsl_dataset_t *ds, *newds;
6701 
6702 			ds = dmu_objset_ds(zfsvfs->z_os);
6703 			error = zfs_suspend_fs(zfsvfs);
6704 			if (error == 0) {
6705 				dmu_objset_refresh_ownership(ds, &newds,
6706 				    B_TRUE, zfsvfs);
6707 				error = zfs_resume_fs(zfsvfs, newds);
6708 			}
6709 		}
6710 		if (error == 0) {
6711 			mutex_enter(&zfsvfs->z_os->os_upgrade_lock);
6712 			if (zfsvfs->z_os->os_upgrade_id == 0) {
6713 				/* clear potential error code and retry */
6714 				zfsvfs->z_os->os_upgrade_status = 0;
6715 				mutex_exit(&zfsvfs->z_os->os_upgrade_lock);
6716 
6717 				dsl_pool_config_enter(
6718 				    dmu_objset_pool(zfsvfs->z_os), FTAG);
6719 				dmu_objset_userspace_upgrade(zfsvfs->z_os);
6720 				dsl_pool_config_exit(
6721 				    dmu_objset_pool(zfsvfs->z_os), FTAG);
6722 			} else {
6723 				mutex_exit(&zfsvfs->z_os->os_upgrade_lock);
6724 			}
6725 
6726 			taskq_wait_id(zfsvfs->z_os->os_spa->spa_upgrade_taskq,
6727 			    zfsvfs->z_os->os_upgrade_id);
6728 			error = zfsvfs->z_os->os_upgrade_status;
6729 		}
6730 		zfs_vfs_rele(zfsvfs);
6731 	} else {
6732 		objset_t *os;
6733 
6734 		/* XXX kind of reading contents without owning */
6735 		error = dmu_objset_hold_flags(zc->zc_name, B_TRUE, FTAG, &os);
6736 		if (error != 0)
6737 			return (error);
6738 
6739 		mutex_enter(&os->os_upgrade_lock);
6740 		if (os->os_upgrade_id == 0) {
6741 			/* clear potential error code and retry */
6742 			os->os_upgrade_status = 0;
6743 			mutex_exit(&os->os_upgrade_lock);
6744 
6745 			dmu_objset_userspace_upgrade(os);
6746 		} else {
6747 			mutex_exit(&os->os_upgrade_lock);
6748 		}
6749 
6750 		dsl_pool_rele(dmu_objset_pool(os), FTAG);
6751 
6752 		taskq_wait_id(os->os_spa->spa_upgrade_taskq, os->os_upgrade_id);
6753 		error = os->os_upgrade_status;
6754 
6755 		dsl_dataset_rele_flags(dmu_objset_ds(os), DS_HOLD_FLAG_DECRYPT,
6756 		    FTAG);
6757 	}
6758 	return (error);
6759 }
6760 
6761 /*
6762  * inputs:
6763  * zc_name		name of filesystem
6764  *
6765  * outputs:
6766  * none
6767  */
6768 static int
zfs_ioc_id_quota_upgrade(zfs_cmd_t * zc)6769 zfs_ioc_id_quota_upgrade(zfs_cmd_t *zc)
6770 {
6771 	objset_t *os;
6772 	int error;
6773 
6774 	error = dmu_objset_hold_flags(zc->zc_name, B_TRUE, FTAG, &os);
6775 	if (error != 0)
6776 		return (error);
6777 
6778 	if (dmu_objset_userobjspace_upgradable(os) ||
6779 	    dmu_objset_projectquota_upgradable(os)) {
6780 		mutex_enter(&os->os_upgrade_lock);
6781 		if (os->os_upgrade_id == 0) {
6782 			/* clear potential error code and retry */
6783 			os->os_upgrade_status = 0;
6784 			mutex_exit(&os->os_upgrade_lock);
6785 
6786 			dmu_objset_id_quota_upgrade(os);
6787 		} else {
6788 			mutex_exit(&os->os_upgrade_lock);
6789 		}
6790 
6791 		dsl_pool_rele(dmu_objset_pool(os), FTAG);
6792 
6793 		taskq_wait_id(os->os_spa->spa_upgrade_taskq, os->os_upgrade_id);
6794 		error = os->os_upgrade_status;
6795 	} else {
6796 		dsl_pool_rele(dmu_objset_pool(os), FTAG);
6797 	}
6798 
6799 	dsl_dataset_rele_flags(dmu_objset_ds(os), DS_HOLD_FLAG_DECRYPT, FTAG);
6800 
6801 	return (error);
6802 }
6803 
6804 static int
zfs_ioc_share(zfs_cmd_t * zc)6805 zfs_ioc_share(zfs_cmd_t *zc)
6806 {
6807 	return (SET_ERROR(ENOSYS));
6808 }
6809 
6810 /*
6811  * inputs:
6812  * zc_name		name of containing filesystem
6813  * zc_obj		object # beyond which we want next in-use object #
6814  *
6815  * outputs:
6816  * zc_obj		next in-use object #
6817  */
6818 static int
zfs_ioc_next_obj(zfs_cmd_t * zc)6819 zfs_ioc_next_obj(zfs_cmd_t *zc)
6820 {
6821 	objset_t *os = NULL;
6822 	int error;
6823 
6824 	error = dmu_objset_hold(zc->zc_name, FTAG, &os);
6825 	if (error != 0)
6826 		return (error);
6827 
6828 	error = dmu_object_next(os, &zc->zc_obj, B_FALSE, 0);
6829 
6830 	dmu_objset_rele(os, FTAG);
6831 	return (error);
6832 }
6833 
6834 /*
6835  * inputs:
6836  * zc_name		name of filesystem
6837  * zc_value		prefix name for snapshot
6838  * zc_cleanup_fd	cleanup-on-exit file descriptor for calling process
6839  *
6840  * outputs:
6841  * zc_value		short name of new snapshot
6842  */
6843 static int
zfs_ioc_tmp_snapshot(zfs_cmd_t * zc)6844 zfs_ioc_tmp_snapshot(zfs_cmd_t *zc)
6845 {
6846 	char *snap_name;
6847 	char *hold_name;
6848 	minor_t minor;
6849 
6850 	zfs_file_t *fp = zfs_onexit_fd_hold(zc->zc_cleanup_fd, &minor);
6851 	if (fp == NULL)
6852 		return (SET_ERROR(EBADF));
6853 
6854 	snap_name = kmem_asprintf("%s-%016llx", zc->zc_value,
6855 	    (u_longlong_t)ddi_get_lbolt64());
6856 	hold_name = kmem_asprintf("%%%s", zc->zc_value);
6857 
6858 	int error = dsl_dataset_snapshot_tmp(zc->zc_name, snap_name, minor,
6859 	    hold_name);
6860 	if (error == 0)
6861 		(void) strlcpy(zc->zc_value, snap_name,
6862 		    sizeof (zc->zc_value));
6863 	kmem_strfree(snap_name);
6864 	kmem_strfree(hold_name);
6865 	zfs_onexit_fd_rele(fp);
6866 	return (error);
6867 }
6868 
6869 /*
6870  * inputs:
6871  * zc_name		name of "to" snapshot
6872  * zc_value		name of "from" snapshot
6873  * zc_cookie		file descriptor to write diff data on
6874  *
6875  * outputs:
6876  * dmu_diff_record_t's to the file descriptor
6877  */
6878 static int
zfs_ioc_diff(zfs_cmd_t * zc)6879 zfs_ioc_diff(zfs_cmd_t *zc)
6880 {
6881 	zfs_file_t *fp;
6882 	offset_t off;
6883 	int error;
6884 
6885 	if ((fp = zfs_file_get(zc->zc_cookie)) == NULL)
6886 		return (SET_ERROR(EBADF));
6887 
6888 	off = zfs_file_off(fp);
6889 	error = dmu_diff(zc->zc_name, zc->zc_value, fp, &off);
6890 
6891 	zfs_file_put(fp);
6892 
6893 	return (error);
6894 }
6895 
6896 static int
zfs_ioc_smb_acl(zfs_cmd_t * zc)6897 zfs_ioc_smb_acl(zfs_cmd_t *zc)
6898 {
6899 	return (SET_ERROR(ENOTSUP));
6900 }
6901 
6902 /*
6903  * innvl: {
6904  *     "holds" -> { snapname -> holdname (string), ... }
6905  *     (optional) "cleanup_fd" -> fd (int32)
6906  * }
6907  *
6908  * outnvl: {
6909  *     snapname -> error value (int32)
6910  *     ...
6911  * }
6912  */
6913 static const zfs_ioc_key_t zfs_keys_hold[] = {
6914 	{"holds",		DATA_TYPE_NVLIST,	0},
6915 	{"cleanup_fd",		DATA_TYPE_INT32,	ZK_OPTIONAL},
6916 };
6917 
6918 static int
zfs_ioc_hold(const char * pool,nvlist_t * args,nvlist_t * errlist)6919 zfs_ioc_hold(const char *pool, nvlist_t *args, nvlist_t *errlist)
6920 {
6921 	(void) pool;
6922 	nvpair_t *pair;
6923 	nvlist_t *holds;
6924 	int cleanup_fd = -1;
6925 	int error;
6926 	minor_t minor = 0;
6927 	zfs_file_t *fp = NULL;
6928 
6929 	holds = fnvlist_lookup_nvlist(args, "holds");
6930 
6931 	/* make sure the user didn't pass us any invalid (empty) tags */
6932 	for (pair = nvlist_next_nvpair(holds, NULL); pair != NULL;
6933 	    pair = nvlist_next_nvpair(holds, pair)) {
6934 		const char *htag;
6935 
6936 		error = nvpair_value_string(pair, &htag);
6937 		if (error != 0)
6938 			return (SET_ERROR(error));
6939 
6940 		if (strlen(htag) == 0)
6941 			return (SET_ERROR(EINVAL));
6942 	}
6943 
6944 	if (nvlist_lookup_int32(args, "cleanup_fd", &cleanup_fd) == 0) {
6945 		fp = zfs_onexit_fd_hold(cleanup_fd, &minor);
6946 		if (fp == NULL)
6947 			return (SET_ERROR(EBADF));
6948 	}
6949 
6950 	error = dsl_dataset_user_hold(holds, minor, errlist);
6951 	if (fp != NULL) {
6952 		ASSERT3U(minor, !=, 0);
6953 		zfs_onexit_fd_rele(fp);
6954 	}
6955 	return (SET_ERROR(error));
6956 }
6957 
6958 /*
6959  * innvl is not used.
6960  *
6961  * outnvl: {
6962  *    holdname -> time added (uint64 seconds since epoch)
6963  *    ...
6964  * }
6965  */
6966 static const zfs_ioc_key_t zfs_keys_get_holds[] = {
6967 	/* no nvl keys */
6968 };
6969 
6970 static int
zfs_ioc_get_holds(const char * snapname,nvlist_t * args,nvlist_t * outnvl)6971 zfs_ioc_get_holds(const char *snapname, nvlist_t *args, nvlist_t *outnvl)
6972 {
6973 	(void) args;
6974 	return (dsl_dataset_get_holds(snapname, outnvl));
6975 }
6976 
6977 /*
6978  * innvl: {
6979  *     snapname -> { holdname, ... }
6980  *     ...
6981  * }
6982  *
6983  * outnvl: {
6984  *     snapname -> error value (int32)
6985  *     ...
6986  * }
6987  */
6988 static const zfs_ioc_key_t zfs_keys_release[] = {
6989 	{"<snapname>...",	DATA_TYPE_NVLIST,	ZK_WILDCARDLIST},
6990 };
6991 
6992 static int
zfs_ioc_release(const char * pool,nvlist_t * holds,nvlist_t * errlist)6993 zfs_ioc_release(const char *pool, nvlist_t *holds, nvlist_t *errlist)
6994 {
6995 	(void) pool;
6996 	return (dsl_dataset_user_release(holds, errlist));
6997 }
6998 
6999 /*
7000  * inputs:
7001  * zc_guid		flags (ZEVENT_NONBLOCK)
7002  * zc_cleanup_fd	zevent file descriptor
7003  *
7004  * outputs:
7005  * zc_nvlist_dst	next nvlist event
7006  * zc_cookie		dropped events since last get
7007  */
7008 static int
zfs_ioc_events_next(zfs_cmd_t * zc)7009 zfs_ioc_events_next(zfs_cmd_t *zc)
7010 {
7011 	zfs_zevent_t *ze;
7012 	nvlist_t *event = NULL;
7013 	minor_t minor;
7014 	uint64_t dropped = 0;
7015 	int error;
7016 
7017 	zfs_file_t *fp = zfs_zevent_fd_hold(zc->zc_cleanup_fd, &minor, &ze);
7018 	if (fp == NULL)
7019 		return (SET_ERROR(EBADF));
7020 
7021 	do {
7022 		error = zfs_zevent_next(ze, &event,
7023 		    &zc->zc_nvlist_dst_size, &dropped);
7024 		if (event != NULL) {
7025 			zc->zc_cookie = dropped;
7026 			error = put_nvlist(zc, event);
7027 			nvlist_free(event);
7028 		}
7029 
7030 		if (zc->zc_guid & ZEVENT_NONBLOCK)
7031 			break;
7032 
7033 		if ((error == 0) || (error != ENOENT))
7034 			break;
7035 
7036 		error = zfs_zevent_wait(ze);
7037 		if (error != 0)
7038 			break;
7039 	} while (1);
7040 
7041 	zfs_zevent_fd_rele(fp);
7042 
7043 	return (error);
7044 }
7045 
7046 /*
7047  * outputs:
7048  * zc_cookie		cleared events count
7049  */
7050 static int
zfs_ioc_events_clear(zfs_cmd_t * zc)7051 zfs_ioc_events_clear(zfs_cmd_t *zc)
7052 {
7053 	uint_t count;
7054 
7055 	zfs_zevent_drain_all(&count);
7056 	zc->zc_cookie = count;
7057 
7058 	return (0);
7059 }
7060 
7061 /*
7062  * inputs:
7063  * zc_guid		eid | ZEVENT_SEEK_START | ZEVENT_SEEK_END
7064  * zc_cleanup		zevent file descriptor
7065  */
7066 static int
zfs_ioc_events_seek(zfs_cmd_t * zc)7067 zfs_ioc_events_seek(zfs_cmd_t *zc)
7068 {
7069 	zfs_zevent_t *ze;
7070 	minor_t minor;
7071 	int error;
7072 
7073 	zfs_file_t *fp = zfs_zevent_fd_hold(zc->zc_cleanup_fd, &minor, &ze);
7074 	if (fp == NULL)
7075 		return (SET_ERROR(EBADF));
7076 
7077 	error = zfs_zevent_seek(ze, zc->zc_guid);
7078 	zfs_zevent_fd_rele(fp);
7079 
7080 	return (error);
7081 }
7082 
7083 /*
7084  * inputs:
7085  * zc_name		name of later filesystem or snapshot
7086  * zc_value		full name of old snapshot or bookmark
7087  *
7088  * outputs:
7089  * zc_cookie		space in bytes
7090  * zc_objset_type	compressed space in bytes
7091  * zc_perm_action	uncompressed space in bytes
7092  */
7093 static int
zfs_ioc_space_written(zfs_cmd_t * zc)7094 zfs_ioc_space_written(zfs_cmd_t *zc)
7095 {
7096 	int error;
7097 	dsl_pool_t *dp;
7098 	dsl_dataset_t *new;
7099 
7100 	error = dsl_pool_hold(zc->zc_name, FTAG, &dp);
7101 	if (error != 0)
7102 		return (error);
7103 	error = dsl_dataset_hold(dp, zc->zc_name, FTAG, &new);
7104 	if (error != 0) {
7105 		dsl_pool_rele(dp, FTAG);
7106 		return (error);
7107 	}
7108 	if (strchr(zc->zc_value, '#') != NULL) {
7109 		zfs_bookmark_phys_t bmp;
7110 		error = dsl_bookmark_lookup(dp, zc->zc_value,
7111 		    new, &bmp);
7112 		if (error == 0) {
7113 			error = dsl_dataset_space_written_bookmark(&bmp, new,
7114 			    &zc->zc_cookie,
7115 			    &zc->zc_objset_type, &zc->zc_perm_action);
7116 		}
7117 	} else {
7118 		dsl_dataset_t *old;
7119 		error = dsl_dataset_hold(dp, zc->zc_value, FTAG, &old);
7120 
7121 		if (error == 0) {
7122 			error = dsl_dataset_space_written(old, new,
7123 			    &zc->zc_cookie,
7124 			    &zc->zc_objset_type, &zc->zc_perm_action);
7125 			dsl_dataset_rele(old, FTAG);
7126 		}
7127 	}
7128 	dsl_dataset_rele(new, FTAG);
7129 	dsl_pool_rele(dp, FTAG);
7130 	return (error);
7131 }
7132 
7133 /*
7134  * innvl: {
7135  *     "firstsnap" -> snapshot name
7136  * }
7137  *
7138  * outnvl: {
7139  *     "used" -> space in bytes
7140  *     "compressed" -> compressed space in bytes
7141  *     "uncompressed" -> uncompressed space in bytes
7142  * }
7143  */
7144 static const zfs_ioc_key_t zfs_keys_space_snaps[] = {
7145 	{"firstsnap",	DATA_TYPE_STRING,	0},
7146 };
7147 
7148 static int
zfs_ioc_space_snaps(const char * lastsnap,nvlist_t * innvl,nvlist_t * outnvl)7149 zfs_ioc_space_snaps(const char *lastsnap, nvlist_t *innvl, nvlist_t *outnvl)
7150 {
7151 	int error;
7152 	dsl_pool_t *dp;
7153 	dsl_dataset_t *new, *old;
7154 	const char *firstsnap;
7155 	uint64_t used, comp, uncomp;
7156 
7157 	firstsnap = fnvlist_lookup_string(innvl, "firstsnap");
7158 
7159 	error = dsl_pool_hold(lastsnap, FTAG, &dp);
7160 	if (error != 0)
7161 		return (error);
7162 
7163 	error = dsl_dataset_hold(dp, lastsnap, FTAG, &new);
7164 	if (error == 0 && !new->ds_is_snapshot) {
7165 		dsl_dataset_rele(new, FTAG);
7166 		error = SET_ERROR(EINVAL);
7167 	}
7168 	if (error != 0) {
7169 		dsl_pool_rele(dp, FTAG);
7170 		return (error);
7171 	}
7172 	error = dsl_dataset_hold(dp, firstsnap, FTAG, &old);
7173 	if (error == 0 && !old->ds_is_snapshot) {
7174 		dsl_dataset_rele(old, FTAG);
7175 		error = SET_ERROR(EINVAL);
7176 	}
7177 	if (error != 0) {
7178 		dsl_dataset_rele(new, FTAG);
7179 		dsl_pool_rele(dp, FTAG);
7180 		return (error);
7181 	}
7182 
7183 	error = dsl_dataset_space_wouldfree(old, new, &used, &comp, &uncomp);
7184 	dsl_dataset_rele(old, FTAG);
7185 	dsl_dataset_rele(new, FTAG);
7186 	dsl_pool_rele(dp, FTAG);
7187 	fnvlist_add_uint64(outnvl, "used", used);
7188 	fnvlist_add_uint64(outnvl, "compressed", comp);
7189 	fnvlist_add_uint64(outnvl, "uncompressed", uncomp);
7190 	return (error);
7191 }
7192 
7193 /*
7194  * innvl: {
7195  *     "fd" -> file descriptor to write stream to (int32)
7196  *     (optional) "fromsnap" -> full snap name to send an incremental from
7197  *     (optional) "largeblockok" -> (value ignored)
7198  *         indicates that blocks > 128KB are permitted
7199  *     (optional) "embedok" -> (value ignored)
7200  *         presence indicates DRR_WRITE_EMBEDDED records are permitted
7201  *     (optional) "compressok" -> (value ignored)
7202  *         presence indicates compressed DRR_WRITE records are permitted
7203  *     (optional) "rawok" -> (value ignored)
7204  *         presence indicates raw encrypted records should be used.
7205  *     (optional) "savedok" -> (value ignored)
7206  *         presence indicates we should send a partially received snapshot
7207  *     (optional) "resume_object" and "resume_offset" -> (uint64)
7208  *         if present, resume send stream from specified object and offset.
7209  *     (optional) "redactbook" -> (string)
7210  *         if present, use this bookmark's redaction list to generate a redacted
7211  *         send stream
7212  * }
7213  *
7214  * outnvl is unused
7215  */
7216 static const zfs_ioc_key_t zfs_keys_send_new[] = {
7217 	{"fd",			DATA_TYPE_INT32,	0},
7218 	{"fromsnap",		DATA_TYPE_STRING,	ZK_OPTIONAL},
7219 	{"largeblockok",	DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
7220 	{"embedok",		DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
7221 	{"compressok",		DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
7222 	{"rawok",		DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
7223 	{"savedok",		DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
7224 	{"resume_object",	DATA_TYPE_UINT64,	ZK_OPTIONAL},
7225 	{"resume_offset",	DATA_TYPE_UINT64,	ZK_OPTIONAL},
7226 	{"redactbook",		DATA_TYPE_STRING,	ZK_OPTIONAL},
7227 };
7228 
7229 static int
zfs_ioc_send_new(const char * snapname,nvlist_t * innvl,nvlist_t * outnvl)7230 zfs_ioc_send_new(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
7231 {
7232 	(void) outnvl;
7233 	int error;
7234 	offset_t off;
7235 	const char *fromname = NULL;
7236 	int fd;
7237 	boolean_t largeblockok;
7238 	boolean_t embedok;
7239 	boolean_t compressok;
7240 	boolean_t rawok;
7241 	boolean_t savedok;
7242 	uint64_t resumeobj = 0;
7243 	uint64_t resumeoff = 0;
7244 	const char *redactbook = NULL;
7245 
7246 	fd = fnvlist_lookup_int32(innvl, "fd");
7247 
7248 	(void) nvlist_lookup_string(innvl, "fromsnap", &fromname);
7249 
7250 	largeblockok = nvlist_exists(innvl, "largeblockok");
7251 	embedok = nvlist_exists(innvl, "embedok");
7252 	compressok = nvlist_exists(innvl, "compressok");
7253 	rawok = nvlist_exists(innvl, "rawok");
7254 	savedok = nvlist_exists(innvl, "savedok");
7255 
7256 	(void) nvlist_lookup_uint64(innvl, "resume_object", &resumeobj);
7257 	(void) nvlist_lookup_uint64(innvl, "resume_offset", &resumeoff);
7258 
7259 	(void) nvlist_lookup_string(innvl, "redactbook", &redactbook);
7260 
7261 	dump_bytes_arg_t dba;
7262 	dmu_send_outparams_t out;
7263 	error = dump_bytes_init(&dba, fd, &out);
7264 	if (error)
7265 		return (error);
7266 
7267 	off = zfs_file_off(dba.dba_fp);
7268 	error = dmu_send(snapname, fromname, embedok, largeblockok,
7269 	    compressok, rawok, savedok, resumeobj, resumeoff,
7270 	    redactbook, fd, &off, &out);
7271 
7272 	dump_bytes_fini(&dba);
7273 
7274 	return (error);
7275 }
7276 
7277 static int
send_space_sum(objset_t * os,void * buf,int len,void * arg)7278 send_space_sum(objset_t *os, void *buf, int len, void *arg)
7279 {
7280 	(void) os, (void) buf;
7281 	uint64_t *size = arg;
7282 
7283 	*size += len;
7284 	return (0);
7285 }
7286 
7287 /*
7288  * Determine approximately how large a zfs send stream will be -- the number
7289  * of bytes that will be written to the fd supplied to zfs_ioc_send_new().
7290  *
7291  * innvl: {
7292  *     (optional) "from" -> full snap or bookmark name to send an incremental
7293  *                          from
7294  *     (optional) "largeblockok" -> (value ignored)
7295  *         indicates that blocks > 128KB are permitted
7296  *     (optional) "embedok" -> (value ignored)
7297  *         presence indicates DRR_WRITE_EMBEDDED records are permitted
7298  *     (optional) "compressok" -> (value ignored)
7299  *         presence indicates compressed DRR_WRITE records are permitted
7300  *     (optional) "rawok" -> (value ignored)
7301  *         presence indicates raw encrypted records should be used.
7302  *     (optional) "resume_object" and "resume_offset" -> (uint64)
7303  *         if present, resume send stream from specified object and offset.
7304  *     (optional) "fd" -> file descriptor to use as a cookie for progress
7305  *         tracking (int32)
7306  * }
7307  *
7308  * outnvl: {
7309  *     "space" -> bytes of space (uint64)
7310  * }
7311  */
7312 static const zfs_ioc_key_t zfs_keys_send_space[] = {
7313 	{"from",		DATA_TYPE_STRING,	ZK_OPTIONAL},
7314 	{"fromsnap",		DATA_TYPE_STRING,	ZK_OPTIONAL},
7315 	{"largeblockok",	DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
7316 	{"embedok",		DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
7317 	{"compressok",		DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
7318 	{"rawok",		DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
7319 	{"fd",			DATA_TYPE_INT32,	ZK_OPTIONAL},
7320 	{"redactbook",		DATA_TYPE_STRING,	ZK_OPTIONAL},
7321 	{"resume_object",	DATA_TYPE_UINT64,	ZK_OPTIONAL},
7322 	{"resume_offset",	DATA_TYPE_UINT64,	ZK_OPTIONAL},
7323 	{"bytes",		DATA_TYPE_UINT64,	ZK_OPTIONAL},
7324 };
7325 
7326 static int
zfs_ioc_send_space(const char * snapname,nvlist_t * innvl,nvlist_t * outnvl)7327 zfs_ioc_send_space(const char *snapname, nvlist_t *innvl, nvlist_t *outnvl)
7328 {
7329 	dsl_pool_t *dp;
7330 	dsl_dataset_t *tosnap;
7331 	dsl_dataset_t *fromsnap = NULL;
7332 	int error;
7333 	const char *fromname = NULL;
7334 	const char *redactlist_book = NULL;
7335 	boolean_t largeblockok;
7336 	boolean_t embedok;
7337 	boolean_t compressok;
7338 	boolean_t rawok;
7339 	boolean_t savedok;
7340 	uint64_t space = 0;
7341 	boolean_t full_estimate = B_FALSE;
7342 	uint64_t resumeobj = 0;
7343 	uint64_t resumeoff = 0;
7344 	uint64_t resume_bytes = 0;
7345 	int32_t fd = -1;
7346 	zfs_bookmark_phys_t zbm = {0};
7347 
7348 	error = dsl_pool_hold(snapname, FTAG, &dp);
7349 	if (error != 0)
7350 		return (error);
7351 
7352 	error = dsl_dataset_hold(dp, snapname, FTAG, &tosnap);
7353 	if (error != 0) {
7354 		dsl_pool_rele(dp, FTAG);
7355 		return (error);
7356 	}
7357 	(void) nvlist_lookup_int32(innvl, "fd", &fd);
7358 
7359 	largeblockok = nvlist_exists(innvl, "largeblockok");
7360 	embedok = nvlist_exists(innvl, "embedok");
7361 	compressok = nvlist_exists(innvl, "compressok");
7362 	rawok = nvlist_exists(innvl, "rawok");
7363 	savedok = nvlist_exists(innvl, "savedok");
7364 	boolean_t from = (nvlist_lookup_string(innvl, "from", &fromname) == 0);
7365 	boolean_t altbook = (nvlist_lookup_string(innvl, "redactbook",
7366 	    &redactlist_book) == 0);
7367 
7368 	(void) nvlist_lookup_uint64(innvl, "resume_object", &resumeobj);
7369 	(void) nvlist_lookup_uint64(innvl, "resume_offset", &resumeoff);
7370 	(void) nvlist_lookup_uint64(innvl, "bytes", &resume_bytes);
7371 
7372 	if (altbook) {
7373 		full_estimate = B_TRUE;
7374 	} else if (from) {
7375 		if (strchr(fromname, '#')) {
7376 			error = dsl_bookmark_lookup(dp, fromname, tosnap, &zbm);
7377 
7378 			/*
7379 			 * dsl_bookmark_lookup() will fail with EXDEV if
7380 			 * the from-bookmark and tosnap are at the same txg.
7381 			 * However, it's valid to do a send (and therefore,
7382 			 * a send estimate) from and to the same time point,
7383 			 * if the bookmark is redacted (the incremental send
7384 			 * can change what's redacted on the target).  In
7385 			 * this case, dsl_bookmark_lookup() fills in zbm
7386 			 * but returns EXDEV.  Ignore this error.
7387 			 */
7388 			if (error == EXDEV && zbm.zbm_redaction_obj != 0 &&
7389 			    zbm.zbm_guid ==
7390 			    dsl_dataset_phys(tosnap)->ds_guid)
7391 				error = 0;
7392 
7393 			if (error != 0) {
7394 				dsl_dataset_rele(tosnap, FTAG);
7395 				dsl_pool_rele(dp, FTAG);
7396 				return (error);
7397 			}
7398 			if (zbm.zbm_redaction_obj != 0 || !(zbm.zbm_flags &
7399 			    ZBM_FLAG_HAS_FBN)) {
7400 				full_estimate = B_TRUE;
7401 			}
7402 		} else if (strchr(fromname, '@')) {
7403 			error = dsl_dataset_hold(dp, fromname, FTAG, &fromsnap);
7404 			if (error != 0) {
7405 				dsl_dataset_rele(tosnap, FTAG);
7406 				dsl_pool_rele(dp, FTAG);
7407 				return (error);
7408 			}
7409 
7410 			if (!dsl_dataset_is_before(tosnap, fromsnap, 0)) {
7411 				full_estimate = B_TRUE;
7412 				dsl_dataset_rele(fromsnap, FTAG);
7413 			}
7414 		} else {
7415 			/*
7416 			 * from is not properly formatted as a snapshot or
7417 			 * bookmark
7418 			 */
7419 			dsl_dataset_rele(tosnap, FTAG);
7420 			dsl_pool_rele(dp, FTAG);
7421 			return (SET_ERROR(EINVAL));
7422 		}
7423 	}
7424 
7425 	if (full_estimate) {
7426 		dmu_send_outparams_t out = {0};
7427 		offset_t off = 0;
7428 		out.dso_outfunc = send_space_sum;
7429 		out.dso_arg = &space;
7430 		out.dso_dryrun = B_TRUE;
7431 		/*
7432 		 * We have to release these holds so dmu_send can take them.  It
7433 		 * will do all the error checking we need.
7434 		 */
7435 		dsl_dataset_rele(tosnap, FTAG);
7436 		dsl_pool_rele(dp, FTAG);
7437 		error = dmu_send(snapname, fromname, embedok, largeblockok,
7438 		    compressok, rawok, savedok, resumeobj, resumeoff,
7439 		    redactlist_book, fd, &off, &out);
7440 	} else {
7441 		error = dmu_send_estimate_fast(tosnap, fromsnap,
7442 		    (from && strchr(fromname, '#') != NULL ? &zbm : NULL),
7443 		    compressok || rawok, savedok, &space);
7444 		space -= resume_bytes;
7445 		if (fromsnap != NULL)
7446 			dsl_dataset_rele(fromsnap, FTAG);
7447 		dsl_dataset_rele(tosnap, FTAG);
7448 		dsl_pool_rele(dp, FTAG);
7449 	}
7450 
7451 	fnvlist_add_uint64(outnvl, "space", space);
7452 
7453 	return (error);
7454 }
7455 
7456 /*
7457  * Sync the currently open TXG to disk for the specified pool.
7458  * This is somewhat similar to 'zfs_sync()'.
7459  * For cases that do not result in error this ioctl will wait for
7460  * the currently open TXG to commit before returning back to the caller.
7461  *
7462  * innvl: {
7463  *  "force" -> when true, force uberblock update even if there is no dirty data.
7464  *             In addition this will cause the vdev configuration to be written
7465  *             out including updating the zpool cache file. (boolean_t)
7466  * }
7467  *
7468  * onvl is unused
7469  */
7470 static const zfs_ioc_key_t zfs_keys_pool_sync[] = {
7471 	{"force",	DATA_TYPE_BOOLEAN_VALUE,	0},
7472 };
7473 
7474 static int
zfs_ioc_pool_sync(const char * pool,nvlist_t * innvl,nvlist_t * onvl)7475 zfs_ioc_pool_sync(const char *pool, nvlist_t *innvl, nvlist_t *onvl)
7476 {
7477 	(void) onvl;
7478 	int err;
7479 	boolean_t rc, force = B_FALSE;
7480 	spa_t *spa;
7481 
7482 	if ((err = spa_open(pool, &spa, FTAG)) != 0)
7483 		return (err);
7484 
7485 	if (innvl) {
7486 		err = nvlist_lookup_boolean_value(innvl, "force", &rc);
7487 		if (err == 0)
7488 			force = rc;
7489 	}
7490 
7491 	if (force) {
7492 		spa_config_enter(spa, SCL_CONFIG, FTAG, RW_WRITER);
7493 		vdev_config_dirty(spa->spa_root_vdev);
7494 		spa_config_exit(spa, SCL_CONFIG, FTAG);
7495 	}
7496 	txg_wait_synced(spa_get_dsl(spa), 0);
7497 
7498 	spa_close(spa, FTAG);
7499 
7500 	return (0);
7501 }
7502 
7503 /*
7504  * Load a user's wrapping key into the kernel.
7505  * innvl: {
7506  *     "hidden_args" -> { "wkeydata" -> value }
7507  *         raw uint8_t array of encryption wrapping key data (32 bytes)
7508  *     (optional) "noop" -> (value ignored)
7509  *         presence indicated key should only be verified, not loaded
7510  * }
7511  */
7512 static const zfs_ioc_key_t zfs_keys_load_key[] = {
7513 	{"hidden_args",	DATA_TYPE_NVLIST,	0},
7514 	{"noop",	DATA_TYPE_BOOLEAN,	ZK_OPTIONAL},
7515 };
7516 
7517 static int
zfs_ioc_load_key(const char * dsname,nvlist_t * innvl,nvlist_t * outnvl)7518 zfs_ioc_load_key(const char *dsname, nvlist_t *innvl, nvlist_t *outnvl)
7519 {
7520 	(void) outnvl;
7521 	int ret;
7522 	dsl_crypto_params_t *dcp = NULL;
7523 	nvlist_t *hidden_args;
7524 	boolean_t noop = nvlist_exists(innvl, "noop");
7525 
7526 	if (strchr(dsname, '@') != NULL || strchr(dsname, '%') != NULL) {
7527 		ret = SET_ERROR(EINVAL);
7528 		goto error;
7529 	}
7530 
7531 	hidden_args = fnvlist_lookup_nvlist(innvl, ZPOOL_HIDDEN_ARGS);
7532 
7533 	ret = dsl_crypto_params_create_nvlist(DCP_CMD_NONE, NULL,
7534 	    hidden_args, &dcp);
7535 	if (ret != 0)
7536 		goto error;
7537 
7538 	ret = spa_keystore_load_wkey(dsname, dcp, noop);
7539 	if (ret != 0)
7540 		goto error;
7541 
7542 	dsl_crypto_params_free(dcp, noop);
7543 
7544 	return (0);
7545 
7546 error:
7547 	dsl_crypto_params_free(dcp, B_TRUE);
7548 	return (ret);
7549 }
7550 
7551 /*
7552  * Unload a user's wrapping key from the kernel.
7553  * Both innvl and outnvl are unused.
7554  */
7555 static const zfs_ioc_key_t zfs_keys_unload_key[] = {
7556 	/* no nvl keys */
7557 };
7558 
7559 static int
zfs_ioc_unload_key(const char * dsname,nvlist_t * innvl,nvlist_t * outnvl)7560 zfs_ioc_unload_key(const char *dsname, nvlist_t *innvl, nvlist_t *outnvl)
7561 {
7562 	(void) innvl, (void) outnvl;
7563 	int ret = 0;
7564 
7565 	if (strchr(dsname, '@') != NULL || strchr(dsname, '%') != NULL) {
7566 		ret = (SET_ERROR(EINVAL));
7567 		goto out;
7568 	}
7569 
7570 	ret = spa_keystore_unload_wkey(dsname);
7571 	if (ret != 0)
7572 		goto out;
7573 
7574 out:
7575 	return (ret);
7576 }
7577 
7578 /*
7579  * Changes a user's wrapping key used to decrypt a dataset. The keyformat,
7580  * keylocation, pbkdf2salt, and pbkdf2iters properties can also be specified
7581  * here to change how the key is derived in userspace.
7582  *
7583  * innvl: {
7584  *    "hidden_args" (optional) -> { "wkeydata" -> value }
7585  *         raw uint8_t array of new encryption wrapping key data (32 bytes)
7586  *    "props" (optional) -> { prop -> value }
7587  * }
7588  *
7589  * outnvl is unused
7590  */
7591 static const zfs_ioc_key_t zfs_keys_change_key[] = {
7592 	{"crypt_cmd",	DATA_TYPE_UINT64,	ZK_OPTIONAL},
7593 	{"hidden_args",	DATA_TYPE_NVLIST,	ZK_OPTIONAL},
7594 	{"props",	DATA_TYPE_NVLIST,	ZK_OPTIONAL},
7595 };
7596 
7597 static int
zfs_ioc_change_key(const char * dsname,nvlist_t * innvl,nvlist_t * outnvl)7598 zfs_ioc_change_key(const char *dsname, nvlist_t *innvl, nvlist_t *outnvl)
7599 {
7600 	(void) outnvl;
7601 	int ret;
7602 	uint64_t cmd = DCP_CMD_NONE;
7603 	dsl_crypto_params_t *dcp = NULL;
7604 	nvlist_t *props = NULL, *hidden_args = NULL;
7605 
7606 	if (strchr(dsname, '@') != NULL || strchr(dsname, '%') != NULL) {
7607 		ret = (SET_ERROR(EINVAL));
7608 		goto error;
7609 	}
7610 
7611 	(void) nvlist_lookup_uint64(innvl, "crypt_cmd", &cmd);
7612 	(void) nvlist_lookup_nvlist(innvl, "props", &props);
7613 	(void) nvlist_lookup_nvlist(innvl, ZPOOL_HIDDEN_ARGS, &hidden_args);
7614 
7615 	ret = dsl_crypto_params_create_nvlist(cmd, props, hidden_args, &dcp);
7616 	if (ret != 0)
7617 		goto error;
7618 
7619 	/* The keylocation property is set from dcp->cp_keylocation. */
7620 	(void) nvlist_remove_all(props, zfs_prop_to_name(ZFS_PROP_KEYLOCATION));
7621 
7622 	if ((ret = zfs_check_userprops(props)) != 0)
7623 		goto error;
7624 
7625 	ret = spa_keystore_change_key(dsname, dcp, props);
7626 	if (ret != 0)
7627 		goto error;
7628 
7629 	dsl_crypto_params_free(dcp, B_FALSE);
7630 
7631 	return (0);
7632 
7633 error:
7634 	dsl_crypto_params_free(dcp, B_TRUE);
7635 	return (ret);
7636 }
7637 
7638 static zfs_ioc_vec_t zfs_ioc_vec[ZFS_IOC_LAST - ZFS_IOC_FIRST];
7639 
7640 static void
zfs_ioctl_register_legacy(zfs_ioc_t ioc,zfs_ioc_legacy_func_t * func,zfs_secpolicy_func_t * secpolicy,zfs_ioc_namecheck_t namecheck,boolean_t log_history,zfs_ioc_poolcheck_t pool_check)7641 zfs_ioctl_register_legacy(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
7642     zfs_secpolicy_func_t *secpolicy, zfs_ioc_namecheck_t namecheck,
7643     boolean_t log_history, zfs_ioc_poolcheck_t pool_check)
7644 {
7645 	zfs_ioc_vec_t *vec = &zfs_ioc_vec[ioc - ZFS_IOC_FIRST];
7646 
7647 	ASSERT3U(ioc, >=, ZFS_IOC_FIRST);
7648 	ASSERT3U(ioc, <, ZFS_IOC_LAST);
7649 	ASSERT0P(vec->zvec_legacy_func);
7650 	ASSERT0P(vec->zvec_func);
7651 
7652 	vec->zvec_legacy_func = func;
7653 	vec->zvec_secpolicy = secpolicy;
7654 	vec->zvec_namecheck = namecheck;
7655 	vec->zvec_allow_log = log_history;
7656 	vec->zvec_pool_check = pool_check;
7657 }
7658 
7659 /*
7660  * See the block comment at the beginning of this file for details on
7661  * each argument to this function.
7662  */
7663 void
zfs_ioctl_register(const char * name,zfs_ioc_t ioc,zfs_ioc_func_t * func,zfs_secpolicy_func_t * secpolicy,zfs_ioc_namecheck_t namecheck,zfs_ioc_poolcheck_t pool_check,boolean_t smush_outnvlist,boolean_t allow_log,const zfs_ioc_key_t * nvl_keys,size_t num_keys)7664 zfs_ioctl_register(const char *name, zfs_ioc_t ioc, zfs_ioc_func_t *func,
7665     zfs_secpolicy_func_t *secpolicy, zfs_ioc_namecheck_t namecheck,
7666     zfs_ioc_poolcheck_t pool_check, boolean_t smush_outnvlist,
7667     boolean_t allow_log, const zfs_ioc_key_t *nvl_keys, size_t num_keys)
7668 {
7669 	zfs_ioc_vec_t *vec = &zfs_ioc_vec[ioc - ZFS_IOC_FIRST];
7670 
7671 	ASSERT3U(ioc, >=, ZFS_IOC_FIRST);
7672 	ASSERT3U(ioc, <, ZFS_IOC_LAST);
7673 	ASSERT0P(vec->zvec_legacy_func);
7674 	ASSERT0P(vec->zvec_func);
7675 
7676 	/* if we are logging, the name must be valid */
7677 	ASSERT(!allow_log || namecheck != NO_NAME);
7678 
7679 	vec->zvec_name = name;
7680 	vec->zvec_func = func;
7681 	vec->zvec_secpolicy = secpolicy;
7682 	vec->zvec_namecheck = namecheck;
7683 	vec->zvec_pool_check = pool_check;
7684 	vec->zvec_smush_outnvlist = smush_outnvlist;
7685 	vec->zvec_allow_log = allow_log;
7686 	vec->zvec_nvl_keys = nvl_keys;
7687 	vec->zvec_nvl_key_count = num_keys;
7688 }
7689 
7690 static void
zfs_ioctl_register_pool(zfs_ioc_t ioc,zfs_ioc_legacy_func_t * func,zfs_secpolicy_func_t * secpolicy,boolean_t log_history,zfs_ioc_poolcheck_t pool_check)7691 zfs_ioctl_register_pool(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
7692     zfs_secpolicy_func_t *secpolicy, boolean_t log_history,
7693     zfs_ioc_poolcheck_t pool_check)
7694 {
7695 	zfs_ioctl_register_legacy(ioc, func, secpolicy,
7696 	    POOL_NAME, log_history, pool_check);
7697 }
7698 
7699 void
zfs_ioctl_register_dataset_nolog(zfs_ioc_t ioc,zfs_ioc_legacy_func_t * func,zfs_secpolicy_func_t * secpolicy,zfs_ioc_poolcheck_t pool_check)7700 zfs_ioctl_register_dataset_nolog(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
7701     zfs_secpolicy_func_t *secpolicy, zfs_ioc_poolcheck_t pool_check)
7702 {
7703 	zfs_ioctl_register_legacy(ioc, func, secpolicy,
7704 	    DATASET_NAME, B_FALSE, pool_check);
7705 }
7706 
7707 static void
zfs_ioctl_register_pool_modify(zfs_ioc_t ioc,zfs_ioc_legacy_func_t * func)7708 zfs_ioctl_register_pool_modify(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func)
7709 {
7710 	zfs_ioctl_register_legacy(ioc, func, zfs_secpolicy_config,
7711 	    POOL_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
7712 }
7713 
7714 static void
zfs_ioctl_register_pool_meta(zfs_ioc_t ioc,zfs_ioc_legacy_func_t * func,zfs_secpolicy_func_t * secpolicy)7715 zfs_ioctl_register_pool_meta(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
7716     zfs_secpolicy_func_t *secpolicy)
7717 {
7718 	zfs_ioctl_register_legacy(ioc, func, secpolicy,
7719 	    NO_NAME, B_FALSE, POOL_CHECK_NONE);
7720 }
7721 
7722 static void
zfs_ioctl_register_dataset_read_secpolicy(zfs_ioc_t ioc,zfs_ioc_legacy_func_t * func,zfs_secpolicy_func_t * secpolicy)7723 zfs_ioctl_register_dataset_read_secpolicy(zfs_ioc_t ioc,
7724     zfs_ioc_legacy_func_t *func, zfs_secpolicy_func_t *secpolicy)
7725 {
7726 	zfs_ioctl_register_legacy(ioc, func, secpolicy,
7727 	    DATASET_NAME, B_FALSE, POOL_CHECK_SUSPENDED);
7728 }
7729 
7730 static void
zfs_ioctl_register_dataset_read(zfs_ioc_t ioc,zfs_ioc_legacy_func_t * func)7731 zfs_ioctl_register_dataset_read(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func)
7732 {
7733 	zfs_ioctl_register_dataset_read_secpolicy(ioc, func,
7734 	    zfs_secpolicy_read);
7735 }
7736 
7737 static void
zfs_ioctl_register_dataset_modify(zfs_ioc_t ioc,zfs_ioc_legacy_func_t * func,zfs_secpolicy_func_t * secpolicy)7738 zfs_ioctl_register_dataset_modify(zfs_ioc_t ioc, zfs_ioc_legacy_func_t *func,
7739     zfs_secpolicy_func_t *secpolicy)
7740 {
7741 	zfs_ioctl_register_legacy(ioc, func, secpolicy,
7742 	    DATASET_NAME, B_TRUE, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
7743 }
7744 
7745 static void
zfs_ioctl_init(void)7746 zfs_ioctl_init(void)
7747 {
7748 	zfs_ioctl_register("snapshot", ZFS_IOC_SNAPSHOT,
7749 	    zfs_ioc_snapshot, zfs_secpolicy_snapshot, POOL_NAME,
7750 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7751 	    zfs_keys_snapshot, ARRAY_SIZE(zfs_keys_snapshot));
7752 
7753 	zfs_ioctl_register("log_history", ZFS_IOC_LOG_HISTORY,
7754 	    zfs_ioc_log_history, zfs_secpolicy_log_history, NO_NAME,
7755 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE,
7756 	    zfs_keys_log_history, ARRAY_SIZE(zfs_keys_log_history));
7757 
7758 	zfs_ioctl_register("space_snaps", ZFS_IOC_SPACE_SNAPS,
7759 	    zfs_ioc_space_snaps, zfs_secpolicy_read, DATASET_NAME,
7760 	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE,
7761 	    zfs_keys_space_snaps, ARRAY_SIZE(zfs_keys_space_snaps));
7762 
7763 	zfs_ioctl_register("send", ZFS_IOC_SEND_NEW,
7764 	    zfs_ioc_send_new, zfs_secpolicy_send_new, DATASET_NAME,
7765 	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE,
7766 	    zfs_keys_send_new, ARRAY_SIZE(zfs_keys_send_new));
7767 
7768 	zfs_ioctl_register("send_space", ZFS_IOC_SEND_SPACE,
7769 	    zfs_ioc_send_space, zfs_secpolicy_read, DATASET_NAME,
7770 	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE,
7771 	    zfs_keys_send_space, ARRAY_SIZE(zfs_keys_send_space));
7772 
7773 	zfs_ioctl_register("create", ZFS_IOC_CREATE,
7774 	    zfs_ioc_create, zfs_secpolicy_create_clone, DATASET_NAME,
7775 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7776 	    zfs_keys_create, ARRAY_SIZE(zfs_keys_create));
7777 
7778 	zfs_ioctl_register("clone", ZFS_IOC_CLONE,
7779 	    zfs_ioc_clone, zfs_secpolicy_create_clone, DATASET_NAME,
7780 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7781 	    zfs_keys_clone, ARRAY_SIZE(zfs_keys_clone));
7782 
7783 	zfs_ioctl_register("remap", ZFS_IOC_REMAP,
7784 	    zfs_ioc_remap, zfs_secpolicy_none, DATASET_NAME,
7785 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_TRUE,
7786 	    zfs_keys_remap, ARRAY_SIZE(zfs_keys_remap));
7787 
7788 	zfs_ioctl_register("destroy_snaps", ZFS_IOC_DESTROY_SNAPS,
7789 	    zfs_ioc_destroy_snaps, zfs_secpolicy_destroy_snaps, POOL_NAME,
7790 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7791 	    zfs_keys_destroy_snaps, ARRAY_SIZE(zfs_keys_destroy_snaps));
7792 
7793 	zfs_ioctl_register("hold", ZFS_IOC_HOLD,
7794 	    zfs_ioc_hold, zfs_secpolicy_hold, POOL_NAME,
7795 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7796 	    zfs_keys_hold, ARRAY_SIZE(zfs_keys_hold));
7797 	zfs_ioctl_register("release", ZFS_IOC_RELEASE,
7798 	    zfs_ioc_release, zfs_secpolicy_release, POOL_NAME,
7799 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7800 	    zfs_keys_release, ARRAY_SIZE(zfs_keys_release));
7801 
7802 	zfs_ioctl_register("get_holds", ZFS_IOC_GET_HOLDS,
7803 	    zfs_ioc_get_holds, zfs_secpolicy_read, DATASET_NAME,
7804 	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE,
7805 	    zfs_keys_get_holds, ARRAY_SIZE(zfs_keys_get_holds));
7806 
7807 	zfs_ioctl_register("rollback", ZFS_IOC_ROLLBACK,
7808 	    zfs_ioc_rollback, zfs_secpolicy_rollback, DATASET_NAME,
7809 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_TRUE,
7810 	    zfs_keys_rollback, ARRAY_SIZE(zfs_keys_rollback));
7811 
7812 	zfs_ioctl_register("bookmark", ZFS_IOC_BOOKMARK,
7813 	    zfs_ioc_bookmark, zfs_secpolicy_bookmark, POOL_NAME,
7814 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7815 	    zfs_keys_bookmark, ARRAY_SIZE(zfs_keys_bookmark));
7816 
7817 	zfs_ioctl_register("get_bookmarks", ZFS_IOC_GET_BOOKMARKS,
7818 	    zfs_ioc_get_bookmarks, zfs_secpolicy_read, DATASET_NAME,
7819 	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE,
7820 	    zfs_keys_get_bookmarks, ARRAY_SIZE(zfs_keys_get_bookmarks));
7821 
7822 	zfs_ioctl_register("get_bookmark_props", ZFS_IOC_GET_BOOKMARK_PROPS,
7823 	    zfs_ioc_get_bookmark_props, zfs_secpolicy_read, ENTITY_NAME,
7824 	    POOL_CHECK_SUSPENDED, B_FALSE, B_FALSE, zfs_keys_get_bookmark_props,
7825 	    ARRAY_SIZE(zfs_keys_get_bookmark_props));
7826 
7827 	zfs_ioctl_register("destroy_bookmarks", ZFS_IOC_DESTROY_BOOKMARKS,
7828 	    zfs_ioc_destroy_bookmarks, zfs_secpolicy_destroy_bookmarks,
7829 	    POOL_NAME,
7830 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7831 	    zfs_keys_destroy_bookmarks,
7832 	    ARRAY_SIZE(zfs_keys_destroy_bookmarks));
7833 
7834 	zfs_ioctl_register("receive", ZFS_IOC_RECV_NEW,
7835 	    zfs_ioc_recv_new, zfs_secpolicy_recv, DATASET_NAME,
7836 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7837 	    zfs_keys_recv_new, ARRAY_SIZE(zfs_keys_recv_new));
7838 	zfs_ioctl_register("load-key", ZFS_IOC_LOAD_KEY,
7839 	    zfs_ioc_load_key, zfs_secpolicy_load_key,
7840 	    DATASET_NAME, POOL_CHECK_SUSPENDED, B_TRUE, B_TRUE,
7841 	    zfs_keys_load_key, ARRAY_SIZE(zfs_keys_load_key));
7842 	zfs_ioctl_register("unload-key", ZFS_IOC_UNLOAD_KEY,
7843 	    zfs_ioc_unload_key, zfs_secpolicy_load_key,
7844 	    DATASET_NAME, POOL_CHECK_SUSPENDED, B_TRUE, B_TRUE,
7845 	    zfs_keys_unload_key, ARRAY_SIZE(zfs_keys_unload_key));
7846 	zfs_ioctl_register("change-key", ZFS_IOC_CHANGE_KEY,
7847 	    zfs_ioc_change_key, zfs_secpolicy_change_key,
7848 	    DATASET_NAME, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY,
7849 	    B_TRUE, B_TRUE, zfs_keys_change_key,
7850 	    ARRAY_SIZE(zfs_keys_change_key));
7851 
7852 	zfs_ioctl_register("sync", ZFS_IOC_POOL_SYNC,
7853 	    zfs_ioc_pool_sync, zfs_secpolicy_none, POOL_NAME,
7854 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE,
7855 	    zfs_keys_pool_sync, ARRAY_SIZE(zfs_keys_pool_sync));
7856 	zfs_ioctl_register("reopen", ZFS_IOC_POOL_REOPEN, zfs_ioc_pool_reopen,
7857 	    zfs_secpolicy_config, POOL_NAME, POOL_CHECK_SUSPENDED, B_TRUE,
7858 	    B_TRUE, zfs_keys_pool_reopen, ARRAY_SIZE(zfs_keys_pool_reopen));
7859 
7860 	zfs_ioctl_register("channel_program", ZFS_IOC_CHANNEL_PROGRAM,
7861 	    zfs_ioc_channel_program, zfs_secpolicy_config,
7862 	    POOL_NAME, POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE,
7863 	    B_TRUE, zfs_keys_channel_program,
7864 	    ARRAY_SIZE(zfs_keys_channel_program));
7865 
7866 	zfs_ioctl_register("redact", ZFS_IOC_REDACT,
7867 	    zfs_ioc_redact, zfs_secpolicy_config, DATASET_NAME,
7868 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7869 	    zfs_keys_redact, ARRAY_SIZE(zfs_keys_redact));
7870 
7871 	zfs_ioctl_register("zpool_checkpoint", ZFS_IOC_POOL_CHECKPOINT,
7872 	    zfs_ioc_pool_checkpoint, zfs_secpolicy_config, POOL_NAME,
7873 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7874 	    zfs_keys_pool_checkpoint, ARRAY_SIZE(zfs_keys_pool_checkpoint));
7875 
7876 	zfs_ioctl_register("zpool_discard_checkpoint",
7877 	    ZFS_IOC_POOL_DISCARD_CHECKPOINT, zfs_ioc_pool_discard_checkpoint,
7878 	    zfs_secpolicy_config, POOL_NAME,
7879 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7880 	    zfs_keys_pool_discard_checkpoint,
7881 	    ARRAY_SIZE(zfs_keys_pool_discard_checkpoint));
7882 
7883 	zfs_ioctl_register("zpool_prefetch",
7884 	    ZFS_IOC_POOL_PREFETCH, zfs_ioc_pool_prefetch,
7885 	    zfs_secpolicy_config, POOL_NAME,
7886 	    POOL_CHECK_SUSPENDED, B_TRUE, B_TRUE,
7887 	    zfs_keys_pool_prefetch, ARRAY_SIZE(zfs_keys_pool_prefetch));
7888 
7889 	zfs_ioctl_register("initialize", ZFS_IOC_POOL_INITIALIZE,
7890 	    zfs_ioc_pool_initialize, zfs_secpolicy_config, POOL_NAME,
7891 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7892 	    zfs_keys_pool_initialize, ARRAY_SIZE(zfs_keys_pool_initialize));
7893 
7894 	zfs_ioctl_register("trim", ZFS_IOC_POOL_TRIM,
7895 	    zfs_ioc_pool_trim, zfs_secpolicy_config, POOL_NAME,
7896 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7897 	    zfs_keys_pool_trim, ARRAY_SIZE(zfs_keys_pool_trim));
7898 
7899 	zfs_ioctl_register("wait", ZFS_IOC_WAIT,
7900 	    zfs_ioc_wait, zfs_secpolicy_none, POOL_NAME,
7901 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE,
7902 	    zfs_keys_pool_wait, ARRAY_SIZE(zfs_keys_pool_wait));
7903 
7904 	zfs_ioctl_register("wait_fs", ZFS_IOC_WAIT_FS,
7905 	    zfs_ioc_wait_fs, zfs_secpolicy_none, DATASET_NAME,
7906 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE,
7907 	    zfs_keys_fs_wait, ARRAY_SIZE(zfs_keys_fs_wait));
7908 
7909 	zfs_ioctl_register("set_bootenv", ZFS_IOC_SET_BOOTENV,
7910 	    zfs_ioc_set_bootenv, zfs_secpolicy_config, POOL_NAME,
7911 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_TRUE,
7912 	    zfs_keys_set_bootenv, ARRAY_SIZE(zfs_keys_set_bootenv));
7913 
7914 	zfs_ioctl_register("get_bootenv", ZFS_IOC_GET_BOOTENV,
7915 	    zfs_ioc_get_bootenv, zfs_secpolicy_none, POOL_NAME,
7916 	    POOL_CHECK_SUSPENDED, B_FALSE, B_TRUE,
7917 	    zfs_keys_get_bootenv, ARRAY_SIZE(zfs_keys_get_bootenv));
7918 
7919 	zfs_ioctl_register("zpool_vdev_get_props", ZFS_IOC_VDEV_GET_PROPS,
7920 	    zfs_ioc_vdev_get_props, zfs_secpolicy_read, POOL_NAME,
7921 	    POOL_CHECK_NONE, B_FALSE, B_FALSE, zfs_keys_vdev_get_props,
7922 	    ARRAY_SIZE(zfs_keys_vdev_get_props));
7923 
7924 	zfs_ioctl_register("zpool_vdev_set_props", ZFS_IOC_VDEV_SET_PROPS,
7925 	    zfs_ioc_vdev_set_props, zfs_secpolicy_config, POOL_NAME,
7926 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_FALSE, B_FALSE,
7927 	    zfs_keys_vdev_set_props, ARRAY_SIZE(zfs_keys_vdev_set_props));
7928 
7929 	zfs_ioctl_register("scrub", ZFS_IOC_POOL_SCRUB,
7930 	    zfs_ioc_pool_scrub, zfs_secpolicy_config, POOL_NAME,
7931 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7932 	    zfs_keys_pool_scrub, ARRAY_SIZE(zfs_keys_pool_scrub));
7933 
7934 	zfs_ioctl_register("get_props", ZFS_IOC_POOL_GET_PROPS,
7935 	    zfs_ioc_pool_get_props, zfs_secpolicy_read, POOL_NAME,
7936 	    POOL_CHECK_NONE, B_FALSE, B_FALSE,
7937 	    zfs_keys_get_props, ARRAY_SIZE(zfs_keys_get_props));
7938 
7939 	zfs_ioctl_register("zpool_ddt_prune", ZFS_IOC_DDT_PRUNE,
7940 	    zfs_ioc_ddt_prune, zfs_secpolicy_config, POOL_NAME,
7941 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY, B_TRUE, B_TRUE,
7942 	    zfs_keys_ddt_prune, ARRAY_SIZE(zfs_keys_ddt_prune));
7943 
7944 	/* IOCTLS that use the legacy function signature */
7945 
7946 	zfs_ioctl_register_legacy(ZFS_IOC_POOL_FREEZE, zfs_ioc_pool_freeze,
7947 	    zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_READONLY);
7948 
7949 	zfs_ioctl_register_pool(ZFS_IOC_POOL_CREATE, zfs_ioc_pool_create,
7950 	    zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE);
7951 	zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_SCAN,
7952 	    zfs_ioc_pool_scan);
7953 	zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_UPGRADE,
7954 	    zfs_ioc_pool_upgrade);
7955 	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_ADD,
7956 	    zfs_ioc_vdev_add);
7957 	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_REMOVE,
7958 	    zfs_ioc_vdev_remove);
7959 	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SET_STATE,
7960 	    zfs_ioc_vdev_set_state);
7961 	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_ATTACH,
7962 	    zfs_ioc_vdev_attach);
7963 	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_DETACH,
7964 	    zfs_ioc_vdev_detach);
7965 	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SETPATH,
7966 	    zfs_ioc_vdev_setpath);
7967 	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SETFRU,
7968 	    zfs_ioc_vdev_setfru);
7969 	zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_SET_PROPS,
7970 	    zfs_ioc_pool_set_props);
7971 	zfs_ioctl_register_pool_modify(ZFS_IOC_VDEV_SPLIT,
7972 	    zfs_ioc_vdev_split);
7973 	zfs_ioctl_register_pool_modify(ZFS_IOC_POOL_REGUID,
7974 	    zfs_ioc_pool_reguid);
7975 
7976 	zfs_ioctl_register_pool_meta(ZFS_IOC_POOL_CONFIGS,
7977 	    zfs_ioc_pool_configs, zfs_secpolicy_none);
7978 	zfs_ioctl_register_pool_meta(ZFS_IOC_POOL_TRYIMPORT,
7979 	    zfs_ioc_pool_tryimport, zfs_secpolicy_config);
7980 	zfs_ioctl_register_pool_meta(ZFS_IOC_INJECT_FAULT,
7981 	    zfs_ioc_inject_fault, zfs_secpolicy_inject);
7982 	zfs_ioctl_register_pool_meta(ZFS_IOC_CLEAR_FAULT,
7983 	    zfs_ioc_clear_fault, zfs_secpolicy_inject);
7984 	zfs_ioctl_register_pool_meta(ZFS_IOC_INJECT_LIST_NEXT,
7985 	    zfs_ioc_inject_list_next, zfs_secpolicy_inject);
7986 
7987 	/*
7988 	 * pool destroy, and export don't log the history as part of
7989 	 * zfsdev_ioctl, but rather zfs_ioc_pool_export
7990 	 * does the logging of those commands.
7991 	 */
7992 	zfs_ioctl_register_pool(ZFS_IOC_POOL_DESTROY, zfs_ioc_pool_destroy,
7993 	    zfs_secpolicy_config, B_FALSE, POOL_CHECK_SUSPENDED);
7994 	zfs_ioctl_register_pool(ZFS_IOC_POOL_EXPORT, zfs_ioc_pool_export,
7995 	    zfs_secpolicy_config, B_FALSE, POOL_CHECK_SUSPENDED);
7996 
7997 	zfs_ioctl_register_pool(ZFS_IOC_POOL_STATS, zfs_ioc_pool_stats,
7998 	    zfs_secpolicy_read, B_FALSE, POOL_CHECK_NONE);
7999 
8000 	zfs_ioctl_register_pool(ZFS_IOC_ERROR_LOG, zfs_ioc_error_log,
8001 	    zfs_secpolicy_inject, B_FALSE, POOL_CHECK_SUSPENDED);
8002 	zfs_ioctl_register_pool(ZFS_IOC_DSOBJ_TO_DSNAME,
8003 	    zfs_ioc_dsobj_to_dsname,
8004 	    zfs_secpolicy_diff, B_FALSE, POOL_CHECK_SUSPENDED);
8005 	zfs_ioctl_register_pool(ZFS_IOC_POOL_GET_HISTORY,
8006 	    zfs_ioc_pool_get_history,
8007 	    zfs_secpolicy_config, B_FALSE, POOL_CHECK_SUSPENDED);
8008 
8009 	zfs_ioctl_register_pool(ZFS_IOC_POOL_IMPORT, zfs_ioc_pool_import,
8010 	    zfs_secpolicy_config, B_TRUE, POOL_CHECK_NONE);
8011 
8012 	zfs_ioctl_register_pool(ZFS_IOC_CLEAR, zfs_ioc_clear,
8013 	    zfs_secpolicy_config, B_TRUE, POOL_CHECK_READONLY);
8014 
8015 	zfs_ioctl_register_dataset_read(ZFS_IOC_SPACE_WRITTEN,
8016 	    zfs_ioc_space_written);
8017 	zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_RECVD_PROPS,
8018 	    zfs_ioc_objset_recvd_props);
8019 	zfs_ioctl_register_dataset_read(ZFS_IOC_NEXT_OBJ,
8020 	    zfs_ioc_next_obj);
8021 	zfs_ioctl_register_dataset_read(ZFS_IOC_GET_FSACL,
8022 	    zfs_ioc_get_fsacl);
8023 	zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_STATS,
8024 	    zfs_ioc_objset_stats);
8025 	zfs_ioctl_register_dataset_read(ZFS_IOC_OBJSET_ZPLPROPS,
8026 	    zfs_ioc_objset_zplprops);
8027 	zfs_ioctl_register_dataset_read(ZFS_IOC_DATASET_LIST_NEXT,
8028 	    zfs_ioc_dataset_list_next);
8029 	zfs_ioctl_register_dataset_read(ZFS_IOC_SNAPSHOT_LIST_NEXT,
8030 	    zfs_ioc_snapshot_list_next);
8031 	zfs_ioctl_register_dataset_read(ZFS_IOC_SEND_PROGRESS,
8032 	    zfs_ioc_send_progress);
8033 
8034 	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_DIFF,
8035 	    zfs_ioc_diff, zfs_secpolicy_diff);
8036 	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_OBJ_TO_STATS,
8037 	    zfs_ioc_obj_to_stats, zfs_secpolicy_diff);
8038 	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_OBJ_TO_PATH,
8039 	    zfs_ioc_obj_to_path, zfs_secpolicy_diff);
8040 	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_USERSPACE_ONE,
8041 	    zfs_ioc_userspace_one, zfs_secpolicy_userspace_one);
8042 	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_USERSPACE_MANY,
8043 	    zfs_ioc_userspace_many, zfs_secpolicy_userspace_many);
8044 	zfs_ioctl_register_dataset_read_secpolicy(ZFS_IOC_SEND,
8045 	    zfs_ioc_send, zfs_secpolicy_send);
8046 
8047 	zfs_ioctl_register_dataset_modify(ZFS_IOC_SET_PROP, zfs_ioc_set_prop,
8048 	    zfs_secpolicy_none);
8049 	zfs_ioctl_register_dataset_modify(ZFS_IOC_DESTROY, zfs_ioc_destroy,
8050 	    zfs_secpolicy_destroy);
8051 	zfs_ioctl_register_dataset_modify(ZFS_IOC_RENAME, zfs_ioc_rename,
8052 	    zfs_secpolicy_rename);
8053 	zfs_ioctl_register_dataset_modify(ZFS_IOC_RECV, zfs_ioc_recv,
8054 	    zfs_secpolicy_recv);
8055 	zfs_ioctl_register_dataset_modify(ZFS_IOC_PROMOTE, zfs_ioc_promote,
8056 	    zfs_secpolicy_promote);
8057 	zfs_ioctl_register_dataset_modify(ZFS_IOC_INHERIT_PROP,
8058 	    zfs_ioc_inherit_prop, zfs_secpolicy_inherit_prop);
8059 	zfs_ioctl_register_dataset_modify(ZFS_IOC_SET_FSACL, zfs_ioc_set_fsacl,
8060 	    zfs_secpolicy_set_fsacl);
8061 
8062 	zfs_ioctl_register_dataset_nolog(ZFS_IOC_SHARE, zfs_ioc_share,
8063 	    zfs_secpolicy_share, POOL_CHECK_NONE);
8064 	zfs_ioctl_register_dataset_nolog(ZFS_IOC_SMB_ACL, zfs_ioc_smb_acl,
8065 	    zfs_secpolicy_smb_acl, POOL_CHECK_NONE);
8066 	zfs_ioctl_register_dataset_nolog(ZFS_IOC_USERSPACE_UPGRADE,
8067 	    zfs_ioc_userspace_upgrade, zfs_secpolicy_userspace_upgrade,
8068 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
8069 	zfs_ioctl_register_dataset_nolog(ZFS_IOC_TMP_SNAPSHOT,
8070 	    zfs_ioc_tmp_snapshot, zfs_secpolicy_tmp_snapshot,
8071 	    POOL_CHECK_SUSPENDED | POOL_CHECK_READONLY);
8072 
8073 	zfs_ioctl_register_legacy(ZFS_IOC_EVENTS_NEXT, zfs_ioc_events_next,
8074 	    zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_NONE);
8075 	zfs_ioctl_register_legacy(ZFS_IOC_EVENTS_CLEAR, zfs_ioc_events_clear,
8076 	    zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_NONE);
8077 	zfs_ioctl_register_legacy(ZFS_IOC_EVENTS_SEEK, zfs_ioc_events_seek,
8078 	    zfs_secpolicy_config, NO_NAME, B_FALSE, POOL_CHECK_NONE);
8079 
8080 	zfs_ioctl_init_os();
8081 }
8082 
8083 /*
8084  * Verify that for non-legacy ioctls the input nvlist
8085  * pairs match against the expected input.
8086  *
8087  * Possible errors are:
8088  * ZFS_ERR_IOC_ARG_UNAVAIL	An unrecognized nvpair was encountered
8089  * ZFS_ERR_IOC_ARG_REQUIRED	A required nvpair is missing
8090  * ZFS_ERR_IOC_ARG_BADTYPE	Invalid type for nvpair
8091  */
8092 static int
zfs_check_input_nvpairs(nvlist_t * innvl,const zfs_ioc_vec_t * vec)8093 zfs_check_input_nvpairs(nvlist_t *innvl, const zfs_ioc_vec_t *vec)
8094 {
8095 	const zfs_ioc_key_t *nvl_keys = vec->zvec_nvl_keys;
8096 	boolean_t required_keys_found = B_FALSE;
8097 
8098 	/*
8099 	 * examine each input pair
8100 	 */
8101 	for (nvpair_t *pair = nvlist_next_nvpair(innvl, NULL);
8102 	    pair != NULL; pair = nvlist_next_nvpair(innvl, pair)) {
8103 		const char *name = nvpair_name(pair);
8104 		data_type_t type = nvpair_type(pair);
8105 		boolean_t identified = B_FALSE;
8106 
8107 		/*
8108 		 * check pair against the documented names and type
8109 		 */
8110 		for (int k = 0; k < vec->zvec_nvl_key_count; k++) {
8111 			/* if not a wild card name, check for an exact match */
8112 			if ((nvl_keys[k].zkey_flags & ZK_WILDCARDLIST) == 0 &&
8113 			    strcmp(nvl_keys[k].zkey_name, name) != 0)
8114 				continue;
8115 
8116 			identified = B_TRUE;
8117 
8118 			if (nvl_keys[k].zkey_type != DATA_TYPE_ANY &&
8119 			    nvl_keys[k].zkey_type != type) {
8120 				return (SET_ERROR(ZFS_ERR_IOC_ARG_BADTYPE));
8121 			}
8122 
8123 			if (nvl_keys[k].zkey_flags & ZK_OPTIONAL)
8124 				continue;
8125 
8126 			required_keys_found = B_TRUE;
8127 			break;
8128 		}
8129 
8130 		/* allow an 'optional' key, everything else is invalid */
8131 		if (!identified &&
8132 		    (strcmp(name, "optional") != 0 ||
8133 		    type != DATA_TYPE_NVLIST)) {
8134 			return (SET_ERROR(ZFS_ERR_IOC_ARG_UNAVAIL));
8135 		}
8136 	}
8137 
8138 	/* verify that all required keys were found */
8139 	for (int k = 0; k < vec->zvec_nvl_key_count; k++) {
8140 		if (nvl_keys[k].zkey_flags & ZK_OPTIONAL)
8141 			continue;
8142 
8143 		if (nvl_keys[k].zkey_flags & ZK_WILDCARDLIST) {
8144 			/* at least one non-optional key is expected here */
8145 			if (!required_keys_found)
8146 				return (SET_ERROR(ZFS_ERR_IOC_ARG_REQUIRED));
8147 			continue;
8148 		}
8149 
8150 		if (!nvlist_exists(innvl, nvl_keys[k].zkey_name))
8151 			return (SET_ERROR(ZFS_ERR_IOC_ARG_REQUIRED));
8152 	}
8153 
8154 	return (0);
8155 }
8156 
8157 static int
pool_status_check(const char * name,zfs_ioc_namecheck_t type,zfs_ioc_poolcheck_t check)8158 pool_status_check(const char *name, zfs_ioc_namecheck_t type,
8159     zfs_ioc_poolcheck_t check)
8160 {
8161 	spa_t *spa;
8162 	int error;
8163 
8164 	ASSERT(type == POOL_NAME || type == DATASET_NAME ||
8165 	    type == ENTITY_NAME);
8166 
8167 	if (check & POOL_CHECK_NONE)
8168 		return (0);
8169 
8170 	error = spa_open(name, &spa, FTAG);
8171 	if (error == 0) {
8172 		if ((check & POOL_CHECK_SUSPENDED) && spa_suspended(spa))
8173 			error = SET_ERROR(EAGAIN);
8174 		else if ((check & POOL_CHECK_READONLY) && !spa_writeable(spa))
8175 			error = SET_ERROR(EROFS);
8176 		spa_close(spa, FTAG);
8177 	}
8178 	return (error);
8179 }
8180 
8181 int
zfsdev_getminor(zfs_file_t * fp,minor_t * minorp)8182 zfsdev_getminor(zfs_file_t *fp, minor_t *minorp)
8183 {
8184 	zfsdev_state_t *zs, *fpd;
8185 
8186 	ASSERT(!MUTEX_HELD(&zfsdev_state_lock));
8187 
8188 	fpd = zfs_file_private(fp);
8189 	if (fpd == NULL)
8190 		return (SET_ERROR(EBADF));
8191 
8192 	mutex_enter(&zfsdev_state_lock);
8193 
8194 	for (zs = &zfsdev_state_listhead; zs != NULL; zs = zs->zs_next) {
8195 
8196 		if (zs->zs_minor == -1)
8197 			continue;
8198 
8199 		if (fpd == zs) {
8200 			*minorp = fpd->zs_minor;
8201 			mutex_exit(&zfsdev_state_lock);
8202 			return (0);
8203 		}
8204 	}
8205 
8206 	mutex_exit(&zfsdev_state_lock);
8207 
8208 	return (SET_ERROR(EBADF));
8209 }
8210 
8211 void *
zfsdev_get_state(minor_t minor,enum zfsdev_state_type which)8212 zfsdev_get_state(minor_t minor, enum zfsdev_state_type which)
8213 {
8214 	zfsdev_state_t *zs;
8215 
8216 	for (zs = &zfsdev_state_listhead; zs != NULL; zs = zs->zs_next) {
8217 		if (zs->zs_minor == minor) {
8218 			membar_consumer();
8219 			switch (which) {
8220 			case ZST_ONEXIT:
8221 				return (zs->zs_onexit);
8222 			case ZST_ZEVENT:
8223 				return (zs->zs_zevent);
8224 			case ZST_ALL:
8225 				return (zs);
8226 			}
8227 		}
8228 	}
8229 
8230 	return (NULL);
8231 }
8232 
8233 /*
8234  * Find a free minor number.  The zfsdev_state_list is expected to
8235  * be short since it is only a list of currently open file handles.
8236  */
8237 static minor_t
zfsdev_minor_alloc(void)8238 zfsdev_minor_alloc(void)
8239 {
8240 	static minor_t last_minor = 0;
8241 	minor_t m;
8242 
8243 	ASSERT(MUTEX_HELD(&zfsdev_state_lock));
8244 
8245 	for (m = last_minor + 1; m != last_minor; m++) {
8246 		if (m > ZFSDEV_MAX_MINOR)
8247 			m = 1;
8248 		if (zfsdev_get_state(m, ZST_ALL) == NULL) {
8249 			last_minor = m;
8250 			return (m);
8251 		}
8252 	}
8253 
8254 	return (0);
8255 }
8256 
8257 int
zfsdev_state_init(void * priv)8258 zfsdev_state_init(void *priv)
8259 {
8260 	zfsdev_state_t *zs, *zsprev = NULL;
8261 	minor_t minor;
8262 	boolean_t newzs = B_FALSE;
8263 
8264 	ASSERT(MUTEX_HELD(&zfsdev_state_lock));
8265 
8266 	minor = zfsdev_minor_alloc();
8267 	if (minor == 0)
8268 		return (SET_ERROR(ENXIO));
8269 
8270 	for (zs = &zfsdev_state_listhead; zs != NULL; zs = zs->zs_next) {
8271 		if (zs->zs_minor == -1)
8272 			break;
8273 		zsprev = zs;
8274 	}
8275 
8276 	if (!zs) {
8277 		zs = kmem_zalloc(sizeof (zfsdev_state_t), KM_SLEEP);
8278 		newzs = B_TRUE;
8279 	}
8280 
8281 	zfsdev_private_set_state(priv, zs);
8282 
8283 	zfs_onexit_init((zfs_onexit_t **)&zs->zs_onexit);
8284 	zfs_zevent_init((zfs_zevent_t **)&zs->zs_zevent);
8285 
8286 	/*
8287 	 * In order to provide for lock-free concurrent read access
8288 	 * to the minor list in zfsdev_get_state(), new entries
8289 	 * must be completely written before linking them into the
8290 	 * list whereas existing entries are already linked; the last
8291 	 * operation must be updating zs_minor (from -1 to the new
8292 	 * value).
8293 	 */
8294 	if (newzs) {
8295 		zs->zs_minor = minor;
8296 		membar_producer();
8297 		zsprev->zs_next = zs;
8298 	} else {
8299 		membar_producer();
8300 		zs->zs_minor = minor;
8301 	}
8302 
8303 	return (0);
8304 }
8305 
8306 void
zfsdev_state_destroy(void * priv)8307 zfsdev_state_destroy(void *priv)
8308 {
8309 	zfsdev_state_t *zs = zfsdev_private_get_state(priv);
8310 
8311 	ASSERT(zs != NULL);
8312 	ASSERT3S(zs->zs_minor, >, 0);
8313 
8314 	/*
8315 	 * The last reference to this zfsdev file descriptor is being dropped.
8316 	 * We don't have to worry about lookup grabbing this state object, and
8317 	 * zfsdev_state_init() will not try to reuse this object until it is
8318 	 * invalidated by setting zs_minor to -1.  Invalidation must be done
8319 	 * last, with a memory barrier to ensure ordering.  This lets us avoid
8320 	 * taking the global zfsdev state lock around destruction.
8321 	 */
8322 	zfs_onexit_destroy(zs->zs_onexit);
8323 	zfs_zevent_destroy(zs->zs_zevent);
8324 	zs->zs_onexit = NULL;
8325 	zs->zs_zevent = NULL;
8326 	membar_producer();
8327 	zs->zs_minor = -1;
8328 }
8329 
8330 long
zfsdev_ioctl_common(uint_t vecnum,zfs_cmd_t * zc,int flag)8331 zfsdev_ioctl_common(uint_t vecnum, zfs_cmd_t *zc, int flag)
8332 {
8333 	int error, cmd;
8334 	const zfs_ioc_vec_t *vec;
8335 	char *saved_poolname = NULL;
8336 	uint64_t max_nvlist_src_size;
8337 	size_t saved_poolname_len = 0;
8338 	nvlist_t *innvl = NULL;
8339 	fstrans_cookie_t cookie;
8340 	hrtime_t start_time = gethrtime();
8341 
8342 	cmd = vecnum;
8343 	error = 0;
8344 	if (vecnum >= sizeof (zfs_ioc_vec) / sizeof (zfs_ioc_vec[0]))
8345 		return (SET_ERROR(ZFS_ERR_IOC_CMD_UNAVAIL));
8346 
8347 	vec = &zfs_ioc_vec[vecnum];
8348 
8349 	/*
8350 	 * The registered ioctl list may be sparse, verify that either
8351 	 * a normal or legacy handler are registered.
8352 	 */
8353 	if (vec->zvec_func == NULL && vec->zvec_legacy_func == NULL)
8354 		return (SET_ERROR(ZFS_ERR_IOC_CMD_UNAVAIL));
8355 
8356 	zc->zc_iflags = flag & FKIOCTL;
8357 	max_nvlist_src_size = zfs_max_nvlist_src_size_os();
8358 	if (zc->zc_nvlist_src_size > max_nvlist_src_size) {
8359 		/*
8360 		 * Make sure the user doesn't pass in an insane value for
8361 		 * zc_nvlist_src_size.  We have to check, since we will end
8362 		 * up allocating that much memory inside of get_nvlist().  This
8363 		 * prevents a nefarious user from allocating tons of kernel
8364 		 * memory.
8365 		 *
8366 		 * Also, we return EINVAL instead of ENOMEM here.  The reason
8367 		 * being that returning ENOMEM from an ioctl() has a special
8368 		 * connotation; that the user's size value is too small and
8369 		 * needs to be expanded to hold the nvlist.  See
8370 		 * zcmd_expand_dst_nvlist() for details.
8371 		 */
8372 		error = SET_ERROR(EINVAL);	/* User's size too big */
8373 
8374 	} else if (zc->zc_nvlist_src_size != 0) {
8375 		error = get_nvlist(zc->zc_nvlist_src, zc->zc_nvlist_src_size,
8376 		    zc->zc_iflags, &innvl);
8377 		if (error != 0)
8378 			goto out;
8379 	}
8380 
8381 	/*
8382 	 * Ensure that all pool/dataset names are valid before we pass down to
8383 	 * the lower layers.
8384 	 */
8385 	zc->zc_name[sizeof (zc->zc_name) - 1] = '\0';
8386 	switch (vec->zvec_namecheck) {
8387 	case POOL_NAME:
8388 		if (pool_namecheck(zc->zc_name, NULL, NULL) != 0)
8389 			error = SET_ERROR(EINVAL);
8390 		else
8391 			error = pool_status_check(zc->zc_name,
8392 			    vec->zvec_namecheck, vec->zvec_pool_check);
8393 		break;
8394 
8395 	case DATASET_NAME:
8396 		if (dataset_namecheck(zc->zc_name, NULL, NULL) != 0)
8397 			error = SET_ERROR(EINVAL);
8398 		else
8399 			error = pool_status_check(zc->zc_name,
8400 			    vec->zvec_namecheck, vec->zvec_pool_check);
8401 		break;
8402 
8403 	case ENTITY_NAME:
8404 		if (entity_namecheck(zc->zc_name, NULL, NULL) != 0) {
8405 			error = SET_ERROR(EINVAL);
8406 		} else {
8407 			error = pool_status_check(zc->zc_name,
8408 			    vec->zvec_namecheck, vec->zvec_pool_check);
8409 		}
8410 		break;
8411 
8412 	case NO_NAME:
8413 		break;
8414 	}
8415 	/*
8416 	 * Ensure that all input pairs are valid before we pass them down
8417 	 * to the lower layers.
8418 	 *
8419 	 * The vectored functions can use fnvlist_lookup_{type} for any
8420 	 * required pairs since zfs_check_input_nvpairs() confirmed that
8421 	 * they exist and are of the correct type.
8422 	 */
8423 	if (error == 0 && vec->zvec_func != NULL) {
8424 		error = zfs_check_input_nvpairs(innvl, vec);
8425 		if (error != 0)
8426 			goto out;
8427 	}
8428 
8429 	if (error == 0) {
8430 		cookie = spl_fstrans_mark();
8431 		error = vec->zvec_secpolicy(zc, innvl, CRED());
8432 		spl_fstrans_unmark(cookie);
8433 	}
8434 
8435 	if (error != 0)
8436 		goto out;
8437 
8438 	/* legacy ioctls can modify zc_name */
8439 	/*
8440 	 * Can't use kmem_strdup() as we might truncate the string and
8441 	 * kmem_strfree() would then free with incorrect size.
8442 	 */
8443 	const char *spa_name = zc->zc_name;
8444 	const char *tname;
8445 	if (nvlist_lookup_string(innvl,
8446 	    zpool_prop_to_name(ZPOOL_PROP_TNAME), &tname) == 0) {
8447 		spa_name = tname;
8448 	}
8449 	saved_poolname_len = strlen(spa_name) + 1;
8450 	saved_poolname = kmem_alloc(saved_poolname_len, KM_SLEEP);
8451 
8452 	strlcpy(saved_poolname, spa_name, saved_poolname_len);
8453 	saved_poolname[strcspn(saved_poolname, "/@#")] = '\0';
8454 
8455 	if (vec->zvec_func != NULL) {
8456 		nvlist_t *outnvl;
8457 		int puterror = 0;
8458 		spa_t *spa;
8459 		nvlist_t *lognv = NULL;
8460 
8461 		ASSERT0P(vec->zvec_legacy_func);
8462 
8463 		/*
8464 		 * Add the innvl to the lognv before calling the func,
8465 		 * in case the func changes the innvl.
8466 		 */
8467 		if (vec->zvec_allow_log) {
8468 			lognv = fnvlist_alloc();
8469 			fnvlist_add_string(lognv, ZPOOL_HIST_IOCTL,
8470 			    vec->zvec_name);
8471 			if (!nvlist_empty(innvl)) {
8472 				fnvlist_add_nvlist(lognv, ZPOOL_HIST_INPUT_NVL,
8473 				    innvl);
8474 			}
8475 		}
8476 
8477 		outnvl = fnvlist_alloc();
8478 		cookie = spl_fstrans_mark();
8479 		error = vec->zvec_func(zc->zc_name, innvl, outnvl);
8480 		spl_fstrans_unmark(cookie);
8481 
8482 		/*
8483 		 * Some commands can partially execute, modify state, and still
8484 		 * return an error.  In these cases, attempt to record what
8485 		 * was modified.
8486 		 */
8487 		if ((error == 0 ||
8488 		    (cmd == ZFS_IOC_CHANNEL_PROGRAM && error != EINVAL)) &&
8489 		    vec->zvec_allow_log &&
8490 		    spa_open(zc->zc_name, &spa, FTAG) == 0) {
8491 			if (!nvlist_empty(outnvl)) {
8492 				size_t out_size = fnvlist_size(outnvl);
8493 				if (out_size > zfs_history_output_max) {
8494 					fnvlist_add_int64(lognv,
8495 					    ZPOOL_HIST_OUTPUT_SIZE, out_size);
8496 				} else {
8497 					fnvlist_add_nvlist(lognv,
8498 					    ZPOOL_HIST_OUTPUT_NVL, outnvl);
8499 				}
8500 			}
8501 			if (error != 0) {
8502 				fnvlist_add_int64(lognv, ZPOOL_HIST_ERRNO,
8503 				    error);
8504 			}
8505 			fnvlist_add_int64(lognv, ZPOOL_HIST_ELAPSED_NS,
8506 			    gethrtime() - start_time);
8507 			(void) spa_history_log_nvl(spa, lognv);
8508 			spa_close(spa, FTAG);
8509 		}
8510 		fnvlist_free(lognv);
8511 
8512 		if (!nvlist_empty(outnvl) || zc->zc_nvlist_dst_size != 0) {
8513 			int smusherror = 0;
8514 			if (vec->zvec_smush_outnvlist) {
8515 				smusherror = nvlist_smush(outnvl,
8516 				    zc->zc_nvlist_dst_size);
8517 			}
8518 			if (smusherror == 0)
8519 				puterror = put_nvlist(zc, outnvl);
8520 		}
8521 
8522 		if (puterror != 0)
8523 			error = puterror;
8524 
8525 		nvlist_free(outnvl);
8526 	} else {
8527 		cookie = spl_fstrans_mark();
8528 		error = vec->zvec_legacy_func(zc);
8529 		spl_fstrans_unmark(cookie);
8530 	}
8531 
8532 out:
8533 	nvlist_free(innvl);
8534 	if (error == 0 && vec->zvec_allow_log) {
8535 		char *s = tsd_get(zfs_allow_log_key);
8536 		if (s != NULL)
8537 			kmem_strfree(s);
8538 		(void) tsd_set(zfs_allow_log_key, kmem_strdup(saved_poolname));
8539 	}
8540 	if (saved_poolname != NULL)
8541 		kmem_free(saved_poolname, saved_poolname_len);
8542 
8543 	return (error);
8544 }
8545 
8546 int
zfs_kmod_init(void)8547 zfs_kmod_init(void)
8548 {
8549 	int error;
8550 
8551 	if ((error = zvol_init()) != 0)
8552 		return (error);
8553 
8554 	spa_init(SPA_MODE_READ | SPA_MODE_WRITE);
8555 	zfs_init();
8556 
8557 	zfs_ioctl_init();
8558 
8559 	/* Register zoned_uid property lookup callback with SPL */
8560 	zone_register_zoned_uid_callback(zfs_get_zoned_uid);
8561 
8562 	mutex_init(&zfsdev_state_lock, NULL, MUTEX_DEFAULT, NULL);
8563 	zfsdev_state_listhead.zs_minor = -1;
8564 
8565 	if ((error = zfsdev_attach()) != 0)
8566 		goto out;
8567 
8568 	tsd_create(&rrw_tsd_key, rrw_tsd_destroy);
8569 	tsd_create(&zfs_allow_log_key, zfs_allow_log_destroy);
8570 
8571 	return (0);
8572 out:
8573 	zfs_fini();
8574 	spa_fini();
8575 	zvol_fini();
8576 
8577 	return (error);
8578 }
8579 
8580 void
zfs_kmod_fini(void)8581 zfs_kmod_fini(void)
8582 {
8583 	zfsdev_state_t *zs, *zsnext = NULL;
8584 
8585 	zfsdev_detach();
8586 
8587 	mutex_destroy(&zfsdev_state_lock);
8588 
8589 	for (zs = &zfsdev_state_listhead; zs != NULL; zs = zsnext) {
8590 		zsnext = zs->zs_next;
8591 		if (zs->zs_onexit)
8592 			zfs_onexit_destroy(zs->zs_onexit);
8593 		if (zs->zs_zevent)
8594 			zfs_zevent_destroy(zs->zs_zevent);
8595 		if (zs != &zfsdev_state_listhead)
8596 			kmem_free(zs, sizeof (zfsdev_state_t));
8597 	}
8598 
8599 	zfs_ereport_taskq_fini();	/* run before zfs_fini() on Linux */
8600 
8601 	/* Unregister zoned_uid callback before ZFS layer is torn down */
8602 	zone_unregister_zoned_uid_callback();
8603 
8604 	zfs_fini();
8605 	spa_fini();
8606 	zvol_fini();
8607 
8608 	tsd_destroy(&rrw_tsd_key);
8609 	tsd_destroy(&zfs_allow_log_key);
8610 }
8611 
8612 ZFS_MODULE_PARAM(zfs, zfs_, max_nvlist_src_size, U64, ZMOD_RW,
8613 	"Maximum size in bytes allowed for src nvlist passed with ZFS ioctls");
8614 
8615 ZFS_MODULE_PARAM(zfs, zfs_, history_output_max, U64, ZMOD_RW,
8616 	"Maximum size in bytes of ZFS ioctl output that will be logged");
8617