xref: /illumos-gate/usr/src/uts/common/fs/zfs/zfs_fuid.c (revision ad14f175b2170ddd9e3a3d61181805b0be2fb914)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright (c) 2007, 2010, Oracle and/or its affiliates. All rights reserved.
23  * Copyright 2020 Tintri by DDN, Inc. All rights reserved.
24  * Copyright 2025 RackTop Systems, Inc.
25  */
26 
27 #include <sys/zfs_context.h>
28 #include <sys/dmu.h>
29 #include <sys/avl.h>
30 #include <sys/zap.h>
31 #include <sys/refcount.h>
32 #include <sys/nvpair.h>
33 #ifdef _KERNEL
34 #include <sys/kidmap.h>
35 #include <sys/sid.h>
36 #include <sys/zfs_vfsops.h>
37 #include <sys/zfs_znode.h>
38 #endif
39 #include <sys/zfs_fuid.h>
40 
41 /*
42  * FUID Domain table(s).
43  *
44  * The FUID table is stored as a packed nvlist of an array
45  * of nvlists which contain an index, domain string and offset
46  *
47  * During file system initialization the nvlist(s) are read and
48  * two AVL trees are created.  One tree is keyed by the index number
49  * and the other by the domain string.  Nodes are never removed from
50  * trees, but new entries may be added.  If a new entry is added then
51  * the zfsvfs->z_fuid_dirty flag is set to true and the caller will then
52  * be responsible for calling zfs_fuid_sync() to sync the changes to disk.
53  *
54  */
55 
56 #define	FUID_IDX	"fuid_idx"
57 #define	FUID_DOMAIN	"fuid_domain"
58 #define	FUID_OFFSET	"fuid_offset"
59 #define	FUID_NVP_ARRAY	"fuid_nvlist"
60 
61 typedef struct fuid_domain {
62 	avl_node_t	f_domnode;
63 	avl_node_t	f_idxnode;
64 	ksiddomain_t	*f_ksid;
65 	uint64_t	f_idx;
66 } fuid_domain_t;
67 
68 static char *nulldomain = "";
69 
70 /*
71  * Compare two indexes.
72  */
73 static int
idx_compare(const void * arg1,const void * arg2)74 idx_compare(const void *arg1, const void *arg2)
75 {
76 	const fuid_domain_t *node1 = (const fuid_domain_t *)arg1;
77 	const fuid_domain_t *node2 = (const fuid_domain_t *)arg2;
78 
79 	return (TREE_CMP(node1->f_idx, node2->f_idx));
80 }
81 
82 /*
83  * Compare two domain strings.
84  */
85 static int
domain_compare(const void * arg1,const void * arg2)86 domain_compare(const void *arg1, const void *arg2)
87 {
88 	const fuid_domain_t *node1 = (const fuid_domain_t *)arg1;
89 	const fuid_domain_t *node2 = (const fuid_domain_t *)arg2;
90 	int val;
91 
92 	val = strcmp(node1->f_ksid->kd_name, node2->f_ksid->kd_name);
93 
94 	return (TREE_ISIGN(val));
95 }
96 
97 void
zfs_fuid_avl_tree_create(avl_tree_t * idx_tree,avl_tree_t * domain_tree)98 zfs_fuid_avl_tree_create(avl_tree_t *idx_tree, avl_tree_t *domain_tree)
99 {
100 	avl_create(idx_tree, idx_compare,
101 	    sizeof (fuid_domain_t), offsetof(fuid_domain_t, f_idxnode));
102 	avl_create(domain_tree, domain_compare,
103 	    sizeof (fuid_domain_t), offsetof(fuid_domain_t, f_domnode));
104 }
105 
106 /*
107  * load initial fuid domain and idx trees.  This function is used by
108  * both the kernel and zdb.
109  */
110 uint64_t
zfs_fuid_table_load(objset_t * os,uint64_t fuid_obj,avl_tree_t * idx_tree,avl_tree_t * domain_tree)111 zfs_fuid_table_load(objset_t *os, uint64_t fuid_obj, avl_tree_t *idx_tree,
112     avl_tree_t *domain_tree)
113 {
114 	dmu_buf_t *db;
115 	uint64_t fuid_size;
116 
117 	ASSERT(fuid_obj != 0);
118 	VERIFY(0 == dmu_bonus_hold(os, fuid_obj,
119 	    FTAG, &db));
120 	fuid_size = *(uint64_t *)db->db_data;
121 	dmu_buf_rele(db, FTAG);
122 
123 	if (fuid_size)  {
124 		nvlist_t **fuidnvp;
125 		nvlist_t *nvp = NULL;
126 		uint_t count;
127 		char *packed;
128 		int i;
129 
130 		packed = kmem_alloc(fuid_size, KM_SLEEP);
131 		VERIFY(dmu_read(os, fuid_obj, 0,
132 		    fuid_size, packed, DMU_READ_PREFETCH) == 0);
133 		VERIFY(nvlist_unpack(packed, fuid_size,
134 		    &nvp, 0) == 0);
135 		VERIFY(nvlist_lookup_nvlist_array(nvp, FUID_NVP_ARRAY,
136 		    &fuidnvp, &count) == 0);
137 
138 		for (i = 0; i != count; i++) {
139 			fuid_domain_t *domnode;
140 			char *domain;
141 			uint64_t idx;
142 
143 			VERIFY(nvlist_lookup_string(fuidnvp[i], FUID_DOMAIN,
144 			    &domain) == 0);
145 			VERIFY(nvlist_lookup_uint64(fuidnvp[i], FUID_IDX,
146 			    &idx) == 0);
147 
148 			domnode = kmem_alloc(sizeof (fuid_domain_t), KM_SLEEP);
149 
150 			domnode->f_idx = idx;
151 			domnode->f_ksid = ksid_lookupdomain(domain);
152 			avl_add(idx_tree, domnode);
153 			avl_add(domain_tree, domnode);
154 		}
155 		nvlist_free(nvp);
156 		kmem_free(packed, fuid_size);
157 	}
158 	return (fuid_size);
159 }
160 
161 void
zfs_fuid_table_destroy(avl_tree_t * idx_tree,avl_tree_t * domain_tree)162 zfs_fuid_table_destroy(avl_tree_t *idx_tree, avl_tree_t *domain_tree)
163 {
164 	fuid_domain_t *domnode;
165 	void *cookie;
166 
167 	cookie = NULL;
168 	while (domnode = avl_destroy_nodes(domain_tree, &cookie))
169 		ksiddomain_rele(domnode->f_ksid);
170 
171 	avl_destroy(domain_tree);
172 	cookie = NULL;
173 	while (domnode = avl_destroy_nodes(idx_tree, &cookie))
174 		kmem_free(domnode, sizeof (fuid_domain_t));
175 	avl_destroy(idx_tree);
176 }
177 
178 char *
zfs_fuid_idx_domain(avl_tree_t * idx_tree,uint32_t idx)179 zfs_fuid_idx_domain(avl_tree_t *idx_tree, uint32_t idx)
180 {
181 	fuid_domain_t searchnode, *findnode;
182 	avl_index_t loc;
183 
184 	searchnode.f_idx = idx;
185 
186 	findnode = avl_find(idx_tree, &searchnode, &loc);
187 
188 	return (findnode ? findnode->f_ksid->kd_name : nulldomain);
189 }
190 
191 #ifdef _KERNEL
192 /*
193  * Load the fuid table(s) into memory.
194  */
195 static void
zfs_fuid_init(zfsvfs_t * zfsvfs)196 zfs_fuid_init(zfsvfs_t *zfsvfs)
197 {
198 	rw_enter(&zfsvfs->z_fuid_lock, RW_WRITER);
199 
200 	if (zfsvfs->z_fuid_loaded) {
201 		rw_exit(&zfsvfs->z_fuid_lock);
202 		return;
203 	}
204 
205 	zfs_fuid_avl_tree_create(&zfsvfs->z_fuid_idx, &zfsvfs->z_fuid_domain);
206 
207 	(void) zap_lookup(zfsvfs->z_os, MASTER_NODE_OBJ,
208 	    ZFS_FUID_TABLES, 8, 1, &zfsvfs->z_fuid_obj);
209 	if (zfsvfs->z_fuid_obj != 0) {
210 		zfsvfs->z_fuid_size = zfs_fuid_table_load(zfsvfs->z_os,
211 		    zfsvfs->z_fuid_obj, &zfsvfs->z_fuid_idx,
212 		    &zfsvfs->z_fuid_domain);
213 	}
214 
215 	zfsvfs->z_fuid_loaded = B_TRUE;
216 	rw_exit(&zfsvfs->z_fuid_lock);
217 }
218 
219 /*
220  * sync out AVL trees to persistent storage.
221  */
222 void
zfs_fuid_sync(zfsvfs_t * zfsvfs,dmu_tx_t * tx)223 zfs_fuid_sync(zfsvfs_t *zfsvfs, dmu_tx_t *tx)
224 {
225 	nvlist_t *nvp;
226 	nvlist_t **fuids;
227 	size_t nvsize = 0;
228 	char *packed;
229 	dmu_buf_t *db;
230 	fuid_domain_t *domnode;
231 	int numnodes;
232 	int i;
233 
234 	if (!zfsvfs->z_fuid_dirty) {
235 		return;
236 	}
237 
238 	rw_enter(&zfsvfs->z_fuid_lock, RW_WRITER);
239 
240 	/*
241 	 * First see if table needs to be created?
242 	 */
243 	if (zfsvfs->z_fuid_obj == 0) {
244 		zfsvfs->z_fuid_obj = dmu_object_alloc(zfsvfs->z_os,
245 		    DMU_OT_FUID, 1 << 14, DMU_OT_FUID_SIZE,
246 		    sizeof (uint64_t), tx);
247 		VERIFY(zap_add(zfsvfs->z_os, MASTER_NODE_OBJ,
248 		    ZFS_FUID_TABLES, sizeof (uint64_t), 1,
249 		    &zfsvfs->z_fuid_obj, tx) == 0);
250 	}
251 
252 	VERIFY(nvlist_alloc(&nvp, NV_UNIQUE_NAME, KM_SLEEP) == 0);
253 
254 	numnodes = avl_numnodes(&zfsvfs->z_fuid_idx);
255 	fuids = kmem_alloc(numnodes * sizeof (void *), KM_SLEEP);
256 	for (i = 0, domnode = avl_first(&zfsvfs->z_fuid_domain); domnode; i++,
257 	    domnode = AVL_NEXT(&zfsvfs->z_fuid_domain, domnode)) {
258 		VERIFY(nvlist_alloc(&fuids[i], NV_UNIQUE_NAME, KM_SLEEP) == 0);
259 		VERIFY(nvlist_add_uint64(fuids[i], FUID_IDX,
260 		    domnode->f_idx) == 0);
261 		VERIFY(nvlist_add_uint64(fuids[i], FUID_OFFSET, 0) == 0);
262 		VERIFY(nvlist_add_string(fuids[i], FUID_DOMAIN,
263 		    domnode->f_ksid->kd_name) == 0);
264 	}
265 	VERIFY(nvlist_add_nvlist_array(nvp, FUID_NVP_ARRAY,
266 	    fuids, numnodes) == 0);
267 	for (i = 0; i != numnodes; i++)
268 		nvlist_free(fuids[i]);
269 	kmem_free(fuids, numnodes * sizeof (void *));
270 	VERIFY(nvlist_size(nvp, &nvsize, NV_ENCODE_XDR) == 0);
271 	packed = kmem_alloc(nvsize, KM_SLEEP);
272 	VERIFY(nvlist_pack(nvp, &packed, &nvsize,
273 	    NV_ENCODE_XDR, KM_SLEEP) == 0);
274 	nvlist_free(nvp);
275 	zfsvfs->z_fuid_size = nvsize;
276 	dmu_write(zfsvfs->z_os, zfsvfs->z_fuid_obj, 0,
277 	    zfsvfs->z_fuid_size, packed, tx);
278 	kmem_free(packed, zfsvfs->z_fuid_size);
279 	VERIFY(0 == dmu_bonus_hold(zfsvfs->z_os, zfsvfs->z_fuid_obj,
280 	    FTAG, &db));
281 	dmu_buf_will_dirty(db, tx);
282 	*(uint64_t *)db->db_data = zfsvfs->z_fuid_size;
283 	dmu_buf_rele(db, FTAG);
284 
285 	zfsvfs->z_fuid_dirty = B_FALSE;
286 	rw_exit(&zfsvfs->z_fuid_lock);
287 }
288 
289 /*
290  * Query domain table for a given domain.
291  *
292  * If domain isn't found and addok is set, it is added to AVL trees and
293  * the zfsvfs->z_fuid_dirty flag will be set to TRUE.  It will then be
294  * necessary for the caller or another thread to detect the dirty table
295  * and sync out the changes.
296  */
297 int
zfs_fuid_find_by_domain(zfsvfs_t * zfsvfs,const char * domain,char ** retdomain,boolean_t addok)298 zfs_fuid_find_by_domain(zfsvfs_t *zfsvfs, const char *domain,
299     char **retdomain, boolean_t addok)
300 {
301 	fuid_domain_t searchnode, *findnode;
302 	avl_index_t loc;
303 	krw_t rw = RW_READER;
304 
305 	/*
306 	 * If the dummy "nobody" domain then return an index of 0
307 	 * to cause the created FUID to be a standard POSIX id
308 	 * for the user nobody.
309 	 */
310 	if (domain[0] == '\0') {
311 		if (retdomain)
312 			*retdomain = nulldomain;
313 		return (0);
314 	}
315 
316 	searchnode.f_ksid = ksid_lookupdomain(domain);
317 	if (retdomain)
318 		*retdomain = searchnode.f_ksid->kd_name;
319 	if (!zfsvfs->z_fuid_loaded)
320 		zfs_fuid_init(zfsvfs);
321 
322 retry:
323 	rw_enter(&zfsvfs->z_fuid_lock, rw);
324 	findnode = avl_find(&zfsvfs->z_fuid_domain, &searchnode, &loc);
325 
326 	if (findnode) {
327 		rw_exit(&zfsvfs->z_fuid_lock);
328 		ksiddomain_rele(searchnode.f_ksid);
329 		return (findnode->f_idx);
330 	} else if (addok) {
331 		fuid_domain_t *domnode;
332 		uint64_t retidx;
333 
334 		if (rw == RW_READER && !rw_tryupgrade(&zfsvfs->z_fuid_lock)) {
335 			rw_exit(&zfsvfs->z_fuid_lock);
336 			rw = RW_WRITER;
337 			goto retry;
338 		}
339 
340 		domnode = kmem_alloc(sizeof (fuid_domain_t), KM_SLEEP);
341 		domnode->f_ksid = searchnode.f_ksid;
342 
343 		retidx = domnode->f_idx = avl_numnodes(&zfsvfs->z_fuid_idx) + 1;
344 
345 		avl_add(&zfsvfs->z_fuid_domain, domnode);
346 		avl_add(&zfsvfs->z_fuid_idx, domnode);
347 		zfsvfs->z_fuid_dirty = B_TRUE;
348 		rw_exit(&zfsvfs->z_fuid_lock);
349 		return (retidx);
350 	} else {
351 		rw_exit(&zfsvfs->z_fuid_lock);
352 		return (-1);
353 	}
354 }
355 
356 /*
357  * Query domain table by index, returning domain string
358  *
359  * Returns a pointer from an avl node of the domain string.
360  *
361  */
362 const char *
zfs_fuid_find_by_idx(zfsvfs_t * zfsvfs,uint32_t idx)363 zfs_fuid_find_by_idx(zfsvfs_t *zfsvfs, uint32_t idx)
364 {
365 	char *domain;
366 
367 	if (idx == 0 || !zfsvfs->z_use_fuids)
368 		return (NULL);
369 
370 	if (!zfsvfs->z_fuid_loaded)
371 		zfs_fuid_init(zfsvfs);
372 
373 	rw_enter(&zfsvfs->z_fuid_lock, RW_READER);
374 
375 	if (zfsvfs->z_fuid_obj || zfsvfs->z_fuid_dirty)
376 		domain = zfs_fuid_idx_domain(&zfsvfs->z_fuid_idx, idx);
377 	else
378 		domain = nulldomain;
379 	rw_exit(&zfsvfs->z_fuid_lock);
380 
381 	ASSERT(domain);
382 	return (domain);
383 }
384 
385 void
zfs_fuid_map_ids(znode_t * zp,cred_t * cr,uid_t * uidp,uid_t * gidp)386 zfs_fuid_map_ids(znode_t *zp, cred_t *cr, uid_t *uidp, uid_t *gidp)
387 {
388 	*uidp = zfs_fuid_map_id(zp->z_zfsvfs, zp->z_uid, cr, ZFS_OWNER);
389 	*gidp = zfs_fuid_map_id(zp->z_zfsvfs, zp->z_gid, cr, ZFS_GROUP);
390 }
391 
392 uid_t
zfs_fuid_map_id(zfsvfs_t * zfsvfs,uint64_t fuid,cred_t * cr,zfs_fuid_type_t type)393 zfs_fuid_map_id(zfsvfs_t *zfsvfs, uint64_t fuid,
394     cred_t *cr, zfs_fuid_type_t type)
395 {
396 	uint32_t index = FUID_INDEX(fuid);
397 	const char *domain;
398 	uid_t id;
399 
400 	if (index == 0)
401 		return (fuid);
402 
403 	domain = zfs_fuid_find_by_idx(zfsvfs, index);
404 	ASSERT(domain != NULL);
405 
406 	if (type == ZFS_OWNER || type == ZFS_ACE_USER) {
407 		(void) kidmap_getuidbysid(crgetzone(cr), domain,
408 		    FUID_RID(fuid), &id);
409 	} else {
410 		(void) kidmap_getgidbysid(crgetzone(cr), domain,
411 		    FUID_RID(fuid), &id);
412 	}
413 	return (id);
414 }
415 
416 /*
417  * Add a FUID node to the list of fuid's being created for this
418  * ACL
419  *
420  * If ACL has multiple domains, then keep only one copy of each unique
421  * domain.
422  */
423 void
zfs_fuid_node_add(zfs_fuid_info_t ** fuidpp,const char * domain,uint32_t rid,uint64_t idx,uint64_t id,zfs_fuid_type_t type)424 zfs_fuid_node_add(zfs_fuid_info_t **fuidpp, const char *domain, uint32_t rid,
425     uint64_t idx, uint64_t id, zfs_fuid_type_t type)
426 {
427 	zfs_fuid_t *fuid;
428 	zfs_fuid_domain_t *fuid_domain;
429 	zfs_fuid_info_t *fuidp;
430 	uint64_t fuididx;
431 	boolean_t found = B_FALSE;
432 
433 	if (*fuidpp == NULL)
434 		*fuidpp = zfs_fuid_info_alloc();
435 
436 	fuidp = *fuidpp;
437 	/*
438 	 * First find fuid domain index in linked list
439 	 *
440 	 * If one isn't found then create an entry.
441 	 */
442 
443 	for (fuididx = 1, fuid_domain = list_head(&fuidp->z_domains);
444 	    fuid_domain; fuid_domain = list_next(&fuidp->z_domains,
445 	    fuid_domain), fuididx++) {
446 		if (idx == fuid_domain->z_domidx) {
447 			found = B_TRUE;
448 			break;
449 		}
450 	}
451 
452 	if (!found) {
453 		fuid_domain = kmem_alloc(sizeof (zfs_fuid_domain_t), KM_SLEEP);
454 		fuid_domain->z_domain = domain;
455 		fuid_domain->z_domidx = idx;
456 		list_insert_tail(&fuidp->z_domains, fuid_domain);
457 		fuidp->z_domain_str_sz += strlen(domain) + 1;
458 		fuidp->z_domain_cnt++;
459 	}
460 
461 	if (type == ZFS_ACE_USER || type == ZFS_ACE_GROUP) {
462 
463 		/*
464 		 * Now allocate fuid entry and add it on the end of the list
465 		 */
466 
467 		fuid = kmem_alloc(sizeof (zfs_fuid_t), KM_SLEEP);
468 		fuid->z_id = id;
469 		fuid->z_domidx = idx;
470 		fuid->z_logfuid = FUID_ENCODE(fuididx, rid);
471 
472 		list_insert_tail(&fuidp->z_fuids, fuid);
473 		fuidp->z_fuid_cnt++;
474 	} else {
475 		if (type == ZFS_OWNER)
476 			fuidp->z_fuid_owner = FUID_ENCODE(fuididx, rid);
477 		else
478 			fuidp->z_fuid_group = FUID_ENCODE(fuididx, rid);
479 	}
480 }
481 
482 /*
483  * Create a file system FUID, based on information in the users cred
484  *
485  * If cred contains KSID_OWNER then it should be used to determine
486  * the uid otherwise cred's uid will be used. By default cred's gid
487  * is used unless it's an ephemeral ID in which case KSID_GROUP will
488  * be used if it exists.
489  */
490 uint64_t
zfs_fuid_create_cred(zfsvfs_t * zfsvfs,zfs_fuid_type_t type,cred_t * cr,zfs_fuid_info_t ** fuidp)491 zfs_fuid_create_cred(zfsvfs_t *zfsvfs, zfs_fuid_type_t type,
492     cred_t *cr, zfs_fuid_info_t **fuidp)
493 {
494 	uint64_t	idx;
495 	ksid_t		*ksid;
496 	uint32_t	rid;
497 	char		*kdomain;
498 	const char	*domain;
499 	uid_t		id;
500 
501 	VERIFY(type == ZFS_OWNER || type == ZFS_GROUP);
502 
503 	ksid = crgetsid(cr, (type == ZFS_OWNER) ? KSID_OWNER : KSID_GROUP);
504 
505 	if (!zfsvfs->z_use_fuids || (ksid == NULL)) {
506 		id = (type == ZFS_OWNER) ? crgetuid(cr) : crgetgid(cr);
507 
508 		if (IS_EPHEMERAL(id))
509 			return ((type == ZFS_OWNER) ? UID_NOBODY : GID_NOBODY);
510 
511 		return ((uint64_t)id);
512 	}
513 
514 	/*
515 	 * ksid is present and FUID is supported
516 	 */
517 	id = (type == ZFS_OWNER) ? ksid_getid(ksid) : crgetgid(cr);
518 
519 	if (!IS_EPHEMERAL(id))
520 		return ((uint64_t)id);
521 
522 	if (type == ZFS_GROUP)
523 		id = ksid_getid(ksid);
524 
525 	rid = ksid_getrid(ksid);
526 	domain = ksid_getdomain(ksid);
527 
528 	idx = zfs_fuid_find_by_domain(zfsvfs, domain, &kdomain, B_TRUE);
529 
530 	zfs_fuid_node_add(fuidp, kdomain, rid, idx, id, type);
531 
532 	return (FUID_ENCODE(idx, rid));
533 }
534 
535 /*
536  * Create a file system FUID for an ACL ace
537  * or a chown/chgrp of the file.
538  * This is similar to zfs_fuid_create_cred, except that
539  * we can't find the domain + rid information in the
540  * cred.  Instead we have to query Winchester for the
541  * domain and rid.
542  *
543  * During replay operations the domain+rid information is
544  * found in the zfs_fuid_info_t that the replay code has
545  * attached to the zfsvfs of the file system.
546  */
547 uint64_t
zfs_fuid_create(zfsvfs_t * zfsvfs,uint64_t id,cred_t * cr,zfs_fuid_type_t type,zfs_fuid_info_t ** fuidpp)548 zfs_fuid_create(zfsvfs_t *zfsvfs, uint64_t id, cred_t *cr,
549     zfs_fuid_type_t type, zfs_fuid_info_t **fuidpp)
550 {
551 	const char *domain;
552 	char *kdomain;
553 	uint32_t fuid_idx = FUID_INDEX(id);
554 	uint32_t rid;
555 	idmap_stat status;
556 	uint64_t idx = 0;
557 	zfs_fuid_t *zfuid = NULL;
558 	zfs_fuid_info_t *fuidp = NULL;
559 
560 	/*
561 	 * If POSIX ID, or entry is already a FUID then
562 	 * just return the id
563 	 *
564 	 * We may also be handed an already FUID'ized id via
565 	 * chmod.
566 	 */
567 
568 	if (!zfsvfs->z_use_fuids || !IS_EPHEMERAL(id) || fuid_idx != 0)
569 		return (id);
570 
571 	if (zfsvfs->z_replay) {
572 		fuidp = zfsvfs->z_fuid_replay;
573 
574 		/*
575 		 * If we are passed an ephemeral id, but no
576 		 * fuid_info was logged then return NOBODY.
577 		 * This is most likely a result of idmap service
578 		 * not being available.
579 		 */
580 		if (fuidp == NULL)
581 			return (UID_NOBODY);
582 
583 		VERIFY3U(type, >=, ZFS_OWNER);
584 		VERIFY3U(type, <=, ZFS_ACE_GROUP);
585 
586 		switch (type) {
587 		case ZFS_ACE_USER:
588 		case ZFS_ACE_GROUP:
589 			zfuid = list_head(&fuidp->z_fuids);
590 			rid = FUID_RID(zfuid->z_logfuid);
591 			idx = FUID_INDEX(zfuid->z_logfuid);
592 			break;
593 		case ZFS_OWNER:
594 			rid = FUID_RID(fuidp->z_fuid_owner);
595 			idx = FUID_INDEX(fuidp->z_fuid_owner);
596 			break;
597 		case ZFS_GROUP:
598 			rid = FUID_RID(fuidp->z_fuid_group);
599 			idx = FUID_INDEX(fuidp->z_fuid_group);
600 			break;
601 		};
602 		domain = fuidp->z_domain_table[idx - 1];
603 	} else {
604 		if (type == ZFS_OWNER || type == ZFS_ACE_USER)
605 			status = kidmap_getsidbyuid(crgetzone(cr), id,
606 			    &domain, &rid);
607 		else
608 			status = kidmap_getsidbygid(crgetzone(cr), id,
609 			    &domain, &rid);
610 
611 		if (status != 0) {
612 			/*
613 			 * When returning nobody we will need to
614 			 * make a dummy fuid table entry for logging
615 			 * purposes.
616 			 */
617 			rid = UID_NOBODY;
618 			domain = nulldomain;
619 		}
620 	}
621 
622 	idx = zfs_fuid_find_by_domain(zfsvfs, domain, &kdomain, B_TRUE);
623 
624 	if (!zfsvfs->z_replay)
625 		zfs_fuid_node_add(fuidpp, kdomain,
626 		    rid, idx, id, type);
627 	else if (zfuid != NULL) {
628 		list_remove(&fuidp->z_fuids, zfuid);
629 		kmem_free(zfuid, sizeof (zfs_fuid_t));
630 	}
631 	return (FUID_ENCODE(idx, rid));
632 }
633 
634 void
zfs_fuid_destroy(zfsvfs_t * zfsvfs)635 zfs_fuid_destroy(zfsvfs_t *zfsvfs)
636 {
637 	rw_enter(&zfsvfs->z_fuid_lock, RW_WRITER);
638 	if (!zfsvfs->z_fuid_loaded) {
639 		rw_exit(&zfsvfs->z_fuid_lock);
640 		return;
641 	}
642 	zfs_fuid_table_destroy(&zfsvfs->z_fuid_idx, &zfsvfs->z_fuid_domain);
643 	rw_exit(&zfsvfs->z_fuid_lock);
644 }
645 
646 /*
647  * Allocate zfs_fuid_info for tracking FUIDs created during
648  * zfs_mknode, VOP_SETATTR() or VOP_SETSECATTR()
649  */
650 zfs_fuid_info_t *
zfs_fuid_info_alloc(void)651 zfs_fuid_info_alloc(void)
652 {
653 	zfs_fuid_info_t *fuidp;
654 
655 	fuidp = kmem_zalloc(sizeof (zfs_fuid_info_t), KM_SLEEP);
656 	list_create(&fuidp->z_domains, sizeof (zfs_fuid_domain_t),
657 	    offsetof(zfs_fuid_domain_t, z_next));
658 	list_create(&fuidp->z_fuids, sizeof (zfs_fuid_t),
659 	    offsetof(zfs_fuid_t, z_next));
660 	return (fuidp);
661 }
662 
663 /*
664  * Release all memory associated with zfs_fuid_info_t
665  */
666 void
zfs_fuid_info_free(zfs_fuid_info_t * fuidp)667 zfs_fuid_info_free(zfs_fuid_info_t *fuidp)
668 {
669 	zfs_fuid_t *zfuid;
670 	zfs_fuid_domain_t *zdomain;
671 
672 	while ((zfuid = list_head(&fuidp->z_fuids)) != NULL) {
673 		list_remove(&fuidp->z_fuids, zfuid);
674 		kmem_free(zfuid, sizeof (zfs_fuid_t));
675 	}
676 
677 	if (fuidp->z_domain_table != NULL)
678 		kmem_free(fuidp->z_domain_table,
679 		    (sizeof (char **)) * fuidp->z_domain_cnt);
680 
681 	while ((zdomain = list_head(&fuidp->z_domains)) != NULL) {
682 		list_remove(&fuidp->z_domains, zdomain);
683 		kmem_free(zdomain, sizeof (zfs_fuid_domain_t));
684 	}
685 
686 	kmem_free(fuidp, sizeof (zfs_fuid_info_t));
687 }
688 
689 /*
690  * Alternative to: getattr(...); va.va_uid == getcruid(cr)
691  * Avoids calling idmap when the cred has a ksid.
692  */
693 boolean_t
zfs_fuid_is_cruser(zfsvfs_t * zfsvfs,uint64_t fuid,cred_t * cr)694 zfs_fuid_is_cruser(zfsvfs_t *zfsvfs, uint64_t fuid, cred_t *cr)
695 {
696 	ksid_t		*ksid;
697 	const char	*domain;
698 	uint32_t	idx = FUID_INDEX(fuid);
699 	uint32_t	rid = FUID_RID(fuid);
700 	uid_t		uid = (uid_t)-1;
701 
702 	if (idx == 0) {
703 		/* The fuid is a plain uid.  Easy. */
704 		return (rid == crgetuid(cr));
705 	}
706 
707 	/* The fuid has a domain part. */
708 	domain = zfs_fuid_find_by_idx(zfsvfs, idx);
709 	ASSERT(domain != NULL);
710 
711 	/*
712 	 * If we have a ksid, we can avoid an idmap up-call.
713 	 */
714 	ksid = crgetsid(cr, KSID_USER);
715 	if (ksid != NULL) {
716 		const char *ksdom = ksid_getdomain(ksid);
717 		ASSERT(ksdom != NULL);
718 		return (rid == ksid->ks_rid &&
719 		    strcmp(domain, ksdom) == 0);
720 	}
721 
722 	/*
723 	 * No ksid, so we have to idmap.
724 	 * The checks for -1 and 0x80000000 appear to be paranoia.
725 	 * Those should never be set in cr_uid.
726 	 */
727 	(void) kidmap_getuidbysid(crgetzone(cr), domain, rid, &uid);
728 	if (uid == (uid_t)-1 || uid != IDMAP_WK_CREATOR_OWNER_UID)
729 		return (B_FALSE);
730 	return (uid == crgetuid(cr));
731 }
732 
733 /*
734  * Check to see if user ID is in the list of SIDs in CR.
735  */
736 boolean_t
zfs_user_in_cred(zfsvfs_t * zfsvfs,uint64_t id,cred_t * cr)737 zfs_user_in_cred(zfsvfs_t *zfsvfs, uint64_t id, cred_t *cr)
738 {
739 	ksid_t		*ksid = crgetsid(cr, KSID_USER);
740 	ksidlist_t	*ksidlist = crgetsidlist(cr);
741 
742 	/* Check for match with cred->cr_uid */
743 	if (zfs_fuid_is_cruser(zfsvfs, id, cr))
744 		return (B_TRUE);
745 
746 	/* Check for any match in the ksidlist */
747 	if (ksid && ksidlist) {
748 		uint32_t	idx = FUID_INDEX(id);
749 		uint32_t	rid = FUID_RID(id);
750 		const char	*domain;
751 
752 		if (idx == 0) {
753 			/*
754 			 * The ID passed in has idx zero, which means
755 			 * it's just a Unix UID.  That can never match
756 			 * anything in ksid_vec[] because those all
757 			 * have ksid->ks_id set to a Group ID.
758 			 */
759 			return (B_FALSE);
760 		}
761 
762 		domain = zfs_fuid_find_by_idx(zfsvfs, idx);
763 		ASSERT(domain != NULL);
764 
765 		if (strcmp(domain, IDMAP_WK_CREATOR_SID_AUTHORITY) == 0)
766 			return (B_FALSE);
767 
768 		if (ksidlist_has_sid(ksidlist, domain, rid))
769 			return (B_TRUE);
770 	}
771 	return (B_FALSE);
772 }
773 
774 /*
775  * Check to see if id is a groupmember.  If cred
776  * has ksid info then sidlist is checked first
777  * and if still not found then POSIX groups are checked
778  *
779  * Will use a straight FUID compare when possible.
780  */
781 boolean_t
zfs_groupmember(zfsvfs_t * zfsvfs,uint64_t id,cred_t * cr)782 zfs_groupmember(zfsvfs_t *zfsvfs, uint64_t id, cred_t *cr)
783 {
784 	ksid_t		*ksid = crgetsid(cr, KSID_GROUP);
785 	ksidlist_t	*ksidlist = crgetsidlist(cr);
786 	uid_t		gid;
787 	uint32_t	idx = FUID_INDEX(id);
788 	uint32_t	rid = FUID_RID(id);
789 
790 	if (ksid != NULL && id != IDMAP_WK_CREATOR_GROUP_GID) {
791 		const char	*domain = NULL;
792 		int ngroups;
793 
794 		if (idx != 0) {
795 			domain = zfs_fuid_find_by_idx(zfsvfs, idx);
796 			ASSERT(domain != NULL);
797 
798 			if (strcmp(domain,
799 			    IDMAP_WK_CREATOR_SID_AUTHORITY) == 0)
800 				return (B_FALSE);
801 
802 			if (strcmp(ksid_getdomain(ksid), domain) == 0 &&
803 			    rid == ksid_getrid(ksid))
804 				return (B_TRUE);
805 
806 			if (ksidlist != NULL &&
807 			    ksidlist_has_sid(ksidlist, domain, rid))
808 				return (B_TRUE);
809 		} else {
810 			if (ksid_getid(ksid) == rid)
811 				return (B_TRUE);
812 
813 			if (ksidlist != NULL &&
814 			    ksidlist_has_pid(ksidlist, rid))
815 				return (B_TRUE);
816 		}
817 
818 		/* If there are no useful subgroups, skip the idmap lookup */
819 		gid = crgetgid(cr);
820 		ngroups = crgetngroups(cr);
821 		if (ksid_getid(ksid) == gid &&
822 		    (ngroups == 0 ||
823 		    (ngroups == 1 && crgetgroups(cr)[0] == gid)))
824 			return (B_FALSE);
825 	}
826 
827 	/*
828 	 * Not found in ksidlist, check posix groups
829 	 */
830 	gid = zfs_fuid_map_id(zfsvfs, id, cr, ZFS_GROUP);
831 	return (groupmember(gid, cr));
832 }
833 
834 void
zfs_fuid_txhold(zfsvfs_t * zfsvfs,dmu_tx_t * tx)835 zfs_fuid_txhold(zfsvfs_t *zfsvfs, dmu_tx_t *tx)
836 {
837 	if (zfsvfs->z_fuid_obj == 0) {
838 		dmu_tx_hold_bonus(tx, DMU_NEW_OBJECT);
839 		dmu_tx_hold_write(tx, DMU_NEW_OBJECT, 0,
840 		    FUID_SIZE_ESTIMATE(zfsvfs));
841 		dmu_tx_hold_zap(tx, MASTER_NODE_OBJ, FALSE, NULL);
842 	} else {
843 		dmu_tx_hold_bonus(tx, zfsvfs->z_fuid_obj);
844 		dmu_tx_hold_write(tx, zfsvfs->z_fuid_obj, 0,
845 		    FUID_SIZE_ESTIMATE(zfsvfs));
846 	}
847 }
848 #endif
849