xref: /titanic_50/usr/src/cmd/svc/configd/rc_node.c (revision 159cf8a6ecac7ecbb601c9653abfd0fa878075d8)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * rc_node.c - object management primitives
31  *
32  * This layer manages entities, their data structure, its locking, iterators,
33  * transactions, and change notification requests.  Entities (scopes,
34  * services, instances, snapshots, snaplevels, property groups, "composed"
35  * property groups (see composition below), and properties) are represented by
36  * rc_node_t's and are kept in the cache_hash hash table.  (Property values
37  * are kept in the rn_values member of the respective property -- not as
38  * separate objects.)  Iterators are represented by rc_node_iter_t's.
39  * Transactions are represented by rc_node_tx_t's and are only allocated as
40  * part of repcache_tx_t's in the client layer (client.c).  Change
41  * notification requests are represented by rc_notify_t structures and are
42  * described below.
43  *
44  * The entity tree is rooted at rc_scope, which rc_node_init() initializes to
45  * the "localhost" scope.  The tree is filled in from the database on-demand
46  * by rc_node_fill_children(), usually from rc_iter_create() since iterators
47  * are the only way to find the children of an entity.
48  *
49  * Each rc_node_t is protected by its rn_lock member.  Operations which can
50  * take too long, however, should serialize on an RC_NODE_WAITING_FLAGS bit in
51  * rn_flags with the rc_node_{hold,rele}_flag() functions.  And since pointers
52  * to rc_node_t's are allowed, rn_refs is a reference count maintained by
53  * rc_node_{hold,rele}().  See configd.h for locking order information.
54  *
55  * When a node (property group or snapshot) is updated, a new node takes the
56  * place of the old node in the global hash, and the old node is hung off of
57  * the rn_former list of the new node.  At the same time, all of its children
58  * have their rn_parent_ref pointer set, and any holds they have are reflected
59  * in the old node's rn_other_refs count.  This is automatically kept up
60  * to date, until the final reference to the subgraph is dropped, at which
61  * point the node is unrefed and destroyed, along with all of its children.
62  *
63  * Locking rules: To dereference an rc_node_t * (usually to lock it), you must
64  * have a hold (rc_node_hold()) on it or otherwise be sure that it hasn't been
65  * rc_node_destroy()ed (hold a lock on its parent or child, hold a flag,
66  * etc.).  Once you have locked an rc_node_t you must check its rn_flags for
67  * RC_NODE_DEAD before you can use it.  This is usually done with the
68  * rc_node_{wait,hold}_flag() functions (often via the rc_node_check_*()
69  * functions & RC_NODE_*() macros), which fail if the object has died.
70  *
71  * An ITER_START for a non-ENTITY_VALUE induces an rc_node_fill_children()
72  * call via rc_node_setup_iter() to populate the rn_children uu_list of the
73  * rc_node_t * in question and a call to uu_list_walk_start() on that list.  For
74  * ITER_READ, rc_iter_next() uses uu_list_walk_next() to find the next
75  * apropriate child.
76  *
77  * An ITER_START for an ENTITY_VALUE makes sure the node has its values
78  * filled, and sets up the iterator.  An ITER_READ_VALUE just copies out
79  * the proper values and updates the offset information.
80  *
81  * When a property group gets changed by a transaction, it sticks around as
82  * a child of its replacement property group, but is removed from the parent.
83  *
84  * To allow aliases, snapshots are implemented with a level of indirection.
85  * A snapshot rc_node_t has a snapid which refers to an rc_snapshot_t in
86  * snapshot.c which contains the authoritative snaplevel information.  The
87  * snapid is "assigned" by rc_attach_snapshot().
88  *
89  * We provide the client layer with rc_node_ptr_t's to reference objects.
90  * Objects referred to by them are automatically held & released by
91  * rc_node_assign() & rc_node_clear().  The RC_NODE_PTR_*() macros are used at
92  * client.c entry points to read the pointers.  They fetch the pointer to the
93  * object, return (from the function) if it is dead, and lock, hold, or hold
94  * a flag of the object.
95  */
96 
97 /*
98  * Permission checking is authorization-based: some operations may only
99  * proceed if the user has been assigned at least one of a set of
100  * authorization strings.  The set of enabling authorizations depends on the
101  * operation and the target object.  The set of authorizations assigned to
102  * a user is determined by reading /etc/security/policy.conf, querying the
103  * user_attr database, and possibly querying the prof_attr database, as per
104  * chkauthattr() in libsecdb.
105  *
106  * The fastest way to decide whether the two sets intersect is by entering the
107  * strings into a hash table and detecting collisions, which takes linear time
108  * in the total size of the sets.  Except for the authorization patterns which
109  * may be assigned to users, which without advanced pattern-matching
110  * algorithms will take O(n) in the number of enabling authorizations, per
111  * pattern.
112  *
113  * We can achieve some practical speed-ups by noting that if we enter all of
114  * the authorizations from one of the sets into the hash table we can merely
115  * check the elements of the second set for existence without adding them.
116  * This reduces memory requirements and hash table clutter.  The enabling set
117  * is well suited for this because it is internal to configd (for now, at
118  * least).  Combine this with short-circuiting and we can even minimize the
119  * number of queries to the security databases (user_attr & prof_attr).
120  *
121  * To force this usage onto clients we provide functions for adding
122  * authorizations to the enabling set of a permission context structure
123  * (perm_add_*()) and one to decide whether the the user associated with the
124  * current door call client possesses any of them (perm_granted()).
125  *
126  * At some point, a generic version of this should move to libsecdb.
127  */
128 
129 /*
130  * Composition is the combination of sets of properties.  The sets are ordered
131  * and properties in higher sets obscure properties of the same name in lower
132  * sets.  Here we present a composed view of an instance's properties as the
133  * union of its properties and its service's properties.  Similarly the
134  * properties of snaplevels are combined to form a composed view of the
135  * properties of a snapshot (which should match the composed view of the
136  * properties of the instance when the snapshot was taken).
137  *
138  * In terms of the client interface, the client may request that a property
139  * group iterator for an instance or snapshot be composed.  Property groups
140  * traversed by such an iterator may not have the target entity as a parent.
141  * Similarly, the properties traversed by a property iterator for those
142  * property groups may not have the property groups iterated as parents.
143  *
144  * Implementation requires that iterators for instances and snapshots be
145  * composition-savvy, and that we have a "composed property group" entity
146  * which represents the composition of a number of property groups.  Iteration
147  * over "composed property groups" yields properties which may have different
148  * parents, but for all other operations a composed property group behaves
149  * like the top-most property group it represents.
150  *
151  * The implementation is based on the rn_cchain[] array of rc_node_t pointers
152  * in rc_node_t.  For instances, the pointers point to the instance and its
153  * parent service.  For snapshots they point to the child snaplevels, and for
154  * composed property groups they point to property groups.  A composed
155  * iterator carries an index into rn_cchain[].  Thus most of the magic ends up
156  * int the rc_iter_*() code.
157  */
158 
159 #include <assert.h>
160 #include <atomic.h>
161 #include <errno.h>
162 #include <libuutil.h>
163 #include <libscf.h>
164 #include <libscf_priv.h>
165 #include <prof_attr.h>
166 #include <pthread.h>
167 #include <stdio.h>
168 #include <stdlib.h>
169 #include <strings.h>
170 #include <sys/types.h>
171 #include <unistd.h>
172 #include <user_attr.h>
173 
174 #include "configd.h"
175 
176 #define	AUTH_PREFIX		"solaris.smf."
177 #define	AUTH_MANAGE		AUTH_PREFIX "manage"
178 #define	AUTH_MODIFY		AUTH_PREFIX "modify"
179 #define	AUTH_MODIFY_PREFIX	AUTH_MODIFY "."
180 #define	AUTH_PG_ACTIONS		SCF_PG_RESTARTER_ACTIONS
181 #define	AUTH_PG_ACTIONS_TYPE	SCF_PG_RESTARTER_ACTIONS_TYPE
182 #define	AUTH_PG_GENERAL		SCF_PG_GENERAL
183 #define	AUTH_PG_GENERAL_TYPE	SCF_PG_GENERAL_TYPE
184 #define	AUTH_PG_GENERAL_OVR	SCF_PG_GENERAL_OVR
185 #define	AUTH_PG_GENERAL_OVR_TYPE  SCF_PG_GENERAL_OVR_TYPE
186 #define	AUTH_PROP_ACTION	"action_authorization"
187 #define	AUTH_PROP_ENABLED	"enabled"
188 #define	AUTH_PROP_MODIFY	"modify_authorization"
189 #define	AUTH_PROP_VALUE		"value_authorization"
190 /* libsecdb should take care of this. */
191 #define	RBAC_AUTH_SEP		","
192 
193 #define	MAX_VALID_CHILDREN 3
194 
195 typedef struct rc_type_info {
196 	uint32_t	rt_type;		/* matches array index */
197 	uint32_t	rt_num_ids;
198 	uint32_t	rt_name_flags;
199 	uint32_t	rt_valid_children[MAX_VALID_CHILDREN];
200 } rc_type_info_t;
201 
202 #define	RT_NO_NAME	-1U
203 
204 static rc_type_info_t rc_types[] = {
205 	{REP_PROTOCOL_ENTITY_NONE, 0, RT_NO_NAME},
206 	{REP_PROTOCOL_ENTITY_SCOPE, 0, 0,
207 	    {REP_PROTOCOL_ENTITY_SERVICE, REP_PROTOCOL_ENTITY_SCOPE}},
208 	{REP_PROTOCOL_ENTITY_SERVICE, 0, UU_NAME_DOMAIN | UU_NAME_PATH,
209 	    {REP_PROTOCOL_ENTITY_INSTANCE, REP_PROTOCOL_ENTITY_PROPERTYGRP}},
210 	{REP_PROTOCOL_ENTITY_INSTANCE, 1, UU_NAME_DOMAIN,
211 	    {REP_PROTOCOL_ENTITY_SNAPSHOT, REP_PROTOCOL_ENTITY_PROPERTYGRP}},
212 	{REP_PROTOCOL_ENTITY_SNAPSHOT, 2, UU_NAME_DOMAIN,
213 	    {REP_PROTOCOL_ENTITY_SNAPLEVEL, REP_PROTOCOL_ENTITY_PROPERTYGRP}},
214 	{REP_PROTOCOL_ENTITY_SNAPLEVEL, 4, RT_NO_NAME,
215 	    {REP_PROTOCOL_ENTITY_PROPERTYGRP}},
216 	{REP_PROTOCOL_ENTITY_PROPERTYGRP, 5, UU_NAME_DOMAIN,
217 	    {REP_PROTOCOL_ENTITY_PROPERTY}},
218 	{REP_PROTOCOL_ENTITY_CPROPERTYGRP, 0, UU_NAME_DOMAIN,
219 	    {REP_PROTOCOL_ENTITY_PROPERTY}},
220 	{REP_PROTOCOL_ENTITY_PROPERTY, 7, UU_NAME_DOMAIN},
221 	{-1UL}
222 };
223 #define	NUM_TYPES	((sizeof (rc_types) / sizeof (*rc_types)))
224 
225 /* Element of a permcheck_t hash table. */
226 struct pc_elt {
227 	struct pc_elt	*pce_next;
228 	char		pce_auth[1];
229 };
230 
231 /* An authorization set hash table. */
232 typedef struct {
233 	struct pc_elt	**pc_buckets;
234 	uint_t		pc_bnum;		/* number of buckets */
235 	uint_t		pc_enum;		/* number of elements */
236 } permcheck_t;
237 
238 static uu_list_pool_t *rc_children_pool;
239 static uu_list_pool_t *rc_pg_notify_pool;
240 static uu_list_pool_t *rc_notify_pool;
241 static uu_list_pool_t *rc_notify_info_pool;
242 
243 static rc_node_t *rc_scope;
244 
245 static pthread_mutex_t	rc_pg_notify_lock = PTHREAD_MUTEX_INITIALIZER;
246 static pthread_cond_t	rc_pg_notify_cv = PTHREAD_COND_INITIALIZER;
247 static uint_t		rc_notify_in_use;	/* blocks removals */
248 
249 static pthread_mutex_t	perm_lock = PTHREAD_MUTEX_INITIALIZER;
250 
251 static void rc_node_unrefed(rc_node_t *np);
252 
253 /*
254  * We support an arbitrary number of clients interested in events for certain
255  * types of changes.  Each client is represented by an rc_notify_info_t, and
256  * all clients are chained onto the rc_notify_info_list.
257  *
258  * The rc_notify_list is the global notification list.  Each entry is of
259  * type rc_notify_t, which is embedded in one of three other structures:
260  *
261  *	rc_node_t		property group update notification
262  *	rc_notify_delete_t	object deletion notification
263  *	rc_notify_info_t	notification clients
264  *
265  * Which type of object is determined by which pointer in the rc_notify_t is
266  * non-NULL.
267  *
268  * New notifications and clients are added to the end of the list.
269  * Notifications no-one is interested in are never added to the list.
270  *
271  * Clients use their position in the list to track which notifications they
272  * have not yet reported.  As they process notifications, they move forward
273  * in the list past them.  There is always a client at the beginning of the
274  * list -- as he moves past notifications, he removes them from the list and
275  * cleans them up.
276  *
277  * The rc_pg_notify_lock protects all notification state.  The rc_pg_notify_cv
278  * is used for global signalling, and each client has a cv which he waits for
279  * events of interest on.
280  */
281 static uu_list_t	*rc_notify_info_list;
282 static uu_list_t	*rc_notify_list;
283 
284 #define	HASH_SIZE	512
285 #define	HASH_MASK	(HASH_SIZE - 1)
286 
287 #pragma align 64(cache_hash)
288 static cache_bucket_t cache_hash[HASH_SIZE];
289 
290 #define	CACHE_BUCKET(h)		(&cache_hash[(h) & HASH_MASK])
291 
292 static uint32_t
293 rc_node_hash(rc_node_lookup_t *lp)
294 {
295 	uint32_t type = lp->rl_type;
296 	uint32_t backend = lp->rl_backend;
297 	uint32_t main = lp->rl_main_id;
298 	uint32_t *ids = lp->rl_ids;
299 
300 	rc_type_info_t *tp = &rc_types[type];
301 	uint32_t num_ids;
302 	uint32_t left;
303 	uint32_t hash;
304 
305 	assert(backend == BACKEND_TYPE_NORMAL ||
306 	    backend == BACKEND_TYPE_NONPERSIST);
307 
308 	assert(type > 0 && type < NUM_TYPES);
309 	num_ids = tp->rt_num_ids;
310 
311 	left = MAX_IDS - num_ids;
312 	assert(num_ids <= MAX_IDS);
313 
314 	hash = type * 7 + main * 5 + backend;
315 
316 	while (num_ids-- > 0)
317 		hash = hash * 11 + *ids++ * 7;
318 
319 	/*
320 	 * the rest should be zeroed
321 	 */
322 	while (left-- > 0)
323 		assert(*ids++ == 0);
324 
325 	return (hash);
326 }
327 
328 static int
329 rc_node_match(rc_node_t *np, rc_node_lookup_t *l)
330 {
331 	rc_node_lookup_t *r = &np->rn_id;
332 	rc_type_info_t *tp;
333 	uint32_t type;
334 	uint32_t num_ids;
335 
336 	if (r->rl_main_id != l->rl_main_id)
337 		return (0);
338 
339 	type = r->rl_type;
340 	if (type != l->rl_type)
341 		return (0);
342 
343 	assert(type > 0 && type < NUM_TYPES);
344 
345 	tp = &rc_types[r->rl_type];
346 	num_ids = tp->rt_num_ids;
347 
348 	assert(num_ids <= MAX_IDS);
349 	while (num_ids-- > 0)
350 		if (r->rl_ids[num_ids] != l->rl_ids[num_ids])
351 			return (0);
352 
353 	return (1);
354 }
355 
356 /*
357  * the "other" references on a node are maintained in an atomically
358  * updated refcount, rn_other_refs.  This can be bumped from arbitrary
359  * context, and tracks references to a possibly out-of-date node's children.
360  *
361  * To prevent the node from disappearing between the final drop of
362  * rn_other_refs and the unref handling, rn_other_refs_held is bumped on
363  * 0->1 transitions and decremented (with the node lock held) on 1->0
364  * transitions.
365  */
366 static void
367 rc_node_hold_other(rc_node_t *np)
368 {
369 	if (atomic_add_32_nv(&np->rn_other_refs, 1) == 1) {
370 		atomic_add_32(&np->rn_other_refs_held, 1);
371 		assert(np->rn_other_refs_held > 0);
372 	}
373 	assert(np->rn_other_refs > 0);
374 }
375 
376 /*
377  * No node locks may be held
378  */
379 static void
380 rc_node_rele_other(rc_node_t *np)
381 {
382 	assert(np->rn_other_refs > 0);
383 	if (atomic_add_32_nv(&np->rn_other_refs, -1) == 0) {
384 		(void) pthread_mutex_lock(&np->rn_lock);
385 		assert(np->rn_other_refs_held > 0);
386 		if (atomic_add_32_nv(&np->rn_other_refs_held, -1) == 0 &&
387 		    np->rn_refs == 0 && (np->rn_flags & RC_NODE_OLD))
388 			rc_node_unrefed(np);
389 		else
390 			(void) pthread_mutex_unlock(&np->rn_lock);
391 	}
392 }
393 
394 static void
395 rc_node_hold_locked(rc_node_t *np)
396 {
397 	assert(MUTEX_HELD(&np->rn_lock));
398 
399 	if (np->rn_refs == 0 && (np->rn_flags & RC_NODE_PARENT_REF))
400 		rc_node_hold_other(np->rn_parent_ref);
401 	np->rn_refs++;
402 	assert(np->rn_refs > 0);
403 }
404 
405 static void
406 rc_node_hold(rc_node_t *np)
407 {
408 	(void) pthread_mutex_lock(&np->rn_lock);
409 	rc_node_hold_locked(np);
410 	(void) pthread_mutex_unlock(&np->rn_lock);
411 }
412 
413 static void
414 rc_node_rele_locked(rc_node_t *np)
415 {
416 	int unref = 0;
417 	rc_node_t *par_ref = NULL;
418 
419 	assert(MUTEX_HELD(&np->rn_lock));
420 	assert(np->rn_refs > 0);
421 
422 	if (--np->rn_refs == 0) {
423 		if (np->rn_flags & RC_NODE_PARENT_REF)
424 			par_ref = np->rn_parent_ref;
425 
426 		/*
427 		 * Composed property groups are only as good as their
428 		 * references.
429 		 */
430 		if (np->rn_id.rl_type == REP_PROTOCOL_ENTITY_CPROPERTYGRP)
431 			np->rn_flags |= RC_NODE_DEAD;
432 
433 		if ((np->rn_flags & (RC_NODE_DEAD|RC_NODE_OLD)) &&
434 		    np->rn_other_refs == 0 && np->rn_other_refs_held == 0)
435 			unref = 1;
436 	}
437 
438 	if (unref)
439 		rc_node_unrefed(np);
440 	else
441 		(void) pthread_mutex_unlock(&np->rn_lock);
442 
443 	if (par_ref != NULL)
444 		rc_node_rele_other(par_ref);
445 }
446 
447 void
448 rc_node_rele(rc_node_t *np)
449 {
450 	(void) pthread_mutex_lock(&np->rn_lock);
451 	rc_node_rele_locked(np);
452 }
453 
454 static cache_bucket_t *
455 cache_hold(uint32_t h)
456 {
457 	cache_bucket_t *bp = CACHE_BUCKET(h);
458 	(void) pthread_mutex_lock(&bp->cb_lock);
459 	return (bp);
460 }
461 
462 static void
463 cache_release(cache_bucket_t *bp)
464 {
465 	(void) pthread_mutex_unlock(&bp->cb_lock);
466 }
467 
468 static rc_node_t *
469 cache_lookup_unlocked(cache_bucket_t *bp, rc_node_lookup_t *lp)
470 {
471 	uint32_t h = rc_node_hash(lp);
472 	rc_node_t *np;
473 
474 	assert(MUTEX_HELD(&bp->cb_lock));
475 	assert(bp == CACHE_BUCKET(h));
476 
477 	for (np = bp->cb_head; np != NULL; np = np->rn_hash_next) {
478 		if (np->rn_hash == h && rc_node_match(np, lp)) {
479 			rc_node_hold(np);
480 			return (np);
481 		}
482 	}
483 
484 	return (NULL);
485 }
486 
487 static rc_node_t *
488 cache_lookup(rc_node_lookup_t *lp)
489 {
490 	uint32_t h;
491 	cache_bucket_t *bp;
492 	rc_node_t *np;
493 
494 	h = rc_node_hash(lp);
495 	bp = cache_hold(h);
496 
497 	np = cache_lookup_unlocked(bp, lp);
498 
499 	cache_release(bp);
500 
501 	return (np);
502 }
503 
504 static void
505 cache_insert_unlocked(cache_bucket_t *bp, rc_node_t *np)
506 {
507 	assert(MUTEX_HELD(&bp->cb_lock));
508 	assert(np->rn_hash == rc_node_hash(&np->rn_id));
509 	assert(bp == CACHE_BUCKET(np->rn_hash));
510 
511 	assert(np->rn_hash_next == NULL);
512 
513 	np->rn_hash_next = bp->cb_head;
514 	bp->cb_head = np;
515 }
516 
517 static void
518 cache_remove_unlocked(cache_bucket_t *bp, rc_node_t *np)
519 {
520 	rc_node_t **npp;
521 
522 	assert(MUTEX_HELD(&bp->cb_lock));
523 	assert(np->rn_hash == rc_node_hash(&np->rn_id));
524 	assert(bp == CACHE_BUCKET(np->rn_hash));
525 
526 	for (npp = &bp->cb_head; *npp != NULL; npp = &(*npp)->rn_hash_next)
527 		if (*npp == np)
528 			break;
529 
530 	assert(*npp == np);
531 	*npp = np->rn_hash_next;
532 	np->rn_hash_next = NULL;
533 }
534 
535 /*
536  * verify that the 'parent' type can have a child typed 'child'
537  * Fails with
538  *   _INVALID_TYPE - argument is invalid
539  *   _TYPE_MISMATCH - parent type cannot have children of type child
540  */
541 static int
542 rc_check_parent_child(uint32_t parent, uint32_t child)
543 {
544 	int idx;
545 	uint32_t type;
546 
547 	if (parent == 0 || parent >= NUM_TYPES ||
548 	    child == 0 || child >= NUM_TYPES)
549 		return (REP_PROTOCOL_FAIL_INVALID_TYPE); /* invalid types */
550 
551 	for (idx = 0; idx < MAX_VALID_CHILDREN; idx++) {
552 		type = rc_types[parent].rt_valid_children[idx];
553 		if (type == child)
554 			return (REP_PROTOCOL_SUCCESS);
555 	}
556 
557 	return (REP_PROTOCOL_FAIL_TYPE_MISMATCH);
558 }
559 
560 /*
561  * Fails with
562  *   _INVALID_TYPE - type is invalid
563  *   _BAD_REQUEST - name is an invalid name for a node of type type
564  */
565 int
566 rc_check_type_name(uint32_t type, const char *name)
567 {
568 	if (type == 0 || type >= NUM_TYPES)
569 		return (REP_PROTOCOL_FAIL_INVALID_TYPE); /* invalid types */
570 
571 	if (uu_check_name(name, rc_types[type].rt_name_flags) == -1)
572 		return (REP_PROTOCOL_FAIL_BAD_REQUEST);
573 
574 	return (REP_PROTOCOL_SUCCESS);
575 }
576 
577 static int
578 rc_check_pgtype_name(const char *name)
579 {
580 	if (uu_check_name(name, UU_NAME_DOMAIN) == -1)
581 		return (REP_PROTOCOL_FAIL_BAD_REQUEST);
582 
583 	return (REP_PROTOCOL_SUCCESS);
584 }
585 
586 static int
587 rc_notify_info_interested(rc_notify_info_t *rnip, rc_notify_t *np)
588 {
589 	rc_node_t *nnp = np->rcn_node;
590 	int i;
591 
592 	assert(MUTEX_HELD(&rc_pg_notify_lock));
593 
594 	if (np->rcn_delete != NULL) {
595 		assert(np->rcn_info == NULL && np->rcn_node == NULL);
596 		return (1);		/* everyone likes deletes */
597 	}
598 	if (np->rcn_node == NULL) {
599 		assert(np->rcn_info != NULL || np->rcn_delete != NULL);
600 		return (0);
601 	}
602 	assert(np->rcn_info == NULL);
603 
604 	for (i = 0; i < RC_NOTIFY_MAX_NAMES; i++) {
605 		if (rnip->rni_namelist[i] != NULL) {
606 			if (strcmp(nnp->rn_name, rnip->rni_namelist[i]) == 0)
607 				return (1);
608 		}
609 		if (rnip->rni_typelist[i] != NULL) {
610 			if (strcmp(nnp->rn_type, rnip->rni_typelist[i]) == 0)
611 				return (1);
612 		}
613 	}
614 	return (0);
615 }
616 
617 static void
618 rc_notify_insert_node(rc_node_t *nnp)
619 {
620 	rc_notify_t *np = &nnp->rn_notify;
621 	rc_notify_info_t *nip;
622 	int found = 0;
623 
624 	assert(np->rcn_info == NULL);
625 
626 	if (nnp->rn_id.rl_type != REP_PROTOCOL_ENTITY_PROPERTYGRP)
627 		return;
628 
629 	(void) pthread_mutex_lock(&rc_pg_notify_lock);
630 	np->rcn_node = nnp;
631 	for (nip = uu_list_first(rc_notify_info_list); nip != NULL;
632 	    nip = uu_list_next(rc_notify_info_list, nip)) {
633 		if (rc_notify_info_interested(nip, np)) {
634 			(void) pthread_cond_broadcast(&nip->rni_cv);
635 			found++;
636 		}
637 	}
638 	if (found)
639 		(void) uu_list_insert_before(rc_notify_list, NULL, np);
640 	else
641 		np->rcn_node = NULL;
642 
643 	(void) pthread_mutex_unlock(&rc_pg_notify_lock);
644 }
645 
646 static void
647 rc_notify_deletion(rc_notify_delete_t *ndp, const char *service,
648     const char *instance, const char *pg)
649 {
650 	rc_notify_info_t *nip;
651 
652 	uu_list_node_init(&ndp->rnd_notify, &ndp->rnd_notify.rcn_list_node,
653 	    rc_notify_pool);
654 	ndp->rnd_notify.rcn_delete = ndp;
655 
656 	(void) snprintf(ndp->rnd_fmri, sizeof (ndp->rnd_fmri),
657 	    "svc:/%s%s%s%s%s", service,
658 	    (instance != NULL)? ":" : "", (instance != NULL)? instance : "",
659 	    (pg != NULL)? "/:properties/" : "", (pg != NULL)? pg : "");
660 
661 	/*
662 	 * add to notification list, notify watchers
663 	 */
664 	(void) pthread_mutex_lock(&rc_pg_notify_lock);
665 	for (nip = uu_list_first(rc_notify_info_list); nip != NULL;
666 	    nip = uu_list_next(rc_notify_info_list, nip))
667 		(void) pthread_cond_broadcast(&nip->rni_cv);
668 	(void) uu_list_insert_before(rc_notify_list, NULL, ndp);
669 	(void) pthread_mutex_unlock(&rc_pg_notify_lock);
670 }
671 
672 static void
673 rc_notify_remove_node(rc_node_t *nnp)
674 {
675 	rc_notify_t *np = &nnp->rn_notify;
676 
677 	assert(np->rcn_info == NULL);
678 	assert(!MUTEX_HELD(&nnp->rn_lock));
679 
680 	(void) pthread_mutex_lock(&rc_pg_notify_lock);
681 	while (np->rcn_node != NULL) {
682 		if (rc_notify_in_use) {
683 			(void) pthread_cond_wait(&rc_pg_notify_cv,
684 			    &rc_pg_notify_lock);
685 			continue;
686 		}
687 		(void) uu_list_remove(rc_notify_list, np);
688 		np->rcn_node = NULL;
689 		break;
690 	}
691 	(void) pthread_mutex_unlock(&rc_pg_notify_lock);
692 }
693 
694 static void
695 rc_notify_remove_locked(rc_notify_t *np)
696 {
697 	assert(MUTEX_HELD(&rc_pg_notify_lock));
698 	assert(rc_notify_in_use == 0);
699 
700 	(void) uu_list_remove(rc_notify_list, np);
701 	if (np->rcn_node) {
702 		np->rcn_node = NULL;
703 	} else if (np->rcn_delete) {
704 		uu_free(np->rcn_delete);
705 	} else {
706 		assert(0);	/* CAN'T HAPPEN */
707 	}
708 }
709 
710 /*
711  * Permission checking functions.  See comment atop this file.
712  */
713 #ifndef NATIVE_BUILD
714 static permcheck_t *
715 pc_create()
716 {
717 	permcheck_t *p;
718 
719 	p = uu_zalloc(sizeof (*p));
720 	if (p == NULL)
721 		return (NULL);
722 	p->pc_bnum = 8;			/* Normal case will only have 2 elts. */
723 	p->pc_buckets = uu_zalloc(sizeof (*p->pc_buckets) * p->pc_bnum);
724 	if (p->pc_buckets == NULL) {
725 		uu_free(p);
726 		return (NULL);
727 	}
728 
729 	p->pc_enum = 0;
730 	return (p);
731 }
732 
733 static void
734 pc_free(permcheck_t *pcp)
735 {
736 	uint_t i;
737 	struct pc_elt *ep, *next;
738 
739 	for (i = 0; i < pcp->pc_bnum; ++i) {
740 		for (ep = pcp->pc_buckets[i]; ep != NULL; ep = next) {
741 			next = ep->pce_next;
742 			free(ep);
743 		}
744 	}
745 
746 	free(pcp->pc_buckets);
747 	free(pcp);
748 }
749 
750 static uint32_t
751 pc_hash(const char *auth)
752 {
753 	uint32_t h = 0, g;
754 	const char *p;
755 
756 	/*
757 	 * Generic hash function from uts/common/os/modhash.c.
758 	 */
759 	for (p = auth; *p != '\0'; ++p) {
760 		h = (h << 4) + *p;
761 		g = (h & 0xf0000000);
762 		if (g != 0) {
763 			h ^= (g >> 24);
764 			h ^= g;
765 		}
766 	}
767 
768 	return (h);
769 }
770 
771 static int
772 pc_exists(const permcheck_t *pcp, const char *auth)
773 {
774 	uint32_t h;
775 	struct pc_elt *ep;
776 
777 	h = pc_hash(auth);
778 	for (ep = pcp->pc_buckets[h & (pcp->pc_bnum - 1)];
779 	    ep != NULL;
780 	    ep = ep->pce_next) {
781 		if (strcmp(auth, ep->pce_auth) == 0)
782 			return (1);
783 	}
784 
785 	return (0);
786 }
787 
788 static int
789 pc_match(const permcheck_t *pcp, const char *pattern)
790 {
791 	uint_t i;
792 	struct pc_elt *ep;
793 
794 	for (i = 0; i < pcp->pc_bnum; ++i) {
795 		for (ep = pcp->pc_buckets[i]; ep != NULL; ep = ep->pce_next) {
796 			if (_auth_match(pattern, ep->pce_auth))
797 				return (1);
798 		}
799 	}
800 
801 	return (0);
802 }
803 
804 static int
805 pc_grow(permcheck_t *pcp)
806 {
807 	uint_t new_bnum, i, j;
808 	struct pc_elt **new_buckets;
809 	struct pc_elt *ep, *next;
810 
811 	new_bnum = pcp->pc_bnum * 2;
812 	if (new_bnum < pcp->pc_bnum)
813 		/* Homey don't play that. */
814 		return (-1);
815 
816 	new_buckets = uu_zalloc(sizeof (*new_buckets) * new_bnum);
817 	if (new_buckets == NULL)
818 		return (-1);
819 
820 	for (i = 0; i < pcp->pc_bnum; ++i) {
821 		for (ep = pcp->pc_buckets[i]; ep != NULL; ep = next) {
822 			next = ep->pce_next;
823 			j = pc_hash(ep->pce_auth) & (new_bnum - 1);
824 			ep->pce_next = new_buckets[j];
825 			new_buckets[j] = ep;
826 		}
827 	}
828 
829 	uu_free(pcp->pc_buckets);
830 	pcp->pc_buckets = new_buckets;
831 	pcp->pc_bnum = new_bnum;
832 
833 	return (0);
834 }
835 
836 static int
837 pc_add(permcheck_t *pcp, const char *auth)
838 {
839 	struct pc_elt *ep;
840 	uint_t i;
841 
842 	ep = uu_zalloc(offsetof(struct pc_elt, pce_auth) + strlen(auth) + 1);
843 	if (ep == NULL)
844 		return (-1);
845 
846 	/* Grow if pc_enum / pc_bnum > 3/4. */
847 	if (pcp->pc_enum * 4 > 3 * pcp->pc_bnum)
848 		/* Failure is not a stopper; we'll try again next time. */
849 		(void) pc_grow(pcp);
850 
851 	(void) strcpy(ep->pce_auth, auth);
852 
853 	i = pc_hash(auth) & (pcp->pc_bnum - 1);
854 	ep->pce_next = pcp->pc_buckets[i];
855 	pcp->pc_buckets[i] = ep;
856 
857 	++pcp->pc_enum;
858 
859 	return (0);
860 }
861 
862 /*
863  * For the type of a property group, return the authorization which may be
864  * used to modify it.
865  */
866 static const char *
867 perm_auth_for_pgtype(const char *pgtype)
868 {
869 	if (strcmp(pgtype, SCF_GROUP_METHOD) == 0)
870 		return (AUTH_MODIFY_PREFIX "method");
871 	else if (strcmp(pgtype, SCF_GROUP_DEPENDENCY) == 0)
872 		return (AUTH_MODIFY_PREFIX "dependency");
873 	else if (strcmp(pgtype, SCF_GROUP_APPLICATION) == 0)
874 		return (AUTH_MODIFY_PREFIX "application");
875 	else if (strcmp(pgtype, SCF_GROUP_FRAMEWORK) == 0)
876 		return (AUTH_MODIFY_PREFIX "framework");
877 	else
878 		return (NULL);
879 }
880 
881 /*
882  * Fails with
883  *   _NO_RESOURCES - out of memory
884  */
885 static int
886 perm_add_enabling(permcheck_t *pcp, const char *auth)
887 {
888 	return (pc_add(pcp, auth) == 0 ? REP_PROTOCOL_SUCCESS :
889 	    REP_PROTOCOL_FAIL_NO_RESOURCES);
890 }
891 
892 /* Note that perm_add_enabling_values() is defined below. */
893 
894 /*
895  * perm_granted() returns 1 if the current door caller has one of the enabling
896  * authorizations in pcp, 0 if it doesn't, and -1 if an error (usually lack of
897  * memory) occurs.  check_auth_list() checks an RBAC_AUTH_SEP-separated list
898  * of authorizations for existance in pcp, and check_prof_list() checks the
899  * authorizations granted to an RBAC_AUTH_SEP-separated list of profiles.
900  */
901 static int
902 check_auth_list(const permcheck_t *pcp, char *authlist)
903 {
904 	char *auth, *lasts;
905 	int ret;
906 
907 	for (auth = (char *)strtok_r(authlist, RBAC_AUTH_SEP, &lasts);
908 	    auth != NULL;
909 	    auth = (char *)strtok_r(NULL, RBAC_AUTH_SEP, &lasts)) {
910 		if (strchr(auth, KV_WILDCHAR) == NULL)
911 			ret = pc_exists(pcp, auth);
912 		else
913 			ret = pc_match(pcp, auth);
914 
915 		if (ret)
916 			return (ret);
917 	}
918 
919 	return (0);
920 }
921 
922 static int
923 check_prof_list(const permcheck_t *pcp, char *proflist)
924 {
925 	char *prof, *lasts, *authlist, *subproflist;
926 	profattr_t *pap;
927 	int ret = 0;
928 
929 	for (prof = strtok_r(proflist, RBAC_AUTH_SEP, &lasts);
930 	    prof != NULL;
931 	    prof = strtok_r(NULL, RBAC_AUTH_SEP, &lasts)) {
932 		pap = getprofnam(prof);
933 		if (pap == NULL)
934 			continue;
935 
936 		authlist = kva_match(pap->attr, PROFATTR_AUTHS_KW);
937 		if (authlist != NULL)
938 			ret = check_auth_list(pcp, authlist);
939 
940 		if (!ret) {
941 			subproflist = kva_match(pap->attr, PROFATTR_PROFS_KW);
942 			if (subproflist != NULL)
943 				/* depth check to avoid invinite recursion? */
944 				ret = check_prof_list(pcp, subproflist);
945 		}
946 
947 		free_profattr(pap);
948 		if (ret)
949 			return (ret);
950 	}
951 
952 	return (ret);
953 }
954 
955 static int
956 perm_granted(const permcheck_t *pcp)
957 {
958 	ucred_t *uc;
959 
960 	int ret = 0;
961 	uid_t uid;
962 	userattr_t *uap;
963 	char *authlist, *proflist, *def_prof = NULL;
964 
965 	/*
966 	 * Get generic authorizations from policy.conf
967 	 *
968 	 * Note that _get_auth_policy is not threadsafe, so we single-thread
969 	 * access to it.
970 	 */
971 	(void) pthread_mutex_lock(&perm_lock);
972 	ret = _get_auth_policy(&authlist, &def_prof);
973 	(void) pthread_mutex_unlock(&perm_lock);
974 
975 	if (ret != 0)
976 		return (-1);
977 
978 	if (authlist != NULL) {
979 		ret = check_auth_list(pcp, authlist);
980 		free(authlist);
981 
982 		if (ret) {
983 			free(def_prof);
984 			return (ret);
985 		}
986 	}
987 
988 	/*
989 	 * Put off checking def_prof for later in an attempt to consolidate
990 	 * prof_attr accesses.
991 	 */
992 
993 	/* Get the uid */
994 	if ((uc = get_ucred()) == NULL) {
995 		free(def_prof);
996 
997 		if (errno == EINVAL) {
998 			/*
999 			 * Client is no longer waiting for our response (e.g.,
1000 			 * it received a signal & resumed with EINTR).
1001 			 * Punting with door_return() would be nice but we
1002 			 * need to release all of the locks & references we
1003 			 * hold.  And we must report failure to the client
1004 			 * layer to keep it from ignoring retries as
1005 			 * already-done (idempotency & all that).  None of the
1006 			 * error codes fit very well, so we might as well
1007 			 * force the return of _PERMISSION_DENIED since we
1008 			 * couldn't determine the user.
1009 			 */
1010 			return (0);
1011 		}
1012 		assert(0);
1013 		abort();
1014 	}
1015 
1016 	uid = ucred_geteuid(uc);
1017 	assert(uid != -1);
1018 
1019 	uap = getuseruid(uid);
1020 	if (uap != NULL) {
1021 		/* Get the authorizations from user_attr. */
1022 		authlist = kva_match(uap->attr, USERATTR_AUTHS_KW);
1023 		if (authlist != NULL)
1024 			ret = check_auth_list(pcp, authlist);
1025 	}
1026 
1027 	if (!ret && def_prof != NULL) {
1028 		/* Check generic profiles. */
1029 		ret = check_prof_list(pcp, def_prof);
1030 	}
1031 
1032 	if (!ret && uap != NULL) {
1033 		proflist = kva_match(uap->attr, USERATTR_PROFILES_KW);
1034 		if (proflist != NULL)
1035 			ret = check_prof_list(pcp, proflist);
1036 	}
1037 
1038 	if (def_prof != NULL)
1039 		free(def_prof);
1040 	if (uap != NULL)
1041 		free_userattr(uap);
1042 
1043 	return (ret);
1044 }
1045 #endif /* NATIVE_BUILD */
1046 
1047 /*
1048  * flags in RC_NODE_WAITING_FLAGS are broadcast when unset, and are used to
1049  * serialize certain actions, and to wait for certain operations to complete
1050  *
1051  * The waiting flags are:
1052  *	RC_NODE_CHILDREN_CHANGING
1053  *		The child list is being built or changed (due to creation
1054  *		or deletion).  All iterators pause.
1055  *
1056  *	RC_NODE_USING_PARENT
1057  *		Someone is actively using the parent pointer, so we can't
1058  *		be removed from the parent list.
1059  *
1060  *	RC_NODE_CREATING_CHILD
1061  *		A child is being created -- locks out other creations, to
1062  *		prevent insert-insert races.
1063  *
1064  *	RC_NODE_IN_TX
1065  *		This object is running a transaction.
1066  *
1067  *	RC_NODE_DYING
1068  *		This node might be dying.  Always set as a set, using
1069  *		RC_NODE_DYING_FLAGS (which is everything but
1070  *		RC_NODE_USING_PARENT)
1071  */
1072 static int
1073 rc_node_hold_flag(rc_node_t *np, uint32_t flag)
1074 {
1075 	assert(MUTEX_HELD(&np->rn_lock));
1076 	assert((flag & ~RC_NODE_WAITING_FLAGS) == 0);
1077 
1078 	while (!(np->rn_flags & RC_NODE_DEAD) && (np->rn_flags & flag)) {
1079 		(void) pthread_cond_wait(&np->rn_cv, &np->rn_lock);
1080 	}
1081 	if (np->rn_flags & RC_NODE_DEAD)
1082 		return (0);
1083 
1084 	np->rn_flags |= flag;
1085 	return (1);
1086 }
1087 
1088 static void
1089 rc_node_rele_flag(rc_node_t *np, uint32_t flag)
1090 {
1091 	assert((flag & ~RC_NODE_WAITING_FLAGS) == 0);
1092 	assert(MUTEX_HELD(&np->rn_lock));
1093 	assert((np->rn_flags & flag) == flag);
1094 	np->rn_flags &= ~flag;
1095 	(void) pthread_cond_broadcast(&np->rn_cv);
1096 }
1097 
1098 /*
1099  * wait until a particular flag has cleared.  Fails if the object dies.
1100  */
1101 static int
1102 rc_node_wait_flag(rc_node_t *np, uint32_t flag)
1103 {
1104 	assert(MUTEX_HELD(&np->rn_lock));
1105 	while (!(np->rn_flags & RC_NODE_DEAD) && (np->rn_flags & flag))
1106 		(void) pthread_cond_wait(&np->rn_cv, &np->rn_lock);
1107 
1108 	return (!(np->rn_flags & RC_NODE_DEAD));
1109 }
1110 
1111 /*
1112  * On entry, np's lock must be held, and this thread must be holding
1113  * RC_NODE_USING_PARENT.  On return, both of them are released.
1114  *
1115  * If the return value is NULL, np either does not have a parent, or
1116  * the parent has been marked DEAD.
1117  *
1118  * If the return value is non-NULL, it is the parent of np, and both
1119  * its lock and the requested flags are held.
1120  */
1121 static rc_node_t *
1122 rc_node_hold_parent_flag(rc_node_t *np, uint32_t flag)
1123 {
1124 	rc_node_t *pp;
1125 
1126 	assert(MUTEX_HELD(&np->rn_lock));
1127 	assert(np->rn_flags & RC_NODE_USING_PARENT);
1128 
1129 	if ((pp = np->rn_parent) == NULL) {
1130 		rc_node_rele_flag(np, RC_NODE_USING_PARENT);
1131 		(void) pthread_mutex_unlock(&np->rn_lock);
1132 		return (NULL);
1133 	}
1134 	(void) pthread_mutex_unlock(&np->rn_lock);
1135 
1136 	(void) pthread_mutex_lock(&pp->rn_lock);
1137 	(void) pthread_mutex_lock(&np->rn_lock);
1138 	rc_node_rele_flag(np, RC_NODE_USING_PARENT);
1139 	(void) pthread_mutex_unlock(&np->rn_lock);
1140 
1141 	if (!rc_node_hold_flag(pp, flag)) {
1142 		(void) pthread_mutex_unlock(&pp->rn_lock);
1143 		return (NULL);
1144 	}
1145 	return (pp);
1146 }
1147 
1148 rc_node_t *
1149 rc_node_alloc(void)
1150 {
1151 	rc_node_t *np = uu_zalloc(sizeof (*np));
1152 
1153 	if (np == NULL)
1154 		return (NULL);
1155 
1156 	(void) pthread_mutex_init(&np->rn_lock, NULL);
1157 	(void) pthread_cond_init(&np->rn_cv, NULL);
1158 
1159 	np->rn_children = uu_list_create(rc_children_pool, np, 0);
1160 	np->rn_pg_notify_list = uu_list_create(rc_pg_notify_pool, np, 0);
1161 
1162 	uu_list_node_init(np, &np->rn_sibling_node, rc_children_pool);
1163 
1164 	uu_list_node_init(&np->rn_notify, &np->rn_notify.rcn_list_node,
1165 	    rc_notify_pool);
1166 
1167 	return (np);
1168 }
1169 
1170 void
1171 rc_node_destroy(rc_node_t *np)
1172 {
1173 	int i;
1174 
1175 	if (np->rn_flags & RC_NODE_UNREFED)
1176 		return;				/* being handled elsewhere */
1177 
1178 	assert(np->rn_refs == 0 && np->rn_other_refs == 0);
1179 	assert(np->rn_former == NULL);
1180 
1181 	if (np->rn_id.rl_type == REP_PROTOCOL_ENTITY_CPROPERTYGRP) {
1182 		/* Release the holds from rc_iter_next(). */
1183 		for (i = 0; i < COMPOSITION_DEPTH; ++i) {
1184 			/* rn_cchain[i] may be NULL for empty snapshots. */
1185 			if (np->rn_cchain[i] != NULL)
1186 				rc_node_rele(np->rn_cchain[i]);
1187 		}
1188 	}
1189 
1190 	if (np->rn_name != NULL)
1191 		free((void *)np->rn_name);
1192 	np->rn_name = NULL;
1193 	if (np->rn_type != NULL)
1194 		free((void *)np->rn_type);
1195 	np->rn_type = NULL;
1196 	if (np->rn_values != NULL)
1197 		object_free_values(np->rn_values, np->rn_valtype,
1198 		    np->rn_values_count, np->rn_values_size);
1199 	np->rn_values = NULL;
1200 
1201 	if (np->rn_snaplevel != NULL)
1202 		rc_snaplevel_rele(np->rn_snaplevel);
1203 	np->rn_snaplevel = NULL;
1204 
1205 	uu_list_node_fini(np, &np->rn_sibling_node, rc_children_pool);
1206 
1207 	uu_list_node_fini(&np->rn_notify, &np->rn_notify.rcn_list_node,
1208 	    rc_notify_pool);
1209 
1210 	assert(uu_list_first(np->rn_children) == NULL);
1211 	uu_list_destroy(np->rn_children);
1212 	uu_list_destroy(np->rn_pg_notify_list);
1213 
1214 	(void) pthread_mutex_destroy(&np->rn_lock);
1215 	(void) pthread_cond_destroy(&np->rn_cv);
1216 
1217 	uu_free(np);
1218 }
1219 
1220 /*
1221  * Link in a child node.
1222  *
1223  * Because of the lock ordering, cp has to already be in the hash table with
1224  * its lock dropped before we get it.  To prevent anyone from noticing that
1225  * it is parentless, the creation code sets the RC_NODE_USING_PARENT.  Once
1226  * we've linked it in, we release the flag.
1227  */
1228 static void
1229 rc_node_link_child(rc_node_t *np, rc_node_t *cp)
1230 {
1231 	assert(!MUTEX_HELD(&np->rn_lock));
1232 	assert(!MUTEX_HELD(&cp->rn_lock));
1233 
1234 	(void) pthread_mutex_lock(&np->rn_lock);
1235 	(void) pthread_mutex_lock(&cp->rn_lock);
1236 	assert(!(cp->rn_flags & RC_NODE_IN_PARENT) &&
1237 	    (cp->rn_flags & RC_NODE_USING_PARENT));
1238 
1239 	assert(rc_check_parent_child(np->rn_id.rl_type, cp->rn_id.rl_type) ==
1240 	    REP_PROTOCOL_SUCCESS);
1241 
1242 	cp->rn_parent = np;
1243 	cp->rn_flags |= RC_NODE_IN_PARENT;
1244 	(void) uu_list_insert_before(np->rn_children, NULL, cp);
1245 
1246 	(void) pthread_mutex_unlock(&np->rn_lock);
1247 
1248 	rc_node_rele_flag(cp, RC_NODE_USING_PARENT);
1249 	(void) pthread_mutex_unlock(&cp->rn_lock);
1250 }
1251 
1252 /*
1253  * Sets the rn_parent_ref field of all the children of np to pp -- always
1254  * initially invoked as rc_node_setup_parent_ref(np, np), we then recurse.
1255  *
1256  * This is used when we mark a node RC_NODE_OLD, so that when the object and
1257  * its children are no longer referenced, they will all be deleted as a unit.
1258  */
1259 static void
1260 rc_node_setup_parent_ref(rc_node_t *np, rc_node_t *pp)
1261 {
1262 	rc_node_t *cp;
1263 
1264 	assert(MUTEX_HELD(&np->rn_lock));
1265 
1266 	for (cp = uu_list_first(np->rn_children); cp != NULL;
1267 	    cp = uu_list_next(np->rn_children, cp)) {
1268 		(void) pthread_mutex_lock(&cp->rn_lock);
1269 		if (cp->rn_flags & RC_NODE_PARENT_REF) {
1270 			assert(cp->rn_parent_ref == pp);
1271 		} else {
1272 			assert(cp->rn_parent_ref == NULL);
1273 
1274 			cp->rn_flags |= RC_NODE_PARENT_REF;
1275 			cp->rn_parent_ref = pp;
1276 			if (cp->rn_refs != 0)
1277 				rc_node_hold_other(pp);
1278 		}
1279 		rc_node_setup_parent_ref(cp, pp);		/* recurse */
1280 		(void) pthread_mutex_unlock(&cp->rn_lock);
1281 	}
1282 }
1283 
1284 /*
1285  * Atomically replace 'np' with 'newp', with a parent of 'pp'.
1286  *
1287  * Requirements:
1288  *	*no* node locks may be held.
1289  *	pp must be held with RC_NODE_CHILDREN_CHANGING
1290  *	newp and np must be held with RC_NODE_IN_TX
1291  *	np must be marked RC_NODE_IN_PARENT, newp must not be
1292  *	np must be marked RC_NODE_OLD
1293  *
1294  * Afterwards:
1295  *	pp's RC_NODE_CHILDREN_CHANGING is dropped
1296  *	newp and np's RC_NODE_IN_TX is dropped
1297  *	newp->rn_former = np;
1298  *	newp is RC_NODE_IN_PARENT, np is not.
1299  *	interested notify subscribers have been notified of newp's new status.
1300  */
1301 static void
1302 rc_node_relink_child(rc_node_t *pp, rc_node_t *np, rc_node_t *newp)
1303 {
1304 	cache_bucket_t *bp;
1305 	/*
1306 	 * First, swap np and nnp in the cache.  newp's RC_NODE_IN_TX flag
1307 	 * keeps rc_node_update() from seeing it until we are done.
1308 	 */
1309 	bp = cache_hold(newp->rn_hash);
1310 	cache_remove_unlocked(bp, np);
1311 	cache_insert_unlocked(bp, newp);
1312 	cache_release(bp);
1313 
1314 	/*
1315 	 * replace np with newp in pp's list, and attach it to newp's rn_former
1316 	 * link.
1317 	 */
1318 	(void) pthread_mutex_lock(&pp->rn_lock);
1319 	assert(pp->rn_flags & RC_NODE_CHILDREN_CHANGING);
1320 
1321 	(void) pthread_mutex_lock(&newp->rn_lock);
1322 	assert(!(newp->rn_flags & RC_NODE_IN_PARENT));
1323 	assert(newp->rn_flags & RC_NODE_IN_TX);
1324 
1325 	(void) pthread_mutex_lock(&np->rn_lock);
1326 	assert(np->rn_flags & RC_NODE_IN_PARENT);
1327 	assert(np->rn_flags & RC_NODE_OLD);
1328 	assert(np->rn_flags & RC_NODE_IN_TX);
1329 
1330 	newp->rn_parent = pp;
1331 	newp->rn_flags |= RC_NODE_IN_PARENT;
1332 
1333 	/*
1334 	 * Note that we carefully add newp before removing np -- this
1335 	 * keeps iterators on the list from missing us.
1336 	 */
1337 	(void) uu_list_insert_after(pp->rn_children, np, newp);
1338 	(void) uu_list_remove(pp->rn_children, np);
1339 
1340 	/*
1341 	 * re-set np
1342 	 */
1343 	newp->rn_former = np;
1344 	np->rn_parent = NULL;
1345 	np->rn_flags &= ~RC_NODE_IN_PARENT;
1346 	np->rn_flags |= RC_NODE_ON_FORMER;
1347 
1348 	rc_notify_insert_node(newp);
1349 
1350 	rc_node_rele_flag(pp, RC_NODE_CHILDREN_CHANGING);
1351 	(void) pthread_mutex_unlock(&pp->rn_lock);
1352 	rc_node_rele_flag(newp, RC_NODE_USING_PARENT | RC_NODE_IN_TX);
1353 	(void) pthread_mutex_unlock(&newp->rn_lock);
1354 	rc_node_setup_parent_ref(np, np);
1355 	rc_node_rele_flag(np, RC_NODE_IN_TX);
1356 	(void) pthread_mutex_unlock(&np->rn_lock);
1357 }
1358 
1359 /*
1360  * makes sure a node with lookup 'nip', name 'name', and parent 'pp' exists.
1361  * 'cp' is used (and returned) if the node does not yet exist.  If it does
1362  * exist, 'cp' is freed, and the existent node is returned instead.
1363  */
1364 rc_node_t *
1365 rc_node_setup(rc_node_t *cp, rc_node_lookup_t *nip, const char *name,
1366     rc_node_t *pp)
1367 {
1368 	rc_node_t *np;
1369 	cache_bucket_t *bp;
1370 	uint32_t h = rc_node_hash(nip);
1371 
1372 	assert(cp->rn_refs == 0);
1373 
1374 	bp = cache_hold(h);
1375 	if ((np = cache_lookup_unlocked(bp, nip)) != NULL) {
1376 		cache_release(bp);
1377 
1378 		/*
1379 		 * make sure it matches our expectations
1380 		 */
1381 		assert(np->rn_parent == pp);
1382 		assert(memcmp(&np->rn_id, nip, sizeof (*nip)) == 0);
1383 		assert(strcmp(np->rn_name, name) == 0);
1384 		assert(np->rn_type == NULL);
1385 		assert(np->rn_flags & RC_NODE_IN_PARENT);
1386 
1387 		rc_node_destroy(cp);
1388 		return (np);
1389 	}
1390 
1391 	/*
1392 	 * No one is there -- create a new node.
1393 	 */
1394 	np = cp;
1395 	rc_node_hold(np);
1396 	np->rn_id = *nip;
1397 	np->rn_hash = h;
1398 	np->rn_name = strdup(name);
1399 
1400 	np->rn_flags |= RC_NODE_USING_PARENT;
1401 
1402 	if (np->rn_id.rl_type == REP_PROTOCOL_ENTITY_INSTANCE) {
1403 #if COMPOSITION_DEPTH == 2
1404 		np->rn_cchain[0] = np;
1405 		np->rn_cchain[1] = pp;
1406 #else
1407 #error This code must be updated.
1408 #endif
1409 	}
1410 
1411 	cache_insert_unlocked(bp, np);
1412 	cache_release(bp);		/* we are now visible */
1413 
1414 	rc_node_link_child(pp, np);
1415 
1416 	return (np);
1417 }
1418 
1419 /*
1420  * makes sure a snapshot with lookup 'nip', name 'name', and parent 'pp' exists.
1421  * 'cp' is used (and returned) if the node does not yet exist.  If it does
1422  * exist, 'cp' is freed, and the existent node is returned instead.
1423  */
1424 rc_node_t *
1425 rc_node_setup_snapshot(rc_node_t *cp, rc_node_lookup_t *nip, const char *name,
1426     uint32_t snap_id, rc_node_t *pp)
1427 {
1428 	rc_node_t *np;
1429 	cache_bucket_t *bp;
1430 	uint32_t h = rc_node_hash(nip);
1431 
1432 	assert(cp->rn_refs == 0);
1433 
1434 	bp = cache_hold(h);
1435 	if ((np = cache_lookup_unlocked(bp, nip)) != NULL) {
1436 		cache_release(bp);
1437 
1438 		/*
1439 		 * make sure it matches our expectations
1440 		 */
1441 		assert(np->rn_parent == pp);
1442 		assert(memcmp(&np->rn_id, nip, sizeof (*nip)) == 0);
1443 		assert(strcmp(np->rn_name, name) == 0);
1444 		assert(np->rn_type == NULL);
1445 		assert(np->rn_flags & RC_NODE_IN_PARENT);
1446 
1447 		rc_node_destroy(cp);
1448 		return (np);
1449 	}
1450 
1451 	/*
1452 	 * No one is there -- create a new node.
1453 	 */
1454 	np = cp;
1455 	rc_node_hold(np);
1456 	np->rn_id = *nip;
1457 	np->rn_hash = h;
1458 	np->rn_name = strdup(name);
1459 	np->rn_snapshot_id = snap_id;
1460 
1461 	np->rn_flags |= RC_NODE_USING_PARENT;
1462 
1463 	cache_insert_unlocked(bp, np);
1464 	cache_release(bp);		/* we are now visible */
1465 
1466 	rc_node_link_child(pp, np);
1467 
1468 	return (np);
1469 }
1470 
1471 /*
1472  * makes sure a snaplevel with lookup 'nip' and parent 'pp' exists.  'cp' is
1473  * used (and returned) if the node does not yet exist.  If it does exist, 'cp'
1474  * is freed, and the existent node is returned instead.
1475  */
1476 rc_node_t *
1477 rc_node_setup_snaplevel(rc_node_t *cp, rc_node_lookup_t *nip,
1478     rc_snaplevel_t *lvl, rc_node_t *pp)
1479 {
1480 	rc_node_t *np;
1481 	cache_bucket_t *bp;
1482 	uint32_t h = rc_node_hash(nip);
1483 
1484 	assert(cp->rn_refs == 0);
1485 
1486 	bp = cache_hold(h);
1487 	if ((np = cache_lookup_unlocked(bp, nip)) != NULL) {
1488 		cache_release(bp);
1489 
1490 		/*
1491 		 * make sure it matches our expectations
1492 		 */
1493 		assert(np->rn_parent == pp);
1494 		assert(memcmp(&np->rn_id, nip, sizeof (*nip)) == 0);
1495 		assert(np->rn_name == NULL);
1496 		assert(np->rn_type == NULL);
1497 		assert(np->rn_flags & RC_NODE_IN_PARENT);
1498 
1499 		rc_node_destroy(cp);
1500 		return (np);
1501 	}
1502 
1503 	/*
1504 	 * No one is there -- create a new node.
1505 	 */
1506 	np = cp;
1507 	rc_node_hold(np);	/* released in snapshot_fill_children() */
1508 	np->rn_id = *nip;
1509 	np->rn_hash = h;
1510 
1511 	rc_snaplevel_hold(lvl);
1512 	np->rn_snaplevel = lvl;
1513 
1514 	np->rn_flags |= RC_NODE_USING_PARENT;
1515 
1516 	cache_insert_unlocked(bp, np);
1517 	cache_release(bp);		/* we are now visible */
1518 
1519 	/* Add this snaplevel to the snapshot's composition chain. */
1520 	assert(pp->rn_cchain[lvl->rsl_level_num - 1] == NULL);
1521 	pp->rn_cchain[lvl->rsl_level_num - 1] = np;
1522 
1523 	rc_node_link_child(pp, np);
1524 
1525 	return (np);
1526 }
1527 
1528 /*
1529  * Returns NULL if strdup() fails.
1530  */
1531 rc_node_t *
1532 rc_node_setup_pg(rc_node_t *cp, rc_node_lookup_t *nip, const char *name,
1533     const char *type, uint32_t flags, uint32_t gen_id, rc_node_t *pp)
1534 {
1535 	rc_node_t *np;
1536 	cache_bucket_t *bp;
1537 
1538 	uint32_t h = rc_node_hash(nip);
1539 	bp = cache_hold(h);
1540 	if ((np = cache_lookup_unlocked(bp, nip)) != NULL) {
1541 		cache_release(bp);
1542 
1543 		/*
1544 		 * make sure it matches our expectations (don't check
1545 		 * the generation number or parent, since someone could
1546 		 * have gotten a transaction through while we weren't
1547 		 * looking)
1548 		 */
1549 		assert(memcmp(&np->rn_id, nip, sizeof (*nip)) == 0);
1550 		assert(strcmp(np->rn_name, name) == 0);
1551 		assert(strcmp(np->rn_type, type) == 0);
1552 		assert(np->rn_pgflags == flags);
1553 		assert(np->rn_flags & RC_NODE_IN_PARENT);
1554 
1555 		rc_node_destroy(cp);
1556 		return (np);
1557 	}
1558 
1559 	np = cp;
1560 	rc_node_hold(np);		/* released in fill_pg_callback() */
1561 	np->rn_id = *nip;
1562 	np->rn_hash = h;
1563 	np->rn_name = strdup(name);
1564 	if (np->rn_name == NULL) {
1565 		rc_node_rele(np);
1566 		return (NULL);
1567 	}
1568 	np->rn_type = strdup(type);
1569 	if (np->rn_type == NULL) {
1570 		free((void *)np->rn_name);
1571 		rc_node_rele(np);
1572 		return (NULL);
1573 	}
1574 	np->rn_pgflags = flags;
1575 	np->rn_gen_id = gen_id;
1576 
1577 	np->rn_flags |= RC_NODE_USING_PARENT;
1578 
1579 	cache_insert_unlocked(bp, np);
1580 	cache_release(bp);		/* we are now visible */
1581 
1582 	rc_node_link_child(pp, np);
1583 
1584 	return (np);
1585 }
1586 
1587 #if COMPOSITION_DEPTH == 2
1588 /*
1589  * Initialize a "composed property group" which represents the composition of
1590  * property groups pg1 & pg2.  It is ephemeral: once created & returned for an
1591  * ITER_READ request, keeping it out of cache_hash and any child lists
1592  * prevents it from being looked up.  Operations besides iteration are passed
1593  * through to pg1.
1594  *
1595  * pg1 & pg2 should be held before entering this function.  They will be
1596  * released in rc_node_destroy().
1597  */
1598 static int
1599 rc_node_setup_cpg(rc_node_t *cpg, rc_node_t *pg1, rc_node_t *pg2)
1600 {
1601 	if (strcmp(pg1->rn_type, pg2->rn_type) != 0)
1602 		return (REP_PROTOCOL_FAIL_TYPE_MISMATCH);
1603 
1604 	cpg->rn_id.rl_type = REP_PROTOCOL_ENTITY_CPROPERTYGRP;
1605 	cpg->rn_name = strdup(pg1->rn_name);
1606 	if (cpg->rn_name == NULL)
1607 		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
1608 
1609 	cpg->rn_cchain[0] = pg1;
1610 	cpg->rn_cchain[1] = pg2;
1611 
1612 	return (REP_PROTOCOL_SUCCESS);
1613 }
1614 #else
1615 #error This code must be updated.
1616 #endif
1617 
1618 /*
1619  * Fails with _NO_RESOURCES.
1620  */
1621 int
1622 rc_node_create_property(rc_node_t *pp, rc_node_lookup_t *nip,
1623     const char *name, rep_protocol_value_type_t type,
1624     const char *vals, size_t count, size_t size)
1625 {
1626 	rc_node_t *np;
1627 	cache_bucket_t *bp;
1628 
1629 	uint32_t h = rc_node_hash(nip);
1630 	bp = cache_hold(h);
1631 	if ((np = cache_lookup_unlocked(bp, nip)) != NULL) {
1632 		cache_release(bp);
1633 		/*
1634 		 * make sure it matches our expectations
1635 		 */
1636 		(void) pthread_mutex_lock(&np->rn_lock);
1637 		if (rc_node_hold_flag(np, RC_NODE_USING_PARENT)) {
1638 			assert(np->rn_parent == pp);
1639 			assert(memcmp(&np->rn_id, nip, sizeof (*nip)) == 0);
1640 			assert(strcmp(np->rn_name, name) == 0);
1641 			assert(np->rn_valtype == type);
1642 			assert(np->rn_values_count == count);
1643 			assert(np->rn_values_size == size);
1644 			assert(vals == NULL ||
1645 			    memcmp(np->rn_values, vals, size) == 0);
1646 			assert(np->rn_flags & RC_NODE_IN_PARENT);
1647 			rc_node_rele_flag(np, RC_NODE_USING_PARENT);
1648 		}
1649 		rc_node_rele_locked(np);
1650 		object_free_values(vals, type, count, size);
1651 		return (REP_PROTOCOL_SUCCESS);
1652 	}
1653 
1654 	/*
1655 	 * No one is there -- create a new node.
1656 	 */
1657 	np = rc_node_alloc();
1658 	if (np == NULL) {
1659 		cache_release(bp);
1660 		object_free_values(vals, type, count, size);
1661 		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
1662 	}
1663 	np->rn_id = *nip;
1664 	np->rn_hash = h;
1665 	np->rn_name = strdup(name);
1666 	if (np->rn_name == NULL) {
1667 		cache_release(bp);
1668 		object_free_values(vals, type, count, size);
1669 		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
1670 	}
1671 
1672 	np->rn_valtype = type;
1673 	np->rn_values = vals;
1674 	np->rn_values_count = count;
1675 	np->rn_values_size = size;
1676 
1677 	np->rn_flags |= RC_NODE_USING_PARENT;
1678 
1679 	cache_insert_unlocked(bp, np);
1680 	cache_release(bp);		/* we are now visible */
1681 
1682 	rc_node_link_child(pp, np);
1683 
1684 	return (REP_PROTOCOL_SUCCESS);
1685 }
1686 
1687 int
1688 rc_node_init(void)
1689 {
1690 	rc_node_t *np;
1691 	cache_bucket_t *bp;
1692 
1693 	rc_children_pool = uu_list_pool_create("rc_children_pool",
1694 	    sizeof (rc_node_t), offsetof(rc_node_t, rn_sibling_node),
1695 	    NULL, UU_LIST_POOL_DEBUG);
1696 
1697 	rc_pg_notify_pool = uu_list_pool_create("rc_pg_notify_pool",
1698 	    sizeof (rc_node_pg_notify_t),
1699 	    offsetof(rc_node_pg_notify_t, rnpn_node),
1700 	    NULL, UU_LIST_POOL_DEBUG);
1701 
1702 	rc_notify_pool = uu_list_pool_create("rc_notify_pool",
1703 	    sizeof (rc_notify_t), offsetof(rc_notify_t, rcn_list_node),
1704 	    NULL, UU_LIST_POOL_DEBUG);
1705 
1706 	rc_notify_info_pool = uu_list_pool_create("rc_notify_info_pool",
1707 	    sizeof (rc_notify_info_t),
1708 	    offsetof(rc_notify_info_t, rni_list_node),
1709 	    NULL, UU_LIST_POOL_DEBUG);
1710 
1711 	if (rc_children_pool == NULL || rc_pg_notify_pool == NULL ||
1712 	    rc_notify_pool == NULL || rc_notify_info_pool == NULL)
1713 		uu_die("out of memory");
1714 
1715 	rc_notify_list = uu_list_create(rc_notify_pool,
1716 	    &rc_notify_list, 0);
1717 
1718 	rc_notify_info_list = uu_list_create(rc_notify_info_pool,
1719 	    &rc_notify_info_list, 0);
1720 
1721 	if (rc_notify_list == NULL || rc_notify_info_list == NULL)
1722 		uu_die("out of memory");
1723 
1724 	if ((np = rc_node_alloc()) == NULL)
1725 		uu_die("out of memory");
1726 
1727 	rc_node_hold(np);
1728 	np->rn_id.rl_type = REP_PROTOCOL_ENTITY_SCOPE;
1729 	np->rn_id.rl_backend = BACKEND_TYPE_NORMAL;
1730 	np->rn_hash = rc_node_hash(&np->rn_id);
1731 	np->rn_name = "localhost";
1732 
1733 	bp = cache_hold(np->rn_hash);
1734 	cache_insert_unlocked(bp, np);
1735 	cache_release(bp);
1736 
1737 	rc_scope = np;
1738 	return (1);
1739 }
1740 
1741 /*
1742  * Fails with
1743  *   _INVALID_TYPE - type is invalid
1744  *   _TYPE_MISMATCH - np doesn't carry children of type type
1745  *   _DELETED - np has been deleted
1746  *   _NO_RESOURCES
1747  */
1748 static int
1749 rc_node_fill_children(rc_node_t *np, uint32_t type)
1750 {
1751 	int rc;
1752 
1753 	assert(MUTEX_HELD(&np->rn_lock));
1754 
1755 	if ((rc = rc_check_parent_child(np->rn_id.rl_type, type)) !=
1756 	    REP_PROTOCOL_SUCCESS)
1757 		return (rc);
1758 
1759 	if (!rc_node_hold_flag(np, RC_NODE_CHILDREN_CHANGING))
1760 		return (REP_PROTOCOL_FAIL_DELETED);
1761 
1762 	if (np->rn_flags & RC_NODE_HAS_CHILDREN) {
1763 		rc_node_rele_flag(np, RC_NODE_CHILDREN_CHANGING);
1764 		return (REP_PROTOCOL_SUCCESS);
1765 	}
1766 
1767 	(void) pthread_mutex_unlock(&np->rn_lock);
1768 	rc = object_fill_children(np);
1769 	(void) pthread_mutex_lock(&np->rn_lock);
1770 
1771 	if (rc == REP_PROTOCOL_SUCCESS) {
1772 		np->rn_flags |= RC_NODE_HAS_CHILDREN;
1773 	}
1774 	rc_node_rele_flag(np, RC_NODE_CHILDREN_CHANGING);
1775 
1776 	return (rc);
1777 }
1778 
1779 /*
1780  * Returns
1781  *   _INVALID_TYPE - type is invalid
1782  *   _TYPE_MISMATCH - np doesn't carry children of type type
1783  *   _DELETED - np has been deleted
1784  *   _NO_RESOURCES
1785  *   _SUCCESS - if *cpp is not NULL, it is held
1786  */
1787 static int
1788 rc_node_find_named_child(rc_node_t *np, const char *name, uint32_t type,
1789     rc_node_t **cpp)
1790 {
1791 	int ret;
1792 	rc_node_t *cp;
1793 
1794 	assert(MUTEX_HELD(&np->rn_lock));
1795 	assert(np->rn_id.rl_type != REP_PROTOCOL_ENTITY_CPROPERTYGRP);
1796 
1797 	ret = rc_node_fill_children(np, type);
1798 	if (ret != REP_PROTOCOL_SUCCESS)
1799 		return (ret);
1800 
1801 	for (cp = uu_list_first(np->rn_children);
1802 	    cp != NULL;
1803 	    cp = uu_list_next(np->rn_children, cp)) {
1804 		if (cp->rn_id.rl_type == type && strcmp(cp->rn_name, name) == 0)
1805 			break;
1806 	}
1807 
1808 	if (cp != NULL)
1809 		rc_node_hold(cp);
1810 	*cpp = cp;
1811 
1812 	return (REP_PROTOCOL_SUCCESS);
1813 }
1814 
1815 #ifndef NATIVE_BUILD
1816 static int rc_node_parent(rc_node_t *, rc_node_t **);
1817 
1818 /*
1819  * If the propname property exists in pg, and it is of type string, add its
1820  * values as authorizations to pcp.  pg must not be locked on entry, and it is
1821  * returned unlocked.  Returns
1822  *   _DELETED - pg was deleted
1823  *   _NO_RESOURCES
1824  *   _NOT_FOUND - pg has no property named propname
1825  *   _SUCCESS
1826  */
1827 static int
1828 perm_add_pg_prop_values(permcheck_t *pcp, rc_node_t *pg, const char *propname)
1829 {
1830 	rc_node_t *prop;
1831 	int result;
1832 
1833 	uint_t count;
1834 	const char *cp;
1835 
1836 	assert(!MUTEX_HELD(&pg->rn_lock));
1837 	assert(pg->rn_id.rl_type == REP_PROTOCOL_ENTITY_PROPERTYGRP);
1838 	assert(pg->rn_id.rl_ids[ID_SNAPSHOT] == 0);
1839 
1840 	(void) pthread_mutex_lock(&pg->rn_lock);
1841 	result = rc_node_find_named_child(pg, propname,
1842 	    REP_PROTOCOL_ENTITY_PROPERTY, &prop);
1843 	(void) pthread_mutex_unlock(&pg->rn_lock);
1844 	if (result != REP_PROTOCOL_SUCCESS) {
1845 		switch (result) {
1846 		case REP_PROTOCOL_FAIL_DELETED:
1847 		case REP_PROTOCOL_FAIL_NO_RESOURCES:
1848 			return (result);
1849 
1850 		case REP_PROTOCOL_FAIL_INVALID_TYPE:
1851 		case REP_PROTOCOL_FAIL_TYPE_MISMATCH:
1852 		default:
1853 			bad_error("rc_node_find_named_child", result);
1854 		}
1855 	}
1856 
1857 	if (prop == NULL)
1858 		return (REP_PROTOCOL_FAIL_NOT_FOUND);
1859 
1860 	/* rn_valtype is immutable, so no locking. */
1861 	if (prop->rn_valtype != REP_PROTOCOL_TYPE_STRING) {
1862 		rc_node_rele(prop);
1863 		return (REP_PROTOCOL_SUCCESS);
1864 	}
1865 
1866 	(void) pthread_mutex_lock(&prop->rn_lock);
1867 	for (count = prop->rn_values_count, cp = prop->rn_values;
1868 	    count > 0;
1869 	    --count) {
1870 		result = perm_add_enabling(pcp, cp);
1871 		if (result != REP_PROTOCOL_SUCCESS)
1872 			break;
1873 
1874 		cp = strchr(cp, '\0') + 1;
1875 	}
1876 
1877 	rc_node_rele_locked(prop);
1878 
1879 	return (result);
1880 }
1881 
1882 /*
1883  * Assuming that ent is a service or instance node, if the pgname property
1884  * group has type pgtype, and it has a propname property with string type, add
1885  * its values as authorizations to pcp.  If pgtype is NULL, it is not checked.
1886  * Returns
1887  *   _SUCCESS
1888  *   _DELETED - ent was deleted
1889  *   _NO_RESOURCES - no resources
1890  *   _NOT_FOUND - ent does not have pgname pg or propname property
1891  */
1892 static int
1893 perm_add_ent_prop_values(permcheck_t *pcp, rc_node_t *ent, const char *pgname,
1894     const char *pgtype, const char *propname)
1895 {
1896 	int r;
1897 	rc_node_t *pg;
1898 
1899 	assert(!MUTEX_HELD(&ent->rn_lock));
1900 
1901 	(void) pthread_mutex_lock(&ent->rn_lock);
1902 	r = rc_node_find_named_child(ent, pgname,
1903 	    REP_PROTOCOL_ENTITY_PROPERTYGRP, &pg);
1904 	(void) pthread_mutex_unlock(&ent->rn_lock);
1905 
1906 	switch (r) {
1907 	case REP_PROTOCOL_SUCCESS:
1908 		break;
1909 
1910 	case REP_PROTOCOL_FAIL_DELETED:
1911 	case REP_PROTOCOL_FAIL_NO_RESOURCES:
1912 		return (r);
1913 
1914 	default:
1915 		bad_error("rc_node_find_named_child", r);
1916 	}
1917 
1918 	if (pg == NULL)
1919 		return (REP_PROTOCOL_FAIL_NOT_FOUND);
1920 
1921 	if (pgtype == NULL || strcmp(pg->rn_type, pgtype) == 0) {
1922 		r = perm_add_pg_prop_values(pcp, pg, propname);
1923 		switch (r) {
1924 		case REP_PROTOCOL_FAIL_DELETED:
1925 			r = REP_PROTOCOL_FAIL_NOT_FOUND;
1926 			break;
1927 
1928 		case REP_PROTOCOL_FAIL_NO_RESOURCES:
1929 		case REP_PROTOCOL_SUCCESS:
1930 		case REP_PROTOCOL_FAIL_NOT_FOUND:
1931 			break;
1932 
1933 		default:
1934 			bad_error("perm_add_pg_prop_values", r);
1935 		}
1936 	}
1937 
1938 	rc_node_rele(pg);
1939 
1940 	return (r);
1941 }
1942 
1943 /*
1944  * If pg has a property named propname, and it string typed, add its values as
1945  * authorizations to pcp.  If pg has no such property, and its parent is an
1946  * instance, walk up to the service and try doing the same with the property
1947  * of the same name from the property group of the same name.  Returns
1948  *   _SUCCESS
1949  *   _NO_RESOURCES
1950  *   _DELETED - pg (or an ancestor) was deleted
1951  */
1952 static int
1953 perm_add_enabling_values(permcheck_t *pcp, rc_node_t *pg, const char *propname)
1954 {
1955 	int r;
1956 
1957 	r = perm_add_pg_prop_values(pcp, pg, propname);
1958 
1959 	if (r == REP_PROTOCOL_FAIL_NOT_FOUND) {
1960 		char pgname[REP_PROTOCOL_NAME_LEN + 1];
1961 		rc_node_t *inst, *svc;
1962 		size_t sz;
1963 
1964 		assert(!MUTEX_HELD(&pg->rn_lock));
1965 
1966 		if (pg->rn_id.rl_ids[ID_INSTANCE] == 0) {
1967 			/* not an instance pg */
1968 			return (REP_PROTOCOL_SUCCESS);
1969 		}
1970 
1971 		sz = strlcpy(pgname, pg->rn_name, sizeof (pgname));
1972 		assert(sz < sizeof (pgname));
1973 
1974 		/* get pg's parent */
1975 		r = rc_node_parent(pg, &inst);
1976 		if (r != REP_PROTOCOL_SUCCESS) {
1977 			assert(r == REP_PROTOCOL_FAIL_DELETED);
1978 			return (r);
1979 		}
1980 
1981 		assert(inst->rn_id.rl_type == REP_PROTOCOL_ENTITY_INSTANCE);
1982 
1983 		/* get instance's parent */
1984 		r = rc_node_parent(inst, &svc);
1985 		rc_node_rele(inst);
1986 		if (r != REP_PROTOCOL_SUCCESS) {
1987 			assert(r == REP_PROTOCOL_FAIL_DELETED);
1988 			return (r);
1989 		}
1990 
1991 		assert(svc->rn_id.rl_type == REP_PROTOCOL_ENTITY_SERVICE);
1992 
1993 		r = perm_add_ent_prop_values(pcp, svc, pgname, NULL, propname);
1994 
1995 		rc_node_rele(svc);
1996 
1997 		if (r == REP_PROTOCOL_FAIL_NOT_FOUND)
1998 			r = REP_PROTOCOL_SUCCESS;
1999 	}
2000 
2001 	return (r);
2002 }
2003 
2004 /*
2005  * Call perm_add_enabling_values() for the "action_authorization" property of
2006  * the "general" property group of inst.  Returns
2007  *   _DELETED - inst (or an ancestor) was deleted
2008  *   _NO_RESOURCES
2009  *   _SUCCESS
2010  */
2011 static int
2012 perm_add_inst_action_auth(permcheck_t *pcp, rc_node_t *inst)
2013 {
2014 	int r;
2015 	rc_node_t *svc;
2016 
2017 	assert(inst->rn_id.rl_type == REP_PROTOCOL_ENTITY_INSTANCE);
2018 
2019 	r = perm_add_ent_prop_values(pcp, inst, AUTH_PG_GENERAL,
2020 	    AUTH_PG_GENERAL_TYPE, AUTH_PROP_ACTION);
2021 
2022 	if (r != REP_PROTOCOL_FAIL_NOT_FOUND)
2023 		return (r);
2024 
2025 	r = rc_node_parent(inst, &svc);
2026 	if (r != REP_PROTOCOL_SUCCESS) {
2027 		assert(r == REP_PROTOCOL_FAIL_DELETED);
2028 		return (r);
2029 	}
2030 
2031 	r = perm_add_ent_prop_values(pcp, svc, AUTH_PG_GENERAL,
2032 	    AUTH_PG_GENERAL_TYPE, AUTH_PROP_ACTION);
2033 
2034 	return (r == REP_PROTOCOL_FAIL_NOT_FOUND ? REP_PROTOCOL_SUCCESS : r);
2035 }
2036 #endif /* NATIVE_BUILD */
2037 
2038 void
2039 rc_node_ptr_init(rc_node_ptr_t *out)
2040 {
2041 	out->rnp_node = NULL;
2042 	out->rnp_authorized = 0;
2043 	out->rnp_deleted = 0;
2044 }
2045 
2046 static void
2047 rc_node_assign(rc_node_ptr_t *out, rc_node_t *val)
2048 {
2049 	rc_node_t *cur = out->rnp_node;
2050 	if (val != NULL)
2051 		rc_node_hold(val);
2052 	out->rnp_node = val;
2053 	if (cur != NULL)
2054 		rc_node_rele(cur);
2055 	out->rnp_authorized = 0;
2056 	out->rnp_deleted = 0;
2057 }
2058 
2059 void
2060 rc_node_clear(rc_node_ptr_t *out, int deleted)
2061 {
2062 	rc_node_assign(out, NULL);
2063 	out->rnp_deleted = deleted;
2064 }
2065 
2066 void
2067 rc_node_ptr_assign(rc_node_ptr_t *out, const rc_node_ptr_t *val)
2068 {
2069 	rc_node_assign(out, val->rnp_node);
2070 }
2071 
2072 /*
2073  * rc_node_check()/RC_NODE_CHECK()
2074  *	generic "entry" checks, run before the use of an rc_node pointer.
2075  *
2076  * Fails with
2077  *   _NOT_SET
2078  *   _DELETED
2079  */
2080 static int
2081 rc_node_check_and_lock(rc_node_t *np)
2082 {
2083 	int result = REP_PROTOCOL_SUCCESS;
2084 	if (np == NULL)
2085 		return (REP_PROTOCOL_FAIL_NOT_SET);
2086 
2087 	(void) pthread_mutex_lock(&np->rn_lock);
2088 	if (!rc_node_wait_flag(np, RC_NODE_DYING)) {
2089 		result = REP_PROTOCOL_FAIL_DELETED;
2090 		(void) pthread_mutex_unlock(&np->rn_lock);
2091 	}
2092 
2093 	return (result);
2094 }
2095 
2096 /*
2097  * Fails with
2098  *   _NOT_SET - ptr is reset
2099  *   _DELETED - node has been deleted
2100  */
2101 static rc_node_t *
2102 rc_node_ptr_check_and_lock(rc_node_ptr_t *npp, int *res)
2103 {
2104 	rc_node_t *np = npp->rnp_node;
2105 	if (np == NULL) {
2106 		if (npp->rnp_deleted)
2107 			*res = REP_PROTOCOL_FAIL_DELETED;
2108 		else
2109 			*res = REP_PROTOCOL_FAIL_NOT_SET;
2110 		return (NULL);
2111 	}
2112 
2113 	(void) pthread_mutex_lock(&np->rn_lock);
2114 	if (!rc_node_wait_flag(np, RC_NODE_DYING)) {
2115 		(void) pthread_mutex_unlock(&np->rn_lock);
2116 		rc_node_clear(npp, 1);
2117 		*res = REP_PROTOCOL_FAIL_DELETED;
2118 		return (NULL);
2119 	}
2120 	return (np);
2121 }
2122 
2123 #define	RC_NODE_CHECK_AND_LOCK(n) {					\
2124 	int rc__res;							\
2125 	if ((rc__res = rc_node_check_and_lock(n)) != REP_PROTOCOL_SUCCESS) \
2126 		return (rc__res);					\
2127 }
2128 
2129 #define	RC_NODE_CHECK(n) {						\
2130 	RC_NODE_CHECK_AND_LOCK(n);					\
2131 	(void) pthread_mutex_unlock(&(n)->rn_lock);			\
2132 }
2133 
2134 #define	RC_NODE_CHECK_AND_HOLD(n) {					\
2135 	RC_NODE_CHECK_AND_LOCK(n);					\
2136 	rc_node_hold_locked(n);						\
2137 	(void) pthread_mutex_unlock(&(n)->rn_lock);			\
2138 }
2139 
2140 #define	RC_NODE_PTR_GET_CHECK_AND_LOCK(np, npp) {			\
2141 	int rc__res;							\
2142 	if (((np) = rc_node_ptr_check_and_lock(npp, &rc__res)) == NULL)	\
2143 		return (rc__res);					\
2144 }
2145 
2146 #define	RC_NODE_PTR_GET_CHECK(np, npp) {				\
2147 	RC_NODE_PTR_GET_CHECK_AND_LOCK(np, npp);			\
2148 	(void) pthread_mutex_unlock(&(np)->rn_lock);			\
2149 }
2150 
2151 #define	RC_NODE_PTR_GET_CHECK_AND_HOLD(np, npp) {			\
2152 	RC_NODE_PTR_GET_CHECK_AND_LOCK(np, npp);			\
2153 	rc_node_hold_locked(np);					\
2154 	(void) pthread_mutex_unlock(&(np)->rn_lock);			\
2155 }
2156 
2157 #define	HOLD_FLAG_OR_RETURN(np, flag) {					\
2158 	assert(MUTEX_HELD(&(np)->rn_lock));				\
2159 	assert(!((np)->rn_flags & RC_NODE_DEAD));			\
2160 	if (!rc_node_hold_flag((np), flag)) {				\
2161 		(void) pthread_mutex_unlock(&(np)->rn_lock);		\
2162 		return (REP_PROTOCOL_FAIL_DELETED);			\
2163 	}								\
2164 }
2165 
2166 #define	HOLD_PTR_FLAG_OR_RETURN(np, npp, flag) {			\
2167 	assert(MUTEX_HELD(&(np)->rn_lock));				\
2168 	assert(!((np)->rn_flags & RC_NODE_DEAD));			\
2169 	if (!rc_node_hold_flag((np), flag)) {				\
2170 		(void) pthread_mutex_unlock(&(np)->rn_lock);		\
2171 		assert((np) == (npp)->rnp_node);			\
2172 		rc_node_clear(npp, 1);					\
2173 		return (REP_PROTOCOL_FAIL_DELETED);			\
2174 	}								\
2175 }
2176 
2177 int
2178 rc_local_scope(uint32_t type, rc_node_ptr_t *out)
2179 {
2180 	if (type != REP_PROTOCOL_ENTITY_SCOPE) {
2181 		rc_node_clear(out, 0);
2182 		return (REP_PROTOCOL_FAIL_TYPE_MISMATCH);
2183 	}
2184 
2185 	/*
2186 	 * the main scope never gets destroyed
2187 	 */
2188 	rc_node_assign(out, rc_scope);
2189 
2190 	return (REP_PROTOCOL_SUCCESS);
2191 }
2192 
2193 /*
2194  * Fails with
2195  *   _NOT_SET - npp is not set
2196  *   _DELETED - the node npp pointed at has been deleted
2197  *   _TYPE_MISMATCH - type is not _SCOPE
2198  *   _NOT_FOUND - scope has no parent
2199  */
2200 static int
2201 rc_scope_parent_scope(rc_node_ptr_t *npp, uint32_t type, rc_node_ptr_t *out)
2202 {
2203 	rc_node_t *np;
2204 
2205 	rc_node_clear(out, 0);
2206 
2207 	RC_NODE_PTR_GET_CHECK(np, npp);
2208 
2209 	if (type != REP_PROTOCOL_ENTITY_SCOPE)
2210 		return (REP_PROTOCOL_FAIL_TYPE_MISMATCH);
2211 
2212 	return (REP_PROTOCOL_FAIL_NOT_FOUND);
2213 }
2214 
2215 /*
2216  * Fails with
2217  *   _NOT_SET
2218  *   _DELETED
2219  *   _NOT_APPLICABLE
2220  *   _NOT_FOUND
2221  *   _BAD_REQUEST
2222  *   _TRUNCATED
2223  */
2224 int
2225 rc_node_name(rc_node_ptr_t *npp, char *buf, size_t sz, uint32_t answertype,
2226     size_t *sz_out)
2227 {
2228 	size_t actual;
2229 	rc_node_t *np;
2230 
2231 	assert(sz == *sz_out);
2232 
2233 	RC_NODE_PTR_GET_CHECK(np, npp);
2234 
2235 	if (np->rn_id.rl_type == REP_PROTOCOL_ENTITY_CPROPERTYGRP) {
2236 		np = np->rn_cchain[0];
2237 		RC_NODE_CHECK(np);
2238 	}
2239 
2240 	switch (answertype) {
2241 	case RP_ENTITY_NAME_NAME:
2242 		if (np->rn_name == NULL)
2243 			return (REP_PROTOCOL_FAIL_NOT_APPLICABLE);
2244 		actual = strlcpy(buf, np->rn_name, sz);
2245 		break;
2246 	case RP_ENTITY_NAME_PGTYPE:
2247 		if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_PROPERTYGRP)
2248 			return (REP_PROTOCOL_FAIL_NOT_APPLICABLE);
2249 		actual = strlcpy(buf, np->rn_type, sz);
2250 		break;
2251 	case RP_ENTITY_NAME_PGFLAGS:
2252 		if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_PROPERTYGRP)
2253 			return (REP_PROTOCOL_FAIL_NOT_APPLICABLE);
2254 		actual = snprintf(buf, sz, "%d", np->rn_pgflags);
2255 		break;
2256 	case RP_ENTITY_NAME_SNAPLEVEL_SCOPE:
2257 		if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_SNAPLEVEL)
2258 			return (REP_PROTOCOL_FAIL_NOT_APPLICABLE);
2259 		actual = strlcpy(buf, np->rn_snaplevel->rsl_scope, sz);
2260 		break;
2261 	case RP_ENTITY_NAME_SNAPLEVEL_SERVICE:
2262 		if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_SNAPLEVEL)
2263 			return (REP_PROTOCOL_FAIL_NOT_APPLICABLE);
2264 		actual = strlcpy(buf, np->rn_snaplevel->rsl_service, sz);
2265 		break;
2266 	case RP_ENTITY_NAME_SNAPLEVEL_INSTANCE:
2267 		if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_SNAPLEVEL)
2268 			return (REP_PROTOCOL_FAIL_NOT_APPLICABLE);
2269 		if (np->rn_snaplevel->rsl_instance == NULL)
2270 			return (REP_PROTOCOL_FAIL_NOT_FOUND);
2271 		actual = strlcpy(buf, np->rn_snaplevel->rsl_instance, sz);
2272 		break;
2273 	default:
2274 		return (REP_PROTOCOL_FAIL_BAD_REQUEST);
2275 	}
2276 	if (actual >= sz)
2277 		return (REP_PROTOCOL_FAIL_TRUNCATED);
2278 
2279 	*sz_out = actual;
2280 	return (REP_PROTOCOL_SUCCESS);
2281 }
2282 
2283 int
2284 rc_node_get_property_type(rc_node_ptr_t *npp, rep_protocol_value_type_t *out)
2285 {
2286 	rc_node_t *np;
2287 
2288 	RC_NODE_PTR_GET_CHECK(np, npp);
2289 
2290 	if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_PROPERTY)
2291 		return (REP_PROTOCOL_FAIL_TYPE_MISMATCH);
2292 
2293 	*out = np->rn_valtype;
2294 
2295 	return (REP_PROTOCOL_SUCCESS);
2296 }
2297 
2298 /*
2299  * Get np's parent.  If np is deleted, returns _DELETED.  Otherwise puts a hold
2300  * on the parent, returns a pointer to it in *out, and returns _SUCCESS.
2301  */
2302 static int
2303 rc_node_parent(rc_node_t *np, rc_node_t **out)
2304 {
2305 	rc_node_t *pnp;
2306 	rc_node_t *np_orig;
2307 
2308 	if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_CPROPERTYGRP) {
2309 		RC_NODE_CHECK_AND_LOCK(np);
2310 	} else {
2311 		np = np->rn_cchain[0];
2312 		RC_NODE_CHECK_AND_LOCK(np);
2313 	}
2314 
2315 	np_orig = np;
2316 	rc_node_hold_locked(np);		/* simplifies the remainder */
2317 
2318 	for (;;) {
2319 		if (!rc_node_wait_flag(np,
2320 		    RC_NODE_IN_TX | RC_NODE_USING_PARENT)) {
2321 			rc_node_rele_locked(np);
2322 			return (REP_PROTOCOL_FAIL_DELETED);
2323 		}
2324 
2325 		if (!(np->rn_flags & RC_NODE_OLD))
2326 			break;
2327 
2328 		rc_node_rele_locked(np);
2329 		np = cache_lookup(&np_orig->rn_id);
2330 		assert(np != np_orig);
2331 
2332 		if (np == NULL)
2333 			goto deleted;
2334 		(void) pthread_mutex_lock(&np->rn_lock);
2335 	}
2336 
2337 	/* guaranteed to succeed without dropping the lock */
2338 	if (!rc_node_hold_flag(np, RC_NODE_USING_PARENT)) {
2339 		(void) pthread_mutex_unlock(&np->rn_lock);
2340 		*out = NULL;
2341 		rc_node_rele(np);
2342 		return (REP_PROTOCOL_FAIL_DELETED);
2343 	}
2344 
2345 	assert(np->rn_parent != NULL);
2346 	pnp = np->rn_parent;
2347 	(void) pthread_mutex_unlock(&np->rn_lock);
2348 
2349 	(void) pthread_mutex_lock(&pnp->rn_lock);
2350 	(void) pthread_mutex_lock(&np->rn_lock);
2351 	rc_node_rele_flag(np, RC_NODE_USING_PARENT);
2352 	(void) pthread_mutex_unlock(&np->rn_lock);
2353 
2354 	rc_node_hold_locked(pnp);
2355 
2356 	(void) pthread_mutex_unlock(&pnp->rn_lock);
2357 
2358 	rc_node_rele(np);
2359 	*out = pnp;
2360 	return (REP_PROTOCOL_SUCCESS);
2361 
2362 deleted:
2363 	rc_node_rele(np);
2364 	return (REP_PROTOCOL_FAIL_DELETED);
2365 }
2366 
2367 /*
2368  * Fails with
2369  *   _NOT_SET
2370  *   _DELETED
2371  */
2372 static int
2373 rc_node_ptr_parent(rc_node_ptr_t *npp, rc_node_t **out)
2374 {
2375 	rc_node_t *np;
2376 
2377 	RC_NODE_PTR_GET_CHECK(np, npp);
2378 
2379 	return (rc_node_parent(np, out));
2380 }
2381 
2382 /*
2383  * Fails with
2384  *   _NOT_SET - npp is not set
2385  *   _DELETED - the node npp pointed at has been deleted
2386  *   _TYPE_MISMATCH - npp's node's parent is not of type type
2387  *
2388  * If npp points to a scope, can also fail with
2389  *   _NOT_FOUND - scope has no parent
2390  */
2391 int
2392 rc_node_get_parent(rc_node_ptr_t *npp, uint32_t type, rc_node_ptr_t *out)
2393 {
2394 	rc_node_t *pnp;
2395 	int rc;
2396 
2397 	if (npp->rnp_node != NULL &&
2398 	    npp->rnp_node->rn_id.rl_type == REP_PROTOCOL_ENTITY_SCOPE)
2399 		return (rc_scope_parent_scope(npp, type, out));
2400 
2401 	if ((rc = rc_node_ptr_parent(npp, &pnp)) != REP_PROTOCOL_SUCCESS) {
2402 		rc_node_clear(out, 0);
2403 		return (rc);
2404 	}
2405 
2406 	if (type != pnp->rn_id.rl_type) {
2407 		rc_node_rele(pnp);
2408 		return (REP_PROTOCOL_FAIL_TYPE_MISMATCH);
2409 	}
2410 
2411 	rc_node_assign(out, pnp);
2412 	rc_node_rele(pnp);
2413 
2414 	return (REP_PROTOCOL_SUCCESS);
2415 }
2416 
2417 int
2418 rc_node_parent_type(rc_node_ptr_t *npp, uint32_t *type_out)
2419 {
2420 	rc_node_t *pnp;
2421 	int rc;
2422 
2423 	if (npp->rnp_node != NULL &&
2424 	    npp->rnp_node->rn_id.rl_type == REP_PROTOCOL_ENTITY_SCOPE) {
2425 		*type_out = REP_PROTOCOL_ENTITY_SCOPE;
2426 		return (REP_PROTOCOL_SUCCESS);
2427 	}
2428 
2429 	if ((rc = rc_node_ptr_parent(npp, &pnp)) != REP_PROTOCOL_SUCCESS)
2430 		return (rc);
2431 
2432 	*type_out = pnp->rn_id.rl_type;
2433 
2434 	rc_node_rele(pnp);
2435 
2436 	return (REP_PROTOCOL_SUCCESS);
2437 }
2438 
2439 /*
2440  * Fails with
2441  *   _INVALID_TYPE - type is invalid
2442  *   _TYPE_MISMATCH - np doesn't carry children of type type
2443  *   _DELETED - np has been deleted
2444  *   _NOT_FOUND - no child with that name/type combo found
2445  *   _NO_RESOURCES
2446  *   _BACKEND_ACCESS
2447  */
2448 int
2449 rc_node_get_child(rc_node_ptr_t *npp, const char *name, uint32_t type,
2450     rc_node_ptr_t *outp)
2451 {
2452 	rc_node_t *np, *cp;
2453 	rc_node_t *child = NULL;
2454 	int ret, idx;
2455 
2456 	RC_NODE_PTR_GET_CHECK_AND_LOCK(np, npp);
2457 	if ((ret = rc_check_type_name(type, name)) == REP_PROTOCOL_SUCCESS) {
2458 		if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_CPROPERTYGRP) {
2459 			ret = rc_node_find_named_child(np, name, type, &child);
2460 		} else {
2461 			(void) pthread_mutex_unlock(&np->rn_lock);
2462 			ret = REP_PROTOCOL_SUCCESS;
2463 			for (idx = 0; idx < COMPOSITION_DEPTH; idx++) {
2464 				cp = np->rn_cchain[idx];
2465 				if (cp == NULL)
2466 					break;
2467 				RC_NODE_CHECK_AND_LOCK(cp);
2468 				ret = rc_node_find_named_child(cp, name, type,
2469 				    &child);
2470 				(void) pthread_mutex_unlock(&cp->rn_lock);
2471 				/*
2472 				 * loop only if we succeeded, but no child of
2473 				 * the correct name was found.
2474 				 */
2475 				if (ret != REP_PROTOCOL_SUCCESS ||
2476 				    child != NULL)
2477 					break;
2478 			}
2479 			(void) pthread_mutex_lock(&np->rn_lock);
2480 		}
2481 	}
2482 	(void) pthread_mutex_unlock(&np->rn_lock);
2483 
2484 	if (ret == REP_PROTOCOL_SUCCESS) {
2485 		rc_node_assign(outp, child);
2486 		if (child != NULL)
2487 			rc_node_rele(child);
2488 		else
2489 			ret = REP_PROTOCOL_FAIL_NOT_FOUND;
2490 	} else {
2491 		rc_node_assign(outp, NULL);
2492 	}
2493 	return (ret);
2494 }
2495 
2496 int
2497 rc_node_update(rc_node_ptr_t *npp)
2498 {
2499 	cache_bucket_t *bp;
2500 	rc_node_t *np = npp->rnp_node;
2501 	rc_node_t *nnp;
2502 	rc_node_t *cpg = NULL;
2503 
2504 	if (np != NULL &&
2505 	    np->rn_id.rl_type == REP_PROTOCOL_ENTITY_CPROPERTYGRP) {
2506 		/*
2507 		 * If we're updating a composed property group, actually
2508 		 * update the top-level property group & return the
2509 		 * appropriate value.  But leave *nnp pointing at us.
2510 		 */
2511 		cpg = np;
2512 		np = np->rn_cchain[0];
2513 	}
2514 
2515 	RC_NODE_CHECK(np);
2516 
2517 	if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_PROPERTYGRP &&
2518 	    np->rn_id.rl_type != REP_PROTOCOL_ENTITY_SNAPSHOT)
2519 		return (REP_PROTOCOL_FAIL_BAD_REQUEST);
2520 
2521 	for (;;) {
2522 		bp = cache_hold(np->rn_hash);
2523 		nnp = cache_lookup_unlocked(bp, &np->rn_id);
2524 		if (nnp == NULL) {
2525 			cache_release(bp);
2526 			rc_node_clear(npp, 1);
2527 			return (REP_PROTOCOL_FAIL_DELETED);
2528 		}
2529 		/*
2530 		 * grab the lock before dropping the cache bucket, so
2531 		 * that no one else can sneak in
2532 		 */
2533 		(void) pthread_mutex_lock(&nnp->rn_lock);
2534 		cache_release(bp);
2535 
2536 		if (!(nnp->rn_flags & RC_NODE_IN_TX) ||
2537 		    !rc_node_wait_flag(nnp, RC_NODE_IN_TX))
2538 			break;
2539 
2540 		rc_node_rele_locked(nnp);
2541 	}
2542 
2543 	/*
2544 	 * If it is dead, we want to update it so that it will continue to
2545 	 * report being dead.
2546 	 */
2547 	if (nnp->rn_flags & RC_NODE_DEAD) {
2548 		(void) pthread_mutex_unlock(&nnp->rn_lock);
2549 		if (nnp != np && cpg == NULL)
2550 			rc_node_assign(npp, nnp);	/* updated */
2551 		rc_node_rele(nnp);
2552 		return (REP_PROTOCOL_FAIL_DELETED);
2553 	}
2554 
2555 	assert(!(nnp->rn_flags & RC_NODE_OLD));
2556 	(void) pthread_mutex_unlock(&nnp->rn_lock);
2557 
2558 	if (nnp != np && cpg == NULL)
2559 		rc_node_assign(npp, nnp);		/* updated */
2560 
2561 	rc_node_rele(nnp);
2562 
2563 	return ((nnp == np)? REP_PROTOCOL_SUCCESS : REP_PROTOCOL_DONE);
2564 }
2565 
2566 /*
2567  * does a generic modification check, for creation, deletion, and snapshot
2568  * management only.  Property group transactions have different checks.
2569  */
2570 int
2571 rc_node_modify_permission_check(void)
2572 {
2573 	int rc = REP_PROTOCOL_SUCCESS;
2574 	permcheck_t *pcp;
2575 	int granted;
2576 
2577 	if (!client_is_privileged()) {
2578 #ifdef NATIVE_BUILD
2579 		rc = REP_PROTOCOL_FAIL_PERMISSION_DENIED;
2580 #else
2581 		pcp = pc_create();
2582 		if (pcp != NULL) {
2583 			rc = perm_add_enabling(pcp, AUTH_MODIFY);
2584 
2585 			if (rc == REP_PROTOCOL_SUCCESS) {
2586 				granted = perm_granted(pcp);
2587 
2588 				if (granted < 0)
2589 					rc = REP_PROTOCOL_FAIL_NO_RESOURCES;
2590 			}
2591 
2592 			pc_free(pcp);
2593 		} else {
2594 			rc = REP_PROTOCOL_FAIL_NO_RESOURCES;
2595 		}
2596 
2597 		if (rc == REP_PROTOCOL_SUCCESS && !granted)
2598 			rc = REP_PROTOCOL_FAIL_PERMISSION_DENIED;
2599 #endif /* NATIVE_BUILD */
2600 	}
2601 	return (rc);
2602 }
2603 
2604 /*
2605  * Fails with
2606  *   _DELETED - node has been deleted
2607  *   _NOT_SET - npp is reset
2608  *   _NOT_APPLICABLE - type is _PROPERTYGRP
2609  *   _INVALID_TYPE - node is corrupt or type is invalid
2610  *   _TYPE_MISMATCH - node cannot have children of type type
2611  *   _BAD_REQUEST - name is invalid
2612  *		    cannot create children for this type of node
2613  *   _NO_RESOURCES - out of memory, or could not allocate new id
2614  *   _PERMISSION_DENIED
2615  *   _BACKEND_ACCESS
2616  *   _BACKEND_READONLY
2617  *   _EXISTS - child already exists
2618  */
2619 int
2620 rc_node_create_child(rc_node_ptr_t *npp, uint32_t type, const char *name,
2621     rc_node_ptr_t *cpp)
2622 {
2623 	rc_node_t *np;
2624 	rc_node_t *cp = NULL;
2625 	int rc;
2626 
2627 	rc_node_clear(cpp, 0);
2628 
2629 	RC_NODE_PTR_GET_CHECK_AND_LOCK(np, npp);
2630 
2631 	/*
2632 	 * there is a separate interface for creating property groups
2633 	 */
2634 	if (type == REP_PROTOCOL_ENTITY_PROPERTYGRP) {
2635 		(void) pthread_mutex_unlock(&np->rn_lock);
2636 		return (REP_PROTOCOL_FAIL_NOT_APPLICABLE);
2637 	}
2638 
2639 	if (np->rn_id.rl_type == REP_PROTOCOL_ENTITY_CPROPERTYGRP) {
2640 		(void) pthread_mutex_unlock(&np->rn_lock);
2641 		np = np->rn_cchain[0];
2642 		RC_NODE_CHECK_AND_LOCK(np);
2643 	}
2644 
2645 	if ((rc = rc_check_parent_child(np->rn_id.rl_type, type)) !=
2646 	    REP_PROTOCOL_SUCCESS) {
2647 		(void) pthread_mutex_unlock(&np->rn_lock);
2648 		return (rc);
2649 	}
2650 	if ((rc = rc_check_type_name(type, name)) != REP_PROTOCOL_SUCCESS) {
2651 		(void) pthread_mutex_unlock(&np->rn_lock);
2652 		return (rc);
2653 	}
2654 
2655 	if ((rc = rc_node_modify_permission_check()) != REP_PROTOCOL_SUCCESS) {
2656 		(void) pthread_mutex_unlock(&np->rn_lock);
2657 		return (rc);
2658 	}
2659 
2660 	HOLD_PTR_FLAG_OR_RETURN(np, npp, RC_NODE_CREATING_CHILD);
2661 	(void) pthread_mutex_unlock(&np->rn_lock);
2662 
2663 	rc = object_create(np, type, name, &cp);
2664 	assert(rc != REP_PROTOCOL_FAIL_NOT_APPLICABLE);
2665 
2666 	if (rc == REP_PROTOCOL_SUCCESS) {
2667 		rc_node_assign(cpp, cp);
2668 		rc_node_rele(cp);
2669 	}
2670 
2671 	(void) pthread_mutex_lock(&np->rn_lock);
2672 	rc_node_rele_flag(np, RC_NODE_CREATING_CHILD);
2673 	(void) pthread_mutex_unlock(&np->rn_lock);
2674 
2675 	return (rc);
2676 }
2677 
2678 int
2679 rc_node_create_child_pg(rc_node_ptr_t *npp, uint32_t type, const char *name,
2680     const char *pgtype, uint32_t flags, rc_node_ptr_t *cpp)
2681 {
2682 	rc_node_t *np;
2683 	rc_node_t *cp;
2684 	int rc;
2685 	permcheck_t *pcp;
2686 	int granted;
2687 
2688 	rc_node_clear(cpp, 0);
2689 
2690 	/* verify flags is valid */
2691 	if (flags & ~SCF_PG_FLAG_NONPERSISTENT)
2692 		return (REP_PROTOCOL_FAIL_BAD_REQUEST);
2693 
2694 	RC_NODE_PTR_GET_CHECK_AND_HOLD(np, npp);
2695 
2696 	if (type != REP_PROTOCOL_ENTITY_PROPERTYGRP) {
2697 		rc_node_rele(np);
2698 		return (REP_PROTOCOL_FAIL_NOT_APPLICABLE);
2699 	}
2700 
2701 	if ((rc = rc_check_parent_child(np->rn_id.rl_type, type)) !=
2702 	    REP_PROTOCOL_SUCCESS) {
2703 		rc_node_rele(np);
2704 		return (rc);
2705 	}
2706 	if ((rc = rc_check_type_name(type, name)) != REP_PROTOCOL_SUCCESS ||
2707 	    (rc = rc_check_pgtype_name(pgtype)) != REP_PROTOCOL_SUCCESS) {
2708 		rc_node_rele(np);
2709 		return (rc);
2710 	}
2711 
2712 	if (!client_is_privileged()) {
2713 #ifdef NATIVE_BUILD
2714 		rc = REP_PROTOCOL_FAIL_PERMISSION_DENIED;
2715 #else
2716 		/* Must have .smf.modify or smf.modify.<type> authorization */
2717 		pcp = pc_create();
2718 		if (pcp != NULL) {
2719 			rc = perm_add_enabling(pcp, AUTH_MODIFY);
2720 
2721 			if (rc == REP_PROTOCOL_SUCCESS) {
2722 				const char * const auth =
2723 				    perm_auth_for_pgtype(pgtype);
2724 
2725 				if (auth != NULL)
2726 					rc = perm_add_enabling(pcp, auth);
2727 			}
2728 
2729 			/*
2730 			 * .manage or $action_authorization can be used to
2731 			 * create the actions pg and the general_ovr pg.
2732 			 */
2733 			if (rc == REP_PROTOCOL_SUCCESS &&
2734 			    (flags & SCF_PG_FLAG_NONPERSISTENT) != 0 &&
2735 			    np->rn_id.rl_type == REP_PROTOCOL_ENTITY_INSTANCE &&
2736 			    ((strcmp(name, AUTH_PG_ACTIONS) == 0 &&
2737 			    strcmp(pgtype, AUTH_PG_ACTIONS_TYPE) == 0) ||
2738 			    (strcmp(name, AUTH_PG_GENERAL_OVR) == 0 &&
2739 			    strcmp(pgtype, AUTH_PG_GENERAL_OVR_TYPE) == 0))) {
2740 				rc = perm_add_enabling(pcp, AUTH_MANAGE);
2741 
2742 				if (rc == REP_PROTOCOL_SUCCESS)
2743 					rc = perm_add_inst_action_auth(pcp, np);
2744 			}
2745 
2746 			if (rc == REP_PROTOCOL_SUCCESS) {
2747 				granted = perm_granted(pcp);
2748 
2749 				if (granted < 0)
2750 					rc = REP_PROTOCOL_FAIL_NO_RESOURCES;
2751 			}
2752 
2753 			pc_free(pcp);
2754 		} else {
2755 			rc = REP_PROTOCOL_FAIL_NO_RESOURCES;
2756 		}
2757 
2758 		if (rc == REP_PROTOCOL_SUCCESS && !granted)
2759 			rc = REP_PROTOCOL_FAIL_PERMISSION_DENIED;
2760 #endif /* NATIVE_BUILD */
2761 
2762 		if (rc != REP_PROTOCOL_SUCCESS) {
2763 			rc_node_rele(np);
2764 			return (rc);
2765 		}
2766 	}
2767 
2768 	(void) pthread_mutex_lock(&np->rn_lock);
2769 	HOLD_PTR_FLAG_OR_RETURN(np, npp, RC_NODE_CREATING_CHILD);
2770 	(void) pthread_mutex_unlock(&np->rn_lock);
2771 
2772 	rc = object_create_pg(np, type, name, pgtype, flags, &cp);
2773 
2774 	if (rc == REP_PROTOCOL_SUCCESS) {
2775 		rc_node_assign(cpp, cp);
2776 		rc_node_rele(cp);
2777 	}
2778 
2779 	(void) pthread_mutex_lock(&np->rn_lock);
2780 	rc_node_rele_flag(np, RC_NODE_CREATING_CHILD);
2781 	(void) pthread_mutex_unlock(&np->rn_lock);
2782 
2783 	return (rc);
2784 }
2785 
2786 static void
2787 rc_pg_notify_fire(rc_node_pg_notify_t *pnp)
2788 {
2789 	assert(MUTEX_HELD(&rc_pg_notify_lock));
2790 
2791 	if (pnp->rnpn_pg != NULL) {
2792 		uu_list_remove(pnp->rnpn_pg->rn_pg_notify_list, pnp);
2793 		(void) close(pnp->rnpn_fd);
2794 
2795 		pnp->rnpn_pg = NULL;
2796 		pnp->rnpn_fd = -1;
2797 	} else {
2798 		assert(pnp->rnpn_fd == -1);
2799 	}
2800 }
2801 
2802 static void
2803 rc_notify_node_delete(rc_notify_delete_t *ndp, rc_node_t *np_arg)
2804 {
2805 	rc_node_t *svc = NULL;
2806 	rc_node_t *inst = NULL;
2807 	rc_node_t *pg = NULL;
2808 	rc_node_t *np = np_arg;
2809 	rc_node_t *nnp;
2810 
2811 	while (svc == NULL) {
2812 		(void) pthread_mutex_lock(&np->rn_lock);
2813 		if (!rc_node_hold_flag(np, RC_NODE_USING_PARENT)) {
2814 			(void) pthread_mutex_unlock(&np->rn_lock);
2815 			goto cleanup;
2816 		}
2817 		nnp = np->rn_parent;
2818 		rc_node_hold_locked(np);	/* hold it in place */
2819 
2820 		switch (np->rn_id.rl_type) {
2821 		case REP_PROTOCOL_ENTITY_PROPERTYGRP:
2822 			assert(pg == NULL);
2823 			pg = np;
2824 			break;
2825 		case REP_PROTOCOL_ENTITY_INSTANCE:
2826 			assert(inst == NULL);
2827 			inst = np;
2828 			break;
2829 		case REP_PROTOCOL_ENTITY_SERVICE:
2830 			assert(svc == NULL);
2831 			svc = np;
2832 			break;
2833 		default:
2834 			rc_node_rele_flag(np, RC_NODE_USING_PARENT);
2835 			rc_node_rele_locked(np);
2836 			goto cleanup;
2837 		}
2838 
2839 		(void) pthread_mutex_unlock(&np->rn_lock);
2840 
2841 		np = nnp;
2842 		if (np == NULL)
2843 			goto cleanup;
2844 	}
2845 
2846 	rc_notify_deletion(ndp,
2847 	    svc->rn_name,
2848 	    inst != NULL ? inst->rn_name : NULL,
2849 	    pg != NULL ? pg->rn_name : NULL);
2850 
2851 	ndp = NULL;
2852 
2853 cleanup:
2854 	if (ndp != NULL)
2855 		uu_free(ndp);
2856 
2857 	for (;;) {
2858 		if (svc != NULL) {
2859 			np = svc;
2860 			svc = NULL;
2861 		} else if (inst != NULL) {
2862 			np = inst;
2863 			inst = NULL;
2864 		} else if (pg != NULL) {
2865 			np = pg;
2866 			pg = NULL;
2867 		} else
2868 			break;
2869 
2870 		(void) pthread_mutex_lock(&np->rn_lock);
2871 		rc_node_rele_flag(np, RC_NODE_USING_PARENT);
2872 		rc_node_rele_locked(np);
2873 	}
2874 }
2875 
2876 /*
2877  * N.B.:  this function drops np->rn_lock on the way out.
2878  */
2879 static void
2880 rc_node_delete_hold(rc_node_t *np, int andformer)
2881 {
2882 	rc_node_t *cp;
2883 
2884 again:
2885 	assert(MUTEX_HELD(&np->rn_lock));
2886 	assert((np->rn_flags & RC_NODE_DYING_FLAGS) == RC_NODE_DYING_FLAGS);
2887 
2888 	for (cp = uu_list_first(np->rn_children); cp != NULL;
2889 	    cp = uu_list_next(np->rn_children, cp)) {
2890 		(void) pthread_mutex_lock(&cp->rn_lock);
2891 		(void) pthread_mutex_unlock(&np->rn_lock);
2892 		if (!rc_node_hold_flag(cp, RC_NODE_DYING_FLAGS)) {
2893 			/*
2894 			 * already marked as dead -- can't happen, since that
2895 			 * would require setting RC_NODE_CHILDREN_CHANGING
2896 			 * in np, and we're holding that...
2897 			 */
2898 			abort();
2899 		}
2900 		rc_node_delete_hold(cp, andformer);	/* recurse, drop lock */
2901 
2902 		(void) pthread_mutex_lock(&np->rn_lock);
2903 	}
2904 	if (andformer && (cp = np->rn_former) != NULL) {
2905 		(void) pthread_mutex_lock(&cp->rn_lock);
2906 		(void) pthread_mutex_unlock(&np->rn_lock);
2907 		if (!rc_node_hold_flag(cp, RC_NODE_DYING_FLAGS))
2908 			abort();		/* can't happen, see above */
2909 		np = cp;
2910 		goto again;		/* tail-recurse down rn_former */
2911 	}
2912 	(void) pthread_mutex_unlock(&np->rn_lock);
2913 }
2914 
2915 /*
2916  * N.B.:  this function drops np->rn_lock on the way out.
2917  */
2918 static void
2919 rc_node_delete_rele(rc_node_t *np, int andformer)
2920 {
2921 	rc_node_t *cp;
2922 
2923 again:
2924 	assert(MUTEX_HELD(&np->rn_lock));
2925 	assert((np->rn_flags & RC_NODE_DYING_FLAGS) == RC_NODE_DYING_FLAGS);
2926 
2927 	for (cp = uu_list_first(np->rn_children); cp != NULL;
2928 	    cp = uu_list_next(np->rn_children, cp)) {
2929 		(void) pthread_mutex_lock(&cp->rn_lock);
2930 		(void) pthread_mutex_unlock(&np->rn_lock);
2931 		rc_node_delete_rele(cp, andformer);	/* recurse, drop lock */
2932 		(void) pthread_mutex_lock(&np->rn_lock);
2933 	}
2934 	if (andformer && (cp = np->rn_former) != NULL) {
2935 		(void) pthread_mutex_lock(&cp->rn_lock);
2936 		rc_node_rele_flag(np, RC_NODE_DYING_FLAGS);
2937 		(void) pthread_mutex_unlock(&np->rn_lock);
2938 
2939 		np = cp;
2940 		goto again;		/* tail-recurse down rn_former */
2941 	}
2942 	rc_node_rele_flag(np, RC_NODE_DYING_FLAGS);
2943 	(void) pthread_mutex_unlock(&np->rn_lock);
2944 }
2945 
2946 static void
2947 rc_node_finish_delete(rc_node_t *cp)
2948 {
2949 	cache_bucket_t *bp;
2950 	rc_node_pg_notify_t *pnp;
2951 
2952 	assert(MUTEX_HELD(&cp->rn_lock));
2953 
2954 	if (!(cp->rn_flags & RC_NODE_OLD)) {
2955 		assert(cp->rn_flags & RC_NODE_IN_PARENT);
2956 		if (!rc_node_wait_flag(cp, RC_NODE_USING_PARENT)) {
2957 			abort();		/* can't happen, see above */
2958 		}
2959 		cp->rn_flags &= ~RC_NODE_IN_PARENT;
2960 		cp->rn_parent = NULL;
2961 	}
2962 
2963 	cp->rn_flags |= RC_NODE_DEAD;
2964 
2965 	/*
2966 	 * If this node is not out-dated, we need to remove it from
2967 	 * the notify list and cache hash table.
2968 	 */
2969 	if (!(cp->rn_flags & RC_NODE_OLD)) {
2970 		assert(cp->rn_refs > 0);	/* can't go away yet */
2971 		(void) pthread_mutex_unlock(&cp->rn_lock);
2972 
2973 		(void) pthread_mutex_lock(&rc_pg_notify_lock);
2974 		while ((pnp = uu_list_first(cp->rn_pg_notify_list)) != NULL)
2975 			rc_pg_notify_fire(pnp);
2976 		(void) pthread_mutex_unlock(&rc_pg_notify_lock);
2977 		rc_notify_remove_node(cp);
2978 
2979 		bp = cache_hold(cp->rn_hash);
2980 		(void) pthread_mutex_lock(&cp->rn_lock);
2981 		cache_remove_unlocked(bp, cp);
2982 		cache_release(bp);
2983 	}
2984 }
2985 
2986 /*
2987  * N.B.:  this function drops np->rn_lock and a reference on the way out.
2988  */
2989 static void
2990 rc_node_delete_children(rc_node_t *np, int andformer)
2991 {
2992 	rc_node_t *cp;
2993 
2994 again:
2995 	assert(np->rn_refs > 0);
2996 	assert(MUTEX_HELD(&np->rn_lock));
2997 	assert(np->rn_flags & RC_NODE_DEAD);
2998 
2999 	while ((cp = uu_list_first(np->rn_children)) != NULL) {
3000 		uu_list_remove(np->rn_children, cp);
3001 		(void) pthread_mutex_lock(&cp->rn_lock);
3002 		(void) pthread_mutex_unlock(&np->rn_lock);
3003 		rc_node_hold_locked(cp);	/* hold while we recurse */
3004 		rc_node_finish_delete(cp);
3005 		rc_node_delete_children(cp, andformer);	/* drops lock + ref */
3006 		(void) pthread_mutex_lock(&np->rn_lock);
3007 	}
3008 
3009 	/*
3010 	 * when we drop cp's lock, all the children will be gone, so we
3011 	 * can release DYING_FLAGS.
3012 	 */
3013 	rc_node_rele_flag(np, RC_NODE_DYING_FLAGS);
3014 	if (andformer && (cp = np->rn_former) != NULL) {
3015 		np->rn_former = NULL;		/* unlink */
3016 		(void) pthread_mutex_lock(&cp->rn_lock);
3017 		(void) pthread_mutex_unlock(&np->rn_lock);
3018 		np->rn_flags &= ~RC_NODE_ON_FORMER;
3019 
3020 		rc_node_hold_locked(cp);	/* hold while we loop */
3021 
3022 		rc_node_finish_delete(cp);
3023 
3024 		rc_node_rele(np);		/* drop the old reference */
3025 
3026 		np = cp;
3027 		goto again;		/* tail-recurse down rn_former */
3028 	}
3029 	rc_node_rele_locked(np);
3030 }
3031 
3032 static void
3033 rc_node_unrefed(rc_node_t *np)
3034 {
3035 	int unrefed;
3036 	rc_node_t *pp, *cur;
3037 
3038 	assert(MUTEX_HELD(&np->rn_lock));
3039 	assert(np->rn_refs == 0);
3040 	assert(np->rn_other_refs == 0);
3041 	assert(np->rn_other_refs_held == 0);
3042 
3043 	if (np->rn_flags & RC_NODE_DEAD) {
3044 		(void) pthread_mutex_unlock(&np->rn_lock);
3045 		rc_node_destroy(np);
3046 		return;
3047 	}
3048 
3049 	assert(np->rn_flags & RC_NODE_OLD);
3050 	if (np->rn_flags & RC_NODE_UNREFED) {
3051 		(void) pthread_mutex_unlock(&np->rn_lock);
3052 		return;
3053 	}
3054 	np->rn_flags |= RC_NODE_UNREFED;
3055 
3056 	(void) pthread_mutex_unlock(&np->rn_lock);
3057 
3058 	/*
3059 	 * find the current in-hash object, and grab it's RC_NODE_IN_TX
3060 	 * flag.  That protects the entire rn_former chain.
3061 	 */
3062 	for (;;) {
3063 		pp = cache_lookup(&np->rn_id);
3064 		if (pp == NULL) {
3065 			(void) pthread_mutex_lock(&np->rn_lock);
3066 			if (np->rn_flags & RC_NODE_DEAD)
3067 				goto died;
3068 			/*
3069 			 * We are trying to unreference this node, but the
3070 			 * owner of the former list does not exist.  It must
3071 			 * be the case that another thread is deleting this
3072 			 * entire sub-branch, but has not yet reached us.
3073 			 * We will in short order be deleted.
3074 			 */
3075 			np->rn_flags &= ~RC_NODE_UNREFED;
3076 			(void) pthread_mutex_unlock(&np->rn_lock);
3077 			return;
3078 		}
3079 		if (pp == np) {
3080 			/*
3081 			 * no longer unreferenced
3082 			 */
3083 			(void) pthread_mutex_lock(&np->rn_lock);
3084 			np->rn_flags &= ~RC_NODE_UNREFED;
3085 			rc_node_rele_locked(np);
3086 			return;
3087 		}
3088 		(void) pthread_mutex_lock(&pp->rn_lock);
3089 		if ((pp->rn_flags & RC_NODE_OLD) ||
3090 		    !rc_node_hold_flag(pp, RC_NODE_IN_TX)) {
3091 			rc_node_rele_locked(pp);
3092 			continue;
3093 		}
3094 		if (!(pp->rn_flags & RC_NODE_OLD)) {
3095 			(void) pthread_mutex_unlock(&pp->rn_lock);
3096 			break;
3097 		}
3098 		rc_node_rele_flag(pp, RC_NODE_IN_TX);
3099 		rc_node_rele_locked(pp);
3100 	}
3101 
3102 	(void) pthread_mutex_lock(&np->rn_lock);
3103 	if (!(np->rn_flags & (RC_NODE_OLD | RC_NODE_DEAD)) ||
3104 	    np->rn_refs != 0 || np->rn_other_refs != 0 ||
3105 	    np->rn_other_refs_held != 0) {
3106 		np->rn_flags &= ~RC_NODE_UNREFED;
3107 		(void) pthread_mutex_lock(&pp->rn_lock);
3108 
3109 		rc_node_rele_flag(pp, RC_NODE_IN_TX);
3110 		rc_node_rele_locked(pp);
3111 		return;
3112 	}
3113 
3114 	if (!rc_node_hold_flag(np, RC_NODE_DYING_FLAGS)) {
3115 		(void) pthread_mutex_unlock(&np->rn_lock);
3116 
3117 		rc_node_rele_flag(pp, RC_NODE_IN_TX);
3118 		rc_node_rele_locked(pp);
3119 
3120 		(void) pthread_mutex_lock(&np->rn_lock);
3121 		goto died;
3122 	}
3123 
3124 	rc_node_delete_hold(np, 0);
3125 
3126 	(void) pthread_mutex_lock(&np->rn_lock);
3127 	if (!(np->rn_flags & RC_NODE_OLD) ||
3128 	    np->rn_refs != 0 || np->rn_other_refs != 0 ||
3129 	    np->rn_other_refs_held != 0) {
3130 		np->rn_flags &= ~RC_NODE_UNREFED;
3131 		rc_node_delete_rele(np, 0);
3132 
3133 		(void) pthread_mutex_lock(&pp->rn_lock);
3134 		rc_node_rele_flag(pp, RC_NODE_IN_TX);
3135 		rc_node_rele_locked(pp);
3136 		return;
3137 	}
3138 
3139 	np->rn_flags |= RC_NODE_DEAD;
3140 	rc_node_hold_locked(np);
3141 	rc_node_delete_children(np, 0);
3142 
3143 	/*
3144 	 * It's gone -- remove it from the former chain and destroy it.
3145 	 */
3146 	(void) pthread_mutex_lock(&pp->rn_lock);
3147 	for (cur = pp; cur != NULL && cur->rn_former != np;
3148 	    cur = cur->rn_former)
3149 		;
3150 	assert(cur != NULL && cur != np);
3151 
3152 	cur->rn_former = np->rn_former;
3153 	np->rn_former = NULL;
3154 
3155 	rc_node_rele_flag(pp, RC_NODE_IN_TX);
3156 	rc_node_rele_locked(pp);
3157 
3158 	(void) pthread_mutex_lock(&np->rn_lock);
3159 	assert(np->rn_flags & RC_NODE_ON_FORMER);
3160 	np->rn_flags &= ~(RC_NODE_UNREFED | RC_NODE_ON_FORMER);
3161 	(void) pthread_mutex_unlock(&np->rn_lock);
3162 	rc_node_destroy(np);
3163 	return;
3164 
3165 died:
3166 	np->rn_flags &= ~RC_NODE_UNREFED;
3167 	unrefed = (np->rn_refs == 0 && np->rn_other_refs == 0 &&
3168 	    np->rn_other_refs_held == 0);
3169 	(void) pthread_mutex_unlock(&np->rn_lock);
3170 	if (unrefed)
3171 		rc_node_destroy(np);
3172 }
3173 
3174 /*
3175  * Fails with
3176  *   _NOT_SET
3177  *   _DELETED
3178  *   _BAD_REQUEST
3179  *   _PERMISSION_DENIED
3180  *   _NO_RESOURCES
3181  * and whatever object_delete() fails with.
3182  */
3183 int
3184 rc_node_delete(rc_node_ptr_t *npp)
3185 {
3186 	rc_node_t *np, *np_orig;
3187 	rc_node_t *pp = NULL;
3188 	int rc;
3189 	rc_node_pg_notify_t *pnp;
3190 	cache_bucket_t *bp;
3191 	rc_notify_delete_t *ndp;
3192 	permcheck_t *pcp;
3193 	int granted;
3194 
3195 	RC_NODE_PTR_GET_CHECK_AND_LOCK(np, npp);
3196 
3197 	switch (np->rn_id.rl_type) {
3198 	case REP_PROTOCOL_ENTITY_SERVICE:
3199 	case REP_PROTOCOL_ENTITY_INSTANCE:
3200 	case REP_PROTOCOL_ENTITY_SNAPSHOT:
3201 		break;			/* deletable */
3202 
3203 	case REP_PROTOCOL_ENTITY_SCOPE:
3204 	case REP_PROTOCOL_ENTITY_SNAPLEVEL:
3205 		/* Scopes and snaplevels are indelible. */
3206 		(void) pthread_mutex_unlock(&np->rn_lock);
3207 		return (REP_PROTOCOL_FAIL_BAD_REQUEST);
3208 
3209 	case REP_PROTOCOL_ENTITY_CPROPERTYGRP:
3210 		(void) pthread_mutex_unlock(&np->rn_lock);
3211 		np = np->rn_cchain[0];
3212 		RC_NODE_CHECK_AND_LOCK(np);
3213 		break;
3214 
3215 	case REP_PROTOCOL_ENTITY_PROPERTYGRP:
3216 		if (np->rn_id.rl_ids[ID_SNAPSHOT] == 0)
3217 			break;
3218 
3219 		/* Snapshot property groups are indelible. */
3220 		(void) pthread_mutex_unlock(&np->rn_lock);
3221 		return (REP_PROTOCOL_FAIL_PERMISSION_DENIED);
3222 
3223 	case REP_PROTOCOL_ENTITY_PROPERTY:
3224 		(void) pthread_mutex_unlock(&np->rn_lock);
3225 		return (REP_PROTOCOL_FAIL_BAD_REQUEST);
3226 
3227 	default:
3228 		assert(0);
3229 		abort();
3230 		break;
3231 	}
3232 
3233 	np_orig = np;
3234 	rc_node_hold_locked(np);	/* simplifies rest of the code */
3235 
3236 again:
3237 	/*
3238 	 * The following loop is to deal with the fact that snapshots and
3239 	 * property groups are moving targets -- changes to them result
3240 	 * in a new "child" node.  Since we can only delete from the top node,
3241 	 * we have to loop until we have a non-RC_NODE_OLD version.
3242 	 */
3243 	for (;;) {
3244 		if (!rc_node_wait_flag(np,
3245 		    RC_NODE_IN_TX | RC_NODE_USING_PARENT)) {
3246 			rc_node_rele_locked(np);
3247 			return (REP_PROTOCOL_FAIL_DELETED);
3248 		}
3249 
3250 		if (np->rn_flags & RC_NODE_OLD) {
3251 			rc_node_rele_locked(np);
3252 			np = cache_lookup(&np_orig->rn_id);
3253 			assert(np != np_orig);
3254 
3255 			if (np == NULL) {
3256 				rc = REP_PROTOCOL_FAIL_DELETED;
3257 				goto fail;
3258 			}
3259 			(void) pthread_mutex_lock(&np->rn_lock);
3260 			continue;
3261 		}
3262 
3263 		if (!rc_node_hold_flag(np, RC_NODE_USING_PARENT)) {
3264 			rc_node_rele_locked(np);
3265 			rc_node_clear(npp, 1);
3266 			return (REP_PROTOCOL_FAIL_DELETED);
3267 		}
3268 
3269 		/*
3270 		 * Mark our parent as children changing.  this call drops our
3271 		 * lock and the RC_NODE_USING_PARENT flag, and returns with
3272 		 * pp's lock held
3273 		 */
3274 		pp = rc_node_hold_parent_flag(np, RC_NODE_CHILDREN_CHANGING);
3275 		if (pp == NULL) {
3276 			/* our parent is gone, we're going next... */
3277 			rc_node_rele(np);
3278 
3279 			rc_node_clear(npp, 1);
3280 			return (REP_PROTOCOL_FAIL_DELETED);
3281 		}
3282 
3283 		rc_node_hold_locked(pp);		/* hold for later */
3284 		(void) pthread_mutex_unlock(&pp->rn_lock);
3285 
3286 		(void) pthread_mutex_lock(&np->rn_lock);
3287 		if (!(np->rn_flags & RC_NODE_OLD))
3288 			break;			/* not old -- we're done */
3289 
3290 		(void) pthread_mutex_unlock(&np->rn_lock);
3291 		(void) pthread_mutex_lock(&pp->rn_lock);
3292 		rc_node_rele_flag(pp, RC_NODE_CHILDREN_CHANGING);
3293 		rc_node_rele_locked(pp);
3294 		(void) pthread_mutex_lock(&np->rn_lock);
3295 		continue;			/* loop around and try again */
3296 	}
3297 	/*
3298 	 * Everyone out of the pool -- we grab everything but
3299 	 * RC_NODE_USING_PARENT (including RC_NODE_DYING) to keep
3300 	 * any changes from occurring while we are attempting to
3301 	 * delete the node.
3302 	 */
3303 	if (!rc_node_hold_flag(np, RC_NODE_DYING_FLAGS)) {
3304 		(void) pthread_mutex_unlock(&np->rn_lock);
3305 		rc = REP_PROTOCOL_FAIL_DELETED;
3306 		goto fail;
3307 	}
3308 
3309 	assert(!(np->rn_flags & RC_NODE_OLD));
3310 
3311 	if (!client_is_privileged()) {
3312 		/* permission check */
3313 		(void) pthread_mutex_unlock(&np->rn_lock);
3314 
3315 #ifdef NATIVE_BUILD
3316 		rc = REP_PROTOCOL_FAIL_PERMISSION_DENIED;
3317 #else
3318 		pcp = pc_create();
3319 		if (pcp != NULL) {
3320 			rc = perm_add_enabling(pcp, AUTH_MODIFY);
3321 
3322 			/* add .smf.modify.<type> for pgs. */
3323 			if (rc == REP_PROTOCOL_SUCCESS && np->rn_id.rl_type ==
3324 			    REP_PROTOCOL_ENTITY_PROPERTYGRP) {
3325 				const char * const auth =
3326 				    perm_auth_for_pgtype(np->rn_type);
3327 
3328 				if (auth != NULL)
3329 					rc = perm_add_enabling(pcp, auth);
3330 			}
3331 
3332 			if (rc == REP_PROTOCOL_SUCCESS) {
3333 				granted = perm_granted(pcp);
3334 
3335 				if (granted < 0)
3336 					rc = REP_PROTOCOL_FAIL_NO_RESOURCES;
3337 			}
3338 
3339 			pc_free(pcp);
3340 		} else {
3341 			rc = REP_PROTOCOL_FAIL_NO_RESOURCES;
3342 		}
3343 
3344 		if (rc == REP_PROTOCOL_SUCCESS && !granted)
3345 			rc = REP_PROTOCOL_FAIL_PERMISSION_DENIED;
3346 #endif /* NATIVE_BUILD */
3347 
3348 		if (rc != REP_PROTOCOL_SUCCESS) {
3349 			(void) pthread_mutex_lock(&np->rn_lock);
3350 			rc_node_rele_flag(np, RC_NODE_DYING_FLAGS);
3351 			(void) pthread_mutex_unlock(&np->rn_lock);
3352 			goto fail;
3353 		}
3354 
3355 		(void) pthread_mutex_lock(&np->rn_lock);
3356 	}
3357 
3358 	ndp = uu_zalloc(sizeof (*ndp));
3359 	if (ndp == NULL) {
3360 		rc_node_rele_flag(np, RC_NODE_DYING_FLAGS);
3361 		(void) pthread_mutex_unlock(&np->rn_lock);
3362 		rc = REP_PROTOCOL_FAIL_NO_RESOURCES;
3363 		goto fail;
3364 	}
3365 
3366 	rc_node_delete_hold(np, 1);	/* hold entire subgraph, drop lock */
3367 
3368 	rc = object_delete(np);
3369 
3370 	if (rc != REP_PROTOCOL_SUCCESS) {
3371 		(void) pthread_mutex_lock(&np->rn_lock);
3372 		rc_node_delete_rele(np, 1);		/* drops lock */
3373 		uu_free(ndp);
3374 		goto fail;
3375 	}
3376 
3377 	/*
3378 	 * Now, delicately unlink and delete the object.
3379 	 *
3380 	 * Create the delete notification, atomically remove
3381 	 * from the hash table and set the NODE_DEAD flag, and
3382 	 * remove from the parent's children list.
3383 	 */
3384 	rc_notify_node_delete(ndp, np); /* frees or uses ndp */
3385 
3386 	bp = cache_hold(np->rn_hash);
3387 
3388 	(void) pthread_mutex_lock(&np->rn_lock);
3389 	cache_remove_unlocked(bp, np);
3390 	cache_release(bp);
3391 
3392 	np->rn_flags |= RC_NODE_DEAD;
3393 	if (pp != NULL) {
3394 		(void) pthread_mutex_unlock(&np->rn_lock);
3395 
3396 		(void) pthread_mutex_lock(&pp->rn_lock);
3397 		(void) pthread_mutex_lock(&np->rn_lock);
3398 		uu_list_remove(pp->rn_children, np);
3399 		rc_node_rele_flag(pp, RC_NODE_CHILDREN_CHANGING);
3400 		(void) pthread_mutex_unlock(&pp->rn_lock);
3401 		np->rn_flags &= ~RC_NODE_IN_PARENT;
3402 	}
3403 	/*
3404 	 * finally, propagate death to our children, handle notifications,
3405 	 * and release our hold.
3406 	 */
3407 	rc_node_hold_locked(np);	/* hold for delete */
3408 	rc_node_delete_children(np, 1);	/* drops DYING_FLAGS, lock, ref */
3409 
3410 	rc_node_clear(npp, 1);
3411 
3412 	(void) pthread_mutex_lock(&rc_pg_notify_lock);
3413 	while ((pnp = uu_list_first(np->rn_pg_notify_list)) != NULL)
3414 		rc_pg_notify_fire(pnp);
3415 	(void) pthread_mutex_unlock(&rc_pg_notify_lock);
3416 	rc_notify_remove_node(np);
3417 
3418 	rc_node_rele(np);
3419 
3420 	return (rc);
3421 
3422 fail:
3423 	rc_node_rele(np);
3424 	if (rc == REP_PROTOCOL_FAIL_DELETED)
3425 		rc_node_clear(npp, 1);
3426 	if (pp != NULL) {
3427 		(void) pthread_mutex_lock(&pp->rn_lock);
3428 		rc_node_rele_flag(pp, RC_NODE_CHILDREN_CHANGING);
3429 		rc_node_rele_locked(pp);	/* drop ref and lock */
3430 	}
3431 	return (rc);
3432 }
3433 
3434 int
3435 rc_node_next_snaplevel(rc_node_ptr_t *npp, rc_node_ptr_t *cpp)
3436 {
3437 	rc_node_t *np;
3438 	rc_node_t *cp, *pp;
3439 	int res;
3440 
3441 	rc_node_clear(cpp, 0);
3442 
3443 	RC_NODE_PTR_GET_CHECK_AND_LOCK(np, npp);
3444 
3445 	if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_SNAPSHOT &&
3446 	    np->rn_id.rl_type != REP_PROTOCOL_ENTITY_SNAPLEVEL) {
3447 		(void) pthread_mutex_unlock(&np->rn_lock);
3448 		return (REP_PROTOCOL_FAIL_NOT_APPLICABLE);
3449 	}
3450 
3451 	if (np->rn_id.rl_type == REP_PROTOCOL_ENTITY_SNAPSHOT) {
3452 		if ((res = rc_node_fill_children(np,
3453 		    REP_PROTOCOL_ENTITY_SNAPLEVEL)) != REP_PROTOCOL_SUCCESS) {
3454 			(void) pthread_mutex_unlock(&np->rn_lock);
3455 			return (res);
3456 		}
3457 
3458 		for (cp = uu_list_first(np->rn_children);
3459 		    cp != NULL;
3460 		    cp = uu_list_next(np->rn_children, cp)) {
3461 			if (cp->rn_id.rl_type != REP_PROTOCOL_ENTITY_SNAPLEVEL)
3462 				continue;
3463 			rc_node_hold(cp);
3464 			break;
3465 		}
3466 
3467 		(void) pthread_mutex_unlock(&np->rn_lock);
3468 	} else {
3469 		HOLD_PTR_FLAG_OR_RETURN(np, npp, RC_NODE_USING_PARENT);
3470 		/*
3471 		 * mark our parent as children changing.  This call drops our
3472 		 * lock and the RC_NODE_USING_PARENT flag, and returns with
3473 		 * pp's lock held
3474 		 */
3475 		pp = rc_node_hold_parent_flag(np, RC_NODE_CHILDREN_CHANGING);
3476 		if (pp == NULL) {
3477 			/* our parent is gone, we're going next... */
3478 
3479 			rc_node_clear(npp, 1);
3480 			return (REP_PROTOCOL_FAIL_DELETED);
3481 		}
3482 
3483 		/*
3484 		 * find the next snaplevel
3485 		 */
3486 		cp = np;
3487 		while ((cp = uu_list_next(pp->rn_children, cp)) != NULL &&
3488 		    cp->rn_id.rl_type != REP_PROTOCOL_ENTITY_SNAPLEVEL)
3489 			;
3490 
3491 		/* it must match the snaplevel list */
3492 		assert((cp == NULL && np->rn_snaplevel->rsl_next == NULL) ||
3493 		    (cp != NULL && np->rn_snaplevel->rsl_next ==
3494 		    cp->rn_snaplevel));
3495 
3496 		if (cp != NULL)
3497 			rc_node_hold(cp);
3498 
3499 		rc_node_rele_flag(pp, RC_NODE_CHILDREN_CHANGING);
3500 
3501 		(void) pthread_mutex_unlock(&pp->rn_lock);
3502 	}
3503 
3504 	rc_node_assign(cpp, cp);
3505 	if (cp != NULL) {
3506 		rc_node_rele(cp);
3507 
3508 		return (REP_PROTOCOL_SUCCESS);
3509 	}
3510 	return (REP_PROTOCOL_FAIL_NOT_FOUND);
3511 }
3512 
3513 /*
3514  * This call takes a snapshot (np) and either:
3515  *	an existing snapid (to be associated with np), or
3516  *	a non-NULL parentp (from which a new snapshot is taken, and associated
3517  *	    with np)
3518  *
3519  * To do the association, np is duplicated, the duplicate is made to
3520  * represent the new snapid, and np is replaced with the new rc_node_t on
3521  * np's parent's child list. np is placed on the new node's rn_former list,
3522  * and replaces np in cache_hash (so rc_node_update() will find the new one).
3523  */
3524 static int
3525 rc_attach_snapshot(rc_node_t *np, uint32_t snapid, rc_node_t *parentp)
3526 {
3527 	rc_node_t *np_orig;
3528 	rc_node_t *nnp, *prev;
3529 	rc_node_t *pp;
3530 	int rc;
3531 
3532 	if (parentp != NULL)
3533 		assert(snapid == 0);
3534 
3535 	assert(MUTEX_HELD(&np->rn_lock));
3536 
3537 	if ((rc = rc_node_modify_permission_check()) != REP_PROTOCOL_SUCCESS) {
3538 		(void) pthread_mutex_unlock(&np->rn_lock);
3539 		return (rc);
3540 	}
3541 
3542 	np_orig = np;
3543 	rc_node_hold_locked(np);		/* simplifies the remainder */
3544 
3545 	/*
3546 	 * get the latest node, holding RC_NODE_IN_TX to keep the rn_former
3547 	 * list from changing.
3548 	 */
3549 	for (;;) {
3550 		if (!(np->rn_flags & RC_NODE_OLD)) {
3551 			if (!rc_node_hold_flag(np, RC_NODE_USING_PARENT)) {
3552 				goto again;
3553 			}
3554 			pp = rc_node_hold_parent_flag(np,
3555 			    RC_NODE_CHILDREN_CHANGING);
3556 
3557 			(void) pthread_mutex_lock(&np->rn_lock);
3558 			if (pp == NULL) {
3559 				goto again;
3560 			}
3561 			if (np->rn_flags & RC_NODE_OLD) {
3562 				rc_node_rele_flag(pp,
3563 				    RC_NODE_CHILDREN_CHANGING);
3564 				(void) pthread_mutex_unlock(&pp->rn_lock);
3565 				goto again;
3566 			}
3567 			(void) pthread_mutex_unlock(&pp->rn_lock);
3568 
3569 			if (!rc_node_hold_flag(np, RC_NODE_IN_TX)) {
3570 				/*
3571 				 * Can't happen, since we're holding our
3572 				 * parent's CHILDREN_CHANGING flag...
3573 				 */
3574 				abort();
3575 			}
3576 			break;			/* everything's ready */
3577 		}
3578 again:
3579 		rc_node_rele_locked(np);
3580 		np = cache_lookup(&np_orig->rn_id);
3581 
3582 		if (np == NULL)
3583 			return (REP_PROTOCOL_FAIL_DELETED);
3584 
3585 		(void) pthread_mutex_lock(&np->rn_lock);
3586 	}
3587 
3588 	if (parentp != NULL) {
3589 		if (pp != parentp) {
3590 			rc = REP_PROTOCOL_FAIL_BAD_REQUEST;
3591 			goto fail;
3592 		}
3593 		nnp = NULL;
3594 	} else {
3595 		/*
3596 		 * look for a former node with the snapid we need.
3597 		 */
3598 		if (np->rn_snapshot_id == snapid) {
3599 			rc_node_rele_flag(np, RC_NODE_IN_TX);
3600 			rc_node_rele_locked(np);
3601 
3602 			(void) pthread_mutex_lock(&pp->rn_lock);
3603 			rc_node_rele_flag(pp, RC_NODE_CHILDREN_CHANGING);
3604 			(void) pthread_mutex_unlock(&pp->rn_lock);
3605 			return (REP_PROTOCOL_SUCCESS);	/* nothing to do */
3606 		}
3607 
3608 		prev = np;
3609 		while ((nnp = prev->rn_former) != NULL) {
3610 			if (nnp->rn_snapshot_id == snapid) {
3611 				rc_node_hold(nnp);
3612 				break;		/* existing node with that id */
3613 			}
3614 			prev = nnp;
3615 		}
3616 	}
3617 
3618 	if (nnp == NULL) {
3619 		prev = NULL;
3620 		nnp = rc_node_alloc();
3621 		if (nnp == NULL) {
3622 			rc = REP_PROTOCOL_FAIL_NO_RESOURCES;
3623 			goto fail;
3624 		}
3625 
3626 		nnp->rn_id = np->rn_id;		/* structure assignment */
3627 		nnp->rn_hash = np->rn_hash;
3628 		nnp->rn_name = strdup(np->rn_name);
3629 		nnp->rn_snapshot_id = snapid;
3630 		nnp->rn_flags = RC_NODE_IN_TX | RC_NODE_USING_PARENT;
3631 
3632 		if (nnp->rn_name == NULL) {
3633 			rc = REP_PROTOCOL_FAIL_NO_RESOURCES;
3634 			goto fail;
3635 		}
3636 	}
3637 
3638 	(void) pthread_mutex_unlock(&np->rn_lock);
3639 
3640 	rc = object_snapshot_attach(&np->rn_id, &snapid, (parentp != NULL));
3641 
3642 	if (parentp != NULL)
3643 		nnp->rn_snapshot_id = snapid;	/* fill in new snapid */
3644 	else
3645 		assert(nnp->rn_snapshot_id == snapid);
3646 
3647 	(void) pthread_mutex_lock(&np->rn_lock);
3648 	if (rc != REP_PROTOCOL_SUCCESS)
3649 		goto fail;
3650 
3651 	/*
3652 	 * fix up the former chain
3653 	 */
3654 	if (prev != NULL) {
3655 		prev->rn_former = nnp->rn_former;
3656 		(void) pthread_mutex_lock(&nnp->rn_lock);
3657 		nnp->rn_flags &= ~RC_NODE_ON_FORMER;
3658 		nnp->rn_former = NULL;
3659 		(void) pthread_mutex_unlock(&nnp->rn_lock);
3660 	}
3661 	np->rn_flags |= RC_NODE_OLD;
3662 	(void) pthread_mutex_unlock(&np->rn_lock);
3663 
3664 	/*
3665 	 * replace np with nnp
3666 	 */
3667 	rc_node_relink_child(pp, np, nnp);
3668 
3669 	rc_node_rele(np);
3670 
3671 	return (REP_PROTOCOL_SUCCESS);
3672 
3673 fail:
3674 	rc_node_rele_flag(np, RC_NODE_IN_TX);
3675 	rc_node_rele_locked(np);
3676 	(void) pthread_mutex_lock(&pp->rn_lock);
3677 	rc_node_rele_flag(pp, RC_NODE_CHILDREN_CHANGING);
3678 	(void) pthread_mutex_unlock(&pp->rn_lock);
3679 
3680 	if (nnp != NULL) {
3681 		if (prev == NULL)
3682 			rc_node_destroy(nnp);
3683 		else
3684 			rc_node_rele(nnp);
3685 	}
3686 
3687 	return (rc);
3688 }
3689 
3690 int
3691 rc_snapshot_take_new(rc_node_ptr_t *npp, const char *svcname,
3692     const char *instname, const char *name, rc_node_ptr_t *outpp)
3693 {
3694 	rc_node_t *np;
3695 	rc_node_t *outp = NULL;
3696 	int rc;
3697 
3698 	rc_node_clear(outpp, 0);
3699 
3700 	RC_NODE_PTR_GET_CHECK_AND_LOCK(np, npp);
3701 	if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_INSTANCE) {
3702 		(void) pthread_mutex_unlock(&np->rn_lock);
3703 		return (REP_PROTOCOL_FAIL_TYPE_MISMATCH);
3704 	}
3705 
3706 	rc = rc_check_type_name(REP_PROTOCOL_ENTITY_SNAPSHOT, name);
3707 	if (rc != REP_PROTOCOL_SUCCESS) {
3708 		(void) pthread_mutex_unlock(&np->rn_lock);
3709 		return (rc);
3710 	}
3711 
3712 	if (svcname != NULL && (rc =
3713 	    rc_check_type_name(REP_PROTOCOL_ENTITY_SERVICE, svcname)) !=
3714 	    REP_PROTOCOL_SUCCESS) {
3715 		(void) pthread_mutex_unlock(&np->rn_lock);
3716 		return (rc);
3717 	}
3718 
3719 	if (instname != NULL && (rc =
3720 	    rc_check_type_name(REP_PROTOCOL_ENTITY_INSTANCE, instname)) !=
3721 	    REP_PROTOCOL_SUCCESS) {
3722 		(void) pthread_mutex_unlock(&np->rn_lock);
3723 		return (rc);
3724 	}
3725 
3726 	if ((rc = rc_node_modify_permission_check()) != REP_PROTOCOL_SUCCESS) {
3727 		(void) pthread_mutex_unlock(&np->rn_lock);
3728 		return (rc);
3729 	}
3730 
3731 	HOLD_PTR_FLAG_OR_RETURN(np, npp, RC_NODE_CREATING_CHILD);
3732 	(void) pthread_mutex_unlock(&np->rn_lock);
3733 
3734 	rc = object_snapshot_take_new(np, svcname, instname, name, &outp);
3735 
3736 	if (rc == REP_PROTOCOL_SUCCESS) {
3737 		rc_node_assign(outpp, outp);
3738 		rc_node_rele(outp);
3739 	}
3740 
3741 	(void) pthread_mutex_lock(&np->rn_lock);
3742 	rc_node_rele_flag(np, RC_NODE_CREATING_CHILD);
3743 	(void) pthread_mutex_unlock(&np->rn_lock);
3744 
3745 	return (rc);
3746 }
3747 
3748 int
3749 rc_snapshot_take_attach(rc_node_ptr_t *npp, rc_node_ptr_t *outpp)
3750 {
3751 	rc_node_t *np, *outp;
3752 
3753 	RC_NODE_PTR_GET_CHECK(np, npp);
3754 	if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_INSTANCE) {
3755 		return (REP_PROTOCOL_FAIL_TYPE_MISMATCH);
3756 	}
3757 
3758 	RC_NODE_PTR_GET_CHECK_AND_LOCK(outp, outpp);
3759 	if (outp->rn_id.rl_type != REP_PROTOCOL_ENTITY_SNAPSHOT) {
3760 		(void) pthread_mutex_unlock(&outp->rn_lock);
3761 		return (REP_PROTOCOL_FAIL_BAD_REQUEST);
3762 	}
3763 
3764 	return (rc_attach_snapshot(outp, 0, np));	/* drops outp's lock */
3765 }
3766 
3767 int
3768 rc_snapshot_attach(rc_node_ptr_t *npp, rc_node_ptr_t *cpp)
3769 {
3770 	rc_node_t *np;
3771 	rc_node_t *cp;
3772 	uint32_t snapid;
3773 
3774 	RC_NODE_PTR_GET_CHECK_AND_LOCK(np, npp);
3775 	if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_SNAPSHOT) {
3776 		(void) pthread_mutex_unlock(&np->rn_lock);
3777 		return (REP_PROTOCOL_FAIL_BAD_REQUEST);
3778 	}
3779 	snapid = np->rn_snapshot_id;
3780 	(void) pthread_mutex_unlock(&np->rn_lock);
3781 
3782 	RC_NODE_PTR_GET_CHECK_AND_LOCK(cp, cpp);
3783 	if (cp->rn_id.rl_type != REP_PROTOCOL_ENTITY_SNAPSHOT) {
3784 		(void) pthread_mutex_unlock(&cp->rn_lock);
3785 		return (REP_PROTOCOL_FAIL_BAD_REQUEST);
3786 	}
3787 
3788 	return (rc_attach_snapshot(cp, snapid, NULL));	/* drops cp's lock */
3789 }
3790 
3791 /*
3792  * Iteration
3793  */
3794 static int
3795 rc_iter_filter_name(rc_node_t *np, void *s)
3796 {
3797 	const char *name = s;
3798 
3799 	return (strcmp(np->rn_name, name) == 0);
3800 }
3801 
3802 static int
3803 rc_iter_filter_type(rc_node_t *np, void *s)
3804 {
3805 	const char *type = s;
3806 
3807 	return (np->rn_type != NULL && strcmp(np->rn_type, type) == 0);
3808 }
3809 
3810 /*ARGSUSED*/
3811 static int
3812 rc_iter_null_filter(rc_node_t *np, void *s)
3813 {
3814 	return (1);
3815 }
3816 
3817 /*
3818  * Allocate & initialize an rc_node_iter_t structure.  Essentially, ensure
3819  * np->rn_children is populated and call uu_list_walk_start(np->rn_children).
3820  * If successful, leaves a hold on np & increments np->rn_other_refs
3821  *
3822  * If composed is true, then set up for iteration across the top level of np's
3823  * composition chain.  If successful, leaves a hold on np and increments
3824  * rn_other_refs for the top level of np's composition chain.
3825  *
3826  * Fails with
3827  *   _NO_RESOURCES
3828  *   _INVALID_TYPE
3829  *   _TYPE_MISMATCH - np cannot carry type children
3830  *   _DELETED
3831  */
3832 static int
3833 rc_iter_create(rc_node_iter_t **resp, rc_node_t *np, uint32_t type,
3834     rc_iter_filter_func *filter, void *arg, boolean_t composed)
3835 {
3836 	rc_node_iter_t *nip;
3837 	int res;
3838 
3839 	assert(*resp == NULL);
3840 
3841 	nip = uu_zalloc(sizeof (*nip));
3842 	if (nip == NULL)
3843 		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
3844 
3845 	/* np is held by the client's rc_node_ptr_t */
3846 	if (np->rn_id.rl_type == REP_PROTOCOL_ENTITY_CPROPERTYGRP)
3847 		composed = 1;
3848 
3849 	if (!composed) {
3850 		(void) pthread_mutex_lock(&np->rn_lock);
3851 
3852 		if ((res = rc_node_fill_children(np, type)) !=
3853 		    REP_PROTOCOL_SUCCESS) {
3854 			(void) pthread_mutex_unlock(&np->rn_lock);
3855 			uu_free(nip);
3856 			return (res);
3857 		}
3858 
3859 		nip->rni_clevel = -1;
3860 
3861 		nip->rni_iter = uu_list_walk_start(np->rn_children,
3862 		    UU_WALK_ROBUST);
3863 		if (nip->rni_iter != NULL) {
3864 			nip->rni_iter_node = np;
3865 			rc_node_hold_other(np);
3866 		} else {
3867 			(void) pthread_mutex_unlock(&np->rn_lock);
3868 			uu_free(nip);
3869 			return (REP_PROTOCOL_FAIL_NO_RESOURCES);
3870 		}
3871 		(void) pthread_mutex_unlock(&np->rn_lock);
3872 	} else {
3873 		rc_node_t *ent;
3874 
3875 		if (np->rn_id.rl_type == REP_PROTOCOL_ENTITY_SNAPSHOT) {
3876 			/* rn_cchain isn't valid until children are loaded. */
3877 			(void) pthread_mutex_lock(&np->rn_lock);
3878 			res = rc_node_fill_children(np,
3879 			    REP_PROTOCOL_ENTITY_SNAPLEVEL);
3880 			(void) pthread_mutex_unlock(&np->rn_lock);
3881 			if (res != REP_PROTOCOL_SUCCESS) {
3882 				uu_free(nip);
3883 				return (res);
3884 			}
3885 
3886 			/* Check for an empty snapshot. */
3887 			if (np->rn_cchain[0] == NULL)
3888 				goto empty;
3889 		}
3890 
3891 		/* Start at the top of the composition chain. */
3892 		for (nip->rni_clevel = 0; ; ++nip->rni_clevel) {
3893 			if (nip->rni_clevel >= COMPOSITION_DEPTH) {
3894 				/* Empty composition chain. */
3895 empty:
3896 				nip->rni_clevel = -1;
3897 				nip->rni_iter = NULL;
3898 				/* It's ok, iter_next() will return _DONE. */
3899 				goto out;
3900 			}
3901 
3902 			ent = np->rn_cchain[nip->rni_clevel];
3903 			assert(ent != NULL);
3904 
3905 			if (rc_node_check_and_lock(ent) == REP_PROTOCOL_SUCCESS)
3906 				break;
3907 
3908 			/* Someone deleted it, so try the next one. */
3909 		}
3910 
3911 		res = rc_node_fill_children(ent, type);
3912 
3913 		if (res == REP_PROTOCOL_SUCCESS) {
3914 			nip->rni_iter = uu_list_walk_start(ent->rn_children,
3915 			    UU_WALK_ROBUST);
3916 
3917 			if (nip->rni_iter == NULL)
3918 				res = REP_PROTOCOL_FAIL_NO_RESOURCES;
3919 			else {
3920 				nip->rni_iter_node = ent;
3921 				rc_node_hold_other(ent);
3922 			}
3923 		}
3924 
3925 		if (res != REP_PROTOCOL_SUCCESS) {
3926 			(void) pthread_mutex_unlock(&ent->rn_lock);
3927 			uu_free(nip);
3928 			return (res);
3929 		}
3930 
3931 		(void) pthread_mutex_unlock(&ent->rn_lock);
3932 	}
3933 
3934 out:
3935 	rc_node_hold(np);		/* released by rc_iter_end() */
3936 	nip->rni_parent = np;
3937 	nip->rni_type = type;
3938 	nip->rni_filter = (filter != NULL)? filter : rc_iter_null_filter;
3939 	nip->rni_filter_arg = arg;
3940 	*resp = nip;
3941 	return (REP_PROTOCOL_SUCCESS);
3942 }
3943 
3944 static void
3945 rc_iter_end(rc_node_iter_t *iter)
3946 {
3947 	rc_node_t *np = iter->rni_parent;
3948 
3949 	if (iter->rni_clevel >= 0)
3950 		np = np->rn_cchain[iter->rni_clevel];
3951 
3952 	assert(MUTEX_HELD(&np->rn_lock));
3953 	if (iter->rni_iter != NULL)
3954 		uu_list_walk_end(iter->rni_iter);
3955 	iter->rni_iter = NULL;
3956 
3957 	(void) pthread_mutex_unlock(&np->rn_lock);
3958 	rc_node_rele(iter->rni_parent);
3959 	if (iter->rni_iter_node != NULL)
3960 		rc_node_rele_other(iter->rni_iter_node);
3961 }
3962 
3963 /*
3964  * Fails with
3965  *   _NOT_SET - npp is reset
3966  *   _DELETED - npp's node has been deleted
3967  *   _NOT_APPLICABLE - npp's node is not a property
3968  *   _NO_RESOURCES - out of memory
3969  */
3970 static int
3971 rc_node_setup_value_iter(rc_node_ptr_t *npp, rc_node_iter_t **iterp)
3972 {
3973 	rc_node_t *np;
3974 
3975 	rc_node_iter_t *nip;
3976 
3977 	assert(*iterp == NULL);
3978 
3979 	RC_NODE_PTR_GET_CHECK_AND_LOCK(np, npp);
3980 
3981 	if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_PROPERTY) {
3982 		(void) pthread_mutex_unlock(&np->rn_lock);
3983 		return (REP_PROTOCOL_FAIL_NOT_APPLICABLE);
3984 	}
3985 
3986 	nip = uu_zalloc(sizeof (*nip));
3987 	if (nip == NULL) {
3988 		(void) pthread_mutex_unlock(&np->rn_lock);
3989 		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
3990 	}
3991 
3992 	nip->rni_parent = np;
3993 	nip->rni_iter = NULL;
3994 	nip->rni_clevel = -1;
3995 	nip->rni_type = REP_PROTOCOL_ENTITY_VALUE;
3996 	nip->rni_offset = 0;
3997 	nip->rni_last_offset = 0;
3998 
3999 	rc_node_hold_locked(np);
4000 
4001 	*iterp = nip;
4002 	(void) pthread_mutex_unlock(&np->rn_lock);
4003 
4004 	return (REP_PROTOCOL_SUCCESS);
4005 }
4006 
4007 /*
4008  * Returns:
4009  *   _NOT_SET - npp is reset
4010  *   _DELETED - npp's node has been deleted
4011  *   _TYPE_MISMATCH - npp's node is not a property
4012  *   _NOT_FOUND - property has no values
4013  *   _TRUNCATED - property has >1 values (first is written into out)
4014  *   _SUCCESS - property has 1 value (which is written into out)
4015  *
4016  * We shorten *sz_out to not include anything after the final '\0'.
4017  */
4018 int
4019 rc_node_get_property_value(rc_node_ptr_t *npp,
4020     struct rep_protocol_value_response *out, size_t *sz_out)
4021 {
4022 	rc_node_t *np;
4023 	size_t w;
4024 	int ret;
4025 
4026 	assert(*sz_out == sizeof (*out));
4027 
4028 	RC_NODE_PTR_GET_CHECK_AND_LOCK(np, npp);
4029 
4030 	if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_PROPERTY) {
4031 		(void) pthread_mutex_unlock(&np->rn_lock);
4032 		return (REP_PROTOCOL_FAIL_TYPE_MISMATCH);
4033 	}
4034 
4035 	if (np->rn_values_size == 0) {
4036 		(void) pthread_mutex_unlock(&np->rn_lock);
4037 		return (REP_PROTOCOL_FAIL_NOT_FOUND);
4038 	}
4039 	out->rpr_type = np->rn_valtype;
4040 	w = strlcpy(out->rpr_value, &np->rn_values[0],
4041 	    sizeof (out->rpr_value));
4042 
4043 	if (w >= sizeof (out->rpr_value))
4044 		backend_panic("value too large");
4045 
4046 	*sz_out = offsetof(struct rep_protocol_value_response,
4047 	    rpr_value[w + 1]);
4048 
4049 	ret = (np->rn_values_count != 1)? REP_PROTOCOL_FAIL_TRUNCATED :
4050 	    REP_PROTOCOL_SUCCESS;
4051 	(void) pthread_mutex_unlock(&np->rn_lock);
4052 	return (ret);
4053 }
4054 
4055 int
4056 rc_iter_next_value(rc_node_iter_t *iter,
4057     struct rep_protocol_value_response *out, size_t *sz_out, int repeat)
4058 {
4059 	rc_node_t *np = iter->rni_parent;
4060 	const char *vals;
4061 	size_t len;
4062 
4063 	size_t start;
4064 	size_t w;
4065 
4066 	rep_protocol_responseid_t result;
4067 
4068 	assert(*sz_out == sizeof (*out));
4069 
4070 	(void) memset(out, '\0', *sz_out);
4071 
4072 	if (iter->rni_type != REP_PROTOCOL_ENTITY_VALUE)
4073 		return (REP_PROTOCOL_FAIL_BAD_REQUEST);
4074 
4075 	RC_NODE_CHECK_AND_LOCK(np);
4076 
4077 	vals = np->rn_values;
4078 	len = np->rn_values_size;
4079 
4080 	out->rpr_type = np->rn_valtype;
4081 
4082 	start = (repeat)? iter->rni_last_offset : iter->rni_offset;
4083 
4084 	if (len == 0 || start >= len) {
4085 		result = REP_PROTOCOL_DONE;
4086 		*sz_out -= sizeof (out->rpr_value);
4087 	} else {
4088 		w = strlcpy(out->rpr_value, &vals[start],
4089 		    sizeof (out->rpr_value));
4090 
4091 		if (w >= sizeof (out->rpr_value))
4092 			backend_panic("value too large");
4093 
4094 		*sz_out = offsetof(struct rep_protocol_value_response,
4095 		    rpr_value[w + 1]);
4096 
4097 		/*
4098 		 * update the offsets if we're not repeating
4099 		 */
4100 		if (!repeat) {
4101 			iter->rni_last_offset = iter->rni_offset;
4102 			iter->rni_offset += (w + 1);
4103 		}
4104 
4105 		result = REP_PROTOCOL_SUCCESS;
4106 	}
4107 
4108 	(void) pthread_mutex_unlock(&np->rn_lock);
4109 	return (result);
4110 }
4111 
4112 /*
4113  * Entry point for ITER_START from client.c.  Validate the arguments & call
4114  * rc_iter_create().
4115  *
4116  * Fails with
4117  *   _NOT_SET
4118  *   _DELETED
4119  *   _TYPE_MISMATCH - np cannot carry type children
4120  *   _BAD_REQUEST - flags is invalid
4121  *		    pattern is invalid
4122  *   _NO_RESOURCES
4123  *   _INVALID_TYPE
4124  *   _TYPE_MISMATCH - *npp cannot have children of type
4125  *   _BACKEND_ACCESS
4126  */
4127 int
4128 rc_node_setup_iter(rc_node_ptr_t *npp, rc_node_iter_t **iterp,
4129     uint32_t type, uint32_t flags, const char *pattern)
4130 {
4131 	rc_node_t *np;
4132 	rc_iter_filter_func *f = NULL;
4133 	int rc;
4134 
4135 	RC_NODE_PTR_GET_CHECK(np, npp);
4136 
4137 	if (pattern != NULL && pattern[0] == '\0')
4138 		pattern = NULL;
4139 
4140 	if (type == REP_PROTOCOL_ENTITY_VALUE) {
4141 		if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_PROPERTY)
4142 			return (REP_PROTOCOL_FAIL_TYPE_MISMATCH);
4143 		if (flags != RP_ITER_START_ALL || pattern != NULL)
4144 			return (REP_PROTOCOL_FAIL_BAD_REQUEST);
4145 
4146 		rc = rc_node_setup_value_iter(npp, iterp);
4147 		assert(rc != REP_PROTOCOL_FAIL_NOT_APPLICABLE);
4148 		return (rc);
4149 	}
4150 
4151 	if ((rc = rc_check_parent_child(np->rn_id.rl_type, type)) !=
4152 	    REP_PROTOCOL_SUCCESS)
4153 		return (rc);
4154 
4155 	if (((flags & RP_ITER_START_FILT_MASK) == RP_ITER_START_ALL) ^
4156 	    (pattern == NULL))
4157 		return (REP_PROTOCOL_FAIL_BAD_REQUEST);
4158 
4159 	/* Composition only works for instances & snapshots. */
4160 	if ((flags & RP_ITER_START_COMPOSED) &&
4161 	    (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_INSTANCE &&
4162 	    np->rn_id.rl_type != REP_PROTOCOL_ENTITY_SNAPSHOT))
4163 		return (REP_PROTOCOL_FAIL_BAD_REQUEST);
4164 
4165 	if (pattern != NULL) {
4166 		if ((rc = rc_check_type_name(type, pattern)) !=
4167 		    REP_PROTOCOL_SUCCESS)
4168 			return (rc);
4169 		pattern = strdup(pattern);
4170 		if (pattern == NULL)
4171 			return (REP_PROTOCOL_FAIL_NO_RESOURCES);
4172 	}
4173 
4174 	switch (flags & RP_ITER_START_FILT_MASK) {
4175 	case RP_ITER_START_ALL:
4176 		f = NULL;
4177 		break;
4178 	case RP_ITER_START_EXACT:
4179 		f = rc_iter_filter_name;
4180 		break;
4181 	case RP_ITER_START_PGTYPE:
4182 		if (type != REP_PROTOCOL_ENTITY_PROPERTYGRP) {
4183 			free((void *)pattern);
4184 			return (REP_PROTOCOL_FAIL_BAD_REQUEST);
4185 		}
4186 		f = rc_iter_filter_type;
4187 		break;
4188 	default:
4189 		free((void *)pattern);
4190 		return (REP_PROTOCOL_FAIL_BAD_REQUEST);
4191 	}
4192 
4193 	rc = rc_iter_create(iterp, np, type, f, (void *)pattern,
4194 	    flags & RP_ITER_START_COMPOSED);
4195 	if (rc != REP_PROTOCOL_SUCCESS && pattern != NULL)
4196 		free((void *)pattern);
4197 
4198 	return (rc);
4199 }
4200 
4201 /*
4202  * Do uu_list_walk_next(iter->rni_iter) until we find a child which matches
4203  * the filter.
4204  * For composed iterators, then check to see if there's an overlapping entity
4205  * (see embedded comments).  If we reach the end of the list, start over at
4206  * the next level.
4207  *
4208  * Returns
4209  *   _BAD_REQUEST - iter walks values
4210  *   _TYPE_MISMATCH - iter does not walk type entities
4211  *   _DELETED - parent was deleted
4212  *   _NO_RESOURCES
4213  *   _INVALID_TYPE - type is invalid
4214  *   _DONE
4215  *   _SUCCESS
4216  *
4217  * For composed property group iterators, can also return
4218  *   _TYPE_MISMATCH - parent cannot have type children
4219  */
4220 int
4221 rc_iter_next(rc_node_iter_t *iter, rc_node_ptr_t *out, uint32_t type)
4222 {
4223 	rc_node_t *np = iter->rni_parent;
4224 	rc_node_t *res;
4225 	int rc;
4226 
4227 	if (iter->rni_type == REP_PROTOCOL_ENTITY_VALUE)
4228 		return (REP_PROTOCOL_FAIL_BAD_REQUEST);
4229 
4230 	if (iter->rni_iter == NULL) {
4231 		rc_node_clear(out, 0);
4232 		return (REP_PROTOCOL_DONE);
4233 	}
4234 
4235 	if (iter->rni_type != type) {
4236 		rc_node_clear(out, 0);
4237 		return (REP_PROTOCOL_FAIL_TYPE_MISMATCH);
4238 	}
4239 
4240 	(void) pthread_mutex_lock(&np->rn_lock);  /* held by _iter_create() */
4241 
4242 	if (!rc_node_wait_flag(np, RC_NODE_CHILDREN_CHANGING)) {
4243 		(void) pthread_mutex_unlock(&np->rn_lock);
4244 		rc_node_clear(out, 1);
4245 		return (REP_PROTOCOL_FAIL_DELETED);
4246 	}
4247 
4248 	if (iter->rni_clevel >= 0) {
4249 		/* Composed iterator.  Iterate over appropriate level. */
4250 		(void) pthread_mutex_unlock(&np->rn_lock);
4251 		np = np->rn_cchain[iter->rni_clevel];
4252 		/*
4253 		 * If iter->rni_parent is an instance or a snapshot, np must
4254 		 * be valid since iter holds iter->rni_parent & possible
4255 		 * levels (service, instance, snaplevel) cannot be destroyed
4256 		 * while rni_parent is held.  If iter->rni_parent is
4257 		 * a composed property group then rc_node_setup_cpg() put
4258 		 * a hold on np.
4259 		 */
4260 
4261 		(void) pthread_mutex_lock(&np->rn_lock);
4262 
4263 		if (!rc_node_wait_flag(np, RC_NODE_CHILDREN_CHANGING)) {
4264 			(void) pthread_mutex_unlock(&np->rn_lock);
4265 			rc_node_clear(out, 1);
4266 			return (REP_PROTOCOL_FAIL_DELETED);
4267 		}
4268 	}
4269 
4270 	assert(np->rn_flags & RC_NODE_HAS_CHILDREN);
4271 
4272 	for (;;) {
4273 		res = uu_list_walk_next(iter->rni_iter);
4274 		if (res == NULL) {
4275 			rc_node_t *parent = iter->rni_parent;
4276 
4277 #if COMPOSITION_DEPTH == 2
4278 			if (iter->rni_clevel < 0 || iter->rni_clevel == 1) {
4279 				/* release walker and lock */
4280 				rc_iter_end(iter);
4281 				break;
4282 			}
4283 
4284 			/* Stop walking current level. */
4285 			uu_list_walk_end(iter->rni_iter);
4286 			iter->rni_iter = NULL;
4287 			(void) pthread_mutex_unlock(&np->rn_lock);
4288 			rc_node_rele_other(iter->rni_iter_node);
4289 			iter->rni_iter_node = NULL;
4290 
4291 			/* Start walking next level. */
4292 			++iter->rni_clevel;
4293 			np = parent->rn_cchain[iter->rni_clevel];
4294 			assert(np != NULL);
4295 #else
4296 #error This code must be updated.
4297 #endif
4298 
4299 			(void) pthread_mutex_lock(&np->rn_lock);
4300 
4301 			rc = rc_node_fill_children(np, iter->rni_type);
4302 
4303 			if (rc == REP_PROTOCOL_SUCCESS) {
4304 				iter->rni_iter =
4305 				    uu_list_walk_start(np->rn_children,
4306 					UU_WALK_ROBUST);
4307 
4308 				if (iter->rni_iter == NULL)
4309 					rc = REP_PROTOCOL_FAIL_NO_RESOURCES;
4310 				else {
4311 					iter->rni_iter_node = np;
4312 					rc_node_hold_other(np);
4313 				}
4314 			}
4315 
4316 			if (rc != REP_PROTOCOL_SUCCESS) {
4317 				(void) pthread_mutex_unlock(&np->rn_lock);
4318 				rc_node_clear(out, 0);
4319 				return (rc);
4320 			}
4321 
4322 			continue;
4323 		}
4324 
4325 		if (res->rn_id.rl_type != type ||
4326 		    !iter->rni_filter(res, iter->rni_filter_arg))
4327 			continue;
4328 
4329 		/*
4330 		 * If we're composed and not at the top level, check to see if
4331 		 * there's an entity at a higher level with the same name.  If
4332 		 * so, skip this one.
4333 		 */
4334 		if (iter->rni_clevel > 0) {
4335 			rc_node_t *ent = iter->rni_parent->rn_cchain[0];
4336 			rc_node_t *pg;
4337 
4338 #if COMPOSITION_DEPTH == 2
4339 			assert(iter->rni_clevel == 1);
4340 
4341 			(void) pthread_mutex_unlock(&np->rn_lock);
4342 			(void) pthread_mutex_lock(&ent->rn_lock);
4343 			rc = rc_node_find_named_child(ent, res->rn_name, type,
4344 			    &pg);
4345 			if (rc == REP_PROTOCOL_SUCCESS && pg != NULL)
4346 				rc_node_rele(pg);
4347 			(void) pthread_mutex_unlock(&ent->rn_lock);
4348 			if (rc != REP_PROTOCOL_SUCCESS) {
4349 				rc_node_clear(out, 0);
4350 				return (rc);
4351 			}
4352 			(void) pthread_mutex_lock(&np->rn_lock);
4353 
4354 			/* Make sure np isn't being deleted all of a sudden. */
4355 			if (!rc_node_wait_flag(np, RC_NODE_DYING)) {
4356 				(void) pthread_mutex_unlock(&np->rn_lock);
4357 				rc_node_clear(out, 1);
4358 				return (REP_PROTOCOL_FAIL_DELETED);
4359 			}
4360 
4361 			if (pg != NULL)
4362 				/* Keep going. */
4363 				continue;
4364 #else
4365 #error This code must be updated.
4366 #endif
4367 		}
4368 
4369 		/*
4370 		 * If we're composed, iterating over property groups, and not
4371 		 * at the bottom level, check to see if there's a pg at lower
4372 		 * level with the same name.  If so, return a cpg.
4373 		 */
4374 		if (iter->rni_clevel >= 0 &&
4375 		    type == REP_PROTOCOL_ENTITY_PROPERTYGRP &&
4376 		    iter->rni_clevel < COMPOSITION_DEPTH - 1) {
4377 #if COMPOSITION_DEPTH == 2
4378 			rc_node_t *pg;
4379 			rc_node_t *ent = iter->rni_parent->rn_cchain[1];
4380 
4381 			rc_node_hold(res);	/* While we drop np->rn_lock */
4382 
4383 			(void) pthread_mutex_unlock(&np->rn_lock);
4384 			(void) pthread_mutex_lock(&ent->rn_lock);
4385 			rc = rc_node_find_named_child(ent, res->rn_name, type,
4386 			    &pg);
4387 			/* holds pg if not NULL */
4388 			(void) pthread_mutex_unlock(&ent->rn_lock);
4389 			if (rc != REP_PROTOCOL_SUCCESS) {
4390 				rc_node_rele(res);
4391 				rc_node_clear(out, 0);
4392 				return (rc);
4393 			}
4394 
4395 			(void) pthread_mutex_lock(&np->rn_lock);
4396 			if (!rc_node_wait_flag(np, RC_NODE_DYING)) {
4397 				(void) pthread_mutex_unlock(&np->rn_lock);
4398 				rc_node_rele(res);
4399 				if (pg != NULL)
4400 					rc_node_rele(pg);
4401 				rc_node_clear(out, 1);
4402 				return (REP_PROTOCOL_FAIL_DELETED);
4403 			}
4404 
4405 			if (pg == NULL) {
4406 				rc_node_rele(res);
4407 			} else {
4408 				rc_node_t *cpg;
4409 
4410 				/* Keep res held for rc_node_setup_cpg(). */
4411 
4412 				cpg = rc_node_alloc();
4413 				if (cpg == NULL) {
4414 					(void) pthread_mutex_unlock(
4415 					    &np->rn_lock);
4416 					rc_node_rele(res);
4417 					rc_node_rele(pg);
4418 					rc_node_clear(out, 0);
4419 					return (REP_PROTOCOL_FAIL_NO_RESOURCES);
4420 				}
4421 
4422 				switch (rc_node_setup_cpg(cpg, res, pg)) {
4423 				case REP_PROTOCOL_SUCCESS:
4424 					res = cpg;
4425 					break;
4426 
4427 				case REP_PROTOCOL_FAIL_TYPE_MISMATCH:
4428 					/* Nevermind. */
4429 					rc_node_destroy(cpg);
4430 					rc_node_rele(pg);
4431 					rc_node_rele(res);
4432 					break;
4433 
4434 				case REP_PROTOCOL_FAIL_NO_RESOURCES:
4435 					rc_node_destroy(cpg);
4436 					(void) pthread_mutex_unlock(
4437 					    &np->rn_lock);
4438 					rc_node_rele(res);
4439 					rc_node_rele(pg);
4440 					rc_node_clear(out, 0);
4441 					return (REP_PROTOCOL_FAIL_NO_RESOURCES);
4442 
4443 				default:
4444 					assert(0);
4445 					abort();
4446 				}
4447 			}
4448 #else
4449 #error This code must be updated.
4450 #endif
4451 		}
4452 
4453 		rc_node_hold(res);
4454 		(void) pthread_mutex_unlock(&np->rn_lock);
4455 		break;
4456 	}
4457 	rc_node_assign(out, res);
4458 
4459 	if (res == NULL)
4460 		return (REP_PROTOCOL_DONE);
4461 	rc_node_rele(res);
4462 	return (REP_PROTOCOL_SUCCESS);
4463 }
4464 
4465 void
4466 rc_iter_destroy(rc_node_iter_t **nipp)
4467 {
4468 	rc_node_iter_t *nip = *nipp;
4469 	rc_node_t *np;
4470 
4471 	if (nip == NULL)
4472 		return;				/* already freed */
4473 
4474 	np = nip->rni_parent;
4475 
4476 	if (nip->rni_filter_arg != NULL)
4477 		free(nip->rni_filter_arg);
4478 	nip->rni_filter_arg = NULL;
4479 
4480 	if (nip->rni_type == REP_PROTOCOL_ENTITY_VALUE ||
4481 	    nip->rni_iter != NULL) {
4482 		if (nip->rni_clevel < 0)
4483 			(void) pthread_mutex_lock(&np->rn_lock);
4484 		else
4485 			(void) pthread_mutex_lock(
4486 			    &np->rn_cchain[nip->rni_clevel]->rn_lock);
4487 		rc_iter_end(nip);		/* release walker and lock */
4488 	}
4489 	nip->rni_parent = NULL;
4490 
4491 	uu_free(nip);
4492 	*nipp = NULL;
4493 }
4494 
4495 int
4496 rc_node_setup_tx(rc_node_ptr_t *npp, rc_node_ptr_t *txp)
4497 {
4498 	rc_node_t *np;
4499 	permcheck_t *pcp;
4500 	int ret;
4501 	int authorized = 0;
4502 
4503 	RC_NODE_PTR_GET_CHECK_AND_HOLD(np, npp);
4504 
4505 	if (np->rn_id.rl_type == REP_PROTOCOL_ENTITY_CPROPERTYGRP) {
4506 		rc_node_rele(np);
4507 		np = np->rn_cchain[0];
4508 		RC_NODE_CHECK_AND_HOLD(np);
4509 	}
4510 
4511 	if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_PROPERTYGRP) {
4512 		rc_node_rele(np);
4513 		return (REP_PROTOCOL_FAIL_TYPE_MISMATCH);
4514 	}
4515 
4516 	if (np->rn_id.rl_ids[ID_SNAPSHOT] != 0) {
4517 		rc_node_rele(np);
4518 		return (REP_PROTOCOL_FAIL_PERMISSION_DENIED);
4519 	}
4520 
4521 	if (client_is_privileged())
4522 		goto skip_checks;
4523 
4524 #ifdef NATIVE_BUILD
4525 	rc_node_rele(np);
4526 	return (REP_PROTOCOL_FAIL_PERMISSION_DENIED);
4527 #else
4528 	/* permission check */
4529 	pcp = pc_create();
4530 	if (pcp == NULL) {
4531 		rc_node_rele(np);
4532 		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
4533 	}
4534 
4535 	if (np->rn_id.rl_ids[ID_INSTANCE] != 0 &&	/* instance pg */
4536 	    ((strcmp(np->rn_name, AUTH_PG_ACTIONS) == 0 &&
4537 	    strcmp(np->rn_type, AUTH_PG_ACTIONS_TYPE) == 0) ||
4538 	    (strcmp(np->rn_name, AUTH_PG_GENERAL_OVR) == 0 &&
4539 	    strcmp(np->rn_type, AUTH_PG_GENERAL_OVR_TYPE) == 0))) {
4540 		rc_node_t *instn;
4541 
4542 		/* solaris.smf.manage can be used. */
4543 		ret = perm_add_enabling(pcp, AUTH_MANAGE);
4544 
4545 		if (ret != REP_PROTOCOL_SUCCESS) {
4546 			pc_free(pcp);
4547 			rc_node_rele(np);
4548 			return (ret);
4549 		}
4550 
4551 		/* general/action_authorization values can be used. */
4552 		ret = rc_node_parent(np, &instn);
4553 		if (ret != REP_PROTOCOL_SUCCESS) {
4554 			assert(ret == REP_PROTOCOL_FAIL_DELETED);
4555 			rc_node_rele(np);
4556 			pc_free(pcp);
4557 			return (REP_PROTOCOL_FAIL_DELETED);
4558 		}
4559 
4560 		assert(instn->rn_id.rl_type == REP_PROTOCOL_ENTITY_INSTANCE);
4561 
4562 		ret = perm_add_inst_action_auth(pcp, instn);
4563 		rc_node_rele(instn);
4564 		switch (ret) {
4565 		case REP_PROTOCOL_SUCCESS:
4566 			break;
4567 
4568 		case REP_PROTOCOL_FAIL_DELETED:
4569 		case REP_PROTOCOL_FAIL_NO_RESOURCES:
4570 			rc_node_rele(np);
4571 			pc_free(pcp);
4572 			return (ret);
4573 
4574 		default:
4575 			bad_error("perm_add_inst_action_auth", ret);
4576 		}
4577 
4578 		if (strcmp(np->rn_name, AUTH_PG_ACTIONS) == 0)
4579 			authorized = 1;		/* Don't check on commit. */
4580 	} else {
4581 		ret = perm_add_enabling(pcp, AUTH_MODIFY);
4582 
4583 		if (ret == REP_PROTOCOL_SUCCESS) {
4584 			/* propertygroup-type-specific authorization */
4585 			/* no locking because rn_type won't change anyway */
4586 			const char * const auth =
4587 			    perm_auth_for_pgtype(np->rn_type);
4588 
4589 			if (auth != NULL)
4590 				ret = perm_add_enabling(pcp, auth);
4591 		}
4592 
4593 		if (ret == REP_PROTOCOL_SUCCESS)
4594 			/* propertygroup/transaction-type-specific auths */
4595 			ret =
4596 			    perm_add_enabling_values(pcp, np, AUTH_PROP_VALUE);
4597 
4598 		if (ret == REP_PROTOCOL_SUCCESS)
4599 			ret =
4600 			    perm_add_enabling_values(pcp, np, AUTH_PROP_MODIFY);
4601 
4602 		/* AUTH_MANAGE can manipulate general/AUTH_PROP_ACTION */
4603 		if (ret == REP_PROTOCOL_SUCCESS &&
4604 		    strcmp(np->rn_name, AUTH_PG_GENERAL) == 0 &&
4605 		    strcmp(np->rn_type, AUTH_PG_GENERAL_TYPE) == 0)
4606 			ret = perm_add_enabling(pcp, AUTH_MANAGE);
4607 
4608 		if (ret != REP_PROTOCOL_SUCCESS) {
4609 			pc_free(pcp);
4610 			rc_node_rele(np);
4611 			return (ret);
4612 		}
4613 	}
4614 
4615 	ret = perm_granted(pcp);
4616 	if (ret != 1) {
4617 		pc_free(pcp);
4618 		rc_node_rele(np);
4619 		return (ret == 0 ? REP_PROTOCOL_FAIL_PERMISSION_DENIED :
4620 		    REP_PROTOCOL_FAIL_NO_RESOURCES);
4621 	}
4622 
4623 	pc_free(pcp);
4624 #endif /* NATIVE_BUILD */
4625 
4626 skip_checks:
4627 	rc_node_assign(txp, np);
4628 	txp->rnp_authorized = authorized;
4629 
4630 	rc_node_rele(np);
4631 	return (REP_PROTOCOL_SUCCESS);
4632 }
4633 
4634 /*
4635  * Return 1 if the given transaction commands only modify the values of
4636  * properties other than "modify_authorization".  Return -1 if any of the
4637  * commands are invalid, and 0 otherwise.
4638  */
4639 static int
4640 tx_allow_value(const void *cmds_arg, size_t cmds_sz, rc_node_t *pg)
4641 {
4642 	const struct rep_protocol_transaction_cmd *cmds;
4643 	uintptr_t loc;
4644 	uint32_t sz;
4645 	rc_node_t *prop;
4646 	boolean_t ok;
4647 
4648 	assert(!MUTEX_HELD(&pg->rn_lock));
4649 
4650 	loc = (uintptr_t)cmds_arg;
4651 
4652 	while (cmds_sz > 0) {
4653 		cmds = (struct rep_protocol_transaction_cmd *)loc;
4654 
4655 		if (cmds_sz <= REP_PROTOCOL_TRANSACTION_CMD_MIN_SIZE)
4656 			return (-1);
4657 
4658 		sz = cmds->rptc_size;
4659 		if (sz <= REP_PROTOCOL_TRANSACTION_CMD_MIN_SIZE)
4660 			return (-1);
4661 
4662 		sz = TX_SIZE(sz);
4663 		if (sz > cmds_sz)
4664 			return (-1);
4665 
4666 		switch (cmds[0].rptc_action) {
4667 		case REP_PROTOCOL_TX_ENTRY_CLEAR:
4668 			break;
4669 
4670 		case REP_PROTOCOL_TX_ENTRY_REPLACE:
4671 			/* Check type */
4672 			(void) pthread_mutex_lock(&pg->rn_lock);
4673 			if (rc_node_find_named_child(pg,
4674 			    (const char *)cmds[0].rptc_data,
4675 			    REP_PROTOCOL_ENTITY_PROPERTY, &prop) ==
4676 			    REP_PROTOCOL_SUCCESS) {
4677 				ok = (prop != NULL &&
4678 				    prop->rn_valtype == cmds[0].rptc_type);
4679 			} else {
4680 				/* Return more particular error? */
4681 				ok = B_FALSE;
4682 			}
4683 			(void) pthread_mutex_unlock(&pg->rn_lock);
4684 			if (ok)
4685 				break;
4686 			return (0);
4687 
4688 		default:
4689 			return (0);
4690 		}
4691 
4692 		if (strcmp((const char *)cmds[0].rptc_data, AUTH_PROP_MODIFY)
4693 		    == 0)
4694 			return (0);
4695 
4696 		loc += sz;
4697 		cmds_sz -= sz;
4698 	}
4699 
4700 	return (1);
4701 }
4702 
4703 /*
4704  * Return 1 if any of the given transaction commands affect
4705  * "action_authorization".  Return -1 if any of the commands are invalid and
4706  * 0 in all other cases.
4707  */
4708 static int
4709 tx_modifies_action(const void *cmds_arg, size_t cmds_sz)
4710 {
4711 	const struct rep_protocol_transaction_cmd *cmds;
4712 	uintptr_t loc;
4713 	uint32_t sz;
4714 
4715 	loc = (uintptr_t)cmds_arg;
4716 
4717 	while (cmds_sz > 0) {
4718 		cmds = (struct rep_protocol_transaction_cmd *)loc;
4719 
4720 		if (cmds_sz <= REP_PROTOCOL_TRANSACTION_CMD_MIN_SIZE)
4721 			return (-1);
4722 
4723 		sz = cmds->rptc_size;
4724 		if (sz <= REP_PROTOCOL_TRANSACTION_CMD_MIN_SIZE)
4725 			return (-1);
4726 
4727 		sz = TX_SIZE(sz);
4728 		if (sz > cmds_sz)
4729 			return (-1);
4730 
4731 		if (strcmp((const char *)cmds[0].rptc_data, AUTH_PROP_ACTION)
4732 		    == 0)
4733 			return (1);
4734 
4735 		loc += sz;
4736 		cmds_sz -= sz;
4737 	}
4738 
4739 	return (0);
4740 }
4741 
4742 /*
4743  * Returns 1 if the transaction commands only modify properties named
4744  * 'enabled'.
4745  */
4746 static int
4747 tx_only_enabled(const void *cmds_arg, size_t cmds_sz)
4748 {
4749 	const struct rep_protocol_transaction_cmd *cmd;
4750 	uintptr_t loc;
4751 	uint32_t sz;
4752 
4753 	loc = (uintptr_t)cmds_arg;
4754 
4755 	while (cmds_sz > 0) {
4756 		cmd = (struct rep_protocol_transaction_cmd *)loc;
4757 
4758 		if (cmds_sz <= REP_PROTOCOL_TRANSACTION_CMD_MIN_SIZE)
4759 			return (-1);
4760 
4761 		sz = cmd->rptc_size;
4762 		if (sz <= REP_PROTOCOL_TRANSACTION_CMD_MIN_SIZE)
4763 			return (-1);
4764 
4765 		sz = TX_SIZE(sz);
4766 		if (sz > cmds_sz)
4767 			return (-1);
4768 
4769 		if (strcmp((const char *)cmd->rptc_data, AUTH_PROP_ENABLED)
4770 		    != 0)
4771 			return (0);
4772 
4773 		loc += sz;
4774 		cmds_sz -= sz;
4775 	}
4776 
4777 	return (1);
4778 }
4779 
4780 int
4781 rc_tx_commit(rc_node_ptr_t *txp, const void *cmds, size_t cmds_sz)
4782 {
4783 	rc_node_t *np = txp->rnp_node;
4784 	rc_node_t *pp;
4785 	rc_node_t *nnp;
4786 	rc_node_pg_notify_t *pnp;
4787 	int rc;
4788 	permcheck_t *pcp;
4789 	int granted, normal;
4790 
4791 	RC_NODE_CHECK(np);
4792 
4793 	if (!client_is_privileged() && !txp->rnp_authorized) {
4794 #ifdef NATIVE_BUILD
4795 		return (REP_PROTOCOL_FAIL_PERMISSION_DENIED);
4796 #else
4797 		/* permission check: depends on contents of transaction */
4798 		pcp = pc_create();
4799 		if (pcp == NULL)
4800 			return (REP_PROTOCOL_FAIL_NO_RESOURCES);
4801 
4802 		/* If normal is cleared, we won't do the normal checks. */
4803 		normal = 1;
4804 		rc = REP_PROTOCOL_SUCCESS;
4805 
4806 		if (strcmp(np->rn_name, AUTH_PG_GENERAL) == 0 &&
4807 		    strcmp(np->rn_type, AUTH_PG_GENERAL_TYPE) == 0) {
4808 			/* Touching general[framework]/action_authorization? */
4809 			rc = tx_modifies_action(cmds, cmds_sz);
4810 			if (rc == -1) {
4811 				pc_free(pcp);
4812 				return (REP_PROTOCOL_FAIL_BAD_REQUEST);
4813 			}
4814 
4815 			if (rc) {
4816 				/* Yes: only AUTH_MANAGE can be used. */
4817 				rc = perm_add_enabling(pcp, AUTH_MANAGE);
4818 				normal = 0;
4819 			} else {
4820 				rc = REP_PROTOCOL_SUCCESS;
4821 			}
4822 		} else if (np->rn_id.rl_ids[ID_INSTANCE] != 0 &&
4823 		    strcmp(np->rn_name, AUTH_PG_GENERAL_OVR) == 0 &&
4824 		    strcmp(np->rn_type, AUTH_PG_GENERAL_OVR_TYPE) == 0) {
4825 			rc_node_t *instn;
4826 
4827 			rc = tx_only_enabled(cmds, cmds_sz);
4828 			if (rc == -1) {
4829 				pc_free(pcp);
4830 				return (REP_PROTOCOL_FAIL_BAD_REQUEST);
4831 			}
4832 
4833 			if (rc) {
4834 				rc = rc_node_parent(np, &instn);
4835 				if (rc != REP_PROTOCOL_SUCCESS) {
4836 					assert(rc == REP_PROTOCOL_FAIL_DELETED);
4837 					pc_free(pcp);
4838 					return (rc);
4839 				}
4840 
4841 				assert(instn->rn_id.rl_type ==
4842 				    REP_PROTOCOL_ENTITY_INSTANCE);
4843 
4844 				rc = perm_add_inst_action_auth(pcp, instn);
4845 				rc_node_rele(instn);
4846 				switch (rc) {
4847 				case REP_PROTOCOL_SUCCESS:
4848 					break;
4849 
4850 				case REP_PROTOCOL_FAIL_DELETED:
4851 				case REP_PROTOCOL_FAIL_NO_RESOURCES:
4852 					pc_free(pcp);
4853 					return (rc);
4854 
4855 				default:
4856 					bad_error("perm_add_inst_action_auth",
4857 					    rc);
4858 				}
4859 			} else {
4860 				rc = REP_PROTOCOL_SUCCESS;
4861 			}
4862 		}
4863 
4864 		if (rc == REP_PROTOCOL_SUCCESS && normal) {
4865 			rc = perm_add_enabling(pcp, AUTH_MODIFY);
4866 
4867 			if (rc == REP_PROTOCOL_SUCCESS) {
4868 				/* Add pgtype-specific authorization. */
4869 				const char * const auth =
4870 				    perm_auth_for_pgtype(np->rn_type);
4871 
4872 				if (auth != NULL)
4873 					rc = perm_add_enabling(pcp, auth);
4874 			}
4875 
4876 			/* Add pg-specific modify_authorization auths. */
4877 			if (rc == REP_PROTOCOL_SUCCESS)
4878 				rc = perm_add_enabling_values(pcp, np,
4879 				    AUTH_PROP_MODIFY);
4880 
4881 			/* If value_authorization values are ok, add them. */
4882 			if (rc == REP_PROTOCOL_SUCCESS) {
4883 				rc = tx_allow_value(cmds, cmds_sz, np);
4884 				if (rc == -1)
4885 					rc = REP_PROTOCOL_FAIL_BAD_REQUEST;
4886 				else if (rc)
4887 					rc = perm_add_enabling_values(pcp, np,
4888 					    AUTH_PROP_VALUE);
4889 			}
4890 		}
4891 
4892 		if (rc == REP_PROTOCOL_SUCCESS) {
4893 			granted = perm_granted(pcp);
4894 			if (granted < 0)
4895 				rc = REP_PROTOCOL_FAIL_NO_RESOURCES;
4896 		}
4897 
4898 		pc_free(pcp);
4899 
4900 		if (rc != REP_PROTOCOL_SUCCESS)
4901 			return (rc);
4902 
4903 		if (!granted)
4904 			return (REP_PROTOCOL_FAIL_PERMISSION_DENIED);
4905 #endif /* NATIVE_BUILD */
4906 	}
4907 
4908 	nnp = rc_node_alloc();
4909 	if (nnp == NULL)
4910 		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
4911 
4912 	nnp->rn_id = np->rn_id;			/* structure assignment */
4913 	nnp->rn_hash = np->rn_hash;
4914 	nnp->rn_name = strdup(np->rn_name);
4915 	nnp->rn_type = strdup(np->rn_type);
4916 	nnp->rn_pgflags = np->rn_pgflags;
4917 
4918 	nnp->rn_flags = RC_NODE_IN_TX | RC_NODE_USING_PARENT;
4919 
4920 	if (nnp->rn_name == NULL || nnp->rn_type == NULL) {
4921 		rc_node_destroy(nnp);
4922 		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
4923 	}
4924 
4925 	(void) pthread_mutex_lock(&np->rn_lock);
4926 	/*
4927 	 * We must have all of the old properties in the cache, or the
4928 	 * database deletions could cause inconsistencies.
4929 	 */
4930 	if ((rc = rc_node_fill_children(np, REP_PROTOCOL_ENTITY_PROPERTY)) !=
4931 	    REP_PROTOCOL_SUCCESS) {
4932 		(void) pthread_mutex_unlock(&np->rn_lock);
4933 		rc_node_destroy(nnp);
4934 		return (rc);
4935 	}
4936 
4937 	if (!rc_node_hold_flag(np, RC_NODE_USING_PARENT)) {
4938 		(void) pthread_mutex_unlock(&np->rn_lock);
4939 		rc_node_destroy(nnp);
4940 		return (REP_PROTOCOL_FAIL_DELETED);
4941 	}
4942 
4943 	if (np->rn_flags & RC_NODE_OLD) {
4944 		rc_node_rele_flag(np, RC_NODE_USING_PARENT);
4945 		(void) pthread_mutex_unlock(&np->rn_lock);
4946 		rc_node_destroy(nnp);
4947 		return (REP_PROTOCOL_FAIL_NOT_LATEST);
4948 	}
4949 
4950 	pp = rc_node_hold_parent_flag(np, RC_NODE_CHILDREN_CHANGING);
4951 	if (pp == NULL) {
4952 		/* our parent is gone, we're going next... */
4953 		rc_node_destroy(nnp);
4954 		(void) pthread_mutex_lock(&np->rn_lock);
4955 		if (np->rn_flags & RC_NODE_OLD) {
4956 			(void) pthread_mutex_unlock(&np->rn_lock);
4957 			return (REP_PROTOCOL_FAIL_NOT_LATEST);
4958 		}
4959 		(void) pthread_mutex_unlock(&np->rn_lock);
4960 		return (REP_PROTOCOL_FAIL_DELETED);
4961 	}
4962 	(void) pthread_mutex_unlock(&pp->rn_lock);
4963 
4964 	/*
4965 	 * prepare for the transaction
4966 	 */
4967 	(void) pthread_mutex_lock(&np->rn_lock);
4968 	if (!rc_node_hold_flag(np, RC_NODE_IN_TX)) {
4969 		(void) pthread_mutex_unlock(&np->rn_lock);
4970 		(void) pthread_mutex_lock(&pp->rn_lock);
4971 		rc_node_rele_flag(pp, RC_NODE_CHILDREN_CHANGING);
4972 		(void) pthread_mutex_unlock(&pp->rn_lock);
4973 		rc_node_destroy(nnp);
4974 		return (REP_PROTOCOL_FAIL_DELETED);
4975 	}
4976 	nnp->rn_gen_id = np->rn_gen_id;
4977 	(void) pthread_mutex_unlock(&np->rn_lock);
4978 
4979 	/* Sets nnp->rn_gen_id on success. */
4980 	rc = object_tx_commit(&np->rn_id, cmds, cmds_sz, &nnp->rn_gen_id);
4981 
4982 	(void) pthread_mutex_lock(&np->rn_lock);
4983 	if (rc != REP_PROTOCOL_SUCCESS) {
4984 		rc_node_rele_flag(np, RC_NODE_IN_TX);
4985 		(void) pthread_mutex_unlock(&np->rn_lock);
4986 		(void) pthread_mutex_lock(&pp->rn_lock);
4987 		rc_node_rele_flag(pp, RC_NODE_CHILDREN_CHANGING);
4988 		(void) pthread_mutex_unlock(&pp->rn_lock);
4989 		rc_node_destroy(nnp);
4990 		rc_node_clear(txp, 0);
4991 		if (rc == REP_PROTOCOL_DONE)
4992 			rc = REP_PROTOCOL_SUCCESS; /* successful empty tx */
4993 		return (rc);
4994 	}
4995 
4996 	/*
4997 	 * Notify waiters
4998 	 */
4999 	(void) pthread_mutex_lock(&rc_pg_notify_lock);
5000 	while ((pnp = uu_list_first(np->rn_pg_notify_list)) != NULL)
5001 		rc_pg_notify_fire(pnp);
5002 	(void) pthread_mutex_unlock(&rc_pg_notify_lock);
5003 
5004 	np->rn_flags |= RC_NODE_OLD;
5005 	(void) pthread_mutex_unlock(&np->rn_lock);
5006 
5007 	rc_notify_remove_node(np);
5008 
5009 	/*
5010 	 * replace np with nnp
5011 	 */
5012 	rc_node_relink_child(pp, np, nnp);
5013 
5014 	/*
5015 	 * all done -- clear the transaction.
5016 	 */
5017 	rc_node_clear(txp, 0);
5018 
5019 	return (REP_PROTOCOL_SUCCESS);
5020 }
5021 
5022 void
5023 rc_pg_notify_init(rc_node_pg_notify_t *pnp)
5024 {
5025 	uu_list_node_init(pnp, &pnp->rnpn_node, rc_pg_notify_pool);
5026 	pnp->rnpn_pg = NULL;
5027 	pnp->rnpn_fd = -1;
5028 }
5029 
5030 int
5031 rc_pg_notify_setup(rc_node_pg_notify_t *pnp, rc_node_ptr_t *npp, int fd)
5032 {
5033 	rc_node_t *np;
5034 
5035 	RC_NODE_PTR_GET_CHECK_AND_LOCK(np, npp);
5036 
5037 	if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_PROPERTYGRP) {
5038 		(void) pthread_mutex_unlock(&np->rn_lock);
5039 		return (REP_PROTOCOL_FAIL_BAD_REQUEST);
5040 	}
5041 
5042 	/*
5043 	 * wait for any transaction in progress to complete
5044 	 */
5045 	if (!rc_node_wait_flag(np, RC_NODE_IN_TX)) {
5046 		(void) pthread_mutex_unlock(&np->rn_lock);
5047 		return (REP_PROTOCOL_FAIL_DELETED);
5048 	}
5049 
5050 	if (np->rn_flags & RC_NODE_OLD) {
5051 		(void) pthread_mutex_unlock(&np->rn_lock);
5052 		return (REP_PROTOCOL_FAIL_NOT_LATEST);
5053 	}
5054 
5055 	(void) pthread_mutex_lock(&rc_pg_notify_lock);
5056 	rc_pg_notify_fire(pnp);
5057 	pnp->rnpn_pg = np;
5058 	pnp->rnpn_fd = fd;
5059 	(void) uu_list_insert_after(np->rn_pg_notify_list, NULL, pnp);
5060 	(void) pthread_mutex_unlock(&rc_pg_notify_lock);
5061 
5062 	(void) pthread_mutex_unlock(&np->rn_lock);
5063 	return (REP_PROTOCOL_SUCCESS);
5064 }
5065 
5066 void
5067 rc_pg_notify_fini(rc_node_pg_notify_t *pnp)
5068 {
5069 	(void) pthread_mutex_lock(&rc_pg_notify_lock);
5070 	rc_pg_notify_fire(pnp);
5071 	(void) pthread_mutex_unlock(&rc_pg_notify_lock);
5072 
5073 	uu_list_node_fini(pnp, &pnp->rnpn_node, rc_pg_notify_pool);
5074 }
5075 
5076 void
5077 rc_notify_info_init(rc_notify_info_t *rnip)
5078 {
5079 	int i;
5080 
5081 	uu_list_node_init(rnip, &rnip->rni_list_node, rc_notify_info_pool);
5082 	uu_list_node_init(&rnip->rni_notify, &rnip->rni_notify.rcn_list_node,
5083 	    rc_notify_pool);
5084 
5085 	rnip->rni_notify.rcn_node = NULL;
5086 	rnip->rni_notify.rcn_info = rnip;
5087 
5088 	bzero(rnip->rni_namelist, sizeof (rnip->rni_namelist));
5089 	bzero(rnip->rni_typelist, sizeof (rnip->rni_typelist));
5090 
5091 	(void) pthread_cond_init(&rnip->rni_cv, NULL);
5092 
5093 	for (i = 0; i < RC_NOTIFY_MAX_NAMES; i++) {
5094 		rnip->rni_namelist[i] = NULL;
5095 		rnip->rni_typelist[i] = NULL;
5096 	}
5097 }
5098 
5099 static void
5100 rc_notify_info_insert_locked(rc_notify_info_t *rnip)
5101 {
5102 	assert(MUTEX_HELD(&rc_pg_notify_lock));
5103 
5104 	assert(!(rnip->rni_flags & RC_NOTIFY_ACTIVE));
5105 
5106 	rnip->rni_flags |= RC_NOTIFY_ACTIVE;
5107 	(void) uu_list_insert_after(rc_notify_info_list, NULL, rnip);
5108 	(void) uu_list_insert_before(rc_notify_list, NULL, &rnip->rni_notify);
5109 }
5110 
5111 static void
5112 rc_notify_info_remove_locked(rc_notify_info_t *rnip)
5113 {
5114 	rc_notify_t *me = &rnip->rni_notify;
5115 	rc_notify_t *np;
5116 
5117 	assert(MUTEX_HELD(&rc_pg_notify_lock));
5118 
5119 	assert(rnip->rni_flags & RC_NOTIFY_ACTIVE);
5120 
5121 	assert(!(rnip->rni_flags & RC_NOTIFY_DRAIN));
5122 	rnip->rni_flags |= RC_NOTIFY_DRAIN;
5123 	(void) pthread_cond_broadcast(&rnip->rni_cv);
5124 
5125 	(void) uu_list_remove(rc_notify_info_list, rnip);
5126 
5127 	/*
5128 	 * clean up any notifications at the beginning of the list
5129 	 */
5130 	if (uu_list_first(rc_notify_list) == me) {
5131 		while ((np = uu_list_next(rc_notify_list, me)) != NULL &&
5132 		    np->rcn_info == NULL)
5133 			rc_notify_remove_locked(np);
5134 	}
5135 	(void) uu_list_remove(rc_notify_list, me);
5136 
5137 	while (rnip->rni_waiters) {
5138 		(void) pthread_cond_broadcast(&rc_pg_notify_cv);
5139 		(void) pthread_cond_broadcast(&rnip->rni_cv);
5140 		(void) pthread_cond_wait(&rnip->rni_cv, &rc_pg_notify_lock);
5141 	}
5142 
5143 	rnip->rni_flags &= ~(RC_NOTIFY_DRAIN | RC_NOTIFY_ACTIVE);
5144 }
5145 
5146 static int
5147 rc_notify_info_add_watch(rc_notify_info_t *rnip, const char **arr,
5148     const char *name)
5149 {
5150 	int i;
5151 	int rc;
5152 	char *f;
5153 
5154 	rc = rc_check_type_name(REP_PROTOCOL_ENTITY_PROPERTYGRP, name);
5155 	if (rc != REP_PROTOCOL_SUCCESS)
5156 		return (rc);
5157 
5158 	f = strdup(name);
5159 	if (f == NULL)
5160 		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
5161 
5162 	(void) pthread_mutex_lock(&rc_pg_notify_lock);
5163 
5164 	while (rnip->rni_flags & RC_NOTIFY_EMPTYING)
5165 		(void) pthread_cond_wait(&rnip->rni_cv, &rc_pg_notify_lock);
5166 
5167 	for (i = 0; i < RC_NOTIFY_MAX_NAMES; i++)
5168 		if (arr[i] == NULL)
5169 			break;
5170 
5171 	if (i == RC_NOTIFY_MAX_NAMES) {
5172 		(void) pthread_mutex_unlock(&rc_pg_notify_lock);
5173 		free(f);
5174 		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
5175 	}
5176 
5177 	arr[i] = f;
5178 	if (!(rnip->rni_flags & RC_NOTIFY_ACTIVE))
5179 		rc_notify_info_insert_locked(rnip);
5180 
5181 	(void) pthread_mutex_unlock(&rc_pg_notify_lock);
5182 	return (REP_PROTOCOL_SUCCESS);
5183 }
5184 
5185 int
5186 rc_notify_info_add_name(rc_notify_info_t *rnip, const char *name)
5187 {
5188 	return (rc_notify_info_add_watch(rnip, rnip->rni_namelist, name));
5189 }
5190 
5191 int
5192 rc_notify_info_add_type(rc_notify_info_t *rnip, const char *type)
5193 {
5194 	return (rc_notify_info_add_watch(rnip, rnip->rni_typelist, type));
5195 }
5196 
5197 /*
5198  * Wait for and report an event of interest to rnip, a notification client
5199  */
5200 int
5201 rc_notify_info_wait(rc_notify_info_t *rnip, rc_node_ptr_t *out,
5202     char *outp, size_t sz)
5203 {
5204 	rc_notify_t *np;
5205 	rc_notify_t *me = &rnip->rni_notify;
5206 	rc_node_t *nnp;
5207 	rc_notify_delete_t *ndp;
5208 
5209 	int am_first_info;
5210 
5211 	if (sz > 0)
5212 		outp[0] = 0;
5213 
5214 	(void) pthread_mutex_lock(&rc_pg_notify_lock);
5215 
5216 	while ((rnip->rni_flags & (RC_NOTIFY_ACTIVE | RC_NOTIFY_DRAIN)) ==
5217 	    RC_NOTIFY_ACTIVE) {
5218 		/*
5219 		 * If I'm first on the notify list, it is my job to
5220 		 * clean up any notifications I pass by.  I can't do that
5221 		 * if someone is blocking the list from removals, so I
5222 		 * have to wait until they have all drained.
5223 		 */
5224 		am_first_info = (uu_list_first(rc_notify_list) == me);
5225 		if (am_first_info && rc_notify_in_use) {
5226 			rnip->rni_waiters++;
5227 			(void) pthread_cond_wait(&rc_pg_notify_cv,
5228 			    &rc_pg_notify_lock);
5229 			rnip->rni_waiters--;
5230 			continue;
5231 		}
5232 
5233 		/*
5234 		 * Search the list for a node of interest.
5235 		 */
5236 		np = uu_list_next(rc_notify_list, me);
5237 		while (np != NULL && !rc_notify_info_interested(rnip, np)) {
5238 			rc_notify_t *next = uu_list_next(rc_notify_list, np);
5239 
5240 			if (am_first_info) {
5241 				if (np->rcn_info) {
5242 					/*
5243 					 * Passing another client -- stop
5244 					 * cleaning up notifications
5245 					 */
5246 					am_first_info = 0;
5247 				} else {
5248 					rc_notify_remove_locked(np);
5249 				}
5250 			}
5251 			np = next;
5252 		}
5253 
5254 		/*
5255 		 * Nothing of interest -- wait for notification
5256 		 */
5257 		if (np == NULL) {
5258 			rnip->rni_waiters++;
5259 			(void) pthread_cond_wait(&rnip->rni_cv,
5260 			    &rc_pg_notify_lock);
5261 			rnip->rni_waiters--;
5262 			continue;
5263 		}
5264 
5265 		/*
5266 		 * found something to report -- move myself after the
5267 		 * notification and process it.
5268 		 */
5269 		(void) uu_list_remove(rc_notify_list, me);
5270 		(void) uu_list_insert_after(rc_notify_list, np, me);
5271 
5272 		if ((ndp = np->rcn_delete) != NULL) {
5273 			(void) strlcpy(outp, ndp->rnd_fmri, sz);
5274 			if (am_first_info)
5275 				rc_notify_remove_locked(np);
5276 			(void) pthread_mutex_unlock(&rc_pg_notify_lock);
5277 			rc_node_clear(out, 0);
5278 			return (REP_PROTOCOL_SUCCESS);
5279 		}
5280 
5281 		nnp = np->rcn_node;
5282 		assert(nnp != NULL);
5283 
5284 		/*
5285 		 * We can't bump nnp's reference count without grabbing its
5286 		 * lock, and rc_pg_notify_lock is a leaf lock.  So we
5287 		 * temporarily block all removals to keep nnp from
5288 		 * disappearing.
5289 		 */
5290 		rc_notify_in_use++;
5291 		assert(rc_notify_in_use > 0);
5292 		(void) pthread_mutex_unlock(&rc_pg_notify_lock);
5293 
5294 		rc_node_assign(out, nnp);
5295 
5296 		(void) pthread_mutex_lock(&rc_pg_notify_lock);
5297 		assert(rc_notify_in_use > 0);
5298 		rc_notify_in_use--;
5299 		if (am_first_info)
5300 			rc_notify_remove_locked(np);
5301 		if (rc_notify_in_use == 0)
5302 			(void) pthread_cond_broadcast(&rc_pg_notify_cv);
5303 		(void) pthread_mutex_unlock(&rc_pg_notify_lock);
5304 
5305 		return (REP_PROTOCOL_SUCCESS);
5306 	}
5307 	/*
5308 	 * If we're the last one out, let people know it's clear.
5309 	 */
5310 	if (rnip->rni_waiters == 0)
5311 		(void) pthread_cond_broadcast(&rnip->rni_cv);
5312 	(void) pthread_mutex_unlock(&rc_pg_notify_lock);
5313 	return (REP_PROTOCOL_DONE);
5314 }
5315 
5316 static void
5317 rc_notify_info_reset(rc_notify_info_t *rnip)
5318 {
5319 	int i;
5320 
5321 	(void) pthread_mutex_lock(&rc_pg_notify_lock);
5322 	if (rnip->rni_flags & RC_NOTIFY_ACTIVE)
5323 		rc_notify_info_remove_locked(rnip);
5324 	assert(!(rnip->rni_flags & (RC_NOTIFY_DRAIN | RC_NOTIFY_EMPTYING)));
5325 	rnip->rni_flags |= RC_NOTIFY_EMPTYING;
5326 	(void) pthread_mutex_unlock(&rc_pg_notify_lock);
5327 
5328 	for (i = 0; i < RC_NOTIFY_MAX_NAMES; i++) {
5329 		if (rnip->rni_namelist[i] != NULL) {
5330 			free((void *)rnip->rni_namelist[i]);
5331 			rnip->rni_namelist[i] = NULL;
5332 		}
5333 		if (rnip->rni_typelist[i] != NULL) {
5334 			free((void *)rnip->rni_typelist[i]);
5335 			rnip->rni_typelist[i] = NULL;
5336 		}
5337 	}
5338 
5339 	(void) pthread_mutex_lock(&rc_pg_notify_lock);
5340 	rnip->rni_flags &= ~RC_NOTIFY_EMPTYING;
5341 	(void) pthread_mutex_unlock(&rc_pg_notify_lock);
5342 }
5343 
5344 void
5345 rc_notify_info_fini(rc_notify_info_t *rnip)
5346 {
5347 	rc_notify_info_reset(rnip);
5348 
5349 	uu_list_node_fini(rnip, &rnip->rni_list_node, rc_notify_info_pool);
5350 	uu_list_node_fini(&rnip->rni_notify, &rnip->rni_notify.rcn_list_node,
5351 	    rc_notify_pool);
5352 }
5353