xref: /titanic_44/usr/src/cmd/svc/configd/rc_node.c (revision 2a9459bdd821c1cf59590a7a9069ac9c591e8a6b)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * rc_node.c - object management primitives
31  *
32  * This layer manages entities, their data structure, its locking, iterators,
33  * transactions, and change notification requests.  Entities (scopes,
34  * services, instances, snapshots, snaplevels, property groups, "composed"
35  * property groups (see composition below), and properties) are represented by
36  * rc_node_t's and are kept in the cache_hash hash table.  (Property values
37  * are kept in the rn_values member of the respective property -- not as
38  * separate objects.)  Iterators are represented by rc_node_iter_t's.
39  * Transactions are represented by rc_node_tx_t's and are only allocated as
40  * part of repcache_tx_t's in the client layer (client.c).  Change
41  * notification requests are represented by rc_notify_t structures and are
42  * described below.
43  *
44  * The entity tree is rooted at rc_scope, which rc_node_init() initializes to
45  * the "localhost" scope.  The tree is filled in from the database on-demand
46  * by rc_node_fill_children(), usually from rc_iter_create() since iterators
47  * are the only way to find the children of an entity.
48  *
49  * Each rc_node_t is protected by its rn_lock member.  Operations which can
50  * take too long, however, should serialize on an RC_NODE_WAITING_FLAGS bit in
51  * rn_flags with the rc_node_{hold,rele}_flag() functions.  And since pointers
52  * to rc_node_t's are allowed, rn_refs is a reference count maintained by
53  * rc_node_{hold,rele}().  See configd.h for locking order information.
54  *
55  * When a node (property group or snapshot) is updated, a new node takes the
56  * place of the old node in the global hash, and the old node is hung off of
57  * the rn_former list of the new node.  At the same time, all of its children
58  * have their rn_parent_ref pointer set, and any holds they have are reflected
59  * in the old node's rn_other_refs count.  This is automatically kept up
60  * to date, until the final reference to the subgraph is dropped, at which
61  * point the node is unrefed and destroyed, along with all of its children.
62  *
63  * Locking rules: To dereference an rc_node_t * (usually to lock it), you must
64  * have a hold (rc_node_hold()) on it or otherwise be sure that it hasn't been
65  * rc_node_destroy()ed (hold a lock on its parent or child, hold a flag,
66  * etc.).  Once you have locked an rc_node_t you must check its rn_flags for
67  * RC_NODE_DEAD before you can use it.  This is usually done with the
68  * rc_node_{wait,hold}_flag() functions (often via the rc_node_check_*()
69  * functions & RC_NODE_*() macros), which fail if the object has died.
70  *
71  * Because name service lookups may take a long time and, more importantly
72  * may trigger additional accesses to the repository, perm_granted() must be
73  * called without holding any locks.
74  *
75  * An ITER_START for a non-ENTITY_VALUE induces an rc_node_fill_children()
76  * call via rc_node_setup_iter() to populate the rn_children uu_list of the
77  * rc_node_t * in question and a call to uu_list_walk_start() on that list.  For
78  * ITER_READ, rc_iter_next() uses uu_list_walk_next() to find the next
79  * apropriate child.
80  *
81  * An ITER_START for an ENTITY_VALUE makes sure the node has its values
82  * filled, and sets up the iterator.  An ITER_READ_VALUE just copies out
83  * the proper values and updates the offset information.
84  *
85  * When a property group gets changed by a transaction, it sticks around as
86  * a child of its replacement property group, but is removed from the parent.
87  *
88  * To allow aliases, snapshots are implemented with a level of indirection.
89  * A snapshot rc_node_t has a snapid which refers to an rc_snapshot_t in
90  * snapshot.c which contains the authoritative snaplevel information.  The
91  * snapid is "assigned" by rc_attach_snapshot().
92  *
93  * We provide the client layer with rc_node_ptr_t's to reference objects.
94  * Objects referred to by them are automatically held & released by
95  * rc_node_assign() & rc_node_clear().  The RC_NODE_PTR_*() macros are used at
96  * client.c entry points to read the pointers.  They fetch the pointer to the
97  * object, return (from the function) if it is dead, and lock, hold, or hold
98  * a flag of the object.
99  */
100 
101 /*
102  * Permission checking is authorization-based: some operations may only
103  * proceed if the user has been assigned at least one of a set of
104  * authorization strings.  The set of enabling authorizations depends on the
105  * operation and the target object.  The set of authorizations assigned to
106  * a user is determined by reading /etc/security/policy.conf, querying the
107  * user_attr database, and possibly querying the prof_attr database, as per
108  * chkauthattr() in libsecdb.
109  *
110  * The fastest way to decide whether the two sets intersect is by entering the
111  * strings into a hash table and detecting collisions, which takes linear time
112  * in the total size of the sets.  Except for the authorization patterns which
113  * may be assigned to users, which without advanced pattern-matching
114  * algorithms will take O(n) in the number of enabling authorizations, per
115  * pattern.
116  *
117  * We can achieve some practical speed-ups by noting that if we enter all of
118  * the authorizations from one of the sets into the hash table we can merely
119  * check the elements of the second set for existence without adding them.
120  * This reduces memory requirements and hash table clutter.  The enabling set
121  * is well suited for this because it is internal to configd (for now, at
122  * least).  Combine this with short-circuiting and we can even minimize the
123  * number of queries to the security databases (user_attr & prof_attr).
124  *
125  * To force this usage onto clients we provide functions for adding
126  * authorizations to the enabling set of a permission context structure
127  * (perm_add_*()) and one to decide whether the the user associated with the
128  * current door call client possesses any of them (perm_granted()).
129  *
130  * At some point, a generic version of this should move to libsecdb.
131  */
132 
133 /*
134  * Composition is the combination of sets of properties.  The sets are ordered
135  * and properties in higher sets obscure properties of the same name in lower
136  * sets.  Here we present a composed view of an instance's properties as the
137  * union of its properties and its service's properties.  Similarly the
138  * properties of snaplevels are combined to form a composed view of the
139  * properties of a snapshot (which should match the composed view of the
140  * properties of the instance when the snapshot was taken).
141  *
142  * In terms of the client interface, the client may request that a property
143  * group iterator for an instance or snapshot be composed.  Property groups
144  * traversed by such an iterator may not have the target entity as a parent.
145  * Similarly, the properties traversed by a property iterator for those
146  * property groups may not have the property groups iterated as parents.
147  *
148  * Implementation requires that iterators for instances and snapshots be
149  * composition-savvy, and that we have a "composed property group" entity
150  * which represents the composition of a number of property groups.  Iteration
151  * over "composed property groups" yields properties which may have different
152  * parents, but for all other operations a composed property group behaves
153  * like the top-most property group it represents.
154  *
155  * The implementation is based on the rn_cchain[] array of rc_node_t pointers
156  * in rc_node_t.  For instances, the pointers point to the instance and its
157  * parent service.  For snapshots they point to the child snaplevels, and for
158  * composed property groups they point to property groups.  A composed
159  * iterator carries an index into rn_cchain[].  Thus most of the magic ends up
160  * int the rc_iter_*() code.
161  */
162 
163 #include <assert.h>
164 #include <atomic.h>
165 #include <errno.h>
166 #include <libuutil.h>
167 #include <libscf.h>
168 #include <libscf_priv.h>
169 #include <prof_attr.h>
170 #include <pthread.h>
171 #include <stdio.h>
172 #include <stdlib.h>
173 #include <strings.h>
174 #include <sys/types.h>
175 #include <unistd.h>
176 #include <user_attr.h>
177 
178 #include "configd.h"
179 
180 #define	AUTH_PREFIX		"solaris.smf."
181 #define	AUTH_MANAGE		AUTH_PREFIX "manage"
182 #define	AUTH_MODIFY		AUTH_PREFIX "modify"
183 #define	AUTH_MODIFY_PREFIX	AUTH_MODIFY "."
184 #define	AUTH_PG_ACTIONS		SCF_PG_RESTARTER_ACTIONS
185 #define	AUTH_PG_ACTIONS_TYPE	SCF_PG_RESTARTER_ACTIONS_TYPE
186 #define	AUTH_PG_GENERAL		SCF_PG_GENERAL
187 #define	AUTH_PG_GENERAL_TYPE	SCF_PG_GENERAL_TYPE
188 #define	AUTH_PG_GENERAL_OVR	SCF_PG_GENERAL_OVR
189 #define	AUTH_PG_GENERAL_OVR_TYPE  SCF_PG_GENERAL_OVR_TYPE
190 #define	AUTH_PROP_ACTION	"action_authorization"
191 #define	AUTH_PROP_ENABLED	"enabled"
192 #define	AUTH_PROP_MODIFY	"modify_authorization"
193 #define	AUTH_PROP_VALUE		"value_authorization"
194 #define	AUTH_PROP_READ		"read_authorization"
195 /* libsecdb should take care of this. */
196 #define	RBAC_AUTH_SEP		","
197 
198 #define	MAX_VALID_CHILDREN 3
199 
200 typedef struct rc_type_info {
201 	uint32_t	rt_type;		/* matches array index */
202 	uint32_t	rt_num_ids;
203 	uint32_t	rt_name_flags;
204 	uint32_t	rt_valid_children[MAX_VALID_CHILDREN];
205 } rc_type_info_t;
206 
207 #define	RT_NO_NAME	-1U
208 
209 static rc_type_info_t rc_types[] = {
210 	{REP_PROTOCOL_ENTITY_NONE, 0, RT_NO_NAME},
211 	{REP_PROTOCOL_ENTITY_SCOPE, 0, 0,
212 	    {REP_PROTOCOL_ENTITY_SERVICE, REP_PROTOCOL_ENTITY_SCOPE}},
213 	{REP_PROTOCOL_ENTITY_SERVICE, 0, UU_NAME_DOMAIN | UU_NAME_PATH,
214 	    {REP_PROTOCOL_ENTITY_INSTANCE, REP_PROTOCOL_ENTITY_PROPERTYGRP}},
215 	{REP_PROTOCOL_ENTITY_INSTANCE, 1, UU_NAME_DOMAIN,
216 	    {REP_PROTOCOL_ENTITY_SNAPSHOT, REP_PROTOCOL_ENTITY_PROPERTYGRP}},
217 	{REP_PROTOCOL_ENTITY_SNAPSHOT, 2, UU_NAME_DOMAIN,
218 	    {REP_PROTOCOL_ENTITY_SNAPLEVEL, REP_PROTOCOL_ENTITY_PROPERTYGRP}},
219 	{REP_PROTOCOL_ENTITY_SNAPLEVEL, 4, RT_NO_NAME,
220 	    {REP_PROTOCOL_ENTITY_PROPERTYGRP}},
221 	{REP_PROTOCOL_ENTITY_PROPERTYGRP, 5, UU_NAME_DOMAIN,
222 	    {REP_PROTOCOL_ENTITY_PROPERTY}},
223 	{REP_PROTOCOL_ENTITY_CPROPERTYGRP, 0, UU_NAME_DOMAIN,
224 	    {REP_PROTOCOL_ENTITY_PROPERTY}},
225 	{REP_PROTOCOL_ENTITY_PROPERTY, 7, UU_NAME_DOMAIN},
226 	{-1UL}
227 };
228 #define	NUM_TYPES	((sizeof (rc_types) / sizeof (*rc_types)))
229 
230 /* Element of a permcheck_t hash table. */
231 struct pc_elt {
232 	struct pc_elt	*pce_next;
233 	char		pce_auth[1];
234 };
235 
236 /* An authorization set hash table. */
237 typedef struct {
238 	struct pc_elt	**pc_buckets;
239 	uint_t		pc_bnum;		/* number of buckets */
240 	uint_t		pc_enum;		/* number of elements */
241 } permcheck_t;
242 
243 static uu_list_pool_t *rc_children_pool;
244 static uu_list_pool_t *rc_pg_notify_pool;
245 static uu_list_pool_t *rc_notify_pool;
246 static uu_list_pool_t *rc_notify_info_pool;
247 
248 static rc_node_t *rc_scope;
249 
250 static pthread_mutex_t	rc_pg_notify_lock = PTHREAD_MUTEX_INITIALIZER;
251 static pthread_cond_t	rc_pg_notify_cv = PTHREAD_COND_INITIALIZER;
252 static uint_t		rc_notify_in_use;	/* blocks removals */
253 
254 static pthread_mutex_t	perm_lock = PTHREAD_MUTEX_INITIALIZER;
255 
256 static void rc_node_unrefed(rc_node_t *np);
257 
258 /*
259  * We support an arbitrary number of clients interested in events for certain
260  * types of changes.  Each client is represented by an rc_notify_info_t, and
261  * all clients are chained onto the rc_notify_info_list.
262  *
263  * The rc_notify_list is the global notification list.  Each entry is of
264  * type rc_notify_t, which is embedded in one of three other structures:
265  *
266  *	rc_node_t		property group update notification
267  *	rc_notify_delete_t	object deletion notification
268  *	rc_notify_info_t	notification clients
269  *
270  * Which type of object is determined by which pointer in the rc_notify_t is
271  * non-NULL.
272  *
273  * New notifications and clients are added to the end of the list.
274  * Notifications no-one is interested in are never added to the list.
275  *
276  * Clients use their position in the list to track which notifications they
277  * have not yet reported.  As they process notifications, they move forward
278  * in the list past them.  There is always a client at the beginning of the
279  * list -- as he moves past notifications, he removes them from the list and
280  * cleans them up.
281  *
282  * The rc_pg_notify_lock protects all notification state.  The rc_pg_notify_cv
283  * is used for global signalling, and each client has a cv which he waits for
284  * events of interest on.
285  */
286 static uu_list_t	*rc_notify_info_list;
287 static uu_list_t	*rc_notify_list;
288 
289 #define	HASH_SIZE	512
290 #define	HASH_MASK	(HASH_SIZE - 1)
291 
292 #pragma align 64(cache_hash)
293 static cache_bucket_t cache_hash[HASH_SIZE];
294 
295 #define	CACHE_BUCKET(h)		(&cache_hash[(h) & HASH_MASK])
296 
297 static uint32_t
298 rc_node_hash(rc_node_lookup_t *lp)
299 {
300 	uint32_t type = lp->rl_type;
301 	uint32_t backend = lp->rl_backend;
302 	uint32_t mainid = lp->rl_main_id;
303 	uint32_t *ids = lp->rl_ids;
304 
305 	rc_type_info_t *tp = &rc_types[type];
306 	uint32_t num_ids;
307 	uint32_t left;
308 	uint32_t hash;
309 
310 	assert(backend == BACKEND_TYPE_NORMAL ||
311 	    backend == BACKEND_TYPE_NONPERSIST);
312 
313 	assert(type > 0 && type < NUM_TYPES);
314 	num_ids = tp->rt_num_ids;
315 
316 	left = MAX_IDS - num_ids;
317 	assert(num_ids <= MAX_IDS);
318 
319 	hash = type * 7 + mainid * 5 + backend;
320 
321 	while (num_ids-- > 0)
322 		hash = hash * 11 + *ids++ * 7;
323 
324 	/*
325 	 * the rest should be zeroed
326 	 */
327 	while (left-- > 0)
328 		assert(*ids++ == 0);
329 
330 	return (hash);
331 }
332 
333 static int
334 rc_node_match(rc_node_t *np, rc_node_lookup_t *l)
335 {
336 	rc_node_lookup_t *r = &np->rn_id;
337 	rc_type_info_t *tp;
338 	uint32_t type;
339 	uint32_t num_ids;
340 
341 	if (r->rl_main_id != l->rl_main_id)
342 		return (0);
343 
344 	type = r->rl_type;
345 	if (type != l->rl_type)
346 		return (0);
347 
348 	assert(type > 0 && type < NUM_TYPES);
349 
350 	tp = &rc_types[r->rl_type];
351 	num_ids = tp->rt_num_ids;
352 
353 	assert(num_ids <= MAX_IDS);
354 	while (num_ids-- > 0)
355 		if (r->rl_ids[num_ids] != l->rl_ids[num_ids])
356 			return (0);
357 
358 	return (1);
359 }
360 
361 /*
362  * the "other" references on a node are maintained in an atomically
363  * updated refcount, rn_other_refs.  This can be bumped from arbitrary
364  * context, and tracks references to a possibly out-of-date node's children.
365  *
366  * To prevent the node from disappearing between the final drop of
367  * rn_other_refs and the unref handling, rn_other_refs_held is bumped on
368  * 0->1 transitions and decremented (with the node lock held) on 1->0
369  * transitions.
370  */
371 static void
372 rc_node_hold_other(rc_node_t *np)
373 {
374 	if (atomic_add_32_nv(&np->rn_other_refs, 1) == 1) {
375 		atomic_add_32(&np->rn_other_refs_held, 1);
376 		assert(np->rn_other_refs_held > 0);
377 	}
378 	assert(np->rn_other_refs > 0);
379 }
380 
381 /*
382  * No node locks may be held
383  */
384 static void
385 rc_node_rele_other(rc_node_t *np)
386 {
387 	assert(np->rn_other_refs > 0);
388 	if (atomic_add_32_nv(&np->rn_other_refs, -1) == 0) {
389 		(void) pthread_mutex_lock(&np->rn_lock);
390 		assert(np->rn_other_refs_held > 0);
391 		if (atomic_add_32_nv(&np->rn_other_refs_held, -1) == 0 &&
392 		    np->rn_refs == 0 && (np->rn_flags & RC_NODE_OLD))
393 			rc_node_unrefed(np);
394 		else
395 			(void) pthread_mutex_unlock(&np->rn_lock);
396 	}
397 }
398 
399 static void
400 rc_node_hold_locked(rc_node_t *np)
401 {
402 	assert(MUTEX_HELD(&np->rn_lock));
403 
404 	if (np->rn_refs == 0 && (np->rn_flags & RC_NODE_PARENT_REF))
405 		rc_node_hold_other(np->rn_parent_ref);
406 	np->rn_refs++;
407 	assert(np->rn_refs > 0);
408 }
409 
410 static void
411 rc_node_hold(rc_node_t *np)
412 {
413 	(void) pthread_mutex_lock(&np->rn_lock);
414 	rc_node_hold_locked(np);
415 	(void) pthread_mutex_unlock(&np->rn_lock);
416 }
417 
418 static void
419 rc_node_rele_locked(rc_node_t *np)
420 {
421 	int unref = 0;
422 	rc_node_t *par_ref = NULL;
423 
424 	assert(MUTEX_HELD(&np->rn_lock));
425 	assert(np->rn_refs > 0);
426 
427 	if (--np->rn_refs == 0) {
428 		if (np->rn_flags & RC_NODE_PARENT_REF)
429 			par_ref = np->rn_parent_ref;
430 
431 		/*
432 		 * Composed property groups are only as good as their
433 		 * references.
434 		 */
435 		if (np->rn_id.rl_type == REP_PROTOCOL_ENTITY_CPROPERTYGRP)
436 			np->rn_flags |= RC_NODE_DEAD;
437 
438 		if ((np->rn_flags & (RC_NODE_DEAD|RC_NODE_OLD)) &&
439 		    np->rn_other_refs == 0 && np->rn_other_refs_held == 0)
440 			unref = 1;
441 	}
442 
443 	if (unref)
444 		rc_node_unrefed(np);
445 	else
446 		(void) pthread_mutex_unlock(&np->rn_lock);
447 
448 	if (par_ref != NULL)
449 		rc_node_rele_other(par_ref);
450 }
451 
452 void
453 rc_node_rele(rc_node_t *np)
454 {
455 	(void) pthread_mutex_lock(&np->rn_lock);
456 	rc_node_rele_locked(np);
457 }
458 
459 static cache_bucket_t *
460 cache_hold(uint32_t h)
461 {
462 	cache_bucket_t *bp = CACHE_BUCKET(h);
463 	(void) pthread_mutex_lock(&bp->cb_lock);
464 	return (bp);
465 }
466 
467 static void
468 cache_release(cache_bucket_t *bp)
469 {
470 	(void) pthread_mutex_unlock(&bp->cb_lock);
471 }
472 
473 static rc_node_t *
474 cache_lookup_unlocked(cache_bucket_t *bp, rc_node_lookup_t *lp)
475 {
476 	uint32_t h = rc_node_hash(lp);
477 	rc_node_t *np;
478 
479 	assert(MUTEX_HELD(&bp->cb_lock));
480 	assert(bp == CACHE_BUCKET(h));
481 
482 	for (np = bp->cb_head; np != NULL; np = np->rn_hash_next) {
483 		if (np->rn_hash == h && rc_node_match(np, lp)) {
484 			rc_node_hold(np);
485 			return (np);
486 		}
487 	}
488 
489 	return (NULL);
490 }
491 
492 static rc_node_t *
493 cache_lookup(rc_node_lookup_t *lp)
494 {
495 	uint32_t h;
496 	cache_bucket_t *bp;
497 	rc_node_t *np;
498 
499 	h = rc_node_hash(lp);
500 	bp = cache_hold(h);
501 
502 	np = cache_lookup_unlocked(bp, lp);
503 
504 	cache_release(bp);
505 
506 	return (np);
507 }
508 
509 static void
510 cache_insert_unlocked(cache_bucket_t *bp, rc_node_t *np)
511 {
512 	assert(MUTEX_HELD(&bp->cb_lock));
513 	assert(np->rn_hash == rc_node_hash(&np->rn_id));
514 	assert(bp == CACHE_BUCKET(np->rn_hash));
515 
516 	assert(np->rn_hash_next == NULL);
517 
518 	np->rn_hash_next = bp->cb_head;
519 	bp->cb_head = np;
520 }
521 
522 static void
523 cache_remove_unlocked(cache_bucket_t *bp, rc_node_t *np)
524 {
525 	rc_node_t **npp;
526 
527 	assert(MUTEX_HELD(&bp->cb_lock));
528 	assert(np->rn_hash == rc_node_hash(&np->rn_id));
529 	assert(bp == CACHE_BUCKET(np->rn_hash));
530 
531 	for (npp = &bp->cb_head; *npp != NULL; npp = &(*npp)->rn_hash_next)
532 		if (*npp == np)
533 			break;
534 
535 	assert(*npp == np);
536 	*npp = np->rn_hash_next;
537 	np->rn_hash_next = NULL;
538 }
539 
540 /*
541  * verify that the 'parent' type can have a child typed 'child'
542  * Fails with
543  *   _INVALID_TYPE - argument is invalid
544  *   _TYPE_MISMATCH - parent type cannot have children of type child
545  */
546 static int
547 rc_check_parent_child(uint32_t parent, uint32_t child)
548 {
549 	int idx;
550 	uint32_t type;
551 
552 	if (parent == 0 || parent >= NUM_TYPES ||
553 	    child == 0 || child >= NUM_TYPES)
554 		return (REP_PROTOCOL_FAIL_INVALID_TYPE); /* invalid types */
555 
556 	for (idx = 0; idx < MAX_VALID_CHILDREN; idx++) {
557 		type = rc_types[parent].rt_valid_children[idx];
558 		if (type == child)
559 			return (REP_PROTOCOL_SUCCESS);
560 	}
561 
562 	return (REP_PROTOCOL_FAIL_TYPE_MISMATCH);
563 }
564 
565 /*
566  * Fails with
567  *   _INVALID_TYPE - type is invalid
568  *   _BAD_REQUEST - name is an invalid name for a node of type type
569  */
570 int
571 rc_check_type_name(uint32_t type, const char *name)
572 {
573 	if (type == 0 || type >= NUM_TYPES)
574 		return (REP_PROTOCOL_FAIL_INVALID_TYPE); /* invalid types */
575 
576 	if (uu_check_name(name, rc_types[type].rt_name_flags) == -1)
577 		return (REP_PROTOCOL_FAIL_BAD_REQUEST);
578 
579 	return (REP_PROTOCOL_SUCCESS);
580 }
581 
582 static int
583 rc_check_pgtype_name(const char *name)
584 {
585 	if (uu_check_name(name, UU_NAME_DOMAIN) == -1)
586 		return (REP_PROTOCOL_FAIL_BAD_REQUEST);
587 
588 	return (REP_PROTOCOL_SUCCESS);
589 }
590 
591 static int
592 rc_notify_info_interested(rc_notify_info_t *rnip, rc_notify_t *np)
593 {
594 	rc_node_t *nnp = np->rcn_node;
595 	int i;
596 
597 	assert(MUTEX_HELD(&rc_pg_notify_lock));
598 
599 	if (np->rcn_delete != NULL) {
600 		assert(np->rcn_info == NULL && np->rcn_node == NULL);
601 		return (1);		/* everyone likes deletes */
602 	}
603 	if (np->rcn_node == NULL) {
604 		assert(np->rcn_info != NULL || np->rcn_delete != NULL);
605 		return (0);
606 	}
607 	assert(np->rcn_info == NULL);
608 
609 	for (i = 0; i < RC_NOTIFY_MAX_NAMES; i++) {
610 		if (rnip->rni_namelist[i] != NULL) {
611 			if (strcmp(nnp->rn_name, rnip->rni_namelist[i]) == 0)
612 				return (1);
613 		}
614 		if (rnip->rni_typelist[i] != NULL) {
615 			if (strcmp(nnp->rn_type, rnip->rni_typelist[i]) == 0)
616 				return (1);
617 		}
618 	}
619 	return (0);
620 }
621 
622 static void
623 rc_notify_insert_node(rc_node_t *nnp)
624 {
625 	rc_notify_t *np = &nnp->rn_notify;
626 	rc_notify_info_t *nip;
627 	int found = 0;
628 
629 	assert(np->rcn_info == NULL);
630 
631 	if (nnp->rn_id.rl_type != REP_PROTOCOL_ENTITY_PROPERTYGRP)
632 		return;
633 
634 	(void) pthread_mutex_lock(&rc_pg_notify_lock);
635 	np->rcn_node = nnp;
636 	for (nip = uu_list_first(rc_notify_info_list); nip != NULL;
637 	    nip = uu_list_next(rc_notify_info_list, nip)) {
638 		if (rc_notify_info_interested(nip, np)) {
639 			(void) pthread_cond_broadcast(&nip->rni_cv);
640 			found++;
641 		}
642 	}
643 	if (found)
644 		(void) uu_list_insert_before(rc_notify_list, NULL, np);
645 	else
646 		np->rcn_node = NULL;
647 
648 	(void) pthread_mutex_unlock(&rc_pg_notify_lock);
649 }
650 
651 static void
652 rc_notify_deletion(rc_notify_delete_t *ndp, const char *service,
653     const char *instance, const char *pg)
654 {
655 	rc_notify_info_t *nip;
656 
657 	uu_list_node_init(&ndp->rnd_notify, &ndp->rnd_notify.rcn_list_node,
658 	    rc_notify_pool);
659 	ndp->rnd_notify.rcn_delete = ndp;
660 
661 	(void) snprintf(ndp->rnd_fmri, sizeof (ndp->rnd_fmri),
662 	    "svc:/%s%s%s%s%s", service,
663 	    (instance != NULL)? ":" : "", (instance != NULL)? instance : "",
664 	    (pg != NULL)? "/:properties/" : "", (pg != NULL)? pg : "");
665 
666 	/*
667 	 * add to notification list, notify watchers
668 	 */
669 	(void) pthread_mutex_lock(&rc_pg_notify_lock);
670 	for (nip = uu_list_first(rc_notify_info_list); nip != NULL;
671 	    nip = uu_list_next(rc_notify_info_list, nip))
672 		(void) pthread_cond_broadcast(&nip->rni_cv);
673 	(void) uu_list_insert_before(rc_notify_list, NULL, ndp);
674 	(void) pthread_mutex_unlock(&rc_pg_notify_lock);
675 }
676 
677 static void
678 rc_notify_remove_node(rc_node_t *nnp)
679 {
680 	rc_notify_t *np = &nnp->rn_notify;
681 
682 	assert(np->rcn_info == NULL);
683 	assert(!MUTEX_HELD(&nnp->rn_lock));
684 
685 	(void) pthread_mutex_lock(&rc_pg_notify_lock);
686 	while (np->rcn_node != NULL) {
687 		if (rc_notify_in_use) {
688 			(void) pthread_cond_wait(&rc_pg_notify_cv,
689 			    &rc_pg_notify_lock);
690 			continue;
691 		}
692 		(void) uu_list_remove(rc_notify_list, np);
693 		np->rcn_node = NULL;
694 		break;
695 	}
696 	(void) pthread_mutex_unlock(&rc_pg_notify_lock);
697 }
698 
699 static void
700 rc_notify_remove_locked(rc_notify_t *np)
701 {
702 	assert(MUTEX_HELD(&rc_pg_notify_lock));
703 	assert(rc_notify_in_use == 0);
704 
705 	(void) uu_list_remove(rc_notify_list, np);
706 	if (np->rcn_node) {
707 		np->rcn_node = NULL;
708 	} else if (np->rcn_delete) {
709 		uu_free(np->rcn_delete);
710 	} else {
711 		assert(0);	/* CAN'T HAPPEN */
712 	}
713 }
714 
715 /*
716  * Permission checking functions.  See comment atop this file.
717  */
718 #ifndef NATIVE_BUILD
719 static permcheck_t *
720 pc_create()
721 {
722 	permcheck_t *p;
723 
724 	p = uu_zalloc(sizeof (*p));
725 	if (p == NULL)
726 		return (NULL);
727 	p->pc_bnum = 8;			/* Normal case will only have 2 elts. */
728 	p->pc_buckets = uu_zalloc(sizeof (*p->pc_buckets) * p->pc_bnum);
729 	if (p->pc_buckets == NULL) {
730 		uu_free(p);
731 		return (NULL);
732 	}
733 
734 	p->pc_enum = 0;
735 	return (p);
736 }
737 
738 static void
739 pc_free(permcheck_t *pcp)
740 {
741 	uint_t i;
742 	struct pc_elt *ep, *next;
743 
744 	for (i = 0; i < pcp->pc_bnum; ++i) {
745 		for (ep = pcp->pc_buckets[i]; ep != NULL; ep = next) {
746 			next = ep->pce_next;
747 			free(ep);
748 		}
749 	}
750 
751 	free(pcp->pc_buckets);
752 	free(pcp);
753 }
754 
755 static uint32_t
756 pc_hash(const char *auth)
757 {
758 	uint32_t h = 0, g;
759 	const char *p;
760 
761 	/*
762 	 * Generic hash function from uts/common/os/modhash.c.
763 	 */
764 	for (p = auth; *p != '\0'; ++p) {
765 		h = (h << 4) + *p;
766 		g = (h & 0xf0000000);
767 		if (g != 0) {
768 			h ^= (g >> 24);
769 			h ^= g;
770 		}
771 	}
772 
773 	return (h);
774 }
775 
776 static int
777 pc_exists(const permcheck_t *pcp, const char *auth)
778 {
779 	uint32_t h;
780 	struct pc_elt *ep;
781 
782 	h = pc_hash(auth);
783 	for (ep = pcp->pc_buckets[h & (pcp->pc_bnum - 1)];
784 	    ep != NULL;
785 	    ep = ep->pce_next) {
786 		if (strcmp(auth, ep->pce_auth) == 0)
787 			return (1);
788 	}
789 
790 	return (0);
791 }
792 
793 static int
794 pc_match(const permcheck_t *pcp, const char *pattern)
795 {
796 	uint_t i;
797 	struct pc_elt *ep;
798 
799 	for (i = 0; i < pcp->pc_bnum; ++i) {
800 		for (ep = pcp->pc_buckets[i]; ep != NULL; ep = ep->pce_next) {
801 			if (_auth_match(pattern, ep->pce_auth))
802 				return (1);
803 		}
804 	}
805 
806 	return (0);
807 }
808 
809 static int
810 pc_grow(permcheck_t *pcp)
811 {
812 	uint_t new_bnum, i, j;
813 	struct pc_elt **new_buckets;
814 	struct pc_elt *ep, *next;
815 
816 	new_bnum = pcp->pc_bnum * 2;
817 	if (new_bnum < pcp->pc_bnum)
818 		/* Homey don't play that. */
819 		return (-1);
820 
821 	new_buckets = uu_zalloc(sizeof (*new_buckets) * new_bnum);
822 	if (new_buckets == NULL)
823 		return (-1);
824 
825 	for (i = 0; i < pcp->pc_bnum; ++i) {
826 		for (ep = pcp->pc_buckets[i]; ep != NULL; ep = next) {
827 			next = ep->pce_next;
828 			j = pc_hash(ep->pce_auth) & (new_bnum - 1);
829 			ep->pce_next = new_buckets[j];
830 			new_buckets[j] = ep;
831 		}
832 	}
833 
834 	uu_free(pcp->pc_buckets);
835 	pcp->pc_buckets = new_buckets;
836 	pcp->pc_bnum = new_bnum;
837 
838 	return (0);
839 }
840 
841 static int
842 pc_add(permcheck_t *pcp, const char *auth)
843 {
844 	struct pc_elt *ep;
845 	uint_t i;
846 
847 	ep = uu_zalloc(offsetof(struct pc_elt, pce_auth) + strlen(auth) + 1);
848 	if (ep == NULL)
849 		return (-1);
850 
851 	/* Grow if pc_enum / pc_bnum > 3/4. */
852 	if (pcp->pc_enum * 4 > 3 * pcp->pc_bnum)
853 		/* Failure is not a stopper; we'll try again next time. */
854 		(void) pc_grow(pcp);
855 
856 	(void) strcpy(ep->pce_auth, auth);
857 
858 	i = pc_hash(auth) & (pcp->pc_bnum - 1);
859 	ep->pce_next = pcp->pc_buckets[i];
860 	pcp->pc_buckets[i] = ep;
861 
862 	++pcp->pc_enum;
863 
864 	return (0);
865 }
866 
867 /*
868  * For the type of a property group, return the authorization which may be
869  * used to modify it.
870  */
871 static const char *
872 perm_auth_for_pgtype(const char *pgtype)
873 {
874 	if (strcmp(pgtype, SCF_GROUP_METHOD) == 0)
875 		return (AUTH_MODIFY_PREFIX "method");
876 	else if (strcmp(pgtype, SCF_GROUP_DEPENDENCY) == 0)
877 		return (AUTH_MODIFY_PREFIX "dependency");
878 	else if (strcmp(pgtype, SCF_GROUP_APPLICATION) == 0)
879 		return (AUTH_MODIFY_PREFIX "application");
880 	else if (strcmp(pgtype, SCF_GROUP_FRAMEWORK) == 0)
881 		return (AUTH_MODIFY_PREFIX "framework");
882 	else
883 		return (NULL);
884 }
885 
886 /*
887  * Fails with
888  *   _NO_RESOURCES - out of memory
889  */
890 static int
891 perm_add_enabling(permcheck_t *pcp, const char *auth)
892 {
893 	return (pc_add(pcp, auth) == 0 ? REP_PROTOCOL_SUCCESS :
894 	    REP_PROTOCOL_FAIL_NO_RESOURCES);
895 }
896 
897 /* Note that perm_add_enabling_values() is defined below. */
898 
899 /*
900  * perm_granted() returns 1 if the current door caller has one of the enabling
901  * authorizations in pcp, 0 if it doesn't, and -1 if an error (usually lack of
902  * memory) occurs.  check_auth_list() checks an RBAC_AUTH_SEP-separated list
903  * of authorizations for existence in pcp, and check_prof_list() checks the
904  * authorizations granted to an RBAC_AUTH_SEP-separated list of profiles.
905  */
906 static int
907 check_auth_list(const permcheck_t *pcp, char *authlist)
908 {
909 	char *auth, *lasts;
910 	int ret;
911 
912 	for (auth = (char *)strtok_r(authlist, RBAC_AUTH_SEP, &lasts);
913 	    auth != NULL;
914 	    auth = (char *)strtok_r(NULL, RBAC_AUTH_SEP, &lasts)) {
915 		if (strchr(auth, KV_WILDCHAR) == NULL)
916 			ret = pc_exists(pcp, auth);
917 		else
918 			ret = pc_match(pcp, auth);
919 
920 		if (ret)
921 			return (ret);
922 	}
923 
924 	return (0);
925 }
926 
927 static int
928 check_prof_list(const permcheck_t *pcp, char *proflist)
929 {
930 	char *prof, *lasts, *authlist, *subproflist;
931 	profattr_t *pap;
932 	int ret = 0;
933 
934 	for (prof = strtok_r(proflist, RBAC_AUTH_SEP, &lasts);
935 	    prof != NULL;
936 	    prof = strtok_r(NULL, RBAC_AUTH_SEP, &lasts)) {
937 		pap = getprofnam(prof);
938 		if (pap == NULL)
939 			continue;
940 
941 		authlist = kva_match(pap->attr, PROFATTR_AUTHS_KW);
942 		if (authlist != NULL)
943 			ret = check_auth_list(pcp, authlist);
944 
945 		if (!ret) {
946 			subproflist = kva_match(pap->attr, PROFATTR_PROFS_KW);
947 			if (subproflist != NULL)
948 				/* depth check to avoid invinite recursion? */
949 				ret = check_prof_list(pcp, subproflist);
950 		}
951 
952 		free_profattr(pap);
953 		if (ret)
954 			return (ret);
955 	}
956 
957 	return (ret);
958 }
959 
960 static int
961 perm_granted(const permcheck_t *pcp)
962 {
963 	ucred_t *uc;
964 
965 	int ret = 0;
966 	uid_t uid;
967 	userattr_t *uap;
968 	char *authlist, *userattr_authlist, *proflist, *def_prof = NULL;
969 
970 	/*
971 	 * Get generic authorizations from policy.conf
972 	 *
973 	 * Note that _get_auth_policy is not threadsafe, so we single-thread
974 	 * access to it.
975 	 */
976 	(void) pthread_mutex_lock(&perm_lock);
977 	ret = _get_auth_policy(&authlist, &def_prof);
978 	(void) pthread_mutex_unlock(&perm_lock);
979 
980 	if (ret != 0)
981 		return (-1);
982 
983 	if (authlist != NULL) {
984 		ret = check_auth_list(pcp, authlist);
985 
986 		if (ret) {
987 			_free_auth_policy(authlist, def_prof);
988 			return (ret);
989 		}
990 	}
991 
992 	/*
993 	 * Put off checking def_prof for later in an attempt to consolidate
994 	 * prof_attr accesses.
995 	 */
996 
997 	/* Get the uid */
998 	if ((uc = get_ucred()) == NULL) {
999 		_free_auth_policy(authlist, def_prof);
1000 
1001 		if (errno == EINVAL) {
1002 			/*
1003 			 * Client is no longer waiting for our response (e.g.,
1004 			 * it received a signal & resumed with EINTR).
1005 			 * Punting with door_return() would be nice but we
1006 			 * need to release all of the locks & references we
1007 			 * hold.  And we must report failure to the client
1008 			 * layer to keep it from ignoring retries as
1009 			 * already-done (idempotency & all that).  None of the
1010 			 * error codes fit very well, so we might as well
1011 			 * force the return of _PERMISSION_DENIED since we
1012 			 * couldn't determine the user.
1013 			 */
1014 			return (0);
1015 		}
1016 		assert(0);
1017 		abort();
1018 	}
1019 
1020 	uid = ucred_geteuid(uc);
1021 	assert(uid != (uid_t)-1);
1022 
1023 	uap = getuseruid(uid);
1024 	if (uap != NULL) {
1025 		/* Get the authorizations from user_attr. */
1026 		userattr_authlist = kva_match(uap->attr, USERATTR_AUTHS_KW);
1027 		if (userattr_authlist != NULL)
1028 			ret = check_auth_list(pcp, userattr_authlist);
1029 	}
1030 
1031 	if (!ret && def_prof != NULL) {
1032 		/* Check generic profiles. */
1033 		ret = check_prof_list(pcp, def_prof);
1034 	}
1035 
1036 	if (!ret && uap != NULL) {
1037 		proflist = kva_match(uap->attr, USERATTR_PROFILES_KW);
1038 		if (proflist != NULL)
1039 			ret = check_prof_list(pcp, proflist);
1040 	}
1041 
1042 	_free_auth_policy(authlist, def_prof);
1043 	if (uap != NULL)
1044 		free_userattr(uap);
1045 
1046 	return (ret);
1047 }
1048 #endif /* NATIVE_BUILD */
1049 
1050 /*
1051  * flags in RC_NODE_WAITING_FLAGS are broadcast when unset, and are used to
1052  * serialize certain actions, and to wait for certain operations to complete
1053  *
1054  * The waiting flags are:
1055  *	RC_NODE_CHILDREN_CHANGING
1056  *		The child list is being built or changed (due to creation
1057  *		or deletion).  All iterators pause.
1058  *
1059  *	RC_NODE_USING_PARENT
1060  *		Someone is actively using the parent pointer, so we can't
1061  *		be removed from the parent list.
1062  *
1063  *	RC_NODE_CREATING_CHILD
1064  *		A child is being created -- locks out other creations, to
1065  *		prevent insert-insert races.
1066  *
1067  *	RC_NODE_IN_TX
1068  *		This object is running a transaction.
1069  *
1070  *	RC_NODE_DYING
1071  *		This node might be dying.  Always set as a set, using
1072  *		RC_NODE_DYING_FLAGS (which is everything but
1073  *		RC_NODE_USING_PARENT)
1074  */
1075 static int
1076 rc_node_hold_flag(rc_node_t *np, uint32_t flag)
1077 {
1078 	assert(MUTEX_HELD(&np->rn_lock));
1079 	assert((flag & ~RC_NODE_WAITING_FLAGS) == 0);
1080 
1081 	while (!(np->rn_flags & RC_NODE_DEAD) && (np->rn_flags & flag)) {
1082 		(void) pthread_cond_wait(&np->rn_cv, &np->rn_lock);
1083 	}
1084 	if (np->rn_flags & RC_NODE_DEAD)
1085 		return (0);
1086 
1087 	np->rn_flags |= flag;
1088 	return (1);
1089 }
1090 
1091 static void
1092 rc_node_rele_flag(rc_node_t *np, uint32_t flag)
1093 {
1094 	assert((flag & ~RC_NODE_WAITING_FLAGS) == 0);
1095 	assert(MUTEX_HELD(&np->rn_lock));
1096 	assert((np->rn_flags & flag) == flag);
1097 	np->rn_flags &= ~flag;
1098 	(void) pthread_cond_broadcast(&np->rn_cv);
1099 }
1100 
1101 /*
1102  * wait until a particular flag has cleared.  Fails if the object dies.
1103  */
1104 static int
1105 rc_node_wait_flag(rc_node_t *np, uint32_t flag)
1106 {
1107 	assert(MUTEX_HELD(&np->rn_lock));
1108 	while (!(np->rn_flags & RC_NODE_DEAD) && (np->rn_flags & flag))
1109 		(void) pthread_cond_wait(&np->rn_cv, &np->rn_lock);
1110 
1111 	return (!(np->rn_flags & RC_NODE_DEAD));
1112 }
1113 
1114 /*
1115  * On entry, np's lock must be held, and this thread must be holding
1116  * RC_NODE_USING_PARENT.  On return, both of them are released.
1117  *
1118  * If the return value is NULL, np either does not have a parent, or
1119  * the parent has been marked DEAD.
1120  *
1121  * If the return value is non-NULL, it is the parent of np, and both
1122  * its lock and the requested flags are held.
1123  */
1124 static rc_node_t *
1125 rc_node_hold_parent_flag(rc_node_t *np, uint32_t flag)
1126 {
1127 	rc_node_t *pp;
1128 
1129 	assert(MUTEX_HELD(&np->rn_lock));
1130 	assert(np->rn_flags & RC_NODE_USING_PARENT);
1131 
1132 	if ((pp = np->rn_parent) == NULL) {
1133 		rc_node_rele_flag(np, RC_NODE_USING_PARENT);
1134 		(void) pthread_mutex_unlock(&np->rn_lock);
1135 		return (NULL);
1136 	}
1137 	(void) pthread_mutex_unlock(&np->rn_lock);
1138 
1139 	(void) pthread_mutex_lock(&pp->rn_lock);
1140 	(void) pthread_mutex_lock(&np->rn_lock);
1141 	rc_node_rele_flag(np, RC_NODE_USING_PARENT);
1142 	(void) pthread_mutex_unlock(&np->rn_lock);
1143 
1144 	if (!rc_node_hold_flag(pp, flag)) {
1145 		(void) pthread_mutex_unlock(&pp->rn_lock);
1146 		return (NULL);
1147 	}
1148 	return (pp);
1149 }
1150 
1151 rc_node_t *
1152 rc_node_alloc(void)
1153 {
1154 	rc_node_t *np = uu_zalloc(sizeof (*np));
1155 
1156 	if (np == NULL)
1157 		return (NULL);
1158 
1159 	(void) pthread_mutex_init(&np->rn_lock, NULL);
1160 	(void) pthread_cond_init(&np->rn_cv, NULL);
1161 
1162 	np->rn_children = uu_list_create(rc_children_pool, np, 0);
1163 	np->rn_pg_notify_list = uu_list_create(rc_pg_notify_pool, np, 0);
1164 
1165 	uu_list_node_init(np, &np->rn_sibling_node, rc_children_pool);
1166 
1167 	uu_list_node_init(&np->rn_notify, &np->rn_notify.rcn_list_node,
1168 	    rc_notify_pool);
1169 
1170 	return (np);
1171 }
1172 
1173 void
1174 rc_node_destroy(rc_node_t *np)
1175 {
1176 	int i;
1177 
1178 	if (np->rn_flags & RC_NODE_UNREFED)
1179 		return;				/* being handled elsewhere */
1180 
1181 	assert(np->rn_refs == 0 && np->rn_other_refs == 0);
1182 	assert(np->rn_former == NULL);
1183 
1184 	if (np->rn_id.rl_type == REP_PROTOCOL_ENTITY_CPROPERTYGRP) {
1185 		/* Release the holds from rc_iter_next(). */
1186 		for (i = 0; i < COMPOSITION_DEPTH; ++i) {
1187 			/* rn_cchain[i] may be NULL for empty snapshots. */
1188 			if (np->rn_cchain[i] != NULL)
1189 				rc_node_rele(np->rn_cchain[i]);
1190 		}
1191 	}
1192 
1193 	if (np->rn_name != NULL)
1194 		free((void *)np->rn_name);
1195 	np->rn_name = NULL;
1196 	if (np->rn_type != NULL)
1197 		free((void *)np->rn_type);
1198 	np->rn_type = NULL;
1199 	if (np->rn_values != NULL)
1200 		object_free_values(np->rn_values, np->rn_valtype,
1201 		    np->rn_values_count, np->rn_values_size);
1202 	np->rn_values = NULL;
1203 
1204 	if (np->rn_snaplevel != NULL)
1205 		rc_snaplevel_rele(np->rn_snaplevel);
1206 	np->rn_snaplevel = NULL;
1207 
1208 	uu_list_node_fini(np, &np->rn_sibling_node, rc_children_pool);
1209 
1210 	uu_list_node_fini(&np->rn_notify, &np->rn_notify.rcn_list_node,
1211 	    rc_notify_pool);
1212 
1213 	assert(uu_list_first(np->rn_children) == NULL);
1214 	uu_list_destroy(np->rn_children);
1215 	uu_list_destroy(np->rn_pg_notify_list);
1216 
1217 	(void) pthread_mutex_destroy(&np->rn_lock);
1218 	(void) pthread_cond_destroy(&np->rn_cv);
1219 
1220 	uu_free(np);
1221 }
1222 
1223 /*
1224  * Link in a child node.
1225  *
1226  * Because of the lock ordering, cp has to already be in the hash table with
1227  * its lock dropped before we get it.  To prevent anyone from noticing that
1228  * it is parentless, the creation code sets the RC_NODE_USING_PARENT.  Once
1229  * we've linked it in, we release the flag.
1230  */
1231 static void
1232 rc_node_link_child(rc_node_t *np, rc_node_t *cp)
1233 {
1234 	assert(!MUTEX_HELD(&np->rn_lock));
1235 	assert(!MUTEX_HELD(&cp->rn_lock));
1236 
1237 	(void) pthread_mutex_lock(&np->rn_lock);
1238 	(void) pthread_mutex_lock(&cp->rn_lock);
1239 	assert(!(cp->rn_flags & RC_NODE_IN_PARENT) &&
1240 	    (cp->rn_flags & RC_NODE_USING_PARENT));
1241 
1242 	assert(rc_check_parent_child(np->rn_id.rl_type, cp->rn_id.rl_type) ==
1243 	    REP_PROTOCOL_SUCCESS);
1244 
1245 	cp->rn_parent = np;
1246 	cp->rn_flags |= RC_NODE_IN_PARENT;
1247 	(void) uu_list_insert_before(np->rn_children, NULL, cp);
1248 
1249 	(void) pthread_mutex_unlock(&np->rn_lock);
1250 
1251 	rc_node_rele_flag(cp, RC_NODE_USING_PARENT);
1252 	(void) pthread_mutex_unlock(&cp->rn_lock);
1253 }
1254 
1255 /*
1256  * Sets the rn_parent_ref field of all the children of np to pp -- always
1257  * initially invoked as rc_node_setup_parent_ref(np, np), we then recurse.
1258  *
1259  * This is used when we mark a node RC_NODE_OLD, so that when the object and
1260  * its children are no longer referenced, they will all be deleted as a unit.
1261  */
1262 static void
1263 rc_node_setup_parent_ref(rc_node_t *np, rc_node_t *pp)
1264 {
1265 	rc_node_t *cp;
1266 
1267 	assert(MUTEX_HELD(&np->rn_lock));
1268 
1269 	for (cp = uu_list_first(np->rn_children); cp != NULL;
1270 	    cp = uu_list_next(np->rn_children, cp)) {
1271 		(void) pthread_mutex_lock(&cp->rn_lock);
1272 		if (cp->rn_flags & RC_NODE_PARENT_REF) {
1273 			assert(cp->rn_parent_ref == pp);
1274 		} else {
1275 			assert(cp->rn_parent_ref == NULL);
1276 
1277 			cp->rn_flags |= RC_NODE_PARENT_REF;
1278 			cp->rn_parent_ref = pp;
1279 			if (cp->rn_refs != 0)
1280 				rc_node_hold_other(pp);
1281 		}
1282 		rc_node_setup_parent_ref(cp, pp);		/* recurse */
1283 		(void) pthread_mutex_unlock(&cp->rn_lock);
1284 	}
1285 }
1286 
1287 /*
1288  * Atomically replace 'np' with 'newp', with a parent of 'pp'.
1289  *
1290  * Requirements:
1291  *	*no* node locks may be held.
1292  *	pp must be held with RC_NODE_CHILDREN_CHANGING
1293  *	newp and np must be held with RC_NODE_IN_TX
1294  *	np must be marked RC_NODE_IN_PARENT, newp must not be
1295  *	np must be marked RC_NODE_OLD
1296  *
1297  * Afterwards:
1298  *	pp's RC_NODE_CHILDREN_CHANGING is dropped
1299  *	newp and np's RC_NODE_IN_TX is dropped
1300  *	newp->rn_former = np;
1301  *	newp is RC_NODE_IN_PARENT, np is not.
1302  *	interested notify subscribers have been notified of newp's new status.
1303  */
1304 static void
1305 rc_node_relink_child(rc_node_t *pp, rc_node_t *np, rc_node_t *newp)
1306 {
1307 	cache_bucket_t *bp;
1308 	/*
1309 	 * First, swap np and nnp in the cache.  newp's RC_NODE_IN_TX flag
1310 	 * keeps rc_node_update() from seeing it until we are done.
1311 	 */
1312 	bp = cache_hold(newp->rn_hash);
1313 	cache_remove_unlocked(bp, np);
1314 	cache_insert_unlocked(bp, newp);
1315 	cache_release(bp);
1316 
1317 	/*
1318 	 * replace np with newp in pp's list, and attach it to newp's rn_former
1319 	 * link.
1320 	 */
1321 	(void) pthread_mutex_lock(&pp->rn_lock);
1322 	assert(pp->rn_flags & RC_NODE_CHILDREN_CHANGING);
1323 
1324 	(void) pthread_mutex_lock(&newp->rn_lock);
1325 	assert(!(newp->rn_flags & RC_NODE_IN_PARENT));
1326 	assert(newp->rn_flags & RC_NODE_IN_TX);
1327 
1328 	(void) pthread_mutex_lock(&np->rn_lock);
1329 	assert(np->rn_flags & RC_NODE_IN_PARENT);
1330 	assert(np->rn_flags & RC_NODE_OLD);
1331 	assert(np->rn_flags & RC_NODE_IN_TX);
1332 
1333 	newp->rn_parent = pp;
1334 	newp->rn_flags |= RC_NODE_IN_PARENT;
1335 
1336 	/*
1337 	 * Note that we carefully add newp before removing np -- this
1338 	 * keeps iterators on the list from missing us.
1339 	 */
1340 	(void) uu_list_insert_after(pp->rn_children, np, newp);
1341 	(void) uu_list_remove(pp->rn_children, np);
1342 
1343 	/*
1344 	 * re-set np
1345 	 */
1346 	newp->rn_former = np;
1347 	np->rn_parent = NULL;
1348 	np->rn_flags &= ~RC_NODE_IN_PARENT;
1349 	np->rn_flags |= RC_NODE_ON_FORMER;
1350 
1351 	rc_notify_insert_node(newp);
1352 
1353 	rc_node_rele_flag(pp, RC_NODE_CHILDREN_CHANGING);
1354 	(void) pthread_mutex_unlock(&pp->rn_lock);
1355 	rc_node_rele_flag(newp, RC_NODE_USING_PARENT | RC_NODE_IN_TX);
1356 	(void) pthread_mutex_unlock(&newp->rn_lock);
1357 	rc_node_setup_parent_ref(np, np);
1358 	rc_node_rele_flag(np, RC_NODE_IN_TX);
1359 	(void) pthread_mutex_unlock(&np->rn_lock);
1360 }
1361 
1362 /*
1363  * makes sure a node with lookup 'nip', name 'name', and parent 'pp' exists.
1364  * 'cp' is used (and returned) if the node does not yet exist.  If it does
1365  * exist, 'cp' is freed, and the existent node is returned instead.
1366  */
1367 rc_node_t *
1368 rc_node_setup(rc_node_t *cp, rc_node_lookup_t *nip, const char *name,
1369     rc_node_t *pp)
1370 {
1371 	rc_node_t *np;
1372 	cache_bucket_t *bp;
1373 	uint32_t h = rc_node_hash(nip);
1374 
1375 	assert(cp->rn_refs == 0);
1376 
1377 	bp = cache_hold(h);
1378 	if ((np = cache_lookup_unlocked(bp, nip)) != NULL) {
1379 		cache_release(bp);
1380 
1381 		/*
1382 		 * make sure it matches our expectations
1383 		 */
1384 		(void) pthread_mutex_lock(&np->rn_lock);
1385 		if (rc_node_hold_flag(np, RC_NODE_USING_PARENT)) {
1386 			assert(np->rn_parent == pp);
1387 			assert(memcmp(&np->rn_id, nip, sizeof (*nip)) == 0);
1388 			assert(strcmp(np->rn_name, name) == 0);
1389 			assert(np->rn_type == NULL);
1390 			assert(np->rn_flags & RC_NODE_IN_PARENT);
1391 			rc_node_rele_flag(np, RC_NODE_USING_PARENT);
1392 		}
1393 		(void) pthread_mutex_unlock(&np->rn_lock);
1394 
1395 		rc_node_destroy(cp);
1396 		return (np);
1397 	}
1398 
1399 	/*
1400 	 * No one is there -- create a new node.
1401 	 */
1402 	np = cp;
1403 	rc_node_hold(np);
1404 	np->rn_id = *nip;
1405 	np->rn_hash = h;
1406 	np->rn_name = strdup(name);
1407 
1408 	np->rn_flags |= RC_NODE_USING_PARENT;
1409 
1410 	if (np->rn_id.rl_type == REP_PROTOCOL_ENTITY_INSTANCE) {
1411 #if COMPOSITION_DEPTH == 2
1412 		np->rn_cchain[0] = np;
1413 		np->rn_cchain[1] = pp;
1414 #else
1415 #error This code must be updated.
1416 #endif
1417 	}
1418 
1419 	cache_insert_unlocked(bp, np);
1420 	cache_release(bp);		/* we are now visible */
1421 
1422 	rc_node_link_child(pp, np);
1423 
1424 	return (np);
1425 }
1426 
1427 /*
1428  * makes sure a snapshot with lookup 'nip', name 'name', and parent 'pp' exists.
1429  * 'cp' is used (and returned) if the node does not yet exist.  If it does
1430  * exist, 'cp' is freed, and the existent node is returned instead.
1431  */
1432 rc_node_t *
1433 rc_node_setup_snapshot(rc_node_t *cp, rc_node_lookup_t *nip, const char *name,
1434     uint32_t snap_id, rc_node_t *pp)
1435 {
1436 	rc_node_t *np;
1437 	cache_bucket_t *bp;
1438 	uint32_t h = rc_node_hash(nip);
1439 
1440 	assert(cp->rn_refs == 0);
1441 
1442 	bp = cache_hold(h);
1443 	if ((np = cache_lookup_unlocked(bp, nip)) != NULL) {
1444 		cache_release(bp);
1445 
1446 		/*
1447 		 * make sure it matches our expectations
1448 		 */
1449 		(void) pthread_mutex_lock(&np->rn_lock);
1450 		if (rc_node_hold_flag(np, RC_NODE_USING_PARENT)) {
1451 			assert(np->rn_parent == pp);
1452 			assert(memcmp(&np->rn_id, nip, sizeof (*nip)) == 0);
1453 			assert(strcmp(np->rn_name, name) == 0);
1454 			assert(np->rn_type == NULL);
1455 			assert(np->rn_flags & RC_NODE_IN_PARENT);
1456 			rc_node_rele_flag(np, RC_NODE_USING_PARENT);
1457 		}
1458 		(void) pthread_mutex_unlock(&np->rn_lock);
1459 
1460 		rc_node_destroy(cp);
1461 		return (np);
1462 	}
1463 
1464 	/*
1465 	 * No one is there -- create a new node.
1466 	 */
1467 	np = cp;
1468 	rc_node_hold(np);
1469 	np->rn_id = *nip;
1470 	np->rn_hash = h;
1471 	np->rn_name = strdup(name);
1472 	np->rn_snapshot_id = snap_id;
1473 
1474 	np->rn_flags |= RC_NODE_USING_PARENT;
1475 
1476 	cache_insert_unlocked(bp, np);
1477 	cache_release(bp);		/* we are now visible */
1478 
1479 	rc_node_link_child(pp, np);
1480 
1481 	return (np);
1482 }
1483 
1484 /*
1485  * makes sure a snaplevel with lookup 'nip' and parent 'pp' exists.  'cp' is
1486  * used (and returned) if the node does not yet exist.  If it does exist, 'cp'
1487  * is freed, and the existent node is returned instead.
1488  */
1489 rc_node_t *
1490 rc_node_setup_snaplevel(rc_node_t *cp, rc_node_lookup_t *nip,
1491     rc_snaplevel_t *lvl, rc_node_t *pp)
1492 {
1493 	rc_node_t *np;
1494 	cache_bucket_t *bp;
1495 	uint32_t h = rc_node_hash(nip);
1496 
1497 	assert(cp->rn_refs == 0);
1498 
1499 	bp = cache_hold(h);
1500 	if ((np = cache_lookup_unlocked(bp, nip)) != NULL) {
1501 		cache_release(bp);
1502 
1503 		/*
1504 		 * make sure it matches our expectations
1505 		 */
1506 		(void) pthread_mutex_lock(&np->rn_lock);
1507 		if (rc_node_hold_flag(np, RC_NODE_USING_PARENT)) {
1508 			assert(np->rn_parent == pp);
1509 			assert(memcmp(&np->rn_id, nip, sizeof (*nip)) == 0);
1510 			assert(np->rn_name == NULL);
1511 			assert(np->rn_type == NULL);
1512 			assert(np->rn_flags & RC_NODE_IN_PARENT);
1513 			rc_node_rele_flag(np, RC_NODE_USING_PARENT);
1514 		}
1515 		(void) pthread_mutex_unlock(&np->rn_lock);
1516 
1517 		rc_node_destroy(cp);
1518 		return (np);
1519 	}
1520 
1521 	/*
1522 	 * No one is there -- create a new node.
1523 	 */
1524 	np = cp;
1525 	rc_node_hold(np);	/* released in snapshot_fill_children() */
1526 	np->rn_id = *nip;
1527 	np->rn_hash = h;
1528 
1529 	rc_snaplevel_hold(lvl);
1530 	np->rn_snaplevel = lvl;
1531 
1532 	np->rn_flags |= RC_NODE_USING_PARENT;
1533 
1534 	cache_insert_unlocked(bp, np);
1535 	cache_release(bp);		/* we are now visible */
1536 
1537 	/* Add this snaplevel to the snapshot's composition chain. */
1538 	assert(pp->rn_cchain[lvl->rsl_level_num - 1] == NULL);
1539 	pp->rn_cchain[lvl->rsl_level_num - 1] = np;
1540 
1541 	rc_node_link_child(pp, np);
1542 
1543 	return (np);
1544 }
1545 
1546 /*
1547  * Returns NULL if strdup() fails.
1548  */
1549 rc_node_t *
1550 rc_node_setup_pg(rc_node_t *cp, rc_node_lookup_t *nip, const char *name,
1551     const char *type, uint32_t flags, uint32_t gen_id, rc_node_t *pp)
1552 {
1553 	rc_node_t *np;
1554 	cache_bucket_t *bp;
1555 
1556 	uint32_t h = rc_node_hash(nip);
1557 	bp = cache_hold(h);
1558 	if ((np = cache_lookup_unlocked(bp, nip)) != NULL) {
1559 		cache_release(bp);
1560 
1561 		/*
1562 		 * make sure it matches our expectations (don't check
1563 		 * the generation number or parent, since someone could
1564 		 * have gotten a transaction through while we weren't
1565 		 * looking)
1566 		 */
1567 		(void) pthread_mutex_lock(&np->rn_lock);
1568 		if (rc_node_hold_flag(np, RC_NODE_USING_PARENT)) {
1569 			assert(memcmp(&np->rn_id, nip, sizeof (*nip)) == 0);
1570 			assert(strcmp(np->rn_name, name) == 0);
1571 			assert(strcmp(np->rn_type, type) == 0);
1572 			assert(np->rn_pgflags == flags);
1573 			assert(np->rn_flags & RC_NODE_IN_PARENT);
1574 			rc_node_rele_flag(np, RC_NODE_USING_PARENT);
1575 		}
1576 		(void) pthread_mutex_unlock(&np->rn_lock);
1577 
1578 		rc_node_destroy(cp);
1579 		return (np);
1580 	}
1581 
1582 	np = cp;
1583 	rc_node_hold(np);		/* released in fill_pg_callback() */
1584 	np->rn_id = *nip;
1585 	np->rn_hash = h;
1586 	np->rn_name = strdup(name);
1587 	if (np->rn_name == NULL) {
1588 		rc_node_rele(np);
1589 		return (NULL);
1590 	}
1591 	np->rn_type = strdup(type);
1592 	if (np->rn_type == NULL) {
1593 		free((void *)np->rn_name);
1594 		rc_node_rele(np);
1595 		return (NULL);
1596 	}
1597 	np->rn_pgflags = flags;
1598 	np->rn_gen_id = gen_id;
1599 
1600 	np->rn_flags |= RC_NODE_USING_PARENT;
1601 
1602 	cache_insert_unlocked(bp, np);
1603 	cache_release(bp);		/* we are now visible */
1604 
1605 	rc_node_link_child(pp, np);
1606 
1607 	return (np);
1608 }
1609 
1610 #if COMPOSITION_DEPTH == 2
1611 /*
1612  * Initialize a "composed property group" which represents the composition of
1613  * property groups pg1 & pg2.  It is ephemeral: once created & returned for an
1614  * ITER_READ request, keeping it out of cache_hash and any child lists
1615  * prevents it from being looked up.  Operations besides iteration are passed
1616  * through to pg1.
1617  *
1618  * pg1 & pg2 should be held before entering this function.  They will be
1619  * released in rc_node_destroy().
1620  */
1621 static int
1622 rc_node_setup_cpg(rc_node_t *cpg, rc_node_t *pg1, rc_node_t *pg2)
1623 {
1624 	if (strcmp(pg1->rn_type, pg2->rn_type) != 0)
1625 		return (REP_PROTOCOL_FAIL_TYPE_MISMATCH);
1626 
1627 	cpg->rn_id.rl_type = REP_PROTOCOL_ENTITY_CPROPERTYGRP;
1628 	cpg->rn_name = strdup(pg1->rn_name);
1629 	if (cpg->rn_name == NULL)
1630 		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
1631 
1632 	cpg->rn_cchain[0] = pg1;
1633 	cpg->rn_cchain[1] = pg2;
1634 
1635 	return (REP_PROTOCOL_SUCCESS);
1636 }
1637 #else
1638 #error This code must be updated.
1639 #endif
1640 
1641 /*
1642  * Fails with _NO_RESOURCES.
1643  */
1644 int
1645 rc_node_create_property(rc_node_t *pp, rc_node_lookup_t *nip,
1646     const char *name, rep_protocol_value_type_t type,
1647     const char *vals, size_t count, size_t size)
1648 {
1649 	rc_node_t *np;
1650 	cache_bucket_t *bp;
1651 
1652 	uint32_t h = rc_node_hash(nip);
1653 	bp = cache_hold(h);
1654 	if ((np = cache_lookup_unlocked(bp, nip)) != NULL) {
1655 		cache_release(bp);
1656 		/*
1657 		 * make sure it matches our expectations
1658 		 */
1659 		(void) pthread_mutex_lock(&np->rn_lock);
1660 		if (rc_node_hold_flag(np, RC_NODE_USING_PARENT)) {
1661 			assert(np->rn_parent == pp);
1662 			assert(memcmp(&np->rn_id, nip, sizeof (*nip)) == 0);
1663 			assert(strcmp(np->rn_name, name) == 0);
1664 			assert(np->rn_valtype == type);
1665 			assert(np->rn_values_count == count);
1666 			assert(np->rn_values_size == size);
1667 			assert(vals == NULL ||
1668 			    memcmp(np->rn_values, vals, size) == 0);
1669 			assert(np->rn_flags & RC_NODE_IN_PARENT);
1670 			rc_node_rele_flag(np, RC_NODE_USING_PARENT);
1671 		}
1672 		rc_node_rele_locked(np);
1673 		object_free_values(vals, type, count, size);
1674 		return (REP_PROTOCOL_SUCCESS);
1675 	}
1676 
1677 	/*
1678 	 * No one is there -- create a new node.
1679 	 */
1680 	np = rc_node_alloc();
1681 	if (np == NULL) {
1682 		cache_release(bp);
1683 		object_free_values(vals, type, count, size);
1684 		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
1685 	}
1686 	np->rn_id = *nip;
1687 	np->rn_hash = h;
1688 	np->rn_name = strdup(name);
1689 	if (np->rn_name == NULL) {
1690 		cache_release(bp);
1691 		object_free_values(vals, type, count, size);
1692 		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
1693 	}
1694 
1695 	np->rn_valtype = type;
1696 	np->rn_values = vals;
1697 	np->rn_values_count = count;
1698 	np->rn_values_size = size;
1699 
1700 	np->rn_flags |= RC_NODE_USING_PARENT;
1701 
1702 	cache_insert_unlocked(bp, np);
1703 	cache_release(bp);		/* we are now visible */
1704 
1705 	rc_node_link_child(pp, np);
1706 
1707 	return (REP_PROTOCOL_SUCCESS);
1708 }
1709 
1710 int
1711 rc_node_init(void)
1712 {
1713 	rc_node_t *np;
1714 	cache_bucket_t *bp;
1715 
1716 	rc_children_pool = uu_list_pool_create("rc_children_pool",
1717 	    sizeof (rc_node_t), offsetof(rc_node_t, rn_sibling_node),
1718 	    NULL, UU_LIST_POOL_DEBUG);
1719 
1720 	rc_pg_notify_pool = uu_list_pool_create("rc_pg_notify_pool",
1721 	    sizeof (rc_node_pg_notify_t),
1722 	    offsetof(rc_node_pg_notify_t, rnpn_node),
1723 	    NULL, UU_LIST_POOL_DEBUG);
1724 
1725 	rc_notify_pool = uu_list_pool_create("rc_notify_pool",
1726 	    sizeof (rc_notify_t), offsetof(rc_notify_t, rcn_list_node),
1727 	    NULL, UU_LIST_POOL_DEBUG);
1728 
1729 	rc_notify_info_pool = uu_list_pool_create("rc_notify_info_pool",
1730 	    sizeof (rc_notify_info_t),
1731 	    offsetof(rc_notify_info_t, rni_list_node),
1732 	    NULL, UU_LIST_POOL_DEBUG);
1733 
1734 	if (rc_children_pool == NULL || rc_pg_notify_pool == NULL ||
1735 	    rc_notify_pool == NULL || rc_notify_info_pool == NULL)
1736 		uu_die("out of memory");
1737 
1738 	rc_notify_list = uu_list_create(rc_notify_pool,
1739 	    &rc_notify_list, 0);
1740 
1741 	rc_notify_info_list = uu_list_create(rc_notify_info_pool,
1742 	    &rc_notify_info_list, 0);
1743 
1744 	if (rc_notify_list == NULL || rc_notify_info_list == NULL)
1745 		uu_die("out of memory");
1746 
1747 	if ((np = rc_node_alloc()) == NULL)
1748 		uu_die("out of memory");
1749 
1750 	rc_node_hold(np);
1751 	np->rn_id.rl_type = REP_PROTOCOL_ENTITY_SCOPE;
1752 	np->rn_id.rl_backend = BACKEND_TYPE_NORMAL;
1753 	np->rn_hash = rc_node_hash(&np->rn_id);
1754 	np->rn_name = "localhost";
1755 
1756 	bp = cache_hold(np->rn_hash);
1757 	cache_insert_unlocked(bp, np);
1758 	cache_release(bp);
1759 
1760 	rc_scope = np;
1761 	return (1);
1762 }
1763 
1764 /*
1765  * Fails with
1766  *   _INVALID_TYPE - type is invalid
1767  *   _TYPE_MISMATCH - np doesn't carry children of type type
1768  *   _DELETED - np has been deleted
1769  *   _NO_RESOURCES
1770  */
1771 static int
1772 rc_node_fill_children(rc_node_t *np, uint32_t type)
1773 {
1774 	int rc;
1775 
1776 	assert(MUTEX_HELD(&np->rn_lock));
1777 
1778 	if ((rc = rc_check_parent_child(np->rn_id.rl_type, type)) !=
1779 	    REP_PROTOCOL_SUCCESS)
1780 		return (rc);
1781 
1782 	if (!rc_node_hold_flag(np, RC_NODE_CHILDREN_CHANGING))
1783 		return (REP_PROTOCOL_FAIL_DELETED);
1784 
1785 	if (np->rn_flags & RC_NODE_HAS_CHILDREN) {
1786 		rc_node_rele_flag(np, RC_NODE_CHILDREN_CHANGING);
1787 		return (REP_PROTOCOL_SUCCESS);
1788 	}
1789 
1790 	(void) pthread_mutex_unlock(&np->rn_lock);
1791 	rc = object_fill_children(np);
1792 	(void) pthread_mutex_lock(&np->rn_lock);
1793 
1794 	if (rc == REP_PROTOCOL_SUCCESS) {
1795 		np->rn_flags |= RC_NODE_HAS_CHILDREN;
1796 	}
1797 	rc_node_rele_flag(np, RC_NODE_CHILDREN_CHANGING);
1798 
1799 	return (rc);
1800 }
1801 
1802 /*
1803  * Returns
1804  *   _INVALID_TYPE - type is invalid
1805  *   _TYPE_MISMATCH - np doesn't carry children of type type
1806  *   _DELETED - np has been deleted
1807  *   _NO_RESOURCES
1808  *   _SUCCESS - if *cpp is not NULL, it is held
1809  */
1810 static int
1811 rc_node_find_named_child(rc_node_t *np, const char *name, uint32_t type,
1812     rc_node_t **cpp)
1813 {
1814 	int ret;
1815 	rc_node_t *cp;
1816 
1817 	assert(MUTEX_HELD(&np->rn_lock));
1818 	assert(np->rn_id.rl_type != REP_PROTOCOL_ENTITY_CPROPERTYGRP);
1819 
1820 	ret = rc_node_fill_children(np, type);
1821 	if (ret != REP_PROTOCOL_SUCCESS)
1822 		return (ret);
1823 
1824 	for (cp = uu_list_first(np->rn_children);
1825 	    cp != NULL;
1826 	    cp = uu_list_next(np->rn_children, cp)) {
1827 		if (cp->rn_id.rl_type == type && strcmp(cp->rn_name, name) == 0)
1828 			break;
1829 	}
1830 
1831 	if (cp != NULL)
1832 		rc_node_hold(cp);
1833 	*cpp = cp;
1834 
1835 	return (REP_PROTOCOL_SUCCESS);
1836 }
1837 
1838 static int rc_node_parent(rc_node_t *, rc_node_t **);
1839 
1840 /*
1841  * Returns
1842  *   _INVALID_TYPE - type is invalid
1843  *   _DELETED - np or an ancestor has been deleted
1844  *   _NOT_FOUND - no ancestor of specified type exists
1845  *   _SUCCESS - *app is held
1846  */
1847 static int
1848 rc_node_find_ancestor(rc_node_t *np, uint32_t type, rc_node_t **app)
1849 {
1850 	int ret;
1851 	rc_node_t *parent, *np_orig;
1852 
1853 	if (type >= REP_PROTOCOL_ENTITY_MAX)
1854 		return (REP_PROTOCOL_FAIL_INVALID_TYPE);
1855 
1856 	np_orig = np;
1857 
1858 	while (np->rn_id.rl_type > type) {
1859 		ret = rc_node_parent(np, &parent);
1860 		if (np != np_orig)
1861 			rc_node_rele(np);
1862 		if (ret != REP_PROTOCOL_SUCCESS)
1863 			return (ret);
1864 		np = parent;
1865 	}
1866 
1867 	if (np->rn_id.rl_type == type) {
1868 		*app = parent;
1869 		return (REP_PROTOCOL_SUCCESS);
1870 	}
1871 
1872 	return (REP_PROTOCOL_FAIL_NOT_FOUND);
1873 }
1874 
1875 #ifndef NATIVE_BUILD
1876 /*
1877  * If the propname property exists in pg, and it is of type string, add its
1878  * values as authorizations to pcp.  pg must not be locked on entry, and it is
1879  * returned unlocked.  Returns
1880  *   _DELETED - pg was deleted
1881  *   _NO_RESOURCES
1882  *   _NOT_FOUND - pg has no property named propname
1883  *   _SUCCESS
1884  */
1885 static int
1886 perm_add_pg_prop_values(permcheck_t *pcp, rc_node_t *pg, const char *propname)
1887 {
1888 	rc_node_t *prop;
1889 	int result;
1890 
1891 	uint_t count;
1892 	const char *cp;
1893 
1894 	assert(!MUTEX_HELD(&pg->rn_lock));
1895 	assert(pg->rn_id.rl_type == REP_PROTOCOL_ENTITY_PROPERTYGRP);
1896 
1897 	(void) pthread_mutex_lock(&pg->rn_lock);
1898 	result = rc_node_find_named_child(pg, propname,
1899 	    REP_PROTOCOL_ENTITY_PROPERTY, &prop);
1900 	(void) pthread_mutex_unlock(&pg->rn_lock);
1901 	if (result != REP_PROTOCOL_SUCCESS) {
1902 		switch (result) {
1903 		case REP_PROTOCOL_FAIL_DELETED:
1904 		case REP_PROTOCOL_FAIL_NO_RESOURCES:
1905 			return (result);
1906 
1907 		case REP_PROTOCOL_FAIL_INVALID_TYPE:
1908 		case REP_PROTOCOL_FAIL_TYPE_MISMATCH:
1909 		default:
1910 			bad_error("rc_node_find_named_child", result);
1911 		}
1912 	}
1913 
1914 	if (prop == NULL)
1915 		return (REP_PROTOCOL_FAIL_NOT_FOUND);
1916 
1917 	/* rn_valtype is immutable, so no locking. */
1918 	if (prop->rn_valtype != REP_PROTOCOL_TYPE_STRING) {
1919 		rc_node_rele(prop);
1920 		return (REP_PROTOCOL_SUCCESS);
1921 	}
1922 
1923 	(void) pthread_mutex_lock(&prop->rn_lock);
1924 	for (count = prop->rn_values_count, cp = prop->rn_values;
1925 	    count > 0;
1926 	    --count) {
1927 		result = perm_add_enabling(pcp, cp);
1928 		if (result != REP_PROTOCOL_SUCCESS)
1929 			break;
1930 
1931 		cp = strchr(cp, '\0') + 1;
1932 	}
1933 
1934 	rc_node_rele_locked(prop);
1935 
1936 	return (result);
1937 }
1938 
1939 /*
1940  * Assuming that ent is a service or instance node, if the pgname property
1941  * group has type pgtype, and it has a propname property with string type, add
1942  * its values as authorizations to pcp.  If pgtype is NULL, it is not checked.
1943  * Returns
1944  *   _SUCCESS
1945  *   _DELETED - ent was deleted
1946  *   _NO_RESOURCES - no resources
1947  *   _NOT_FOUND - ent does not have pgname pg or propname property
1948  */
1949 static int
1950 perm_add_ent_prop_values(permcheck_t *pcp, rc_node_t *ent, const char *pgname,
1951     const char *pgtype, const char *propname)
1952 {
1953 	int r;
1954 	rc_node_t *pg;
1955 
1956 	assert(!MUTEX_HELD(&ent->rn_lock));
1957 
1958 	(void) pthread_mutex_lock(&ent->rn_lock);
1959 	r = rc_node_find_named_child(ent, pgname,
1960 	    REP_PROTOCOL_ENTITY_PROPERTYGRP, &pg);
1961 	(void) pthread_mutex_unlock(&ent->rn_lock);
1962 
1963 	switch (r) {
1964 	case REP_PROTOCOL_SUCCESS:
1965 		break;
1966 
1967 	case REP_PROTOCOL_FAIL_DELETED:
1968 	case REP_PROTOCOL_FAIL_NO_RESOURCES:
1969 		return (r);
1970 
1971 	default:
1972 		bad_error("rc_node_find_named_child", r);
1973 	}
1974 
1975 	if (pg == NULL)
1976 		return (REP_PROTOCOL_FAIL_NOT_FOUND);
1977 
1978 	if (pgtype == NULL || strcmp(pg->rn_type, pgtype) == 0) {
1979 		r = perm_add_pg_prop_values(pcp, pg, propname);
1980 		switch (r) {
1981 		case REP_PROTOCOL_FAIL_DELETED:
1982 			r = REP_PROTOCOL_FAIL_NOT_FOUND;
1983 			break;
1984 
1985 		case REP_PROTOCOL_FAIL_NO_RESOURCES:
1986 		case REP_PROTOCOL_SUCCESS:
1987 		case REP_PROTOCOL_FAIL_NOT_FOUND:
1988 			break;
1989 
1990 		default:
1991 			bad_error("perm_add_pg_prop_values", r);
1992 		}
1993 	}
1994 
1995 	rc_node_rele(pg);
1996 
1997 	return (r);
1998 }
1999 
2000 /*
2001  * If pg has a property named propname, and is string typed, add its values as
2002  * authorizations to pcp.  If pg has no such property, and its parent is an
2003  * instance, walk up to the service and try doing the same with the property
2004  * of the same name from the property group of the same name.  Returns
2005  *   _SUCCESS
2006  *   _NO_RESOURCES
2007  *   _DELETED - pg (or an ancestor) was deleted
2008  */
2009 static int
2010 perm_add_enabling_values(permcheck_t *pcp, rc_node_t *pg, const char *propname)
2011 {
2012 	int r;
2013 	char pgname[REP_PROTOCOL_NAME_LEN + 1];
2014 	rc_node_t *svc;
2015 	size_t sz;
2016 
2017 	r = perm_add_pg_prop_values(pcp, pg, propname);
2018 
2019 	if (r != REP_PROTOCOL_FAIL_NOT_FOUND)
2020 		return (r);
2021 
2022 	assert(!MUTEX_HELD(&pg->rn_lock));
2023 
2024 	if (pg->rn_id.rl_ids[ID_INSTANCE] == 0)
2025 		return (REP_PROTOCOL_SUCCESS);
2026 
2027 	sz = strlcpy(pgname, pg->rn_name, sizeof (pgname));
2028 	assert(sz < sizeof (pgname));
2029 
2030 	/*
2031 	 * If pg is a child of an instance or snapshot, we want to compose the
2032 	 * authorization property with the service's (if it exists).  The
2033 	 * snapshot case applies only to read_authorization.  In all other
2034 	 * cases, the pg's parent will be the instance.
2035 	 */
2036 	r = rc_node_find_ancestor(pg, REP_PROTOCOL_ENTITY_SERVICE, &svc);
2037 	if (r != REP_PROTOCOL_SUCCESS) {
2038 		assert(r == REP_PROTOCOL_FAIL_DELETED);
2039 		return (r);
2040 	}
2041 	assert(svc->rn_id.rl_type == REP_PROTOCOL_ENTITY_SERVICE);
2042 
2043 	r = perm_add_ent_prop_values(pcp, svc, pgname, NULL, propname);
2044 
2045 	rc_node_rele(svc);
2046 
2047 	if (r == REP_PROTOCOL_FAIL_NOT_FOUND)
2048 		r = REP_PROTOCOL_SUCCESS;
2049 
2050 	return (r);
2051 }
2052 
2053 /*
2054  * Call perm_add_enabling_values() for the "action_authorization" property of
2055  * the "general" property group of inst.  Returns
2056  *   _DELETED - inst (or an ancestor) was deleted
2057  *   _NO_RESOURCES
2058  *   _SUCCESS
2059  */
2060 static int
2061 perm_add_inst_action_auth(permcheck_t *pcp, rc_node_t *inst)
2062 {
2063 	int r;
2064 	rc_node_t *svc;
2065 
2066 	assert(inst->rn_id.rl_type == REP_PROTOCOL_ENTITY_INSTANCE);
2067 
2068 	r = perm_add_ent_prop_values(pcp, inst, AUTH_PG_GENERAL,
2069 	    AUTH_PG_GENERAL_TYPE, AUTH_PROP_ACTION);
2070 
2071 	if (r != REP_PROTOCOL_FAIL_NOT_FOUND)
2072 		return (r);
2073 
2074 	r = rc_node_parent(inst, &svc);
2075 	if (r != REP_PROTOCOL_SUCCESS) {
2076 		assert(r == REP_PROTOCOL_FAIL_DELETED);
2077 		return (r);
2078 	}
2079 
2080 	r = perm_add_ent_prop_values(pcp, svc, AUTH_PG_GENERAL,
2081 	    AUTH_PG_GENERAL_TYPE, AUTH_PROP_ACTION);
2082 
2083 	return (r == REP_PROTOCOL_FAIL_NOT_FOUND ? REP_PROTOCOL_SUCCESS : r);
2084 }
2085 #endif /* NATIVE_BUILD */
2086 
2087 void
2088 rc_node_ptr_init(rc_node_ptr_t *out)
2089 {
2090 	out->rnp_node = NULL;
2091 	out->rnp_authorized = 0;
2092 	out->rnp_deleted = 0;
2093 }
2094 
2095 static void
2096 rc_node_assign(rc_node_ptr_t *out, rc_node_t *val)
2097 {
2098 	rc_node_t *cur = out->rnp_node;
2099 	if (val != NULL)
2100 		rc_node_hold(val);
2101 	out->rnp_node = val;
2102 	if (cur != NULL)
2103 		rc_node_rele(cur);
2104 	out->rnp_authorized = 0;
2105 	out->rnp_deleted = 0;
2106 }
2107 
2108 void
2109 rc_node_clear(rc_node_ptr_t *out, int deleted)
2110 {
2111 	rc_node_assign(out, NULL);
2112 	out->rnp_deleted = deleted;
2113 }
2114 
2115 void
2116 rc_node_ptr_assign(rc_node_ptr_t *out, const rc_node_ptr_t *val)
2117 {
2118 	rc_node_assign(out, val->rnp_node);
2119 }
2120 
2121 /*
2122  * rc_node_check()/RC_NODE_CHECK()
2123  *	generic "entry" checks, run before the use of an rc_node pointer.
2124  *
2125  * Fails with
2126  *   _NOT_SET
2127  *   _DELETED
2128  */
2129 static int
2130 rc_node_check_and_lock(rc_node_t *np)
2131 {
2132 	int result = REP_PROTOCOL_SUCCESS;
2133 	if (np == NULL)
2134 		return (REP_PROTOCOL_FAIL_NOT_SET);
2135 
2136 	(void) pthread_mutex_lock(&np->rn_lock);
2137 	if (!rc_node_wait_flag(np, RC_NODE_DYING)) {
2138 		result = REP_PROTOCOL_FAIL_DELETED;
2139 		(void) pthread_mutex_unlock(&np->rn_lock);
2140 	}
2141 
2142 	return (result);
2143 }
2144 
2145 /*
2146  * Fails with
2147  *   _NOT_SET - ptr is reset
2148  *   _DELETED - node has been deleted
2149  */
2150 static rc_node_t *
2151 rc_node_ptr_check_and_lock(rc_node_ptr_t *npp, int *res)
2152 {
2153 	rc_node_t *np = npp->rnp_node;
2154 	if (np == NULL) {
2155 		if (npp->rnp_deleted)
2156 			*res = REP_PROTOCOL_FAIL_DELETED;
2157 		else
2158 			*res = REP_PROTOCOL_FAIL_NOT_SET;
2159 		return (NULL);
2160 	}
2161 
2162 	(void) pthread_mutex_lock(&np->rn_lock);
2163 	if (!rc_node_wait_flag(np, RC_NODE_DYING)) {
2164 		(void) pthread_mutex_unlock(&np->rn_lock);
2165 		rc_node_clear(npp, 1);
2166 		*res = REP_PROTOCOL_FAIL_DELETED;
2167 		return (NULL);
2168 	}
2169 	return (np);
2170 }
2171 
2172 #define	RC_NODE_CHECK_AND_LOCK(n) {					\
2173 	int rc__res;							\
2174 	if ((rc__res = rc_node_check_and_lock(n)) != REP_PROTOCOL_SUCCESS) \
2175 		return (rc__res);					\
2176 }
2177 
2178 #define	RC_NODE_CHECK(n) {						\
2179 	RC_NODE_CHECK_AND_LOCK(n);					\
2180 	(void) pthread_mutex_unlock(&(n)->rn_lock);			\
2181 }
2182 
2183 #define	RC_NODE_CHECK_AND_HOLD(n) {					\
2184 	RC_NODE_CHECK_AND_LOCK(n);					\
2185 	rc_node_hold_locked(n);						\
2186 	(void) pthread_mutex_unlock(&(n)->rn_lock);			\
2187 }
2188 
2189 #define	RC_NODE_PTR_GET_CHECK_AND_LOCK(np, npp) {			\
2190 	int rc__res;							\
2191 	if (((np) = rc_node_ptr_check_and_lock(npp, &rc__res)) == NULL)	\
2192 		return (rc__res);					\
2193 }
2194 
2195 #define	RC_NODE_PTR_GET_CHECK(np, npp) {				\
2196 	RC_NODE_PTR_GET_CHECK_AND_LOCK(np, npp);			\
2197 	(void) pthread_mutex_unlock(&(np)->rn_lock);			\
2198 }
2199 
2200 #define	RC_NODE_PTR_GET_CHECK_AND_HOLD(np, npp) {			\
2201 	RC_NODE_PTR_GET_CHECK_AND_LOCK(np, npp);			\
2202 	rc_node_hold_locked(np);					\
2203 	(void) pthread_mutex_unlock(&(np)->rn_lock);			\
2204 }
2205 
2206 #define	HOLD_FLAG_OR_RETURN(np, flag) {					\
2207 	assert(MUTEX_HELD(&(np)->rn_lock));				\
2208 	assert(!((np)->rn_flags & RC_NODE_DEAD));			\
2209 	if (!rc_node_hold_flag((np), flag)) {				\
2210 		(void) pthread_mutex_unlock(&(np)->rn_lock);		\
2211 		return (REP_PROTOCOL_FAIL_DELETED);			\
2212 	}								\
2213 }
2214 
2215 #define	HOLD_PTR_FLAG_OR_RETURN(np, npp, flag) {			\
2216 	assert(MUTEX_HELD(&(np)->rn_lock));				\
2217 	assert(!((np)->rn_flags & RC_NODE_DEAD));			\
2218 	if (!rc_node_hold_flag((np), flag)) {				\
2219 		(void) pthread_mutex_unlock(&(np)->rn_lock);		\
2220 		assert((np) == (npp)->rnp_node);			\
2221 		rc_node_clear(npp, 1);					\
2222 		return (REP_PROTOCOL_FAIL_DELETED);			\
2223 	}								\
2224 }
2225 
2226 int
2227 rc_local_scope(uint32_t type, rc_node_ptr_t *out)
2228 {
2229 	if (type != REP_PROTOCOL_ENTITY_SCOPE) {
2230 		rc_node_clear(out, 0);
2231 		return (REP_PROTOCOL_FAIL_TYPE_MISMATCH);
2232 	}
2233 
2234 	/*
2235 	 * the main scope never gets destroyed
2236 	 */
2237 	rc_node_assign(out, rc_scope);
2238 
2239 	return (REP_PROTOCOL_SUCCESS);
2240 }
2241 
2242 /*
2243  * Fails with
2244  *   _NOT_SET - npp is not set
2245  *   _DELETED - the node npp pointed at has been deleted
2246  *   _TYPE_MISMATCH - type is not _SCOPE
2247  *   _NOT_FOUND - scope has no parent
2248  */
2249 static int
2250 rc_scope_parent_scope(rc_node_ptr_t *npp, uint32_t type, rc_node_ptr_t *out)
2251 {
2252 	rc_node_t *np;
2253 
2254 	rc_node_clear(out, 0);
2255 
2256 	RC_NODE_PTR_GET_CHECK(np, npp);
2257 
2258 	if (type != REP_PROTOCOL_ENTITY_SCOPE)
2259 		return (REP_PROTOCOL_FAIL_TYPE_MISMATCH);
2260 
2261 	return (REP_PROTOCOL_FAIL_NOT_FOUND);
2262 }
2263 
2264 static int rc_node_pg_check_read_protect(rc_node_t *);
2265 
2266 /*
2267  * Fails with
2268  *   _NOT_SET
2269  *   _DELETED
2270  *   _NOT_APPLICABLE
2271  *   _NOT_FOUND
2272  *   _BAD_REQUEST
2273  *   _TRUNCATED
2274  *   _NO_RESOURCES
2275  */
2276 int
2277 rc_node_name(rc_node_ptr_t *npp, char *buf, size_t sz, uint32_t answertype,
2278     size_t *sz_out)
2279 {
2280 	size_t actual;
2281 	rc_node_t *np;
2282 
2283 	assert(sz == *sz_out);
2284 
2285 	RC_NODE_PTR_GET_CHECK(np, npp);
2286 
2287 	if (np->rn_id.rl_type == REP_PROTOCOL_ENTITY_CPROPERTYGRP) {
2288 		np = np->rn_cchain[0];
2289 		RC_NODE_CHECK(np);
2290 	}
2291 
2292 	switch (answertype) {
2293 	case RP_ENTITY_NAME_NAME:
2294 		if (np->rn_name == NULL)
2295 			return (REP_PROTOCOL_FAIL_NOT_APPLICABLE);
2296 		actual = strlcpy(buf, np->rn_name, sz);
2297 		break;
2298 	case RP_ENTITY_NAME_PGTYPE:
2299 		if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_PROPERTYGRP)
2300 			return (REP_PROTOCOL_FAIL_NOT_APPLICABLE);
2301 		actual = strlcpy(buf, np->rn_type, sz);
2302 		break;
2303 	case RP_ENTITY_NAME_PGFLAGS:
2304 		if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_PROPERTYGRP)
2305 			return (REP_PROTOCOL_FAIL_NOT_APPLICABLE);
2306 		actual = snprintf(buf, sz, "%d", np->rn_pgflags);
2307 		break;
2308 	case RP_ENTITY_NAME_SNAPLEVEL_SCOPE:
2309 		if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_SNAPLEVEL)
2310 			return (REP_PROTOCOL_FAIL_NOT_APPLICABLE);
2311 		actual = strlcpy(buf, np->rn_snaplevel->rsl_scope, sz);
2312 		break;
2313 	case RP_ENTITY_NAME_SNAPLEVEL_SERVICE:
2314 		if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_SNAPLEVEL)
2315 			return (REP_PROTOCOL_FAIL_NOT_APPLICABLE);
2316 		actual = strlcpy(buf, np->rn_snaplevel->rsl_service, sz);
2317 		break;
2318 	case RP_ENTITY_NAME_SNAPLEVEL_INSTANCE:
2319 		if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_SNAPLEVEL)
2320 			return (REP_PROTOCOL_FAIL_NOT_APPLICABLE);
2321 		if (np->rn_snaplevel->rsl_instance == NULL)
2322 			return (REP_PROTOCOL_FAIL_NOT_FOUND);
2323 		actual = strlcpy(buf, np->rn_snaplevel->rsl_instance, sz);
2324 		break;
2325 	case RP_ENTITY_NAME_PGREADPROT:
2326 	{
2327 		int ret;
2328 
2329 		if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_PROPERTYGRP)
2330 			return (REP_PROTOCOL_FAIL_NOT_APPLICABLE);
2331 		ret = rc_node_pg_check_read_protect(np);
2332 		assert(ret != REP_PROTOCOL_FAIL_TYPE_MISMATCH);
2333 		switch (ret) {
2334 		case REP_PROTOCOL_FAIL_PERMISSION_DENIED:
2335 			actual = snprintf(buf, sz, "1");
2336 			break;
2337 		case REP_PROTOCOL_SUCCESS:
2338 			actual = snprintf(buf, sz, "0");
2339 			break;
2340 		default:
2341 			return (ret);
2342 		}
2343 		break;
2344 	}
2345 	default:
2346 		return (REP_PROTOCOL_FAIL_BAD_REQUEST);
2347 	}
2348 	if (actual >= sz)
2349 		return (REP_PROTOCOL_FAIL_TRUNCATED);
2350 
2351 	*sz_out = actual;
2352 	return (REP_PROTOCOL_SUCCESS);
2353 }
2354 
2355 int
2356 rc_node_get_property_type(rc_node_ptr_t *npp, rep_protocol_value_type_t *out)
2357 {
2358 	rc_node_t *np;
2359 
2360 	RC_NODE_PTR_GET_CHECK(np, npp);
2361 
2362 	if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_PROPERTY)
2363 		return (REP_PROTOCOL_FAIL_TYPE_MISMATCH);
2364 
2365 	*out = np->rn_valtype;
2366 
2367 	return (REP_PROTOCOL_SUCCESS);
2368 }
2369 
2370 /*
2371  * Get np's parent.  If np is deleted, returns _DELETED.  Otherwise puts a hold
2372  * on the parent, returns a pointer to it in *out, and returns _SUCCESS.
2373  */
2374 static int
2375 rc_node_parent(rc_node_t *np, rc_node_t **out)
2376 {
2377 	rc_node_t *pnp;
2378 	rc_node_t *np_orig;
2379 
2380 	if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_CPROPERTYGRP) {
2381 		RC_NODE_CHECK_AND_LOCK(np);
2382 	} else {
2383 		np = np->rn_cchain[0];
2384 		RC_NODE_CHECK_AND_LOCK(np);
2385 	}
2386 
2387 	np_orig = np;
2388 	rc_node_hold_locked(np);		/* simplifies the remainder */
2389 
2390 	for (;;) {
2391 		if (!rc_node_wait_flag(np,
2392 		    RC_NODE_IN_TX | RC_NODE_USING_PARENT)) {
2393 			rc_node_rele_locked(np);
2394 			return (REP_PROTOCOL_FAIL_DELETED);
2395 		}
2396 
2397 		if (!(np->rn_flags & RC_NODE_OLD))
2398 			break;
2399 
2400 		rc_node_rele_locked(np);
2401 		np = cache_lookup(&np_orig->rn_id);
2402 		assert(np != np_orig);
2403 
2404 		if (np == NULL)
2405 			goto deleted;
2406 		(void) pthread_mutex_lock(&np->rn_lock);
2407 	}
2408 
2409 	/* guaranteed to succeed without dropping the lock */
2410 	if (!rc_node_hold_flag(np, RC_NODE_USING_PARENT)) {
2411 		(void) pthread_mutex_unlock(&np->rn_lock);
2412 		*out = NULL;
2413 		rc_node_rele(np);
2414 		return (REP_PROTOCOL_FAIL_DELETED);
2415 	}
2416 
2417 	assert(np->rn_parent != NULL);
2418 	pnp = np->rn_parent;
2419 	(void) pthread_mutex_unlock(&np->rn_lock);
2420 
2421 	(void) pthread_mutex_lock(&pnp->rn_lock);
2422 	(void) pthread_mutex_lock(&np->rn_lock);
2423 	rc_node_rele_flag(np, RC_NODE_USING_PARENT);
2424 	(void) pthread_mutex_unlock(&np->rn_lock);
2425 
2426 	rc_node_hold_locked(pnp);
2427 
2428 	(void) pthread_mutex_unlock(&pnp->rn_lock);
2429 
2430 	rc_node_rele(np);
2431 	*out = pnp;
2432 	return (REP_PROTOCOL_SUCCESS);
2433 
2434 deleted:
2435 	rc_node_rele(np);
2436 	return (REP_PROTOCOL_FAIL_DELETED);
2437 }
2438 
2439 /*
2440  * Fails with
2441  *   _NOT_SET
2442  *   _DELETED
2443  */
2444 static int
2445 rc_node_ptr_parent(rc_node_ptr_t *npp, rc_node_t **out)
2446 {
2447 	rc_node_t *np;
2448 
2449 	RC_NODE_PTR_GET_CHECK(np, npp);
2450 
2451 	return (rc_node_parent(np, out));
2452 }
2453 
2454 /*
2455  * Fails with
2456  *   _NOT_SET - npp is not set
2457  *   _DELETED - the node npp pointed at has been deleted
2458  *   _TYPE_MISMATCH - npp's node's parent is not of type type
2459  *
2460  * If npp points to a scope, can also fail with
2461  *   _NOT_FOUND - scope has no parent
2462  */
2463 int
2464 rc_node_get_parent(rc_node_ptr_t *npp, uint32_t type, rc_node_ptr_t *out)
2465 {
2466 	rc_node_t *pnp;
2467 	int rc;
2468 
2469 	if (npp->rnp_node != NULL &&
2470 	    npp->rnp_node->rn_id.rl_type == REP_PROTOCOL_ENTITY_SCOPE)
2471 		return (rc_scope_parent_scope(npp, type, out));
2472 
2473 	if ((rc = rc_node_ptr_parent(npp, &pnp)) != REP_PROTOCOL_SUCCESS) {
2474 		rc_node_clear(out, 0);
2475 		return (rc);
2476 	}
2477 
2478 	if (type != pnp->rn_id.rl_type) {
2479 		rc_node_rele(pnp);
2480 		return (REP_PROTOCOL_FAIL_TYPE_MISMATCH);
2481 	}
2482 
2483 	rc_node_assign(out, pnp);
2484 	rc_node_rele(pnp);
2485 
2486 	return (REP_PROTOCOL_SUCCESS);
2487 }
2488 
2489 int
2490 rc_node_parent_type(rc_node_ptr_t *npp, uint32_t *type_out)
2491 {
2492 	rc_node_t *pnp;
2493 	int rc;
2494 
2495 	if (npp->rnp_node != NULL &&
2496 	    npp->rnp_node->rn_id.rl_type == REP_PROTOCOL_ENTITY_SCOPE) {
2497 		*type_out = REP_PROTOCOL_ENTITY_SCOPE;
2498 		return (REP_PROTOCOL_SUCCESS);
2499 	}
2500 
2501 	if ((rc = rc_node_ptr_parent(npp, &pnp)) != REP_PROTOCOL_SUCCESS)
2502 		return (rc);
2503 
2504 	*type_out = pnp->rn_id.rl_type;
2505 
2506 	rc_node_rele(pnp);
2507 
2508 	return (REP_PROTOCOL_SUCCESS);
2509 }
2510 
2511 /*
2512  * Fails with
2513  *   _INVALID_TYPE - type is invalid
2514  *   _TYPE_MISMATCH - np doesn't carry children of type type
2515  *   _DELETED - np has been deleted
2516  *   _NOT_FOUND - no child with that name/type combo found
2517  *   _NO_RESOURCES
2518  *   _BACKEND_ACCESS
2519  */
2520 int
2521 rc_node_get_child(rc_node_ptr_t *npp, const char *name, uint32_t type,
2522     rc_node_ptr_t *outp)
2523 {
2524 	rc_node_t *np, *cp;
2525 	rc_node_t *child = NULL;
2526 	int ret, idx;
2527 
2528 	RC_NODE_PTR_GET_CHECK_AND_LOCK(np, npp);
2529 	if ((ret = rc_check_type_name(type, name)) == REP_PROTOCOL_SUCCESS) {
2530 		if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_CPROPERTYGRP) {
2531 			ret = rc_node_find_named_child(np, name, type, &child);
2532 		} else {
2533 			(void) pthread_mutex_unlock(&np->rn_lock);
2534 			ret = REP_PROTOCOL_SUCCESS;
2535 			for (idx = 0; idx < COMPOSITION_DEPTH; idx++) {
2536 				cp = np->rn_cchain[idx];
2537 				if (cp == NULL)
2538 					break;
2539 				RC_NODE_CHECK_AND_LOCK(cp);
2540 				ret = rc_node_find_named_child(cp, name, type,
2541 				    &child);
2542 				(void) pthread_mutex_unlock(&cp->rn_lock);
2543 				/*
2544 				 * loop only if we succeeded, but no child of
2545 				 * the correct name was found.
2546 				 */
2547 				if (ret != REP_PROTOCOL_SUCCESS ||
2548 				    child != NULL)
2549 					break;
2550 			}
2551 			(void) pthread_mutex_lock(&np->rn_lock);
2552 		}
2553 	}
2554 	(void) pthread_mutex_unlock(&np->rn_lock);
2555 
2556 	if (ret == REP_PROTOCOL_SUCCESS) {
2557 		rc_node_assign(outp, child);
2558 		if (child != NULL)
2559 			rc_node_rele(child);
2560 		else
2561 			ret = REP_PROTOCOL_FAIL_NOT_FOUND;
2562 	} else {
2563 		rc_node_assign(outp, NULL);
2564 	}
2565 	return (ret);
2566 }
2567 
2568 int
2569 rc_node_update(rc_node_ptr_t *npp)
2570 {
2571 	cache_bucket_t *bp;
2572 	rc_node_t *np = npp->rnp_node;
2573 	rc_node_t *nnp;
2574 	rc_node_t *cpg = NULL;
2575 
2576 	if (np != NULL &&
2577 	    np->rn_id.rl_type == REP_PROTOCOL_ENTITY_CPROPERTYGRP) {
2578 		/*
2579 		 * If we're updating a composed property group, actually
2580 		 * update the top-level property group & return the
2581 		 * appropriate value.  But leave *nnp pointing at us.
2582 		 */
2583 		cpg = np;
2584 		np = np->rn_cchain[0];
2585 	}
2586 
2587 	RC_NODE_CHECK(np);
2588 
2589 	if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_PROPERTYGRP &&
2590 	    np->rn_id.rl_type != REP_PROTOCOL_ENTITY_SNAPSHOT)
2591 		return (REP_PROTOCOL_FAIL_BAD_REQUEST);
2592 
2593 	for (;;) {
2594 		bp = cache_hold(np->rn_hash);
2595 		nnp = cache_lookup_unlocked(bp, &np->rn_id);
2596 		if (nnp == NULL) {
2597 			cache_release(bp);
2598 			rc_node_clear(npp, 1);
2599 			return (REP_PROTOCOL_FAIL_DELETED);
2600 		}
2601 		/*
2602 		 * grab the lock before dropping the cache bucket, so
2603 		 * that no one else can sneak in
2604 		 */
2605 		(void) pthread_mutex_lock(&nnp->rn_lock);
2606 		cache_release(bp);
2607 
2608 		if (!(nnp->rn_flags & RC_NODE_IN_TX) ||
2609 		    !rc_node_wait_flag(nnp, RC_NODE_IN_TX))
2610 			break;
2611 
2612 		rc_node_rele_locked(nnp);
2613 	}
2614 
2615 	/*
2616 	 * If it is dead, we want to update it so that it will continue to
2617 	 * report being dead.
2618 	 */
2619 	if (nnp->rn_flags & RC_NODE_DEAD) {
2620 		(void) pthread_mutex_unlock(&nnp->rn_lock);
2621 		if (nnp != np && cpg == NULL)
2622 			rc_node_assign(npp, nnp);	/* updated */
2623 		rc_node_rele(nnp);
2624 		return (REP_PROTOCOL_FAIL_DELETED);
2625 	}
2626 
2627 	assert(!(nnp->rn_flags & RC_NODE_OLD));
2628 	(void) pthread_mutex_unlock(&nnp->rn_lock);
2629 
2630 	if (nnp != np && cpg == NULL)
2631 		rc_node_assign(npp, nnp);		/* updated */
2632 
2633 	rc_node_rele(nnp);
2634 
2635 	return ((nnp == np)? REP_PROTOCOL_SUCCESS : REP_PROTOCOL_DONE);
2636 }
2637 
2638 /*
2639  * does a generic modification check, for creation, deletion, and snapshot
2640  * management only.  Property group transactions have different checks.
2641  */
2642 int
2643 rc_node_modify_permission_check(void)
2644 {
2645 	int rc = REP_PROTOCOL_SUCCESS;
2646 	permcheck_t *pcp;
2647 	int granted;
2648 
2649 	if (!client_is_privileged()) {
2650 #ifdef NATIVE_BUILD
2651 		rc = REP_PROTOCOL_FAIL_PERMISSION_DENIED;
2652 #else
2653 		pcp = pc_create();
2654 		if (pcp != NULL) {
2655 			rc = perm_add_enabling(pcp, AUTH_MODIFY);
2656 
2657 			if (rc == REP_PROTOCOL_SUCCESS) {
2658 				granted = perm_granted(pcp);
2659 
2660 				if (granted < 0)
2661 					rc = REP_PROTOCOL_FAIL_NO_RESOURCES;
2662 			}
2663 
2664 			pc_free(pcp);
2665 		} else {
2666 			rc = REP_PROTOCOL_FAIL_NO_RESOURCES;
2667 		}
2668 
2669 		if (rc == REP_PROTOCOL_SUCCESS && !granted)
2670 			rc = REP_PROTOCOL_FAIL_PERMISSION_DENIED;
2671 #endif /* NATIVE_BUILD */
2672 	}
2673 	return (rc);
2674 }
2675 
2676 /*
2677  * Fails with
2678  *   _DELETED - node has been deleted
2679  *   _NOT_SET - npp is reset
2680  *   _NOT_APPLICABLE - type is _PROPERTYGRP
2681  *   _INVALID_TYPE - node is corrupt or type is invalid
2682  *   _TYPE_MISMATCH - node cannot have children of type type
2683  *   _BAD_REQUEST - name is invalid
2684  *		    cannot create children for this type of node
2685  *   _NO_RESOURCES - out of memory, or could not allocate new id
2686  *   _PERMISSION_DENIED
2687  *   _BACKEND_ACCESS
2688  *   _BACKEND_READONLY
2689  *   _EXISTS - child already exists
2690  */
2691 int
2692 rc_node_create_child(rc_node_ptr_t *npp, uint32_t type, const char *name,
2693     rc_node_ptr_t *cpp)
2694 {
2695 	rc_node_t *np;
2696 	rc_node_t *cp = NULL;
2697 	int rc, perm_rc;
2698 
2699 	rc_node_clear(cpp, 0);
2700 
2701 	perm_rc = rc_node_modify_permission_check();
2702 
2703 	RC_NODE_PTR_GET_CHECK_AND_LOCK(np, npp);
2704 
2705 	/*
2706 	 * there is a separate interface for creating property groups
2707 	 */
2708 	if (type == REP_PROTOCOL_ENTITY_PROPERTYGRP) {
2709 		(void) pthread_mutex_unlock(&np->rn_lock);
2710 		return (REP_PROTOCOL_FAIL_NOT_APPLICABLE);
2711 	}
2712 
2713 	if (np->rn_id.rl_type == REP_PROTOCOL_ENTITY_CPROPERTYGRP) {
2714 		(void) pthread_mutex_unlock(&np->rn_lock);
2715 		np = np->rn_cchain[0];
2716 		RC_NODE_CHECK_AND_LOCK(np);
2717 	}
2718 
2719 	if ((rc = rc_check_parent_child(np->rn_id.rl_type, type)) !=
2720 	    REP_PROTOCOL_SUCCESS) {
2721 		(void) pthread_mutex_unlock(&np->rn_lock);
2722 		return (rc);
2723 	}
2724 	if ((rc = rc_check_type_name(type, name)) != REP_PROTOCOL_SUCCESS) {
2725 		(void) pthread_mutex_unlock(&np->rn_lock);
2726 		return (rc);
2727 	}
2728 
2729 	if (perm_rc != REP_PROTOCOL_SUCCESS) {
2730 		(void) pthread_mutex_unlock(&np->rn_lock);
2731 		return (perm_rc);
2732 	}
2733 
2734 	HOLD_PTR_FLAG_OR_RETURN(np, npp, RC_NODE_CREATING_CHILD);
2735 	(void) pthread_mutex_unlock(&np->rn_lock);
2736 
2737 	rc = object_create(np, type, name, &cp);
2738 	assert(rc != REP_PROTOCOL_FAIL_NOT_APPLICABLE);
2739 
2740 	if (rc == REP_PROTOCOL_SUCCESS) {
2741 		rc_node_assign(cpp, cp);
2742 		rc_node_rele(cp);
2743 	}
2744 
2745 	(void) pthread_mutex_lock(&np->rn_lock);
2746 	rc_node_rele_flag(np, RC_NODE_CREATING_CHILD);
2747 	(void) pthread_mutex_unlock(&np->rn_lock);
2748 
2749 	return (rc);
2750 }
2751 
2752 int
2753 rc_node_create_child_pg(rc_node_ptr_t *npp, uint32_t type, const char *name,
2754     const char *pgtype, uint32_t flags, rc_node_ptr_t *cpp)
2755 {
2756 	rc_node_t *np;
2757 	rc_node_t *cp;
2758 	int rc;
2759 	permcheck_t *pcp;
2760 	int granted;
2761 
2762 	rc_node_clear(cpp, 0);
2763 
2764 	/* verify flags is valid */
2765 	if (flags & ~SCF_PG_FLAG_NONPERSISTENT)
2766 		return (REP_PROTOCOL_FAIL_BAD_REQUEST);
2767 
2768 	RC_NODE_PTR_GET_CHECK_AND_HOLD(np, npp);
2769 
2770 	if (type != REP_PROTOCOL_ENTITY_PROPERTYGRP) {
2771 		rc_node_rele(np);
2772 		return (REP_PROTOCOL_FAIL_NOT_APPLICABLE);
2773 	}
2774 
2775 	if ((rc = rc_check_parent_child(np->rn_id.rl_type, type)) !=
2776 	    REP_PROTOCOL_SUCCESS) {
2777 		rc_node_rele(np);
2778 		return (rc);
2779 	}
2780 	if ((rc = rc_check_type_name(type, name)) != REP_PROTOCOL_SUCCESS ||
2781 	    (rc = rc_check_pgtype_name(pgtype)) != REP_PROTOCOL_SUCCESS) {
2782 		rc_node_rele(np);
2783 		return (rc);
2784 	}
2785 
2786 	if (!client_is_privileged()) {
2787 #ifdef NATIVE_BUILD
2788 		rc = REP_PROTOCOL_FAIL_PERMISSION_DENIED;
2789 #else
2790 		/* Must have .smf.modify or smf.modify.<type> authorization */
2791 		pcp = pc_create();
2792 		if (pcp != NULL) {
2793 			rc = perm_add_enabling(pcp, AUTH_MODIFY);
2794 
2795 			if (rc == REP_PROTOCOL_SUCCESS) {
2796 				const char * const auth =
2797 				    perm_auth_for_pgtype(pgtype);
2798 
2799 				if (auth != NULL)
2800 					rc = perm_add_enabling(pcp, auth);
2801 			}
2802 
2803 			/*
2804 			 * .manage or $action_authorization can be used to
2805 			 * create the actions pg and the general_ovr pg.
2806 			 */
2807 			if (rc == REP_PROTOCOL_SUCCESS &&
2808 			    (flags & SCF_PG_FLAG_NONPERSISTENT) != 0 &&
2809 			    np->rn_id.rl_type == REP_PROTOCOL_ENTITY_INSTANCE &&
2810 			    ((strcmp(name, AUTH_PG_ACTIONS) == 0 &&
2811 			    strcmp(pgtype, AUTH_PG_ACTIONS_TYPE) == 0) ||
2812 			    (strcmp(name, AUTH_PG_GENERAL_OVR) == 0 &&
2813 			    strcmp(pgtype, AUTH_PG_GENERAL_OVR_TYPE) == 0))) {
2814 				rc = perm_add_enabling(pcp, AUTH_MANAGE);
2815 
2816 				if (rc == REP_PROTOCOL_SUCCESS)
2817 					rc = perm_add_inst_action_auth(pcp, np);
2818 			}
2819 
2820 			if (rc == REP_PROTOCOL_SUCCESS) {
2821 				granted = perm_granted(pcp);
2822 
2823 				if (granted < 0)
2824 					rc = REP_PROTOCOL_FAIL_NO_RESOURCES;
2825 			}
2826 
2827 			pc_free(pcp);
2828 		} else {
2829 			rc = REP_PROTOCOL_FAIL_NO_RESOURCES;
2830 		}
2831 
2832 		if (rc == REP_PROTOCOL_SUCCESS && !granted)
2833 			rc = REP_PROTOCOL_FAIL_PERMISSION_DENIED;
2834 #endif /* NATIVE_BUILD */
2835 
2836 		if (rc != REP_PROTOCOL_SUCCESS) {
2837 			rc_node_rele(np);
2838 			return (rc);
2839 		}
2840 	}
2841 
2842 	(void) pthread_mutex_lock(&np->rn_lock);
2843 	HOLD_PTR_FLAG_OR_RETURN(np, npp, RC_NODE_CREATING_CHILD);
2844 	(void) pthread_mutex_unlock(&np->rn_lock);
2845 
2846 	rc = object_create_pg(np, type, name, pgtype, flags, &cp);
2847 
2848 	if (rc == REP_PROTOCOL_SUCCESS) {
2849 		rc_node_assign(cpp, cp);
2850 		rc_node_rele(cp);
2851 	}
2852 
2853 	(void) pthread_mutex_lock(&np->rn_lock);
2854 	rc_node_rele_flag(np, RC_NODE_CREATING_CHILD);
2855 	(void) pthread_mutex_unlock(&np->rn_lock);
2856 
2857 	return (rc);
2858 }
2859 
2860 static void
2861 rc_pg_notify_fire(rc_node_pg_notify_t *pnp)
2862 {
2863 	assert(MUTEX_HELD(&rc_pg_notify_lock));
2864 
2865 	if (pnp->rnpn_pg != NULL) {
2866 		uu_list_remove(pnp->rnpn_pg->rn_pg_notify_list, pnp);
2867 		(void) close(pnp->rnpn_fd);
2868 
2869 		pnp->rnpn_pg = NULL;
2870 		pnp->rnpn_fd = -1;
2871 	} else {
2872 		assert(pnp->rnpn_fd == -1);
2873 	}
2874 }
2875 
2876 static void
2877 rc_notify_node_delete(rc_notify_delete_t *ndp, rc_node_t *np_arg)
2878 {
2879 	rc_node_t *svc = NULL;
2880 	rc_node_t *inst = NULL;
2881 	rc_node_t *pg = NULL;
2882 	rc_node_t *np = np_arg;
2883 	rc_node_t *nnp;
2884 
2885 	while (svc == NULL) {
2886 		(void) pthread_mutex_lock(&np->rn_lock);
2887 		if (!rc_node_hold_flag(np, RC_NODE_USING_PARENT)) {
2888 			(void) pthread_mutex_unlock(&np->rn_lock);
2889 			goto cleanup;
2890 		}
2891 		nnp = np->rn_parent;
2892 		rc_node_hold_locked(np);	/* hold it in place */
2893 
2894 		switch (np->rn_id.rl_type) {
2895 		case REP_PROTOCOL_ENTITY_PROPERTYGRP:
2896 			assert(pg == NULL);
2897 			pg = np;
2898 			break;
2899 		case REP_PROTOCOL_ENTITY_INSTANCE:
2900 			assert(inst == NULL);
2901 			inst = np;
2902 			break;
2903 		case REP_PROTOCOL_ENTITY_SERVICE:
2904 			assert(svc == NULL);
2905 			svc = np;
2906 			break;
2907 		default:
2908 			rc_node_rele_flag(np, RC_NODE_USING_PARENT);
2909 			rc_node_rele_locked(np);
2910 			goto cleanup;
2911 		}
2912 
2913 		(void) pthread_mutex_unlock(&np->rn_lock);
2914 
2915 		np = nnp;
2916 		if (np == NULL)
2917 			goto cleanup;
2918 	}
2919 
2920 	rc_notify_deletion(ndp,
2921 	    svc->rn_name,
2922 	    inst != NULL ? inst->rn_name : NULL,
2923 	    pg != NULL ? pg->rn_name : NULL);
2924 
2925 	ndp = NULL;
2926 
2927 cleanup:
2928 	if (ndp != NULL)
2929 		uu_free(ndp);
2930 
2931 	for (;;) {
2932 		if (svc != NULL) {
2933 			np = svc;
2934 			svc = NULL;
2935 		} else if (inst != NULL) {
2936 			np = inst;
2937 			inst = NULL;
2938 		} else if (pg != NULL) {
2939 			np = pg;
2940 			pg = NULL;
2941 		} else
2942 			break;
2943 
2944 		(void) pthread_mutex_lock(&np->rn_lock);
2945 		rc_node_rele_flag(np, RC_NODE_USING_PARENT);
2946 		rc_node_rele_locked(np);
2947 	}
2948 }
2949 
2950 /*
2951  * N.B.:  this function drops np->rn_lock on the way out.
2952  */
2953 static void
2954 rc_node_delete_hold(rc_node_t *np, int andformer)
2955 {
2956 	rc_node_t *cp;
2957 
2958 again:
2959 	assert(MUTEX_HELD(&np->rn_lock));
2960 	assert((np->rn_flags & RC_NODE_DYING_FLAGS) == RC_NODE_DYING_FLAGS);
2961 
2962 	for (cp = uu_list_first(np->rn_children); cp != NULL;
2963 	    cp = uu_list_next(np->rn_children, cp)) {
2964 		(void) pthread_mutex_lock(&cp->rn_lock);
2965 		(void) pthread_mutex_unlock(&np->rn_lock);
2966 		if (!rc_node_hold_flag(cp, RC_NODE_DYING_FLAGS)) {
2967 			/*
2968 			 * already marked as dead -- can't happen, since that
2969 			 * would require setting RC_NODE_CHILDREN_CHANGING
2970 			 * in np, and we're holding that...
2971 			 */
2972 			abort();
2973 		}
2974 		rc_node_delete_hold(cp, andformer);	/* recurse, drop lock */
2975 
2976 		(void) pthread_mutex_lock(&np->rn_lock);
2977 	}
2978 	if (andformer && (cp = np->rn_former) != NULL) {
2979 		(void) pthread_mutex_lock(&cp->rn_lock);
2980 		(void) pthread_mutex_unlock(&np->rn_lock);
2981 		if (!rc_node_hold_flag(cp, RC_NODE_DYING_FLAGS))
2982 			abort();		/* can't happen, see above */
2983 		np = cp;
2984 		goto again;		/* tail-recurse down rn_former */
2985 	}
2986 	(void) pthread_mutex_unlock(&np->rn_lock);
2987 }
2988 
2989 /*
2990  * N.B.:  this function drops np->rn_lock on the way out.
2991  */
2992 static void
2993 rc_node_delete_rele(rc_node_t *np, int andformer)
2994 {
2995 	rc_node_t *cp;
2996 
2997 again:
2998 	assert(MUTEX_HELD(&np->rn_lock));
2999 	assert((np->rn_flags & RC_NODE_DYING_FLAGS) == RC_NODE_DYING_FLAGS);
3000 
3001 	for (cp = uu_list_first(np->rn_children); cp != NULL;
3002 	    cp = uu_list_next(np->rn_children, cp)) {
3003 		(void) pthread_mutex_lock(&cp->rn_lock);
3004 		(void) pthread_mutex_unlock(&np->rn_lock);
3005 		rc_node_delete_rele(cp, andformer);	/* recurse, drop lock */
3006 		(void) pthread_mutex_lock(&np->rn_lock);
3007 	}
3008 	if (andformer && (cp = np->rn_former) != NULL) {
3009 		(void) pthread_mutex_lock(&cp->rn_lock);
3010 		rc_node_rele_flag(np, RC_NODE_DYING_FLAGS);
3011 		(void) pthread_mutex_unlock(&np->rn_lock);
3012 
3013 		np = cp;
3014 		goto again;		/* tail-recurse down rn_former */
3015 	}
3016 	rc_node_rele_flag(np, RC_NODE_DYING_FLAGS);
3017 	(void) pthread_mutex_unlock(&np->rn_lock);
3018 }
3019 
3020 static void
3021 rc_node_finish_delete(rc_node_t *cp)
3022 {
3023 	cache_bucket_t *bp;
3024 	rc_node_pg_notify_t *pnp;
3025 
3026 	assert(MUTEX_HELD(&cp->rn_lock));
3027 
3028 	if (!(cp->rn_flags & RC_NODE_OLD)) {
3029 		assert(cp->rn_flags & RC_NODE_IN_PARENT);
3030 		if (!rc_node_wait_flag(cp, RC_NODE_USING_PARENT)) {
3031 			abort();		/* can't happen, see above */
3032 		}
3033 		cp->rn_flags &= ~RC_NODE_IN_PARENT;
3034 		cp->rn_parent = NULL;
3035 	}
3036 
3037 	cp->rn_flags |= RC_NODE_DEAD;
3038 
3039 	/*
3040 	 * If this node is not out-dated, we need to remove it from
3041 	 * the notify list and cache hash table.
3042 	 */
3043 	if (!(cp->rn_flags & RC_NODE_OLD)) {
3044 		assert(cp->rn_refs > 0);	/* can't go away yet */
3045 		(void) pthread_mutex_unlock(&cp->rn_lock);
3046 
3047 		(void) pthread_mutex_lock(&rc_pg_notify_lock);
3048 		while ((pnp = uu_list_first(cp->rn_pg_notify_list)) != NULL)
3049 			rc_pg_notify_fire(pnp);
3050 		(void) pthread_mutex_unlock(&rc_pg_notify_lock);
3051 		rc_notify_remove_node(cp);
3052 
3053 		bp = cache_hold(cp->rn_hash);
3054 		(void) pthread_mutex_lock(&cp->rn_lock);
3055 		cache_remove_unlocked(bp, cp);
3056 		cache_release(bp);
3057 	}
3058 }
3059 
3060 /*
3061  * N.B.:  this function drops np->rn_lock and a reference on the way out.
3062  */
3063 static void
3064 rc_node_delete_children(rc_node_t *np, int andformer)
3065 {
3066 	rc_node_t *cp;
3067 
3068 again:
3069 	assert(np->rn_refs > 0);
3070 	assert(MUTEX_HELD(&np->rn_lock));
3071 	assert(np->rn_flags & RC_NODE_DEAD);
3072 
3073 	while ((cp = uu_list_first(np->rn_children)) != NULL) {
3074 		uu_list_remove(np->rn_children, cp);
3075 		(void) pthread_mutex_lock(&cp->rn_lock);
3076 		(void) pthread_mutex_unlock(&np->rn_lock);
3077 		rc_node_hold_locked(cp);	/* hold while we recurse */
3078 		rc_node_finish_delete(cp);
3079 		rc_node_delete_children(cp, andformer);	/* drops lock + ref */
3080 		(void) pthread_mutex_lock(&np->rn_lock);
3081 	}
3082 
3083 	/*
3084 	 * when we drop cp's lock, all the children will be gone, so we
3085 	 * can release DYING_FLAGS.
3086 	 */
3087 	rc_node_rele_flag(np, RC_NODE_DYING_FLAGS);
3088 	if (andformer && (cp = np->rn_former) != NULL) {
3089 		np->rn_former = NULL;		/* unlink */
3090 		(void) pthread_mutex_lock(&cp->rn_lock);
3091 		(void) pthread_mutex_unlock(&np->rn_lock);
3092 		np->rn_flags &= ~RC_NODE_ON_FORMER;
3093 
3094 		rc_node_hold_locked(cp);	/* hold while we loop */
3095 
3096 		rc_node_finish_delete(cp);
3097 
3098 		rc_node_rele(np);		/* drop the old reference */
3099 
3100 		np = cp;
3101 		goto again;		/* tail-recurse down rn_former */
3102 	}
3103 	rc_node_rele_locked(np);
3104 }
3105 
3106 static void
3107 rc_node_unrefed(rc_node_t *np)
3108 {
3109 	int unrefed;
3110 	rc_node_t *pp, *cur;
3111 
3112 	assert(MUTEX_HELD(&np->rn_lock));
3113 	assert(np->rn_refs == 0);
3114 	assert(np->rn_other_refs == 0);
3115 	assert(np->rn_other_refs_held == 0);
3116 
3117 	if (np->rn_flags & RC_NODE_DEAD) {
3118 		(void) pthread_mutex_unlock(&np->rn_lock);
3119 		rc_node_destroy(np);
3120 		return;
3121 	}
3122 
3123 	assert(np->rn_flags & RC_NODE_OLD);
3124 	if (np->rn_flags & RC_NODE_UNREFED) {
3125 		(void) pthread_mutex_unlock(&np->rn_lock);
3126 		return;
3127 	}
3128 	np->rn_flags |= RC_NODE_UNREFED;
3129 
3130 	(void) pthread_mutex_unlock(&np->rn_lock);
3131 
3132 	/*
3133 	 * find the current in-hash object, and grab it's RC_NODE_IN_TX
3134 	 * flag.  That protects the entire rn_former chain.
3135 	 */
3136 	for (;;) {
3137 		pp = cache_lookup(&np->rn_id);
3138 		if (pp == NULL) {
3139 			(void) pthread_mutex_lock(&np->rn_lock);
3140 			if (np->rn_flags & RC_NODE_DEAD)
3141 				goto died;
3142 			/*
3143 			 * We are trying to unreference this node, but the
3144 			 * owner of the former list does not exist.  It must
3145 			 * be the case that another thread is deleting this
3146 			 * entire sub-branch, but has not yet reached us.
3147 			 * We will in short order be deleted.
3148 			 */
3149 			np->rn_flags &= ~RC_NODE_UNREFED;
3150 			(void) pthread_mutex_unlock(&np->rn_lock);
3151 			return;
3152 		}
3153 		if (pp == np) {
3154 			/*
3155 			 * no longer unreferenced
3156 			 */
3157 			(void) pthread_mutex_lock(&np->rn_lock);
3158 			np->rn_flags &= ~RC_NODE_UNREFED;
3159 			rc_node_rele_locked(np);
3160 			return;
3161 		}
3162 		(void) pthread_mutex_lock(&pp->rn_lock);
3163 		if ((pp->rn_flags & RC_NODE_OLD) ||
3164 		    !rc_node_hold_flag(pp, RC_NODE_IN_TX)) {
3165 			rc_node_rele_locked(pp);
3166 			continue;
3167 		}
3168 		if (!(pp->rn_flags & RC_NODE_OLD)) {
3169 			(void) pthread_mutex_unlock(&pp->rn_lock);
3170 			break;
3171 		}
3172 		rc_node_rele_flag(pp, RC_NODE_IN_TX);
3173 		rc_node_rele_locked(pp);
3174 	}
3175 
3176 	(void) pthread_mutex_lock(&np->rn_lock);
3177 	if (!(np->rn_flags & (RC_NODE_OLD | RC_NODE_DEAD)) ||
3178 	    np->rn_refs != 0 || np->rn_other_refs != 0 ||
3179 	    np->rn_other_refs_held != 0) {
3180 		np->rn_flags &= ~RC_NODE_UNREFED;
3181 		(void) pthread_mutex_lock(&pp->rn_lock);
3182 
3183 		rc_node_rele_flag(pp, RC_NODE_IN_TX);
3184 		rc_node_rele_locked(pp);
3185 		return;
3186 	}
3187 
3188 	if (!rc_node_hold_flag(np, RC_NODE_DYING_FLAGS)) {
3189 		(void) pthread_mutex_unlock(&np->rn_lock);
3190 
3191 		rc_node_rele_flag(pp, RC_NODE_IN_TX);
3192 		rc_node_rele_locked(pp);
3193 
3194 		(void) pthread_mutex_lock(&np->rn_lock);
3195 		goto died;
3196 	}
3197 
3198 	rc_node_delete_hold(np, 0);
3199 
3200 	(void) pthread_mutex_lock(&np->rn_lock);
3201 	if (!(np->rn_flags & RC_NODE_OLD) ||
3202 	    np->rn_refs != 0 || np->rn_other_refs != 0 ||
3203 	    np->rn_other_refs_held != 0) {
3204 		np->rn_flags &= ~RC_NODE_UNREFED;
3205 		rc_node_delete_rele(np, 0);
3206 
3207 		(void) pthread_mutex_lock(&pp->rn_lock);
3208 		rc_node_rele_flag(pp, RC_NODE_IN_TX);
3209 		rc_node_rele_locked(pp);
3210 		return;
3211 	}
3212 
3213 	np->rn_flags |= RC_NODE_DEAD;
3214 	rc_node_hold_locked(np);
3215 	rc_node_delete_children(np, 0);
3216 
3217 	/*
3218 	 * It's gone -- remove it from the former chain and destroy it.
3219 	 */
3220 	(void) pthread_mutex_lock(&pp->rn_lock);
3221 	for (cur = pp; cur != NULL && cur->rn_former != np;
3222 	    cur = cur->rn_former)
3223 		;
3224 	assert(cur != NULL && cur != np);
3225 
3226 	cur->rn_former = np->rn_former;
3227 	np->rn_former = NULL;
3228 
3229 	rc_node_rele_flag(pp, RC_NODE_IN_TX);
3230 	rc_node_rele_locked(pp);
3231 
3232 	(void) pthread_mutex_lock(&np->rn_lock);
3233 	assert(np->rn_flags & RC_NODE_ON_FORMER);
3234 	np->rn_flags &= ~(RC_NODE_UNREFED | RC_NODE_ON_FORMER);
3235 	(void) pthread_mutex_unlock(&np->rn_lock);
3236 	rc_node_destroy(np);
3237 	return;
3238 
3239 died:
3240 	np->rn_flags &= ~RC_NODE_UNREFED;
3241 	unrefed = (np->rn_refs == 0 && np->rn_other_refs == 0 &&
3242 	    np->rn_other_refs_held == 0);
3243 	(void) pthread_mutex_unlock(&np->rn_lock);
3244 	if (unrefed)
3245 		rc_node_destroy(np);
3246 }
3247 
3248 /*
3249  * Fails with
3250  *   _NOT_SET
3251  *   _DELETED
3252  *   _BAD_REQUEST
3253  *   _PERMISSION_DENIED
3254  *   _NO_RESOURCES
3255  * and whatever object_delete() fails with.
3256  */
3257 int
3258 rc_node_delete(rc_node_ptr_t *npp)
3259 {
3260 	rc_node_t *np, *np_orig;
3261 	rc_node_t *pp = NULL;
3262 	int rc;
3263 	rc_node_pg_notify_t *pnp;
3264 	cache_bucket_t *bp;
3265 	rc_notify_delete_t *ndp;
3266 	permcheck_t *pcp;
3267 	int granted;
3268 
3269 	RC_NODE_PTR_GET_CHECK_AND_LOCK(np, npp);
3270 
3271 	switch (np->rn_id.rl_type) {
3272 	case REP_PROTOCOL_ENTITY_SERVICE:
3273 	case REP_PROTOCOL_ENTITY_INSTANCE:
3274 	case REP_PROTOCOL_ENTITY_SNAPSHOT:
3275 		break;			/* deletable */
3276 
3277 	case REP_PROTOCOL_ENTITY_SCOPE:
3278 	case REP_PROTOCOL_ENTITY_SNAPLEVEL:
3279 		/* Scopes and snaplevels are indelible. */
3280 		(void) pthread_mutex_unlock(&np->rn_lock);
3281 		return (REP_PROTOCOL_FAIL_BAD_REQUEST);
3282 
3283 	case REP_PROTOCOL_ENTITY_CPROPERTYGRP:
3284 		(void) pthread_mutex_unlock(&np->rn_lock);
3285 		np = np->rn_cchain[0];
3286 		RC_NODE_CHECK_AND_LOCK(np);
3287 		break;
3288 
3289 	case REP_PROTOCOL_ENTITY_PROPERTYGRP:
3290 		if (np->rn_id.rl_ids[ID_SNAPSHOT] == 0)
3291 			break;
3292 
3293 		/* Snapshot property groups are indelible. */
3294 		(void) pthread_mutex_unlock(&np->rn_lock);
3295 		return (REP_PROTOCOL_FAIL_PERMISSION_DENIED);
3296 
3297 	case REP_PROTOCOL_ENTITY_PROPERTY:
3298 		(void) pthread_mutex_unlock(&np->rn_lock);
3299 		return (REP_PROTOCOL_FAIL_BAD_REQUEST);
3300 
3301 	default:
3302 		assert(0);
3303 		abort();
3304 		break;
3305 	}
3306 
3307 	np_orig = np;
3308 	rc_node_hold_locked(np);	/* simplifies rest of the code */
3309 
3310 again:
3311 	/*
3312 	 * The following loop is to deal with the fact that snapshots and
3313 	 * property groups are moving targets -- changes to them result
3314 	 * in a new "child" node.  Since we can only delete from the top node,
3315 	 * we have to loop until we have a non-RC_NODE_OLD version.
3316 	 */
3317 	for (;;) {
3318 		if (!rc_node_wait_flag(np,
3319 		    RC_NODE_IN_TX | RC_NODE_USING_PARENT)) {
3320 			rc_node_rele_locked(np);
3321 			return (REP_PROTOCOL_FAIL_DELETED);
3322 		}
3323 
3324 		if (np->rn_flags & RC_NODE_OLD) {
3325 			rc_node_rele_locked(np);
3326 			np = cache_lookup(&np_orig->rn_id);
3327 			assert(np != np_orig);
3328 
3329 			if (np == NULL) {
3330 				rc = REP_PROTOCOL_FAIL_DELETED;
3331 				goto fail;
3332 			}
3333 			(void) pthread_mutex_lock(&np->rn_lock);
3334 			continue;
3335 		}
3336 
3337 		if (!rc_node_hold_flag(np, RC_NODE_USING_PARENT)) {
3338 			rc_node_rele_locked(np);
3339 			rc_node_clear(npp, 1);
3340 			return (REP_PROTOCOL_FAIL_DELETED);
3341 		}
3342 
3343 		/*
3344 		 * Mark our parent as children changing.  this call drops our
3345 		 * lock and the RC_NODE_USING_PARENT flag, and returns with
3346 		 * pp's lock held
3347 		 */
3348 		pp = rc_node_hold_parent_flag(np, RC_NODE_CHILDREN_CHANGING);
3349 		if (pp == NULL) {
3350 			/* our parent is gone, we're going next... */
3351 			rc_node_rele(np);
3352 
3353 			rc_node_clear(npp, 1);
3354 			return (REP_PROTOCOL_FAIL_DELETED);
3355 		}
3356 
3357 		rc_node_hold_locked(pp);		/* hold for later */
3358 		(void) pthread_mutex_unlock(&pp->rn_lock);
3359 
3360 		(void) pthread_mutex_lock(&np->rn_lock);
3361 		if (!(np->rn_flags & RC_NODE_OLD))
3362 			break;			/* not old -- we're done */
3363 
3364 		(void) pthread_mutex_unlock(&np->rn_lock);
3365 		(void) pthread_mutex_lock(&pp->rn_lock);
3366 		rc_node_rele_flag(pp, RC_NODE_CHILDREN_CHANGING);
3367 		rc_node_rele_locked(pp);
3368 		(void) pthread_mutex_lock(&np->rn_lock);
3369 		continue;			/* loop around and try again */
3370 	}
3371 	/*
3372 	 * Everyone out of the pool -- we grab everything but
3373 	 * RC_NODE_USING_PARENT (including RC_NODE_DYING) to keep
3374 	 * any changes from occurring while we are attempting to
3375 	 * delete the node.
3376 	 */
3377 	if (!rc_node_hold_flag(np, RC_NODE_DYING_FLAGS)) {
3378 		(void) pthread_mutex_unlock(&np->rn_lock);
3379 		rc = REP_PROTOCOL_FAIL_DELETED;
3380 		goto fail;
3381 	}
3382 
3383 	assert(!(np->rn_flags & RC_NODE_OLD));
3384 
3385 	if (!client_is_privileged()) {
3386 		/* permission check */
3387 		(void) pthread_mutex_unlock(&np->rn_lock);
3388 
3389 #ifdef NATIVE_BUILD
3390 		rc = REP_PROTOCOL_FAIL_PERMISSION_DENIED;
3391 #else
3392 		pcp = pc_create();
3393 		if (pcp != NULL) {
3394 			rc = perm_add_enabling(pcp, AUTH_MODIFY);
3395 
3396 			/* add .smf.modify.<type> for pgs. */
3397 			if (rc == REP_PROTOCOL_SUCCESS && np->rn_id.rl_type ==
3398 			    REP_PROTOCOL_ENTITY_PROPERTYGRP) {
3399 				const char * const auth =
3400 				    perm_auth_for_pgtype(np->rn_type);
3401 
3402 				if (auth != NULL)
3403 					rc = perm_add_enabling(pcp, auth);
3404 			}
3405 
3406 			if (rc == REP_PROTOCOL_SUCCESS) {
3407 				granted = perm_granted(pcp);
3408 
3409 				if (granted < 0)
3410 					rc = REP_PROTOCOL_FAIL_NO_RESOURCES;
3411 			}
3412 
3413 			pc_free(pcp);
3414 		} else {
3415 			rc = REP_PROTOCOL_FAIL_NO_RESOURCES;
3416 		}
3417 
3418 		if (rc == REP_PROTOCOL_SUCCESS && !granted)
3419 			rc = REP_PROTOCOL_FAIL_PERMISSION_DENIED;
3420 #endif /* NATIVE_BUILD */
3421 
3422 		if (rc != REP_PROTOCOL_SUCCESS) {
3423 			(void) pthread_mutex_lock(&np->rn_lock);
3424 			rc_node_rele_flag(np, RC_NODE_DYING_FLAGS);
3425 			(void) pthread_mutex_unlock(&np->rn_lock);
3426 			goto fail;
3427 		}
3428 
3429 		(void) pthread_mutex_lock(&np->rn_lock);
3430 	}
3431 
3432 	ndp = uu_zalloc(sizeof (*ndp));
3433 	if (ndp == NULL) {
3434 		rc_node_rele_flag(np, RC_NODE_DYING_FLAGS);
3435 		(void) pthread_mutex_unlock(&np->rn_lock);
3436 		rc = REP_PROTOCOL_FAIL_NO_RESOURCES;
3437 		goto fail;
3438 	}
3439 
3440 	rc_node_delete_hold(np, 1);	/* hold entire subgraph, drop lock */
3441 
3442 	rc = object_delete(np);
3443 
3444 	if (rc != REP_PROTOCOL_SUCCESS) {
3445 		(void) pthread_mutex_lock(&np->rn_lock);
3446 		rc_node_delete_rele(np, 1);		/* drops lock */
3447 		uu_free(ndp);
3448 		goto fail;
3449 	}
3450 
3451 	/*
3452 	 * Now, delicately unlink and delete the object.
3453 	 *
3454 	 * Create the delete notification, atomically remove
3455 	 * from the hash table and set the NODE_DEAD flag, and
3456 	 * remove from the parent's children list.
3457 	 */
3458 	rc_notify_node_delete(ndp, np); /* frees or uses ndp */
3459 
3460 	bp = cache_hold(np->rn_hash);
3461 
3462 	(void) pthread_mutex_lock(&np->rn_lock);
3463 	cache_remove_unlocked(bp, np);
3464 	cache_release(bp);
3465 
3466 	np->rn_flags |= RC_NODE_DEAD;
3467 	if (pp != NULL) {
3468 		(void) pthread_mutex_unlock(&np->rn_lock);
3469 
3470 		(void) pthread_mutex_lock(&pp->rn_lock);
3471 		(void) pthread_mutex_lock(&np->rn_lock);
3472 		uu_list_remove(pp->rn_children, np);
3473 		rc_node_rele_flag(pp, RC_NODE_CHILDREN_CHANGING);
3474 		(void) pthread_mutex_unlock(&pp->rn_lock);
3475 		np->rn_flags &= ~RC_NODE_IN_PARENT;
3476 	}
3477 	/*
3478 	 * finally, propagate death to our children, handle notifications,
3479 	 * and release our hold.
3480 	 */
3481 	rc_node_hold_locked(np);	/* hold for delete */
3482 	rc_node_delete_children(np, 1);	/* drops DYING_FLAGS, lock, ref */
3483 
3484 	rc_node_clear(npp, 1);
3485 
3486 	(void) pthread_mutex_lock(&rc_pg_notify_lock);
3487 	while ((pnp = uu_list_first(np->rn_pg_notify_list)) != NULL)
3488 		rc_pg_notify_fire(pnp);
3489 	(void) pthread_mutex_unlock(&rc_pg_notify_lock);
3490 	rc_notify_remove_node(np);
3491 
3492 	rc_node_rele(np);
3493 
3494 	return (rc);
3495 
3496 fail:
3497 	rc_node_rele(np);
3498 	if (rc == REP_PROTOCOL_FAIL_DELETED)
3499 		rc_node_clear(npp, 1);
3500 	if (pp != NULL) {
3501 		(void) pthread_mutex_lock(&pp->rn_lock);
3502 		rc_node_rele_flag(pp, RC_NODE_CHILDREN_CHANGING);
3503 		rc_node_rele_locked(pp);	/* drop ref and lock */
3504 	}
3505 	return (rc);
3506 }
3507 
3508 int
3509 rc_node_next_snaplevel(rc_node_ptr_t *npp, rc_node_ptr_t *cpp)
3510 {
3511 	rc_node_t *np;
3512 	rc_node_t *cp, *pp;
3513 	int res;
3514 
3515 	rc_node_clear(cpp, 0);
3516 
3517 	RC_NODE_PTR_GET_CHECK_AND_LOCK(np, npp);
3518 
3519 	if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_SNAPSHOT &&
3520 	    np->rn_id.rl_type != REP_PROTOCOL_ENTITY_SNAPLEVEL) {
3521 		(void) pthread_mutex_unlock(&np->rn_lock);
3522 		return (REP_PROTOCOL_FAIL_NOT_APPLICABLE);
3523 	}
3524 
3525 	if (np->rn_id.rl_type == REP_PROTOCOL_ENTITY_SNAPSHOT) {
3526 		if ((res = rc_node_fill_children(np,
3527 		    REP_PROTOCOL_ENTITY_SNAPLEVEL)) != REP_PROTOCOL_SUCCESS) {
3528 			(void) pthread_mutex_unlock(&np->rn_lock);
3529 			return (res);
3530 		}
3531 
3532 		for (cp = uu_list_first(np->rn_children);
3533 		    cp != NULL;
3534 		    cp = uu_list_next(np->rn_children, cp)) {
3535 			if (cp->rn_id.rl_type != REP_PROTOCOL_ENTITY_SNAPLEVEL)
3536 				continue;
3537 			rc_node_hold(cp);
3538 			break;
3539 		}
3540 
3541 		(void) pthread_mutex_unlock(&np->rn_lock);
3542 	} else {
3543 		HOLD_PTR_FLAG_OR_RETURN(np, npp, RC_NODE_USING_PARENT);
3544 		/*
3545 		 * mark our parent as children changing.  This call drops our
3546 		 * lock and the RC_NODE_USING_PARENT flag, and returns with
3547 		 * pp's lock held
3548 		 */
3549 		pp = rc_node_hold_parent_flag(np, RC_NODE_CHILDREN_CHANGING);
3550 		if (pp == NULL) {
3551 			/* our parent is gone, we're going next... */
3552 
3553 			rc_node_clear(npp, 1);
3554 			return (REP_PROTOCOL_FAIL_DELETED);
3555 		}
3556 
3557 		/*
3558 		 * find the next snaplevel
3559 		 */
3560 		cp = np;
3561 		while ((cp = uu_list_next(pp->rn_children, cp)) != NULL &&
3562 		    cp->rn_id.rl_type != REP_PROTOCOL_ENTITY_SNAPLEVEL)
3563 			;
3564 
3565 		/* it must match the snaplevel list */
3566 		assert((cp == NULL && np->rn_snaplevel->rsl_next == NULL) ||
3567 		    (cp != NULL && np->rn_snaplevel->rsl_next ==
3568 		    cp->rn_snaplevel));
3569 
3570 		if (cp != NULL)
3571 			rc_node_hold(cp);
3572 
3573 		rc_node_rele_flag(pp, RC_NODE_CHILDREN_CHANGING);
3574 
3575 		(void) pthread_mutex_unlock(&pp->rn_lock);
3576 	}
3577 
3578 	rc_node_assign(cpp, cp);
3579 	if (cp != NULL) {
3580 		rc_node_rele(cp);
3581 
3582 		return (REP_PROTOCOL_SUCCESS);
3583 	}
3584 	return (REP_PROTOCOL_FAIL_NOT_FOUND);
3585 }
3586 
3587 /*
3588  * This call takes a snapshot (np) and either:
3589  *	an existing snapid (to be associated with np), or
3590  *	a non-NULL parentp (from which a new snapshot is taken, and associated
3591  *	    with np)
3592  *
3593  * To do the association, np is duplicated, the duplicate is made to
3594  * represent the new snapid, and np is replaced with the new rc_node_t on
3595  * np's parent's child list. np is placed on the new node's rn_former list,
3596  * and replaces np in cache_hash (so rc_node_update() will find the new one).
3597  */
3598 static int
3599 rc_attach_snapshot(rc_node_t *np, uint32_t snapid, rc_node_t *parentp)
3600 {
3601 	rc_node_t *np_orig;
3602 	rc_node_t *nnp, *prev;
3603 	rc_node_t *pp;
3604 	int rc;
3605 
3606 	if (parentp != NULL)
3607 		assert(snapid == 0);
3608 
3609 	assert(MUTEX_HELD(&np->rn_lock));
3610 
3611 	np_orig = np;
3612 	rc_node_hold_locked(np);		/* simplifies the remainder */
3613 
3614 	(void) pthread_mutex_unlock(&np->rn_lock);
3615 	if ((rc = rc_node_modify_permission_check()) != REP_PROTOCOL_SUCCESS)
3616 		return (rc);
3617 	(void) pthread_mutex_lock(&np->rn_lock);
3618 
3619 	/*
3620 	 * get the latest node, holding RC_NODE_IN_TX to keep the rn_former
3621 	 * list from changing.
3622 	 */
3623 	for (;;) {
3624 		if (!(np->rn_flags & RC_NODE_OLD)) {
3625 			if (!rc_node_hold_flag(np, RC_NODE_USING_PARENT)) {
3626 				goto again;
3627 			}
3628 			pp = rc_node_hold_parent_flag(np,
3629 			    RC_NODE_CHILDREN_CHANGING);
3630 
3631 			(void) pthread_mutex_lock(&np->rn_lock);
3632 			if (pp == NULL) {
3633 				goto again;
3634 			}
3635 			if (np->rn_flags & RC_NODE_OLD) {
3636 				rc_node_rele_flag(pp,
3637 				    RC_NODE_CHILDREN_CHANGING);
3638 				(void) pthread_mutex_unlock(&pp->rn_lock);
3639 				goto again;
3640 			}
3641 			(void) pthread_mutex_unlock(&pp->rn_lock);
3642 
3643 			if (!rc_node_hold_flag(np, RC_NODE_IN_TX)) {
3644 				/*
3645 				 * Can't happen, since we're holding our
3646 				 * parent's CHILDREN_CHANGING flag...
3647 				 */
3648 				abort();
3649 			}
3650 			break;			/* everything's ready */
3651 		}
3652 again:
3653 		rc_node_rele_locked(np);
3654 		np = cache_lookup(&np_orig->rn_id);
3655 
3656 		if (np == NULL)
3657 			return (REP_PROTOCOL_FAIL_DELETED);
3658 
3659 		(void) pthread_mutex_lock(&np->rn_lock);
3660 	}
3661 
3662 	if (parentp != NULL) {
3663 		if (pp != parentp) {
3664 			rc = REP_PROTOCOL_FAIL_BAD_REQUEST;
3665 			goto fail;
3666 		}
3667 		nnp = NULL;
3668 	} else {
3669 		/*
3670 		 * look for a former node with the snapid we need.
3671 		 */
3672 		if (np->rn_snapshot_id == snapid) {
3673 			rc_node_rele_flag(np, RC_NODE_IN_TX);
3674 			rc_node_rele_locked(np);
3675 
3676 			(void) pthread_mutex_lock(&pp->rn_lock);
3677 			rc_node_rele_flag(pp, RC_NODE_CHILDREN_CHANGING);
3678 			(void) pthread_mutex_unlock(&pp->rn_lock);
3679 			return (REP_PROTOCOL_SUCCESS);	/* nothing to do */
3680 		}
3681 
3682 		prev = np;
3683 		while ((nnp = prev->rn_former) != NULL) {
3684 			if (nnp->rn_snapshot_id == snapid) {
3685 				rc_node_hold(nnp);
3686 				break;		/* existing node with that id */
3687 			}
3688 			prev = nnp;
3689 		}
3690 	}
3691 
3692 	if (nnp == NULL) {
3693 		prev = NULL;
3694 		nnp = rc_node_alloc();
3695 		if (nnp == NULL) {
3696 			rc = REP_PROTOCOL_FAIL_NO_RESOURCES;
3697 			goto fail;
3698 		}
3699 
3700 		nnp->rn_id = np->rn_id;		/* structure assignment */
3701 		nnp->rn_hash = np->rn_hash;
3702 		nnp->rn_name = strdup(np->rn_name);
3703 		nnp->rn_snapshot_id = snapid;
3704 		nnp->rn_flags = RC_NODE_IN_TX | RC_NODE_USING_PARENT;
3705 
3706 		if (nnp->rn_name == NULL) {
3707 			rc = REP_PROTOCOL_FAIL_NO_RESOURCES;
3708 			goto fail;
3709 		}
3710 	}
3711 
3712 	(void) pthread_mutex_unlock(&np->rn_lock);
3713 
3714 	rc = object_snapshot_attach(&np->rn_id, &snapid, (parentp != NULL));
3715 
3716 	if (parentp != NULL)
3717 		nnp->rn_snapshot_id = snapid;	/* fill in new snapid */
3718 	else
3719 		assert(nnp->rn_snapshot_id == snapid);
3720 
3721 	(void) pthread_mutex_lock(&np->rn_lock);
3722 	if (rc != REP_PROTOCOL_SUCCESS)
3723 		goto fail;
3724 
3725 	/*
3726 	 * fix up the former chain
3727 	 */
3728 	if (prev != NULL) {
3729 		prev->rn_former = nnp->rn_former;
3730 		(void) pthread_mutex_lock(&nnp->rn_lock);
3731 		nnp->rn_flags &= ~RC_NODE_ON_FORMER;
3732 		nnp->rn_former = NULL;
3733 		(void) pthread_mutex_unlock(&nnp->rn_lock);
3734 	}
3735 	np->rn_flags |= RC_NODE_OLD;
3736 	(void) pthread_mutex_unlock(&np->rn_lock);
3737 
3738 	/*
3739 	 * replace np with nnp
3740 	 */
3741 	rc_node_relink_child(pp, np, nnp);
3742 
3743 	rc_node_rele(np);
3744 
3745 	return (REP_PROTOCOL_SUCCESS);
3746 
3747 fail:
3748 	rc_node_rele_flag(np, RC_NODE_IN_TX);
3749 	rc_node_rele_locked(np);
3750 	(void) pthread_mutex_lock(&pp->rn_lock);
3751 	rc_node_rele_flag(pp, RC_NODE_CHILDREN_CHANGING);
3752 	(void) pthread_mutex_unlock(&pp->rn_lock);
3753 
3754 	if (nnp != NULL) {
3755 		if (prev == NULL)
3756 			rc_node_destroy(nnp);
3757 		else
3758 			rc_node_rele(nnp);
3759 	}
3760 
3761 	return (rc);
3762 }
3763 
3764 int
3765 rc_snapshot_take_new(rc_node_ptr_t *npp, const char *svcname,
3766     const char *instname, const char *name, rc_node_ptr_t *outpp)
3767 {
3768 	rc_node_t *np;
3769 	rc_node_t *outp = NULL;
3770 	int rc, perm_rc;
3771 
3772 	rc_node_clear(outpp, 0);
3773 
3774 	perm_rc = rc_node_modify_permission_check();
3775 
3776 	RC_NODE_PTR_GET_CHECK_AND_LOCK(np, npp);
3777 	if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_INSTANCE) {
3778 		(void) pthread_mutex_unlock(&np->rn_lock);
3779 		return (REP_PROTOCOL_FAIL_TYPE_MISMATCH);
3780 	}
3781 
3782 	rc = rc_check_type_name(REP_PROTOCOL_ENTITY_SNAPSHOT, name);
3783 	if (rc != REP_PROTOCOL_SUCCESS) {
3784 		(void) pthread_mutex_unlock(&np->rn_lock);
3785 		return (rc);
3786 	}
3787 
3788 	if (svcname != NULL && (rc =
3789 	    rc_check_type_name(REP_PROTOCOL_ENTITY_SERVICE, svcname)) !=
3790 	    REP_PROTOCOL_SUCCESS) {
3791 		(void) pthread_mutex_unlock(&np->rn_lock);
3792 		return (rc);
3793 	}
3794 
3795 	if (instname != NULL && (rc =
3796 	    rc_check_type_name(REP_PROTOCOL_ENTITY_INSTANCE, instname)) !=
3797 	    REP_PROTOCOL_SUCCESS) {
3798 		(void) pthread_mutex_unlock(&np->rn_lock);
3799 		return (rc);
3800 	}
3801 
3802 	if (perm_rc != REP_PROTOCOL_SUCCESS) {
3803 		(void) pthread_mutex_unlock(&np->rn_lock);
3804 		return (perm_rc);
3805 	}
3806 
3807 	HOLD_PTR_FLAG_OR_RETURN(np, npp, RC_NODE_CREATING_CHILD);
3808 	(void) pthread_mutex_unlock(&np->rn_lock);
3809 
3810 	rc = object_snapshot_take_new(np, svcname, instname, name, &outp);
3811 
3812 	if (rc == REP_PROTOCOL_SUCCESS) {
3813 		rc_node_assign(outpp, outp);
3814 		rc_node_rele(outp);
3815 	}
3816 
3817 	(void) pthread_mutex_lock(&np->rn_lock);
3818 	rc_node_rele_flag(np, RC_NODE_CREATING_CHILD);
3819 	(void) pthread_mutex_unlock(&np->rn_lock);
3820 
3821 	return (rc);
3822 }
3823 
3824 int
3825 rc_snapshot_take_attach(rc_node_ptr_t *npp, rc_node_ptr_t *outpp)
3826 {
3827 	rc_node_t *np, *outp;
3828 
3829 	RC_NODE_PTR_GET_CHECK(np, npp);
3830 	if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_INSTANCE) {
3831 		return (REP_PROTOCOL_FAIL_TYPE_MISMATCH);
3832 	}
3833 
3834 	RC_NODE_PTR_GET_CHECK_AND_LOCK(outp, outpp);
3835 	if (outp->rn_id.rl_type != REP_PROTOCOL_ENTITY_SNAPSHOT) {
3836 		(void) pthread_mutex_unlock(&outp->rn_lock);
3837 		return (REP_PROTOCOL_FAIL_BAD_REQUEST);
3838 	}
3839 
3840 	return (rc_attach_snapshot(outp, 0, np));	/* drops outp's lock */
3841 }
3842 
3843 int
3844 rc_snapshot_attach(rc_node_ptr_t *npp, rc_node_ptr_t *cpp)
3845 {
3846 	rc_node_t *np;
3847 	rc_node_t *cp;
3848 	uint32_t snapid;
3849 
3850 	RC_NODE_PTR_GET_CHECK_AND_LOCK(np, npp);
3851 	if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_SNAPSHOT) {
3852 		(void) pthread_mutex_unlock(&np->rn_lock);
3853 		return (REP_PROTOCOL_FAIL_BAD_REQUEST);
3854 	}
3855 	snapid = np->rn_snapshot_id;
3856 	(void) pthread_mutex_unlock(&np->rn_lock);
3857 
3858 	RC_NODE_PTR_GET_CHECK_AND_LOCK(cp, cpp);
3859 	if (cp->rn_id.rl_type != REP_PROTOCOL_ENTITY_SNAPSHOT) {
3860 		(void) pthread_mutex_unlock(&cp->rn_lock);
3861 		return (REP_PROTOCOL_FAIL_BAD_REQUEST);
3862 	}
3863 
3864 	return (rc_attach_snapshot(cp, snapid, NULL));	/* drops cp's lock */
3865 }
3866 
3867 /*
3868  * If the pgname property group under ent has type pgtype, and it has a
3869  * propname property with type ptype, return _SUCCESS.  If pgtype is NULL,
3870  * it is not checked.  If ent is not a service node, we will return _SUCCESS if
3871  * a property meeting the requirements exists in either the instance or its
3872  * parent.
3873  *
3874  * Returns
3875  *   _SUCCESS - see above
3876  *   _DELETED - ent or one of its ancestors was deleted
3877  *   _NO_RESOURCES - no resources
3878  *   _NOT_FOUND - no matching property was found
3879  */
3880 static int
3881 rc_svc_prop_exists(rc_node_t *ent, const char *pgname, const char *pgtype,
3882     const char *propname, rep_protocol_value_type_t ptype)
3883 {
3884 	int ret;
3885 	rc_node_t *pg = NULL, *spg = NULL, *svc, *prop;
3886 
3887 	assert(!MUTEX_HELD(&ent->rn_lock));
3888 
3889 	(void) pthread_mutex_lock(&ent->rn_lock);
3890 	ret = rc_node_find_named_child(ent, pgname,
3891 	    REP_PROTOCOL_ENTITY_PROPERTYGRP, &pg);
3892 	(void) pthread_mutex_unlock(&ent->rn_lock);
3893 
3894 	switch (ret) {
3895 	case REP_PROTOCOL_SUCCESS:
3896 		break;
3897 
3898 	case REP_PROTOCOL_FAIL_DELETED:
3899 	case REP_PROTOCOL_FAIL_NO_RESOURCES:
3900 		return (ret);
3901 
3902 	default:
3903 		bad_error("rc_node_find_named_child", ret);
3904 	}
3905 
3906 	if (ent->rn_id.rl_type != REP_PROTOCOL_ENTITY_SERVICE) {
3907 		ret = rc_node_find_ancestor(ent, REP_PROTOCOL_ENTITY_SERVICE,
3908 		    &svc);
3909 		if (ret != REP_PROTOCOL_SUCCESS) {
3910 			assert(ret == REP_PROTOCOL_FAIL_DELETED);
3911 			if (pg != NULL)
3912 				rc_node_rele(pg);
3913 			return (ret);
3914 		}
3915 		assert(svc->rn_id.rl_type == REP_PROTOCOL_ENTITY_SERVICE);
3916 
3917 		(void) pthread_mutex_lock(&svc->rn_lock);
3918 		ret = rc_node_find_named_child(svc, pgname,
3919 		    REP_PROTOCOL_ENTITY_PROPERTYGRP, &spg);
3920 		(void) pthread_mutex_unlock(&svc->rn_lock);
3921 
3922 		rc_node_rele(svc);
3923 
3924 		switch (ret) {
3925 		case REP_PROTOCOL_SUCCESS:
3926 			break;
3927 
3928 		case REP_PROTOCOL_FAIL_DELETED:
3929 		case REP_PROTOCOL_FAIL_NO_RESOURCES:
3930 			if (pg != NULL)
3931 				rc_node_rele(pg);
3932 			return (ret);
3933 
3934 		default:
3935 			bad_error("rc_node_find_named_child", ret);
3936 		}
3937 	}
3938 
3939 	if (pg != NULL &&
3940 	    pgtype != NULL && strcmp(pg->rn_type, pgtype) != 0) {
3941 		rc_node_rele(pg);
3942 		pg = NULL;
3943 	}
3944 
3945 	if (spg != NULL &&
3946 	    pgtype != NULL && strcmp(spg->rn_type, pgtype) != 0) {
3947 		rc_node_rele(spg);
3948 		spg = NULL;
3949 	}
3950 
3951 	if (pg == NULL) {
3952 		if (spg == NULL)
3953 			return (REP_PROTOCOL_FAIL_NOT_FOUND);
3954 		pg = spg;
3955 		spg = NULL;
3956 	}
3957 
3958 	/*
3959 	 * At this point, pg is non-NULL, and is a property group node of the
3960 	 * correct type.  spg, if non-NULL, is also a property group node of
3961 	 * the correct type.  Check for the property in pg first, then spg
3962 	 * (if applicable).
3963 	 */
3964 	(void) pthread_mutex_lock(&pg->rn_lock);
3965 	ret = rc_node_find_named_child(pg, propname,
3966 	    REP_PROTOCOL_ENTITY_PROPERTY, &prop);
3967 	(void) pthread_mutex_unlock(&pg->rn_lock);
3968 	rc_node_rele(pg);
3969 	switch (ret) {
3970 	case REP_PROTOCOL_SUCCESS:
3971 		if (prop != NULL) {
3972 			if (prop->rn_valtype == ptype) {
3973 				rc_node_rele(prop);
3974 				if (spg != NULL)
3975 					rc_node_rele(spg);
3976 				return (REP_PROTOCOL_SUCCESS);
3977 			}
3978 			rc_node_rele(prop);
3979 		}
3980 		break;
3981 
3982 	case REP_PROTOCOL_FAIL_NO_RESOURCES:
3983 		if (spg != NULL)
3984 			rc_node_rele(spg);
3985 		return (ret);
3986 
3987 	case REP_PROTOCOL_FAIL_DELETED:
3988 		break;
3989 
3990 	default:
3991 		bad_error("rc_node_find_named_child", ret);
3992 	}
3993 
3994 	if (spg == NULL)
3995 		return (REP_PROTOCOL_FAIL_NOT_FOUND);
3996 
3997 	pg = spg;
3998 
3999 	(void) pthread_mutex_lock(&pg->rn_lock);
4000 	ret = rc_node_find_named_child(pg, propname,
4001 	    REP_PROTOCOL_ENTITY_PROPERTY, &prop);
4002 	(void) pthread_mutex_unlock(&pg->rn_lock);
4003 	rc_node_rele(pg);
4004 	switch (ret) {
4005 	case REP_PROTOCOL_SUCCESS:
4006 		if (prop != NULL) {
4007 			if (prop->rn_valtype == ptype) {
4008 				rc_node_rele(prop);
4009 				return (REP_PROTOCOL_SUCCESS);
4010 			}
4011 			rc_node_rele(prop);
4012 		}
4013 		return (REP_PROTOCOL_FAIL_NOT_FOUND);
4014 
4015 	case REP_PROTOCOL_FAIL_NO_RESOURCES:
4016 		return (ret);
4017 
4018 	case REP_PROTOCOL_FAIL_DELETED:
4019 		return (REP_PROTOCOL_FAIL_NOT_FOUND);
4020 
4021 	default:
4022 		bad_error("rc_node_find_named_child", ret);
4023 	}
4024 
4025 	return (REP_PROTOCOL_SUCCESS);
4026 }
4027 
4028 /*
4029  * Given a property group node, returns _SUCCESS if the property group may
4030  * be read without any special authorization.
4031  *
4032  * Fails with:
4033  *   _DELETED - np or an ancestor node was deleted
4034  *   _TYPE_MISMATCH - np does not refer to a property group
4035  *   _NO_RESOURCES - no resources
4036  *   _PERMISSION_DENIED - authorization is required
4037  */
4038 static int
4039 rc_node_pg_check_read_protect(rc_node_t *np)
4040 {
4041 	int ret;
4042 	rc_node_t *ent;
4043 
4044 	assert(!MUTEX_HELD(&np->rn_lock));
4045 
4046 	if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_PROPERTYGRP)
4047 		return (REP_PROTOCOL_FAIL_TYPE_MISMATCH);
4048 
4049 	if (strcmp(np->rn_type, SCF_GROUP_FRAMEWORK) == 0 ||
4050 	    strcmp(np->rn_type, SCF_GROUP_DEPENDENCY) == 0 ||
4051 	    strcmp(np->rn_type, SCF_GROUP_METHOD) == 0)
4052 		return (REP_PROTOCOL_SUCCESS);
4053 
4054 	ret = rc_node_parent(np, &ent);
4055 
4056 	if (ret != REP_PROTOCOL_SUCCESS)
4057 		return (ret);
4058 
4059 	ret = rc_svc_prop_exists(ent, np->rn_name, np->rn_type,
4060 	    AUTH_PROP_READ, REP_PROTOCOL_TYPE_STRING);
4061 
4062 	rc_node_rele(ent);
4063 
4064 	switch (ret) {
4065 	case REP_PROTOCOL_FAIL_NOT_FOUND:
4066 		return (REP_PROTOCOL_SUCCESS);
4067 	case REP_PROTOCOL_SUCCESS:
4068 		return (REP_PROTOCOL_FAIL_PERMISSION_DENIED);
4069 	case REP_PROTOCOL_FAIL_DELETED:
4070 	case REP_PROTOCOL_FAIL_NO_RESOURCES:
4071 		return (ret);
4072 	default:
4073 		bad_error("rc_svc_prop_exists", ret);
4074 	}
4075 
4076 	return (REP_PROTOCOL_SUCCESS);
4077 }
4078 
4079 /*
4080  * Fails with
4081  *   _DELETED - np's node or parent has been deleted
4082  *   _TYPE_MISMATCH - np's node is not a property
4083  *   _NO_RESOURCES - out of memory
4084  *   _PERMISSION_DENIED - no authorization to read this property's value(s)
4085  *   _BAD_REQUEST - np's parent is not a property group
4086  */
4087 static int
4088 rc_node_property_may_read(rc_node_t *np)
4089 {
4090 	int ret, granted = 0;
4091 	rc_node_t *pgp;
4092 	permcheck_t *pcp;
4093 
4094 	if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_PROPERTY)
4095 		return (REP_PROTOCOL_FAIL_TYPE_MISMATCH);
4096 
4097 	if (client_is_privileged())
4098 		return (REP_PROTOCOL_SUCCESS);
4099 
4100 #ifdef NATIVE_BUILD
4101 	return (REP_PROTOCOL_FAIL_PERMISSION_DENIED);
4102 #else
4103 	ret = rc_node_parent(np, &pgp);
4104 
4105 	if (ret != REP_PROTOCOL_SUCCESS)
4106 		return (ret);
4107 
4108 	if (pgp->rn_id.rl_type != REP_PROTOCOL_ENTITY_PROPERTYGRP) {
4109 		rc_node_rele(pgp);
4110 		return (REP_PROTOCOL_FAIL_BAD_REQUEST);
4111 	}
4112 
4113 	ret = rc_node_pg_check_read_protect(pgp);
4114 
4115 	if (ret != REP_PROTOCOL_FAIL_PERMISSION_DENIED) {
4116 		rc_node_rele(pgp);
4117 		return (ret);
4118 	}
4119 
4120 	pcp = pc_create();
4121 
4122 	if (pcp == NULL) {
4123 		rc_node_rele(pgp);
4124 		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
4125 	}
4126 
4127 	ret = perm_add_enabling(pcp, AUTH_MODIFY);
4128 
4129 	if (ret == REP_PROTOCOL_SUCCESS) {
4130 		const char * const auth =
4131 		    perm_auth_for_pgtype(pgp->rn_type);
4132 
4133 		if (auth != NULL)
4134 			ret = perm_add_enabling(pcp, auth);
4135 	}
4136 
4137 	/*
4138 	 * If you are permitted to modify the value, you may also
4139 	 * read it.  This means that both the MODIFY and VALUE
4140 	 * authorizations are acceptable.  We don't allow requests
4141 	 * for AUTH_PROP_MODIFY if all you have is $AUTH_PROP_VALUE,
4142 	 * however, to avoid leaking possibly valuable information
4143 	 * since such a user can't change the property anyway.
4144 	 */
4145 	if (ret == REP_PROTOCOL_SUCCESS)
4146 		ret = perm_add_enabling_values(pcp, pgp,
4147 		    AUTH_PROP_MODIFY);
4148 
4149 	if (ret == REP_PROTOCOL_SUCCESS &&
4150 	    strcmp(np->rn_name, AUTH_PROP_MODIFY) != 0)
4151 		ret = perm_add_enabling_values(pcp, pgp,
4152 		    AUTH_PROP_VALUE);
4153 
4154 	if (ret == REP_PROTOCOL_SUCCESS)
4155 		ret = perm_add_enabling_values(pcp, pgp,
4156 		    AUTH_PROP_READ);
4157 
4158 	rc_node_rele(pgp);
4159 
4160 	if (ret == REP_PROTOCOL_SUCCESS) {
4161 		granted = perm_granted(pcp);
4162 		if (granted < 0)
4163 			ret = REP_PROTOCOL_FAIL_NO_RESOURCES;
4164 	}
4165 
4166 	pc_free(pcp);
4167 
4168 	if (ret == REP_PROTOCOL_SUCCESS && !granted)
4169 		ret = REP_PROTOCOL_FAIL_PERMISSION_DENIED;
4170 
4171 	return (ret);
4172 #endif	/* NATIVE_BUILD */
4173 }
4174 
4175 /*
4176  * Iteration
4177  */
4178 static int
4179 rc_iter_filter_name(rc_node_t *np, void *s)
4180 {
4181 	const char *name = s;
4182 
4183 	return (strcmp(np->rn_name, name) == 0);
4184 }
4185 
4186 static int
4187 rc_iter_filter_type(rc_node_t *np, void *s)
4188 {
4189 	const char *type = s;
4190 
4191 	return (np->rn_type != NULL && strcmp(np->rn_type, type) == 0);
4192 }
4193 
4194 /*ARGSUSED*/
4195 static int
4196 rc_iter_null_filter(rc_node_t *np, void *s)
4197 {
4198 	return (1);
4199 }
4200 
4201 /*
4202  * Allocate & initialize an rc_node_iter_t structure.  Essentially, ensure
4203  * np->rn_children is populated and call uu_list_walk_start(np->rn_children).
4204  * If successful, leaves a hold on np & increments np->rn_other_refs
4205  *
4206  * If composed is true, then set up for iteration across the top level of np's
4207  * composition chain.  If successful, leaves a hold on np and increments
4208  * rn_other_refs for the top level of np's composition chain.
4209  *
4210  * Fails with
4211  *   _NO_RESOURCES
4212  *   _INVALID_TYPE
4213  *   _TYPE_MISMATCH - np cannot carry type children
4214  *   _DELETED
4215  */
4216 static int
4217 rc_iter_create(rc_node_iter_t **resp, rc_node_t *np, uint32_t type,
4218     rc_iter_filter_func *filter, void *arg, boolean_t composed)
4219 {
4220 	rc_node_iter_t *nip;
4221 	int res;
4222 
4223 	assert(*resp == NULL);
4224 
4225 	nip = uu_zalloc(sizeof (*nip));
4226 	if (nip == NULL)
4227 		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
4228 
4229 	/* np is held by the client's rc_node_ptr_t */
4230 	if (np->rn_id.rl_type == REP_PROTOCOL_ENTITY_CPROPERTYGRP)
4231 		composed = 1;
4232 
4233 	if (!composed) {
4234 		(void) pthread_mutex_lock(&np->rn_lock);
4235 
4236 		if ((res = rc_node_fill_children(np, type)) !=
4237 		    REP_PROTOCOL_SUCCESS) {
4238 			(void) pthread_mutex_unlock(&np->rn_lock);
4239 			uu_free(nip);
4240 			return (res);
4241 		}
4242 
4243 		nip->rni_clevel = -1;
4244 
4245 		nip->rni_iter = uu_list_walk_start(np->rn_children,
4246 		    UU_WALK_ROBUST);
4247 		if (nip->rni_iter != NULL) {
4248 			nip->rni_iter_node = np;
4249 			rc_node_hold_other(np);
4250 		} else {
4251 			(void) pthread_mutex_unlock(&np->rn_lock);
4252 			uu_free(nip);
4253 			return (REP_PROTOCOL_FAIL_NO_RESOURCES);
4254 		}
4255 		(void) pthread_mutex_unlock(&np->rn_lock);
4256 	} else {
4257 		rc_node_t *ent;
4258 
4259 		if (np->rn_id.rl_type == REP_PROTOCOL_ENTITY_SNAPSHOT) {
4260 			/* rn_cchain isn't valid until children are loaded. */
4261 			(void) pthread_mutex_lock(&np->rn_lock);
4262 			res = rc_node_fill_children(np,
4263 			    REP_PROTOCOL_ENTITY_SNAPLEVEL);
4264 			(void) pthread_mutex_unlock(&np->rn_lock);
4265 			if (res != REP_PROTOCOL_SUCCESS) {
4266 				uu_free(nip);
4267 				return (res);
4268 			}
4269 
4270 			/* Check for an empty snapshot. */
4271 			if (np->rn_cchain[0] == NULL)
4272 				goto empty;
4273 		}
4274 
4275 		/* Start at the top of the composition chain. */
4276 		for (nip->rni_clevel = 0; ; ++nip->rni_clevel) {
4277 			if (nip->rni_clevel >= COMPOSITION_DEPTH) {
4278 				/* Empty composition chain. */
4279 empty:
4280 				nip->rni_clevel = -1;
4281 				nip->rni_iter = NULL;
4282 				/* It's ok, iter_next() will return _DONE. */
4283 				goto out;
4284 			}
4285 
4286 			ent = np->rn_cchain[nip->rni_clevel];
4287 			assert(ent != NULL);
4288 
4289 			if (rc_node_check_and_lock(ent) == REP_PROTOCOL_SUCCESS)
4290 				break;
4291 
4292 			/* Someone deleted it, so try the next one. */
4293 		}
4294 
4295 		res = rc_node_fill_children(ent, type);
4296 
4297 		if (res == REP_PROTOCOL_SUCCESS) {
4298 			nip->rni_iter = uu_list_walk_start(ent->rn_children,
4299 			    UU_WALK_ROBUST);
4300 
4301 			if (nip->rni_iter == NULL)
4302 				res = REP_PROTOCOL_FAIL_NO_RESOURCES;
4303 			else {
4304 				nip->rni_iter_node = ent;
4305 				rc_node_hold_other(ent);
4306 			}
4307 		}
4308 
4309 		if (res != REP_PROTOCOL_SUCCESS) {
4310 			(void) pthread_mutex_unlock(&ent->rn_lock);
4311 			uu_free(nip);
4312 			return (res);
4313 		}
4314 
4315 		(void) pthread_mutex_unlock(&ent->rn_lock);
4316 	}
4317 
4318 out:
4319 	rc_node_hold(np);		/* released by rc_iter_end() */
4320 	nip->rni_parent = np;
4321 	nip->rni_type = type;
4322 	nip->rni_filter = (filter != NULL)? filter : rc_iter_null_filter;
4323 	nip->rni_filter_arg = arg;
4324 	*resp = nip;
4325 	return (REP_PROTOCOL_SUCCESS);
4326 }
4327 
4328 static void
4329 rc_iter_end(rc_node_iter_t *iter)
4330 {
4331 	rc_node_t *np = iter->rni_parent;
4332 
4333 	if (iter->rni_clevel >= 0)
4334 		np = np->rn_cchain[iter->rni_clevel];
4335 
4336 	assert(MUTEX_HELD(&np->rn_lock));
4337 	if (iter->rni_iter != NULL)
4338 		uu_list_walk_end(iter->rni_iter);
4339 	iter->rni_iter = NULL;
4340 
4341 	(void) pthread_mutex_unlock(&np->rn_lock);
4342 	rc_node_rele(iter->rni_parent);
4343 	if (iter->rni_iter_node != NULL)
4344 		rc_node_rele_other(iter->rni_iter_node);
4345 }
4346 
4347 /*
4348  * Fails with
4349  *   _NOT_SET - npp is reset
4350  *   _DELETED - npp's node has been deleted
4351  *   _NOT_APPLICABLE - npp's node is not a property
4352  *   _NO_RESOURCES - out of memory
4353  */
4354 static int
4355 rc_node_setup_value_iter(rc_node_ptr_t *npp, rc_node_iter_t **iterp)
4356 {
4357 	rc_node_t *np;
4358 
4359 	rc_node_iter_t *nip;
4360 
4361 	assert(*iterp == NULL);
4362 
4363 	RC_NODE_PTR_GET_CHECK_AND_LOCK(np, npp);
4364 
4365 	if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_PROPERTY) {
4366 		(void) pthread_mutex_unlock(&np->rn_lock);
4367 		return (REP_PROTOCOL_FAIL_NOT_APPLICABLE);
4368 	}
4369 
4370 	nip = uu_zalloc(sizeof (*nip));
4371 	if (nip == NULL) {
4372 		(void) pthread_mutex_unlock(&np->rn_lock);
4373 		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
4374 	}
4375 
4376 	nip->rni_parent = np;
4377 	nip->rni_iter = NULL;
4378 	nip->rni_clevel = -1;
4379 	nip->rni_type = REP_PROTOCOL_ENTITY_VALUE;
4380 	nip->rni_offset = 0;
4381 	nip->rni_last_offset = 0;
4382 
4383 	rc_node_hold_locked(np);
4384 
4385 	*iterp = nip;
4386 	(void) pthread_mutex_unlock(&np->rn_lock);
4387 
4388 	return (REP_PROTOCOL_SUCCESS);
4389 }
4390 
4391 /*
4392  * Returns:
4393  *   _NO_RESOURCES - out of memory
4394  *   _NOT_SET - npp is reset
4395  *   _DELETED - npp's node has been deleted
4396  *   _TYPE_MISMATCH - npp's node is not a property
4397  *   _NOT_FOUND - property has no values
4398  *   _TRUNCATED - property has >1 values (first is written into out)
4399  *   _SUCCESS - property has 1 value (which is written into out)
4400  *   _PERMISSION_DENIED - no authorization to read property value(s)
4401  *
4402  * We shorten *sz_out to not include anything after the final '\0'.
4403  */
4404 int
4405 rc_node_get_property_value(rc_node_ptr_t *npp,
4406     struct rep_protocol_value_response *out, size_t *sz_out)
4407 {
4408 	rc_node_t *np;
4409 	size_t w;
4410 	int ret;
4411 
4412 	assert(*sz_out == sizeof (*out));
4413 
4414 	RC_NODE_PTR_GET_CHECK_AND_HOLD(np, npp);
4415 	ret = rc_node_property_may_read(np);
4416 	rc_node_rele(np);
4417 
4418 	if (ret != REP_PROTOCOL_SUCCESS)
4419 		return (ret);
4420 
4421 	RC_NODE_PTR_GET_CHECK_AND_LOCK(np, npp);
4422 
4423 	if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_PROPERTY) {
4424 		(void) pthread_mutex_unlock(&np->rn_lock);
4425 		return (REP_PROTOCOL_FAIL_TYPE_MISMATCH);
4426 	}
4427 
4428 	if (np->rn_values_size == 0) {
4429 		(void) pthread_mutex_unlock(&np->rn_lock);
4430 		return (REP_PROTOCOL_FAIL_NOT_FOUND);
4431 	}
4432 	out->rpr_type = np->rn_valtype;
4433 	w = strlcpy(out->rpr_value, &np->rn_values[0],
4434 	    sizeof (out->rpr_value));
4435 
4436 	if (w >= sizeof (out->rpr_value))
4437 		backend_panic("value too large");
4438 
4439 	*sz_out = offsetof(struct rep_protocol_value_response,
4440 	    rpr_value[w + 1]);
4441 
4442 	ret = (np->rn_values_count != 1)? REP_PROTOCOL_FAIL_TRUNCATED :
4443 	    REP_PROTOCOL_SUCCESS;
4444 	(void) pthread_mutex_unlock(&np->rn_lock);
4445 	return (ret);
4446 }
4447 
4448 int
4449 rc_iter_next_value(rc_node_iter_t *iter,
4450     struct rep_protocol_value_response *out, size_t *sz_out, int repeat)
4451 {
4452 	rc_node_t *np = iter->rni_parent;
4453 	const char *vals;
4454 	size_t len;
4455 
4456 	size_t start;
4457 	size_t w;
4458 	int ret;
4459 
4460 	rep_protocol_responseid_t result;
4461 
4462 	assert(*sz_out == sizeof (*out));
4463 
4464 	(void) memset(out, '\0', *sz_out);
4465 
4466 	if (iter->rni_type != REP_PROTOCOL_ENTITY_VALUE)
4467 		return (REP_PROTOCOL_FAIL_BAD_REQUEST);
4468 
4469 	RC_NODE_CHECK(np);
4470 	ret = rc_node_property_may_read(np);
4471 
4472 	if (ret != REP_PROTOCOL_SUCCESS)
4473 		return (ret);
4474 
4475 	RC_NODE_CHECK_AND_LOCK(np);
4476 
4477 	vals = np->rn_values;
4478 	len = np->rn_values_size;
4479 
4480 	out->rpr_type = np->rn_valtype;
4481 
4482 	start = (repeat)? iter->rni_last_offset : iter->rni_offset;
4483 
4484 	if (len == 0 || start >= len) {
4485 		result = REP_PROTOCOL_DONE;
4486 		*sz_out -= sizeof (out->rpr_value);
4487 	} else {
4488 		w = strlcpy(out->rpr_value, &vals[start],
4489 		    sizeof (out->rpr_value));
4490 
4491 		if (w >= sizeof (out->rpr_value))
4492 			backend_panic("value too large");
4493 
4494 		*sz_out = offsetof(struct rep_protocol_value_response,
4495 		    rpr_value[w + 1]);
4496 
4497 		/*
4498 		 * update the offsets if we're not repeating
4499 		 */
4500 		if (!repeat) {
4501 			iter->rni_last_offset = iter->rni_offset;
4502 			iter->rni_offset += (w + 1);
4503 		}
4504 
4505 		result = REP_PROTOCOL_SUCCESS;
4506 	}
4507 
4508 	(void) pthread_mutex_unlock(&np->rn_lock);
4509 	return (result);
4510 }
4511 
4512 /*
4513  * Entry point for ITER_START from client.c.  Validate the arguments & call
4514  * rc_iter_create().
4515  *
4516  * Fails with
4517  *   _NOT_SET
4518  *   _DELETED
4519  *   _TYPE_MISMATCH - np cannot carry type children
4520  *   _BAD_REQUEST - flags is invalid
4521  *		    pattern is invalid
4522  *   _NO_RESOURCES
4523  *   _INVALID_TYPE
4524  *   _TYPE_MISMATCH - *npp cannot have children of type
4525  *   _BACKEND_ACCESS
4526  */
4527 int
4528 rc_node_setup_iter(rc_node_ptr_t *npp, rc_node_iter_t **iterp,
4529     uint32_t type, uint32_t flags, const char *pattern)
4530 {
4531 	rc_node_t *np;
4532 	rc_iter_filter_func *f = NULL;
4533 	int rc;
4534 
4535 	RC_NODE_PTR_GET_CHECK(np, npp);
4536 
4537 	if (pattern != NULL && pattern[0] == '\0')
4538 		pattern = NULL;
4539 
4540 	if (type == REP_PROTOCOL_ENTITY_VALUE) {
4541 		if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_PROPERTY)
4542 			return (REP_PROTOCOL_FAIL_TYPE_MISMATCH);
4543 		if (flags != RP_ITER_START_ALL || pattern != NULL)
4544 			return (REP_PROTOCOL_FAIL_BAD_REQUEST);
4545 
4546 		rc = rc_node_setup_value_iter(npp, iterp);
4547 		assert(rc != REP_PROTOCOL_FAIL_NOT_APPLICABLE);
4548 		return (rc);
4549 	}
4550 
4551 	if ((rc = rc_check_parent_child(np->rn_id.rl_type, type)) !=
4552 	    REP_PROTOCOL_SUCCESS)
4553 		return (rc);
4554 
4555 	if (((flags & RP_ITER_START_FILT_MASK) == RP_ITER_START_ALL) ^
4556 	    (pattern == NULL))
4557 		return (REP_PROTOCOL_FAIL_BAD_REQUEST);
4558 
4559 	/* Composition only works for instances & snapshots. */
4560 	if ((flags & RP_ITER_START_COMPOSED) &&
4561 	    (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_INSTANCE &&
4562 	    np->rn_id.rl_type != REP_PROTOCOL_ENTITY_SNAPSHOT))
4563 		return (REP_PROTOCOL_FAIL_BAD_REQUEST);
4564 
4565 	if (pattern != NULL) {
4566 		if ((rc = rc_check_type_name(type, pattern)) !=
4567 		    REP_PROTOCOL_SUCCESS)
4568 			return (rc);
4569 		pattern = strdup(pattern);
4570 		if (pattern == NULL)
4571 			return (REP_PROTOCOL_FAIL_NO_RESOURCES);
4572 	}
4573 
4574 	switch (flags & RP_ITER_START_FILT_MASK) {
4575 	case RP_ITER_START_ALL:
4576 		f = NULL;
4577 		break;
4578 	case RP_ITER_START_EXACT:
4579 		f = rc_iter_filter_name;
4580 		break;
4581 	case RP_ITER_START_PGTYPE:
4582 		if (type != REP_PROTOCOL_ENTITY_PROPERTYGRP) {
4583 			free((void *)pattern);
4584 			return (REP_PROTOCOL_FAIL_BAD_REQUEST);
4585 		}
4586 		f = rc_iter_filter_type;
4587 		break;
4588 	default:
4589 		free((void *)pattern);
4590 		return (REP_PROTOCOL_FAIL_BAD_REQUEST);
4591 	}
4592 
4593 	rc = rc_iter_create(iterp, np, type, f, (void *)pattern,
4594 	    flags & RP_ITER_START_COMPOSED);
4595 	if (rc != REP_PROTOCOL_SUCCESS && pattern != NULL)
4596 		free((void *)pattern);
4597 
4598 	return (rc);
4599 }
4600 
4601 /*
4602  * Do uu_list_walk_next(iter->rni_iter) until we find a child which matches
4603  * the filter.
4604  * For composed iterators, then check to see if there's an overlapping entity
4605  * (see embedded comments).  If we reach the end of the list, start over at
4606  * the next level.
4607  *
4608  * Returns
4609  *   _BAD_REQUEST - iter walks values
4610  *   _TYPE_MISMATCH - iter does not walk type entities
4611  *   _DELETED - parent was deleted
4612  *   _NO_RESOURCES
4613  *   _INVALID_TYPE - type is invalid
4614  *   _DONE
4615  *   _SUCCESS
4616  *
4617  * For composed property group iterators, can also return
4618  *   _TYPE_MISMATCH - parent cannot have type children
4619  */
4620 int
4621 rc_iter_next(rc_node_iter_t *iter, rc_node_ptr_t *out, uint32_t type)
4622 {
4623 	rc_node_t *np = iter->rni_parent;
4624 	rc_node_t *res;
4625 	int rc;
4626 
4627 	if (iter->rni_type == REP_PROTOCOL_ENTITY_VALUE)
4628 		return (REP_PROTOCOL_FAIL_BAD_REQUEST);
4629 
4630 	if (iter->rni_iter == NULL) {
4631 		rc_node_clear(out, 0);
4632 		return (REP_PROTOCOL_DONE);
4633 	}
4634 
4635 	if (iter->rni_type != type) {
4636 		rc_node_clear(out, 0);
4637 		return (REP_PROTOCOL_FAIL_TYPE_MISMATCH);
4638 	}
4639 
4640 	(void) pthread_mutex_lock(&np->rn_lock);  /* held by _iter_create() */
4641 
4642 	if (!rc_node_wait_flag(np, RC_NODE_CHILDREN_CHANGING)) {
4643 		(void) pthread_mutex_unlock(&np->rn_lock);
4644 		rc_node_clear(out, 1);
4645 		return (REP_PROTOCOL_FAIL_DELETED);
4646 	}
4647 
4648 	if (iter->rni_clevel >= 0) {
4649 		/* Composed iterator.  Iterate over appropriate level. */
4650 		(void) pthread_mutex_unlock(&np->rn_lock);
4651 		np = np->rn_cchain[iter->rni_clevel];
4652 		/*
4653 		 * If iter->rni_parent is an instance or a snapshot, np must
4654 		 * be valid since iter holds iter->rni_parent & possible
4655 		 * levels (service, instance, snaplevel) cannot be destroyed
4656 		 * while rni_parent is held.  If iter->rni_parent is
4657 		 * a composed property group then rc_node_setup_cpg() put
4658 		 * a hold on np.
4659 		 */
4660 
4661 		(void) pthread_mutex_lock(&np->rn_lock);
4662 
4663 		if (!rc_node_wait_flag(np, RC_NODE_CHILDREN_CHANGING)) {
4664 			(void) pthread_mutex_unlock(&np->rn_lock);
4665 			rc_node_clear(out, 1);
4666 			return (REP_PROTOCOL_FAIL_DELETED);
4667 		}
4668 	}
4669 
4670 	assert(np->rn_flags & RC_NODE_HAS_CHILDREN);
4671 
4672 	for (;;) {
4673 		res = uu_list_walk_next(iter->rni_iter);
4674 		if (res == NULL) {
4675 			rc_node_t *parent = iter->rni_parent;
4676 
4677 #if COMPOSITION_DEPTH == 2
4678 			if (iter->rni_clevel < 0 || iter->rni_clevel == 1) {
4679 				/* release walker and lock */
4680 				rc_iter_end(iter);
4681 				break;
4682 			}
4683 
4684 			/* Stop walking current level. */
4685 			uu_list_walk_end(iter->rni_iter);
4686 			iter->rni_iter = NULL;
4687 			(void) pthread_mutex_unlock(&np->rn_lock);
4688 			rc_node_rele_other(iter->rni_iter_node);
4689 			iter->rni_iter_node = NULL;
4690 
4691 			/* Start walking next level. */
4692 			++iter->rni_clevel;
4693 			np = parent->rn_cchain[iter->rni_clevel];
4694 			assert(np != NULL);
4695 #else
4696 #error This code must be updated.
4697 #endif
4698 
4699 			(void) pthread_mutex_lock(&np->rn_lock);
4700 
4701 			rc = rc_node_fill_children(np, iter->rni_type);
4702 
4703 			if (rc == REP_PROTOCOL_SUCCESS) {
4704 				iter->rni_iter =
4705 				    uu_list_walk_start(np->rn_children,
4706 				    UU_WALK_ROBUST);
4707 
4708 				if (iter->rni_iter == NULL)
4709 					rc = REP_PROTOCOL_FAIL_NO_RESOURCES;
4710 				else {
4711 					iter->rni_iter_node = np;
4712 					rc_node_hold_other(np);
4713 				}
4714 			}
4715 
4716 			if (rc != REP_PROTOCOL_SUCCESS) {
4717 				(void) pthread_mutex_unlock(&np->rn_lock);
4718 				rc_node_clear(out, 0);
4719 				return (rc);
4720 			}
4721 
4722 			continue;
4723 		}
4724 
4725 		if (res->rn_id.rl_type != type ||
4726 		    !iter->rni_filter(res, iter->rni_filter_arg))
4727 			continue;
4728 
4729 		/*
4730 		 * If we're composed and not at the top level, check to see if
4731 		 * there's an entity at a higher level with the same name.  If
4732 		 * so, skip this one.
4733 		 */
4734 		if (iter->rni_clevel > 0) {
4735 			rc_node_t *ent = iter->rni_parent->rn_cchain[0];
4736 			rc_node_t *pg;
4737 
4738 #if COMPOSITION_DEPTH == 2
4739 			assert(iter->rni_clevel == 1);
4740 
4741 			(void) pthread_mutex_unlock(&np->rn_lock);
4742 			(void) pthread_mutex_lock(&ent->rn_lock);
4743 			rc = rc_node_find_named_child(ent, res->rn_name, type,
4744 			    &pg);
4745 			if (rc == REP_PROTOCOL_SUCCESS && pg != NULL)
4746 				rc_node_rele(pg);
4747 			(void) pthread_mutex_unlock(&ent->rn_lock);
4748 			if (rc != REP_PROTOCOL_SUCCESS) {
4749 				rc_node_clear(out, 0);
4750 				return (rc);
4751 			}
4752 			(void) pthread_mutex_lock(&np->rn_lock);
4753 
4754 			/* Make sure np isn't being deleted all of a sudden. */
4755 			if (!rc_node_wait_flag(np, RC_NODE_DYING)) {
4756 				(void) pthread_mutex_unlock(&np->rn_lock);
4757 				rc_node_clear(out, 1);
4758 				return (REP_PROTOCOL_FAIL_DELETED);
4759 			}
4760 
4761 			if (pg != NULL)
4762 				/* Keep going. */
4763 				continue;
4764 #else
4765 #error This code must be updated.
4766 #endif
4767 		}
4768 
4769 		/*
4770 		 * If we're composed, iterating over property groups, and not
4771 		 * at the bottom level, check to see if there's a pg at lower
4772 		 * level with the same name.  If so, return a cpg.
4773 		 */
4774 		if (iter->rni_clevel >= 0 &&
4775 		    type == REP_PROTOCOL_ENTITY_PROPERTYGRP &&
4776 		    iter->rni_clevel < COMPOSITION_DEPTH - 1) {
4777 #if COMPOSITION_DEPTH == 2
4778 			rc_node_t *pg;
4779 			rc_node_t *ent = iter->rni_parent->rn_cchain[1];
4780 
4781 			rc_node_hold(res);	/* While we drop np->rn_lock */
4782 
4783 			(void) pthread_mutex_unlock(&np->rn_lock);
4784 			(void) pthread_mutex_lock(&ent->rn_lock);
4785 			rc = rc_node_find_named_child(ent, res->rn_name, type,
4786 			    &pg);
4787 			/* holds pg if not NULL */
4788 			(void) pthread_mutex_unlock(&ent->rn_lock);
4789 			if (rc != REP_PROTOCOL_SUCCESS) {
4790 				rc_node_rele(res);
4791 				rc_node_clear(out, 0);
4792 				return (rc);
4793 			}
4794 
4795 			(void) pthread_mutex_lock(&np->rn_lock);
4796 			if (!rc_node_wait_flag(np, RC_NODE_DYING)) {
4797 				(void) pthread_mutex_unlock(&np->rn_lock);
4798 				rc_node_rele(res);
4799 				if (pg != NULL)
4800 					rc_node_rele(pg);
4801 				rc_node_clear(out, 1);
4802 				return (REP_PROTOCOL_FAIL_DELETED);
4803 			}
4804 
4805 			if (pg == NULL) {
4806 				rc_node_rele(res);
4807 			} else {
4808 				rc_node_t *cpg;
4809 
4810 				/* Keep res held for rc_node_setup_cpg(). */
4811 
4812 				cpg = rc_node_alloc();
4813 				if (cpg == NULL) {
4814 					(void) pthread_mutex_unlock(
4815 					    &np->rn_lock);
4816 					rc_node_rele(res);
4817 					rc_node_rele(pg);
4818 					rc_node_clear(out, 0);
4819 					return (REP_PROTOCOL_FAIL_NO_RESOURCES);
4820 				}
4821 
4822 				switch (rc_node_setup_cpg(cpg, res, pg)) {
4823 				case REP_PROTOCOL_SUCCESS:
4824 					res = cpg;
4825 					break;
4826 
4827 				case REP_PROTOCOL_FAIL_TYPE_MISMATCH:
4828 					/* Nevermind. */
4829 					rc_node_destroy(cpg);
4830 					rc_node_rele(pg);
4831 					rc_node_rele(res);
4832 					break;
4833 
4834 				case REP_PROTOCOL_FAIL_NO_RESOURCES:
4835 					rc_node_destroy(cpg);
4836 					(void) pthread_mutex_unlock(
4837 					    &np->rn_lock);
4838 					rc_node_rele(res);
4839 					rc_node_rele(pg);
4840 					rc_node_clear(out, 0);
4841 					return (REP_PROTOCOL_FAIL_NO_RESOURCES);
4842 
4843 				default:
4844 					assert(0);
4845 					abort();
4846 				}
4847 			}
4848 #else
4849 #error This code must be updated.
4850 #endif
4851 		}
4852 
4853 		rc_node_hold(res);
4854 		(void) pthread_mutex_unlock(&np->rn_lock);
4855 		break;
4856 	}
4857 	rc_node_assign(out, res);
4858 
4859 	if (res == NULL)
4860 		return (REP_PROTOCOL_DONE);
4861 	rc_node_rele(res);
4862 	return (REP_PROTOCOL_SUCCESS);
4863 }
4864 
4865 void
4866 rc_iter_destroy(rc_node_iter_t **nipp)
4867 {
4868 	rc_node_iter_t *nip = *nipp;
4869 	rc_node_t *np;
4870 
4871 	if (nip == NULL)
4872 		return;				/* already freed */
4873 
4874 	np = nip->rni_parent;
4875 
4876 	if (nip->rni_filter_arg != NULL)
4877 		free(nip->rni_filter_arg);
4878 	nip->rni_filter_arg = NULL;
4879 
4880 	if (nip->rni_type == REP_PROTOCOL_ENTITY_VALUE ||
4881 	    nip->rni_iter != NULL) {
4882 		if (nip->rni_clevel < 0)
4883 			(void) pthread_mutex_lock(&np->rn_lock);
4884 		else
4885 			(void) pthread_mutex_lock(
4886 			    &np->rn_cchain[nip->rni_clevel]->rn_lock);
4887 		rc_iter_end(nip);		/* release walker and lock */
4888 	}
4889 	nip->rni_parent = NULL;
4890 
4891 	uu_free(nip);
4892 	*nipp = NULL;
4893 }
4894 
4895 int
4896 rc_node_setup_tx(rc_node_ptr_t *npp, rc_node_ptr_t *txp)
4897 {
4898 	rc_node_t *np;
4899 	permcheck_t *pcp;
4900 	int ret;
4901 	int authorized = 0;
4902 
4903 	RC_NODE_PTR_GET_CHECK_AND_HOLD(np, npp);
4904 
4905 	if (np->rn_id.rl_type == REP_PROTOCOL_ENTITY_CPROPERTYGRP) {
4906 		rc_node_rele(np);
4907 		np = np->rn_cchain[0];
4908 		RC_NODE_CHECK_AND_HOLD(np);
4909 	}
4910 
4911 	if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_PROPERTYGRP) {
4912 		rc_node_rele(np);
4913 		return (REP_PROTOCOL_FAIL_TYPE_MISMATCH);
4914 	}
4915 
4916 	if (np->rn_id.rl_ids[ID_SNAPSHOT] != 0) {
4917 		rc_node_rele(np);
4918 		return (REP_PROTOCOL_FAIL_PERMISSION_DENIED);
4919 	}
4920 
4921 	if (client_is_privileged())
4922 		goto skip_checks;
4923 
4924 #ifdef NATIVE_BUILD
4925 	rc_node_rele(np);
4926 	return (REP_PROTOCOL_FAIL_PERMISSION_DENIED);
4927 #else
4928 	/* permission check */
4929 	pcp = pc_create();
4930 	if (pcp == NULL) {
4931 		rc_node_rele(np);
4932 		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
4933 	}
4934 
4935 	if (np->rn_id.rl_ids[ID_INSTANCE] != 0 &&	/* instance pg */
4936 	    ((strcmp(np->rn_name, AUTH_PG_ACTIONS) == 0 &&
4937 	    strcmp(np->rn_type, AUTH_PG_ACTIONS_TYPE) == 0) ||
4938 	    (strcmp(np->rn_name, AUTH_PG_GENERAL_OVR) == 0 &&
4939 	    strcmp(np->rn_type, AUTH_PG_GENERAL_OVR_TYPE) == 0))) {
4940 		rc_node_t *instn;
4941 
4942 		/* solaris.smf.manage can be used. */
4943 		ret = perm_add_enabling(pcp, AUTH_MANAGE);
4944 
4945 		if (ret != REP_PROTOCOL_SUCCESS) {
4946 			pc_free(pcp);
4947 			rc_node_rele(np);
4948 			return (ret);
4949 		}
4950 
4951 		/* general/action_authorization values can be used. */
4952 		ret = rc_node_parent(np, &instn);
4953 		if (ret != REP_PROTOCOL_SUCCESS) {
4954 			assert(ret == REP_PROTOCOL_FAIL_DELETED);
4955 			rc_node_rele(np);
4956 			pc_free(pcp);
4957 			return (REP_PROTOCOL_FAIL_DELETED);
4958 		}
4959 
4960 		assert(instn->rn_id.rl_type == REP_PROTOCOL_ENTITY_INSTANCE);
4961 
4962 		ret = perm_add_inst_action_auth(pcp, instn);
4963 		rc_node_rele(instn);
4964 		switch (ret) {
4965 		case REP_PROTOCOL_SUCCESS:
4966 			break;
4967 
4968 		case REP_PROTOCOL_FAIL_DELETED:
4969 		case REP_PROTOCOL_FAIL_NO_RESOURCES:
4970 			rc_node_rele(np);
4971 			pc_free(pcp);
4972 			return (ret);
4973 
4974 		default:
4975 			bad_error("perm_add_inst_action_auth", ret);
4976 		}
4977 
4978 		if (strcmp(np->rn_name, AUTH_PG_ACTIONS) == 0)
4979 			authorized = 1;		/* Don't check on commit. */
4980 	} else {
4981 		ret = perm_add_enabling(pcp, AUTH_MODIFY);
4982 
4983 		if (ret == REP_PROTOCOL_SUCCESS) {
4984 			/* propertygroup-type-specific authorization */
4985 			/* no locking because rn_type won't change anyway */
4986 			const char * const auth =
4987 			    perm_auth_for_pgtype(np->rn_type);
4988 
4989 			if (auth != NULL)
4990 				ret = perm_add_enabling(pcp, auth);
4991 		}
4992 
4993 		if (ret == REP_PROTOCOL_SUCCESS)
4994 			/* propertygroup/transaction-type-specific auths */
4995 			ret =
4996 			    perm_add_enabling_values(pcp, np, AUTH_PROP_VALUE);
4997 
4998 		if (ret == REP_PROTOCOL_SUCCESS)
4999 			ret =
5000 			    perm_add_enabling_values(pcp, np, AUTH_PROP_MODIFY);
5001 
5002 		/* AUTH_MANAGE can manipulate general/AUTH_PROP_ACTION */
5003 		if (ret == REP_PROTOCOL_SUCCESS &&
5004 		    strcmp(np->rn_name, AUTH_PG_GENERAL) == 0 &&
5005 		    strcmp(np->rn_type, AUTH_PG_GENERAL_TYPE) == 0)
5006 			ret = perm_add_enabling(pcp, AUTH_MANAGE);
5007 
5008 		if (ret != REP_PROTOCOL_SUCCESS) {
5009 			pc_free(pcp);
5010 			rc_node_rele(np);
5011 			return (ret);
5012 		}
5013 	}
5014 
5015 	ret = perm_granted(pcp);
5016 	if (ret != 1) {
5017 		pc_free(pcp);
5018 		rc_node_rele(np);
5019 		return (ret == 0 ? REP_PROTOCOL_FAIL_PERMISSION_DENIED :
5020 		    REP_PROTOCOL_FAIL_NO_RESOURCES);
5021 	}
5022 
5023 	pc_free(pcp);
5024 #endif /* NATIVE_BUILD */
5025 
5026 skip_checks:
5027 	rc_node_assign(txp, np);
5028 	txp->rnp_authorized = authorized;
5029 
5030 	rc_node_rele(np);
5031 	return (REP_PROTOCOL_SUCCESS);
5032 }
5033 
5034 /*
5035  * Return 1 if the given transaction commands only modify the values of
5036  * properties other than "modify_authorization".  Return -1 if any of the
5037  * commands are invalid, and 0 otherwise.
5038  */
5039 static int
5040 tx_allow_value(const void *cmds_arg, size_t cmds_sz, rc_node_t *pg)
5041 {
5042 	const struct rep_protocol_transaction_cmd *cmds;
5043 	uintptr_t loc;
5044 	uint32_t sz;
5045 	rc_node_t *prop;
5046 	boolean_t ok;
5047 
5048 	assert(!MUTEX_HELD(&pg->rn_lock));
5049 
5050 	loc = (uintptr_t)cmds_arg;
5051 
5052 	while (cmds_sz > 0) {
5053 		cmds = (struct rep_protocol_transaction_cmd *)loc;
5054 
5055 		if (cmds_sz <= REP_PROTOCOL_TRANSACTION_CMD_MIN_SIZE)
5056 			return (-1);
5057 
5058 		sz = cmds->rptc_size;
5059 		if (sz <= REP_PROTOCOL_TRANSACTION_CMD_MIN_SIZE)
5060 			return (-1);
5061 
5062 		sz = TX_SIZE(sz);
5063 		if (sz > cmds_sz)
5064 			return (-1);
5065 
5066 		switch (cmds[0].rptc_action) {
5067 		case REP_PROTOCOL_TX_ENTRY_CLEAR:
5068 			break;
5069 
5070 		case REP_PROTOCOL_TX_ENTRY_REPLACE:
5071 			/* Check type */
5072 			(void) pthread_mutex_lock(&pg->rn_lock);
5073 			if (rc_node_find_named_child(pg,
5074 			    (const char *)cmds[0].rptc_data,
5075 			    REP_PROTOCOL_ENTITY_PROPERTY, &prop) ==
5076 			    REP_PROTOCOL_SUCCESS) {
5077 				ok = (prop != NULL &&
5078 				    prop->rn_valtype == cmds[0].rptc_type);
5079 			} else {
5080 				/* Return more particular error? */
5081 				ok = B_FALSE;
5082 			}
5083 			(void) pthread_mutex_unlock(&pg->rn_lock);
5084 			if (ok)
5085 				break;
5086 			return (0);
5087 
5088 		default:
5089 			return (0);
5090 		}
5091 
5092 		if (strcmp((const char *)cmds[0].rptc_data, AUTH_PROP_MODIFY)
5093 		    == 0)
5094 			return (0);
5095 
5096 		loc += sz;
5097 		cmds_sz -= sz;
5098 	}
5099 
5100 	return (1);
5101 }
5102 
5103 /*
5104  * Return 1 if any of the given transaction commands affect
5105  * "action_authorization".  Return -1 if any of the commands are invalid and
5106  * 0 in all other cases.
5107  */
5108 static int
5109 tx_modifies_action(const void *cmds_arg, size_t cmds_sz)
5110 {
5111 	const struct rep_protocol_transaction_cmd *cmds;
5112 	uintptr_t loc;
5113 	uint32_t sz;
5114 
5115 	loc = (uintptr_t)cmds_arg;
5116 
5117 	while (cmds_sz > 0) {
5118 		cmds = (struct rep_protocol_transaction_cmd *)loc;
5119 
5120 		if (cmds_sz <= REP_PROTOCOL_TRANSACTION_CMD_MIN_SIZE)
5121 			return (-1);
5122 
5123 		sz = cmds->rptc_size;
5124 		if (sz <= REP_PROTOCOL_TRANSACTION_CMD_MIN_SIZE)
5125 			return (-1);
5126 
5127 		sz = TX_SIZE(sz);
5128 		if (sz > cmds_sz)
5129 			return (-1);
5130 
5131 		if (strcmp((const char *)cmds[0].rptc_data, AUTH_PROP_ACTION)
5132 		    == 0)
5133 			return (1);
5134 
5135 		loc += sz;
5136 		cmds_sz -= sz;
5137 	}
5138 
5139 	return (0);
5140 }
5141 
5142 /*
5143  * Returns 1 if the transaction commands only modify properties named
5144  * 'enabled'.
5145  */
5146 static int
5147 tx_only_enabled(const void *cmds_arg, size_t cmds_sz)
5148 {
5149 	const struct rep_protocol_transaction_cmd *cmd;
5150 	uintptr_t loc;
5151 	uint32_t sz;
5152 
5153 	loc = (uintptr_t)cmds_arg;
5154 
5155 	while (cmds_sz > 0) {
5156 		cmd = (struct rep_protocol_transaction_cmd *)loc;
5157 
5158 		if (cmds_sz <= REP_PROTOCOL_TRANSACTION_CMD_MIN_SIZE)
5159 			return (-1);
5160 
5161 		sz = cmd->rptc_size;
5162 		if (sz <= REP_PROTOCOL_TRANSACTION_CMD_MIN_SIZE)
5163 			return (-1);
5164 
5165 		sz = TX_SIZE(sz);
5166 		if (sz > cmds_sz)
5167 			return (-1);
5168 
5169 		if (strcmp((const char *)cmd->rptc_data, AUTH_PROP_ENABLED)
5170 		    != 0)
5171 			return (0);
5172 
5173 		loc += sz;
5174 		cmds_sz -= sz;
5175 	}
5176 
5177 	return (1);
5178 }
5179 
5180 int
5181 rc_tx_commit(rc_node_ptr_t *txp, const void *cmds, size_t cmds_sz)
5182 {
5183 	rc_node_t *np = txp->rnp_node;
5184 	rc_node_t *pp;
5185 	rc_node_t *nnp;
5186 	rc_node_pg_notify_t *pnp;
5187 	int rc;
5188 	permcheck_t *pcp;
5189 	int granted, normal;
5190 
5191 	RC_NODE_CHECK(np);
5192 
5193 	if (!client_is_privileged() && !txp->rnp_authorized) {
5194 #ifdef NATIVE_BUILD
5195 		return (REP_PROTOCOL_FAIL_PERMISSION_DENIED);
5196 #else
5197 		/* permission check: depends on contents of transaction */
5198 		pcp = pc_create();
5199 		if (pcp == NULL)
5200 			return (REP_PROTOCOL_FAIL_NO_RESOURCES);
5201 
5202 		/* If normal is cleared, we won't do the normal checks. */
5203 		normal = 1;
5204 		rc = REP_PROTOCOL_SUCCESS;
5205 
5206 		if (strcmp(np->rn_name, AUTH_PG_GENERAL) == 0 &&
5207 		    strcmp(np->rn_type, AUTH_PG_GENERAL_TYPE) == 0) {
5208 			/* Touching general[framework]/action_authorization? */
5209 			rc = tx_modifies_action(cmds, cmds_sz);
5210 			if (rc == -1) {
5211 				pc_free(pcp);
5212 				return (REP_PROTOCOL_FAIL_BAD_REQUEST);
5213 			}
5214 
5215 			if (rc) {
5216 				/* Yes: only AUTH_MANAGE can be used. */
5217 				rc = perm_add_enabling(pcp, AUTH_MANAGE);
5218 				normal = 0;
5219 			} else {
5220 				rc = REP_PROTOCOL_SUCCESS;
5221 			}
5222 		} else if (np->rn_id.rl_ids[ID_INSTANCE] != 0 &&
5223 		    strcmp(np->rn_name, AUTH_PG_GENERAL_OVR) == 0 &&
5224 		    strcmp(np->rn_type, AUTH_PG_GENERAL_OVR_TYPE) == 0) {
5225 			rc_node_t *instn;
5226 
5227 			rc = tx_only_enabled(cmds, cmds_sz);
5228 			if (rc == -1) {
5229 				pc_free(pcp);
5230 				return (REP_PROTOCOL_FAIL_BAD_REQUEST);
5231 			}
5232 
5233 			if (rc) {
5234 				rc = rc_node_parent(np, &instn);
5235 				if (rc != REP_PROTOCOL_SUCCESS) {
5236 					assert(rc == REP_PROTOCOL_FAIL_DELETED);
5237 					pc_free(pcp);
5238 					return (rc);
5239 				}
5240 
5241 				assert(instn->rn_id.rl_type ==
5242 				    REP_PROTOCOL_ENTITY_INSTANCE);
5243 
5244 				rc = perm_add_inst_action_auth(pcp, instn);
5245 				rc_node_rele(instn);
5246 				switch (rc) {
5247 				case REP_PROTOCOL_SUCCESS:
5248 					break;
5249 
5250 				case REP_PROTOCOL_FAIL_DELETED:
5251 				case REP_PROTOCOL_FAIL_NO_RESOURCES:
5252 					pc_free(pcp);
5253 					return (rc);
5254 
5255 				default:
5256 					bad_error("perm_add_inst_action_auth",
5257 					    rc);
5258 				}
5259 			} else {
5260 				rc = REP_PROTOCOL_SUCCESS;
5261 			}
5262 		}
5263 
5264 		if (rc == REP_PROTOCOL_SUCCESS && normal) {
5265 			rc = perm_add_enabling(pcp, AUTH_MODIFY);
5266 
5267 			if (rc == REP_PROTOCOL_SUCCESS) {
5268 				/* Add pgtype-specific authorization. */
5269 				const char * const auth =
5270 				    perm_auth_for_pgtype(np->rn_type);
5271 
5272 				if (auth != NULL)
5273 					rc = perm_add_enabling(pcp, auth);
5274 			}
5275 
5276 			/* Add pg-specific modify_authorization auths. */
5277 			if (rc == REP_PROTOCOL_SUCCESS)
5278 				rc = perm_add_enabling_values(pcp, np,
5279 				    AUTH_PROP_MODIFY);
5280 
5281 			/* If value_authorization values are ok, add them. */
5282 			if (rc == REP_PROTOCOL_SUCCESS) {
5283 				rc = tx_allow_value(cmds, cmds_sz, np);
5284 				if (rc == -1)
5285 					rc = REP_PROTOCOL_FAIL_BAD_REQUEST;
5286 				else if (rc)
5287 					rc = perm_add_enabling_values(pcp, np,
5288 					    AUTH_PROP_VALUE);
5289 			}
5290 		}
5291 
5292 		if (rc == REP_PROTOCOL_SUCCESS) {
5293 			granted = perm_granted(pcp);
5294 			if (granted < 0)
5295 				rc = REP_PROTOCOL_FAIL_NO_RESOURCES;
5296 		}
5297 
5298 		pc_free(pcp);
5299 
5300 		if (rc != REP_PROTOCOL_SUCCESS)
5301 			return (rc);
5302 
5303 		if (!granted)
5304 			return (REP_PROTOCOL_FAIL_PERMISSION_DENIED);
5305 #endif /* NATIVE_BUILD */
5306 	}
5307 
5308 	nnp = rc_node_alloc();
5309 	if (nnp == NULL)
5310 		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
5311 
5312 	nnp->rn_id = np->rn_id;			/* structure assignment */
5313 	nnp->rn_hash = np->rn_hash;
5314 	nnp->rn_name = strdup(np->rn_name);
5315 	nnp->rn_type = strdup(np->rn_type);
5316 	nnp->rn_pgflags = np->rn_pgflags;
5317 
5318 	nnp->rn_flags = RC_NODE_IN_TX | RC_NODE_USING_PARENT;
5319 
5320 	if (nnp->rn_name == NULL || nnp->rn_type == NULL) {
5321 		rc_node_destroy(nnp);
5322 		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
5323 	}
5324 
5325 	(void) pthread_mutex_lock(&np->rn_lock);
5326 	/*
5327 	 * We must have all of the old properties in the cache, or the
5328 	 * database deletions could cause inconsistencies.
5329 	 */
5330 	if ((rc = rc_node_fill_children(np, REP_PROTOCOL_ENTITY_PROPERTY)) !=
5331 	    REP_PROTOCOL_SUCCESS) {
5332 		(void) pthread_mutex_unlock(&np->rn_lock);
5333 		rc_node_destroy(nnp);
5334 		return (rc);
5335 	}
5336 
5337 	if (!rc_node_hold_flag(np, RC_NODE_USING_PARENT)) {
5338 		(void) pthread_mutex_unlock(&np->rn_lock);
5339 		rc_node_destroy(nnp);
5340 		return (REP_PROTOCOL_FAIL_DELETED);
5341 	}
5342 
5343 	if (np->rn_flags & RC_NODE_OLD) {
5344 		rc_node_rele_flag(np, RC_NODE_USING_PARENT);
5345 		(void) pthread_mutex_unlock(&np->rn_lock);
5346 		rc_node_destroy(nnp);
5347 		return (REP_PROTOCOL_FAIL_NOT_LATEST);
5348 	}
5349 
5350 	pp = rc_node_hold_parent_flag(np, RC_NODE_CHILDREN_CHANGING);
5351 	if (pp == NULL) {
5352 		/* our parent is gone, we're going next... */
5353 		rc_node_destroy(nnp);
5354 		(void) pthread_mutex_lock(&np->rn_lock);
5355 		if (np->rn_flags & RC_NODE_OLD) {
5356 			(void) pthread_mutex_unlock(&np->rn_lock);
5357 			return (REP_PROTOCOL_FAIL_NOT_LATEST);
5358 		}
5359 		(void) pthread_mutex_unlock(&np->rn_lock);
5360 		return (REP_PROTOCOL_FAIL_DELETED);
5361 	}
5362 	(void) pthread_mutex_unlock(&pp->rn_lock);
5363 
5364 	/*
5365 	 * prepare for the transaction
5366 	 */
5367 	(void) pthread_mutex_lock(&np->rn_lock);
5368 	if (!rc_node_hold_flag(np, RC_NODE_IN_TX)) {
5369 		(void) pthread_mutex_unlock(&np->rn_lock);
5370 		(void) pthread_mutex_lock(&pp->rn_lock);
5371 		rc_node_rele_flag(pp, RC_NODE_CHILDREN_CHANGING);
5372 		(void) pthread_mutex_unlock(&pp->rn_lock);
5373 		rc_node_destroy(nnp);
5374 		return (REP_PROTOCOL_FAIL_DELETED);
5375 	}
5376 	nnp->rn_gen_id = np->rn_gen_id;
5377 	(void) pthread_mutex_unlock(&np->rn_lock);
5378 
5379 	/* Sets nnp->rn_gen_id on success. */
5380 	rc = object_tx_commit(&np->rn_id, cmds, cmds_sz, &nnp->rn_gen_id);
5381 
5382 	(void) pthread_mutex_lock(&np->rn_lock);
5383 	if (rc != REP_PROTOCOL_SUCCESS) {
5384 		rc_node_rele_flag(np, RC_NODE_IN_TX);
5385 		(void) pthread_mutex_unlock(&np->rn_lock);
5386 		(void) pthread_mutex_lock(&pp->rn_lock);
5387 		rc_node_rele_flag(pp, RC_NODE_CHILDREN_CHANGING);
5388 		(void) pthread_mutex_unlock(&pp->rn_lock);
5389 		rc_node_destroy(nnp);
5390 		rc_node_clear(txp, 0);
5391 		if (rc == REP_PROTOCOL_DONE)
5392 			rc = REP_PROTOCOL_SUCCESS; /* successful empty tx */
5393 		return (rc);
5394 	}
5395 
5396 	/*
5397 	 * Notify waiters
5398 	 */
5399 	(void) pthread_mutex_lock(&rc_pg_notify_lock);
5400 	while ((pnp = uu_list_first(np->rn_pg_notify_list)) != NULL)
5401 		rc_pg_notify_fire(pnp);
5402 	(void) pthread_mutex_unlock(&rc_pg_notify_lock);
5403 
5404 	np->rn_flags |= RC_NODE_OLD;
5405 	(void) pthread_mutex_unlock(&np->rn_lock);
5406 
5407 	rc_notify_remove_node(np);
5408 
5409 	/*
5410 	 * replace np with nnp
5411 	 */
5412 	rc_node_relink_child(pp, np, nnp);
5413 
5414 	/*
5415 	 * all done -- clear the transaction.
5416 	 */
5417 	rc_node_clear(txp, 0);
5418 
5419 	return (REP_PROTOCOL_SUCCESS);
5420 }
5421 
5422 void
5423 rc_pg_notify_init(rc_node_pg_notify_t *pnp)
5424 {
5425 	uu_list_node_init(pnp, &pnp->rnpn_node, rc_pg_notify_pool);
5426 	pnp->rnpn_pg = NULL;
5427 	pnp->rnpn_fd = -1;
5428 }
5429 
5430 int
5431 rc_pg_notify_setup(rc_node_pg_notify_t *pnp, rc_node_ptr_t *npp, int fd)
5432 {
5433 	rc_node_t *np;
5434 
5435 	RC_NODE_PTR_GET_CHECK_AND_LOCK(np, npp);
5436 
5437 	if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_PROPERTYGRP) {
5438 		(void) pthread_mutex_unlock(&np->rn_lock);
5439 		return (REP_PROTOCOL_FAIL_BAD_REQUEST);
5440 	}
5441 
5442 	/*
5443 	 * wait for any transaction in progress to complete
5444 	 */
5445 	if (!rc_node_wait_flag(np, RC_NODE_IN_TX)) {
5446 		(void) pthread_mutex_unlock(&np->rn_lock);
5447 		return (REP_PROTOCOL_FAIL_DELETED);
5448 	}
5449 
5450 	if (np->rn_flags & RC_NODE_OLD) {
5451 		(void) pthread_mutex_unlock(&np->rn_lock);
5452 		return (REP_PROTOCOL_FAIL_NOT_LATEST);
5453 	}
5454 
5455 	(void) pthread_mutex_lock(&rc_pg_notify_lock);
5456 	rc_pg_notify_fire(pnp);
5457 	pnp->rnpn_pg = np;
5458 	pnp->rnpn_fd = fd;
5459 	(void) uu_list_insert_after(np->rn_pg_notify_list, NULL, pnp);
5460 	(void) pthread_mutex_unlock(&rc_pg_notify_lock);
5461 
5462 	(void) pthread_mutex_unlock(&np->rn_lock);
5463 	return (REP_PROTOCOL_SUCCESS);
5464 }
5465 
5466 void
5467 rc_pg_notify_fini(rc_node_pg_notify_t *pnp)
5468 {
5469 	(void) pthread_mutex_lock(&rc_pg_notify_lock);
5470 	rc_pg_notify_fire(pnp);
5471 	(void) pthread_mutex_unlock(&rc_pg_notify_lock);
5472 
5473 	uu_list_node_fini(pnp, &pnp->rnpn_node, rc_pg_notify_pool);
5474 }
5475 
5476 void
5477 rc_notify_info_init(rc_notify_info_t *rnip)
5478 {
5479 	int i;
5480 
5481 	uu_list_node_init(rnip, &rnip->rni_list_node, rc_notify_info_pool);
5482 	uu_list_node_init(&rnip->rni_notify, &rnip->rni_notify.rcn_list_node,
5483 	    rc_notify_pool);
5484 
5485 	rnip->rni_notify.rcn_node = NULL;
5486 	rnip->rni_notify.rcn_info = rnip;
5487 
5488 	bzero(rnip->rni_namelist, sizeof (rnip->rni_namelist));
5489 	bzero(rnip->rni_typelist, sizeof (rnip->rni_typelist));
5490 
5491 	(void) pthread_cond_init(&rnip->rni_cv, NULL);
5492 
5493 	for (i = 0; i < RC_NOTIFY_MAX_NAMES; i++) {
5494 		rnip->rni_namelist[i] = NULL;
5495 		rnip->rni_typelist[i] = NULL;
5496 	}
5497 }
5498 
5499 static void
5500 rc_notify_info_insert_locked(rc_notify_info_t *rnip)
5501 {
5502 	assert(MUTEX_HELD(&rc_pg_notify_lock));
5503 
5504 	assert(!(rnip->rni_flags & RC_NOTIFY_ACTIVE));
5505 
5506 	rnip->rni_flags |= RC_NOTIFY_ACTIVE;
5507 	(void) uu_list_insert_after(rc_notify_info_list, NULL, rnip);
5508 	(void) uu_list_insert_before(rc_notify_list, NULL, &rnip->rni_notify);
5509 }
5510 
5511 static void
5512 rc_notify_info_remove_locked(rc_notify_info_t *rnip)
5513 {
5514 	rc_notify_t *me = &rnip->rni_notify;
5515 	rc_notify_t *np;
5516 
5517 	assert(MUTEX_HELD(&rc_pg_notify_lock));
5518 
5519 	assert(rnip->rni_flags & RC_NOTIFY_ACTIVE);
5520 
5521 	assert(!(rnip->rni_flags & RC_NOTIFY_DRAIN));
5522 	rnip->rni_flags |= RC_NOTIFY_DRAIN;
5523 	(void) pthread_cond_broadcast(&rnip->rni_cv);
5524 
5525 	(void) uu_list_remove(rc_notify_info_list, rnip);
5526 
5527 	/*
5528 	 * clean up any notifications at the beginning of the list
5529 	 */
5530 	if (uu_list_first(rc_notify_list) == me) {
5531 		while ((np = uu_list_next(rc_notify_list, me)) != NULL &&
5532 		    np->rcn_info == NULL)
5533 			rc_notify_remove_locked(np);
5534 	}
5535 	(void) uu_list_remove(rc_notify_list, me);
5536 
5537 	while (rnip->rni_waiters) {
5538 		(void) pthread_cond_broadcast(&rc_pg_notify_cv);
5539 		(void) pthread_cond_broadcast(&rnip->rni_cv);
5540 		(void) pthread_cond_wait(&rnip->rni_cv, &rc_pg_notify_lock);
5541 	}
5542 
5543 	rnip->rni_flags &= ~(RC_NOTIFY_DRAIN | RC_NOTIFY_ACTIVE);
5544 }
5545 
5546 static int
5547 rc_notify_info_add_watch(rc_notify_info_t *rnip, const char **arr,
5548     const char *name)
5549 {
5550 	int i;
5551 	int rc;
5552 	char *f;
5553 
5554 	rc = rc_check_type_name(REP_PROTOCOL_ENTITY_PROPERTYGRP, name);
5555 	if (rc != REP_PROTOCOL_SUCCESS)
5556 		return (rc);
5557 
5558 	f = strdup(name);
5559 	if (f == NULL)
5560 		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
5561 
5562 	(void) pthread_mutex_lock(&rc_pg_notify_lock);
5563 
5564 	while (rnip->rni_flags & RC_NOTIFY_EMPTYING)
5565 		(void) pthread_cond_wait(&rnip->rni_cv, &rc_pg_notify_lock);
5566 
5567 	for (i = 0; i < RC_NOTIFY_MAX_NAMES; i++)
5568 		if (arr[i] == NULL)
5569 			break;
5570 
5571 	if (i == RC_NOTIFY_MAX_NAMES) {
5572 		(void) pthread_mutex_unlock(&rc_pg_notify_lock);
5573 		free(f);
5574 		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
5575 	}
5576 
5577 	arr[i] = f;
5578 	if (!(rnip->rni_flags & RC_NOTIFY_ACTIVE))
5579 		rc_notify_info_insert_locked(rnip);
5580 
5581 	(void) pthread_mutex_unlock(&rc_pg_notify_lock);
5582 	return (REP_PROTOCOL_SUCCESS);
5583 }
5584 
5585 int
5586 rc_notify_info_add_name(rc_notify_info_t *rnip, const char *name)
5587 {
5588 	return (rc_notify_info_add_watch(rnip, rnip->rni_namelist, name));
5589 }
5590 
5591 int
5592 rc_notify_info_add_type(rc_notify_info_t *rnip, const char *type)
5593 {
5594 	return (rc_notify_info_add_watch(rnip, rnip->rni_typelist, type));
5595 }
5596 
5597 /*
5598  * Wait for and report an event of interest to rnip, a notification client
5599  */
5600 int
5601 rc_notify_info_wait(rc_notify_info_t *rnip, rc_node_ptr_t *out,
5602     char *outp, size_t sz)
5603 {
5604 	rc_notify_t *np;
5605 	rc_notify_t *me = &rnip->rni_notify;
5606 	rc_node_t *nnp;
5607 	rc_notify_delete_t *ndp;
5608 
5609 	int am_first_info;
5610 
5611 	if (sz > 0)
5612 		outp[0] = 0;
5613 
5614 	(void) pthread_mutex_lock(&rc_pg_notify_lock);
5615 
5616 	while ((rnip->rni_flags & (RC_NOTIFY_ACTIVE | RC_NOTIFY_DRAIN)) ==
5617 	    RC_NOTIFY_ACTIVE) {
5618 		/*
5619 		 * If I'm first on the notify list, it is my job to
5620 		 * clean up any notifications I pass by.  I can't do that
5621 		 * if someone is blocking the list from removals, so I
5622 		 * have to wait until they have all drained.
5623 		 */
5624 		am_first_info = (uu_list_first(rc_notify_list) == me);
5625 		if (am_first_info && rc_notify_in_use) {
5626 			rnip->rni_waiters++;
5627 			(void) pthread_cond_wait(&rc_pg_notify_cv,
5628 			    &rc_pg_notify_lock);
5629 			rnip->rni_waiters--;
5630 			continue;
5631 		}
5632 
5633 		/*
5634 		 * Search the list for a node of interest.
5635 		 */
5636 		np = uu_list_next(rc_notify_list, me);
5637 		while (np != NULL && !rc_notify_info_interested(rnip, np)) {
5638 			rc_notify_t *next = uu_list_next(rc_notify_list, np);
5639 
5640 			if (am_first_info) {
5641 				if (np->rcn_info) {
5642 					/*
5643 					 * Passing another client -- stop
5644 					 * cleaning up notifications
5645 					 */
5646 					am_first_info = 0;
5647 				} else {
5648 					rc_notify_remove_locked(np);
5649 				}
5650 			}
5651 			np = next;
5652 		}
5653 
5654 		/*
5655 		 * Nothing of interest -- wait for notification
5656 		 */
5657 		if (np == NULL) {
5658 			rnip->rni_waiters++;
5659 			(void) pthread_cond_wait(&rnip->rni_cv,
5660 			    &rc_pg_notify_lock);
5661 			rnip->rni_waiters--;
5662 			continue;
5663 		}
5664 
5665 		/*
5666 		 * found something to report -- move myself after the
5667 		 * notification and process it.
5668 		 */
5669 		(void) uu_list_remove(rc_notify_list, me);
5670 		(void) uu_list_insert_after(rc_notify_list, np, me);
5671 
5672 		if ((ndp = np->rcn_delete) != NULL) {
5673 			(void) strlcpy(outp, ndp->rnd_fmri, sz);
5674 			if (am_first_info)
5675 				rc_notify_remove_locked(np);
5676 			(void) pthread_mutex_unlock(&rc_pg_notify_lock);
5677 			rc_node_clear(out, 0);
5678 			return (REP_PROTOCOL_SUCCESS);
5679 		}
5680 
5681 		nnp = np->rcn_node;
5682 		assert(nnp != NULL);
5683 
5684 		/*
5685 		 * We can't bump nnp's reference count without grabbing its
5686 		 * lock, and rc_pg_notify_lock is a leaf lock.  So we
5687 		 * temporarily block all removals to keep nnp from
5688 		 * disappearing.
5689 		 */
5690 		rc_notify_in_use++;
5691 		assert(rc_notify_in_use > 0);
5692 		(void) pthread_mutex_unlock(&rc_pg_notify_lock);
5693 
5694 		rc_node_assign(out, nnp);
5695 
5696 		(void) pthread_mutex_lock(&rc_pg_notify_lock);
5697 		assert(rc_notify_in_use > 0);
5698 		rc_notify_in_use--;
5699 		if (am_first_info)
5700 			rc_notify_remove_locked(np);
5701 		if (rc_notify_in_use == 0)
5702 			(void) pthread_cond_broadcast(&rc_pg_notify_cv);
5703 		(void) pthread_mutex_unlock(&rc_pg_notify_lock);
5704 
5705 		return (REP_PROTOCOL_SUCCESS);
5706 	}
5707 	/*
5708 	 * If we're the last one out, let people know it's clear.
5709 	 */
5710 	if (rnip->rni_waiters == 0)
5711 		(void) pthread_cond_broadcast(&rnip->rni_cv);
5712 	(void) pthread_mutex_unlock(&rc_pg_notify_lock);
5713 	return (REP_PROTOCOL_DONE);
5714 }
5715 
5716 static void
5717 rc_notify_info_reset(rc_notify_info_t *rnip)
5718 {
5719 	int i;
5720 
5721 	(void) pthread_mutex_lock(&rc_pg_notify_lock);
5722 	if (rnip->rni_flags & RC_NOTIFY_ACTIVE)
5723 		rc_notify_info_remove_locked(rnip);
5724 	assert(!(rnip->rni_flags & (RC_NOTIFY_DRAIN | RC_NOTIFY_EMPTYING)));
5725 	rnip->rni_flags |= RC_NOTIFY_EMPTYING;
5726 	(void) pthread_mutex_unlock(&rc_pg_notify_lock);
5727 
5728 	for (i = 0; i < RC_NOTIFY_MAX_NAMES; i++) {
5729 		if (rnip->rni_namelist[i] != NULL) {
5730 			free((void *)rnip->rni_namelist[i]);
5731 			rnip->rni_namelist[i] = NULL;
5732 		}
5733 		if (rnip->rni_typelist[i] != NULL) {
5734 			free((void *)rnip->rni_typelist[i]);
5735 			rnip->rni_typelist[i] = NULL;
5736 		}
5737 	}
5738 
5739 	(void) pthread_mutex_lock(&rc_pg_notify_lock);
5740 	rnip->rni_flags &= ~RC_NOTIFY_EMPTYING;
5741 	(void) pthread_mutex_unlock(&rc_pg_notify_lock);
5742 }
5743 
5744 void
5745 rc_notify_info_fini(rc_notify_info_t *rnip)
5746 {
5747 	rc_notify_info_reset(rnip);
5748 
5749 	uu_list_node_fini(rnip, &rnip->rni_list_node, rc_notify_info_pool);
5750 	uu_list_node_fini(&rnip->rni_notify, &rnip->rni_notify.rcn_list_node,
5751 	    rc_notify_pool);
5752 }
5753