xref: /titanic_41/usr/src/cmd/svc/configd/rc_node.c (revision 03831d35f7499c87d51205817c93e9a8d42c4bae)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 /*
30  * rc_node.c - object management primitives
31  *
32  * This layer manages entities, their data structure, its locking, iterators,
33  * transactions, and change notification requests.  Entities (scopes,
34  * services, instances, snapshots, snaplevels, property groups, "composed"
35  * property groups (see composition below), and properties) are represented by
36  * rc_node_t's and are kept in the cache_hash hash table.  (Property values
37  * are kept in the rn_values member of the respective property -- not as
38  * separate objects.)  Iterators are represented by rc_node_iter_t's.
39  * Transactions are represented by rc_node_tx_t's and are only allocated as
40  * part of repcache_tx_t's in the client layer (client.c).  Change
41  * notification requests are represented by rc_notify_t structures and are
42  * described below.
43  *
44  * The entity tree is rooted at rc_scope, which rc_node_init() initializes to
45  * the "localhost" scope.  The tree is filled in from the database on-demand
46  * by rc_node_fill_children(), usually from rc_iter_create() since iterators
47  * are the only way to find the children of an entity.
48  *
49  * Each rc_node_t is protected by its rn_lock member.  Operations which can
50  * take too long, however, should serialize on an RC_NODE_WAITING_FLAGS bit in
51  * rn_flags with the rc_node_{hold,rele}_flag() functions.  And since pointers
52  * to rc_node_t's are allowed, rn_refs is a reference count maintained by
53  * rc_node_{hold,rele}().  See configd.h for locking order information.
54  *
55  * When a node (property group or snapshot) is updated, a new node takes the
56  * place of the old node in the global hash, and the old node is hung off of
57  * the rn_former list of the new node.  At the same time, all of its children
58  * have their rn_parent_ref pointer set, and any holds they have are reflected
59  * in the old node's rn_other_refs count.  This is automatically kept up
60  * to date, until the final reference to the subgraph is dropped, at which
61  * point the node is unrefed and destroyed, along with all of its children.
62  *
63  * Locking rules: To dereference an rc_node_t * (usually to lock it), you must
64  * have a hold (rc_node_hold()) on it or otherwise be sure that it hasn't been
65  * rc_node_destroy()ed (hold a lock on its parent or child, hold a flag,
66  * etc.).  Once you have locked an rc_node_t you must check its rn_flags for
67  * RC_NODE_DEAD before you can use it.  This is usually done with the
68  * rc_node_{wait,hold}_flag() functions (often via the rc_node_check_*()
69  * functions & RC_NODE_*() macros), which fail if the object has died.
70  *
71  * An ITER_START for a non-ENTITY_VALUE induces an rc_node_fill_children()
72  * call via rc_node_setup_iter() to populate the rn_children uu_list of the
73  * rc_node_t * in question and a call to uu_list_walk_start() on that list.  For
74  * ITER_READ, rc_iter_next() uses uu_list_walk_next() to find the next
75  * apropriate child.
76  *
77  * An ITER_START for an ENTITY_VALUE makes sure the node has its values
78  * filled, and sets up the iterator.  An ITER_READ_VALUE just copies out
79  * the proper values and updates the offset information.
80  *
81  * When a property group gets changed by a transaction, it sticks around as
82  * a child of its replacement property group, but is removed from the parent.
83  *
84  * To allow aliases, snapshots are implemented with a level of indirection.
85  * A snapshot rc_node_t has a snapid which refers to an rc_snapshot_t in
86  * snapshot.c which contains the authoritative snaplevel information.  The
87  * snapid is "assigned" by rc_attach_snapshot().
88  *
89  * We provide the client layer with rc_node_ptr_t's to reference objects.
90  * Objects referred to by them are automatically held & released by
91  * rc_node_assign() & rc_node_clear().  The RC_NODE_PTR_*() macros are used at
92  * client.c entry points to read the pointers.  They fetch the pointer to the
93  * object, return (from the function) if it is dead, and lock, hold, or hold
94  * a flag of the object.
95  */
96 
97 /*
98  * Permission checking is authorization-based: some operations may only
99  * proceed if the user has been assigned at least one of a set of
100  * authorization strings.  The set of enabling authorizations depends on the
101  * operation and the target object.  The set of authorizations assigned to
102  * a user is determined by reading /etc/security/policy.conf, querying the
103  * user_attr database, and possibly querying the prof_attr database, as per
104  * chkauthattr() in libsecdb.
105  *
106  * The fastest way to decide whether the two sets intersect is by entering the
107  * strings into a hash table and detecting collisions, which takes linear time
108  * in the total size of the sets.  Except for the authorization patterns which
109  * may be assigned to users, which without advanced pattern-matching
110  * algorithms will take O(n) in the number of enabling authorizations, per
111  * pattern.
112  *
113  * We can achieve some practical speed-ups by noting that if we enter all of
114  * the authorizations from one of the sets into the hash table we can merely
115  * check the elements of the second set for existence without adding them.
116  * This reduces memory requirements and hash table clutter.  The enabling set
117  * is well suited for this because it is internal to configd (for now, at
118  * least).  Combine this with short-circuiting and we can even minimize the
119  * number of queries to the security databases (user_attr & prof_attr).
120  *
121  * To force this usage onto clients we provide functions for adding
122  * authorizations to the enabling set of a permission context structure
123  * (perm_add_*()) and one to decide whether the the user associated with the
124  * current door call client possesses any of them (perm_granted()).
125  *
126  * At some point, a generic version of this should move to libsecdb.
127  */
128 
129 /*
130  * Composition is the combination of sets of properties.  The sets are ordered
131  * and properties in higher sets obscure properties of the same name in lower
132  * sets.  Here we present a composed view of an instance's properties as the
133  * union of its properties and its service's properties.  Similarly the
134  * properties of snaplevels are combined to form a composed view of the
135  * properties of a snapshot (which should match the composed view of the
136  * properties of the instance when the snapshot was taken).
137  *
138  * In terms of the client interface, the client may request that a property
139  * group iterator for an instance or snapshot be composed.  Property groups
140  * traversed by such an iterator may not have the target entity as a parent.
141  * Similarly, the properties traversed by a property iterator for those
142  * property groups may not have the property groups iterated as parents.
143  *
144  * Implementation requires that iterators for instances and snapshots be
145  * composition-savvy, and that we have a "composed property group" entity
146  * which represents the composition of a number of property groups.  Iteration
147  * over "composed property groups" yields properties which may have different
148  * parents, but for all other operations a composed property group behaves
149  * like the top-most property group it represents.
150  *
151  * The implementation is based on the rn_cchain[] array of rc_node_t pointers
152  * in rc_node_t.  For instances, the pointers point to the instance and its
153  * parent service.  For snapshots they point to the child snaplevels, and for
154  * composed property groups they point to property groups.  A composed
155  * iterator carries an index into rn_cchain[].  Thus most of the magic ends up
156  * int the rc_iter_*() code.
157  */
158 
159 #include <assert.h>
160 #include <atomic.h>
161 #include <errno.h>
162 #include <libuutil.h>
163 #include <libscf.h>
164 #include <libscf_priv.h>
165 #include <prof_attr.h>
166 #include <pthread.h>
167 #include <stdio.h>
168 #include <stdlib.h>
169 #include <strings.h>
170 #include <sys/types.h>
171 #include <unistd.h>
172 #include <user_attr.h>
173 
174 #include "configd.h"
175 
176 #define	AUTH_PREFIX		"solaris.smf."
177 #define	AUTH_MANAGE		AUTH_PREFIX "manage"
178 #define	AUTH_MODIFY		AUTH_PREFIX "modify"
179 #define	AUTH_MODIFY_PREFIX	AUTH_MODIFY "."
180 #define	AUTH_PG_ACTIONS		SCF_PG_RESTARTER_ACTIONS
181 #define	AUTH_PG_ACTIONS_TYPE	SCF_PG_RESTARTER_ACTIONS_TYPE
182 #define	AUTH_PG_GENERAL		SCF_PG_GENERAL
183 #define	AUTH_PG_GENERAL_TYPE	SCF_PG_GENERAL_TYPE
184 #define	AUTH_PG_GENERAL_OVR	SCF_PG_GENERAL_OVR
185 #define	AUTH_PG_GENERAL_OVR_TYPE  SCF_PG_GENERAL_OVR_TYPE
186 #define	AUTH_PROP_ACTION	"action_authorization"
187 #define	AUTH_PROP_ENABLED	"enabled"
188 #define	AUTH_PROP_MODIFY	"modify_authorization"
189 #define	AUTH_PROP_VALUE		"value_authorization"
190 /* libsecdb should take care of this. */
191 #define	RBAC_AUTH_SEP		","
192 
193 #define	MAX_VALID_CHILDREN 3
194 
195 typedef struct rc_type_info {
196 	uint32_t	rt_type;		/* matches array index */
197 	uint32_t	rt_num_ids;
198 	uint32_t	rt_name_flags;
199 	uint32_t	rt_valid_children[MAX_VALID_CHILDREN];
200 } rc_type_info_t;
201 
202 #define	RT_NO_NAME	-1U
203 
204 static rc_type_info_t rc_types[] = {
205 	{REP_PROTOCOL_ENTITY_NONE, 0, RT_NO_NAME},
206 	{REP_PROTOCOL_ENTITY_SCOPE, 0, 0,
207 	    {REP_PROTOCOL_ENTITY_SERVICE, REP_PROTOCOL_ENTITY_SCOPE}},
208 	{REP_PROTOCOL_ENTITY_SERVICE, 0, UU_NAME_DOMAIN | UU_NAME_PATH,
209 	    {REP_PROTOCOL_ENTITY_INSTANCE, REP_PROTOCOL_ENTITY_PROPERTYGRP}},
210 	{REP_PROTOCOL_ENTITY_INSTANCE, 1, UU_NAME_DOMAIN,
211 	    {REP_PROTOCOL_ENTITY_SNAPSHOT, REP_PROTOCOL_ENTITY_PROPERTYGRP}},
212 	{REP_PROTOCOL_ENTITY_SNAPSHOT, 2, UU_NAME_DOMAIN,
213 	    {REP_PROTOCOL_ENTITY_SNAPLEVEL, REP_PROTOCOL_ENTITY_PROPERTYGRP}},
214 	{REP_PROTOCOL_ENTITY_SNAPLEVEL, 4, RT_NO_NAME,
215 	    {REP_PROTOCOL_ENTITY_PROPERTYGRP}},
216 	{REP_PROTOCOL_ENTITY_PROPERTYGRP, 5, UU_NAME_DOMAIN,
217 	    {REP_PROTOCOL_ENTITY_PROPERTY}},
218 	{REP_PROTOCOL_ENTITY_CPROPERTYGRP, 0, UU_NAME_DOMAIN,
219 	    {REP_PROTOCOL_ENTITY_PROPERTY}},
220 	{REP_PROTOCOL_ENTITY_PROPERTY, 7, UU_NAME_DOMAIN},
221 	{-1UL}
222 };
223 #define	NUM_TYPES	((sizeof (rc_types) / sizeof (*rc_types)))
224 
225 /* Element of a permcheck_t hash table. */
226 struct pc_elt {
227 	struct pc_elt	*pce_next;
228 	char		pce_auth[1];
229 };
230 
231 /* An authorization set hash table. */
232 typedef struct {
233 	struct pc_elt	**pc_buckets;
234 	uint_t		pc_bnum;		/* number of buckets */
235 	uint_t		pc_enum;		/* number of elements */
236 } permcheck_t;
237 
238 static uu_list_pool_t *rc_children_pool;
239 static uu_list_pool_t *rc_pg_notify_pool;
240 static uu_list_pool_t *rc_notify_pool;
241 static uu_list_pool_t *rc_notify_info_pool;
242 
243 static rc_node_t *rc_scope;
244 
245 static pthread_mutex_t	rc_pg_notify_lock = PTHREAD_MUTEX_INITIALIZER;
246 static pthread_cond_t	rc_pg_notify_cv = PTHREAD_COND_INITIALIZER;
247 static uint_t		rc_notify_in_use;	/* blocks removals */
248 
249 static pthread_mutex_t	perm_lock = PTHREAD_MUTEX_INITIALIZER;
250 
251 static void rc_node_unrefed(rc_node_t *np);
252 
253 /*
254  * We support an arbitrary number of clients interested in events for certain
255  * types of changes.  Each client is represented by an rc_notify_info_t, and
256  * all clients are chained onto the rc_notify_info_list.
257  *
258  * The rc_notify_list is the global notification list.  Each entry is of
259  * type rc_notify_t, which is embedded in one of three other structures:
260  *
261  *	rc_node_t		property group update notification
262  *	rc_notify_delete_t	object deletion notification
263  *	rc_notify_info_t	notification clients
264  *
265  * Which type of object is determined by which pointer in the rc_notify_t is
266  * non-NULL.
267  *
268  * New notifications and clients are added to the end of the list.
269  * Notifications no-one is interested in are never added to the list.
270  *
271  * Clients use their position in the list to track which notifications they
272  * have not yet reported.  As they process notifications, they move forward
273  * in the list past them.  There is always a client at the beginning of the
274  * list -- as he moves past notifications, he removes them from the list and
275  * cleans them up.
276  *
277  * The rc_pg_notify_lock protects all notification state.  The rc_pg_notify_cv
278  * is used for global signalling, and each client has a cv which he waits for
279  * events of interest on.
280  */
281 static uu_list_t	*rc_notify_info_list;
282 static uu_list_t	*rc_notify_list;
283 
284 #define	HASH_SIZE	512
285 #define	HASH_MASK	(HASH_SIZE - 1)
286 
287 #pragma align 64(cache_hash)
288 static cache_bucket_t cache_hash[HASH_SIZE];
289 
290 #define	CACHE_BUCKET(h)		(&cache_hash[(h) & HASH_MASK])
291 
292 static uint32_t
293 rc_node_hash(rc_node_lookup_t *lp)
294 {
295 	uint32_t type = lp->rl_type;
296 	uint32_t backend = lp->rl_backend;
297 	uint32_t mainid = lp->rl_main_id;
298 	uint32_t *ids = lp->rl_ids;
299 
300 	rc_type_info_t *tp = &rc_types[type];
301 	uint32_t num_ids;
302 	uint32_t left;
303 	uint32_t hash;
304 
305 	assert(backend == BACKEND_TYPE_NORMAL ||
306 	    backend == BACKEND_TYPE_NONPERSIST);
307 
308 	assert(type > 0 && type < NUM_TYPES);
309 	num_ids = tp->rt_num_ids;
310 
311 	left = MAX_IDS - num_ids;
312 	assert(num_ids <= MAX_IDS);
313 
314 	hash = type * 7 + mainid * 5 + backend;
315 
316 	while (num_ids-- > 0)
317 		hash = hash * 11 + *ids++ * 7;
318 
319 	/*
320 	 * the rest should be zeroed
321 	 */
322 	while (left-- > 0)
323 		assert(*ids++ == 0);
324 
325 	return (hash);
326 }
327 
328 static int
329 rc_node_match(rc_node_t *np, rc_node_lookup_t *l)
330 {
331 	rc_node_lookup_t *r = &np->rn_id;
332 	rc_type_info_t *tp;
333 	uint32_t type;
334 	uint32_t num_ids;
335 
336 	if (r->rl_main_id != l->rl_main_id)
337 		return (0);
338 
339 	type = r->rl_type;
340 	if (type != l->rl_type)
341 		return (0);
342 
343 	assert(type > 0 && type < NUM_TYPES);
344 
345 	tp = &rc_types[r->rl_type];
346 	num_ids = tp->rt_num_ids;
347 
348 	assert(num_ids <= MAX_IDS);
349 	while (num_ids-- > 0)
350 		if (r->rl_ids[num_ids] != l->rl_ids[num_ids])
351 			return (0);
352 
353 	return (1);
354 }
355 
356 /*
357  * the "other" references on a node are maintained in an atomically
358  * updated refcount, rn_other_refs.  This can be bumped from arbitrary
359  * context, and tracks references to a possibly out-of-date node's children.
360  *
361  * To prevent the node from disappearing between the final drop of
362  * rn_other_refs and the unref handling, rn_other_refs_held is bumped on
363  * 0->1 transitions and decremented (with the node lock held) on 1->0
364  * transitions.
365  */
366 static void
367 rc_node_hold_other(rc_node_t *np)
368 {
369 	if (atomic_add_32_nv(&np->rn_other_refs, 1) == 1) {
370 		atomic_add_32(&np->rn_other_refs_held, 1);
371 		assert(np->rn_other_refs_held > 0);
372 	}
373 	assert(np->rn_other_refs > 0);
374 }
375 
376 /*
377  * No node locks may be held
378  */
379 static void
380 rc_node_rele_other(rc_node_t *np)
381 {
382 	assert(np->rn_other_refs > 0);
383 	if (atomic_add_32_nv(&np->rn_other_refs, -1) == 0) {
384 		(void) pthread_mutex_lock(&np->rn_lock);
385 		assert(np->rn_other_refs_held > 0);
386 		if (atomic_add_32_nv(&np->rn_other_refs_held, -1) == 0 &&
387 		    np->rn_refs == 0 && (np->rn_flags & RC_NODE_OLD))
388 			rc_node_unrefed(np);
389 		else
390 			(void) pthread_mutex_unlock(&np->rn_lock);
391 	}
392 }
393 
394 static void
395 rc_node_hold_locked(rc_node_t *np)
396 {
397 	assert(MUTEX_HELD(&np->rn_lock));
398 
399 	if (np->rn_refs == 0 && (np->rn_flags & RC_NODE_PARENT_REF))
400 		rc_node_hold_other(np->rn_parent_ref);
401 	np->rn_refs++;
402 	assert(np->rn_refs > 0);
403 }
404 
405 static void
406 rc_node_hold(rc_node_t *np)
407 {
408 	(void) pthread_mutex_lock(&np->rn_lock);
409 	rc_node_hold_locked(np);
410 	(void) pthread_mutex_unlock(&np->rn_lock);
411 }
412 
413 static void
414 rc_node_rele_locked(rc_node_t *np)
415 {
416 	int unref = 0;
417 	rc_node_t *par_ref = NULL;
418 
419 	assert(MUTEX_HELD(&np->rn_lock));
420 	assert(np->rn_refs > 0);
421 
422 	if (--np->rn_refs == 0) {
423 		if (np->rn_flags & RC_NODE_PARENT_REF)
424 			par_ref = np->rn_parent_ref;
425 
426 		/*
427 		 * Composed property groups are only as good as their
428 		 * references.
429 		 */
430 		if (np->rn_id.rl_type == REP_PROTOCOL_ENTITY_CPROPERTYGRP)
431 			np->rn_flags |= RC_NODE_DEAD;
432 
433 		if ((np->rn_flags & (RC_NODE_DEAD|RC_NODE_OLD)) &&
434 		    np->rn_other_refs == 0 && np->rn_other_refs_held == 0)
435 			unref = 1;
436 	}
437 
438 	if (unref)
439 		rc_node_unrefed(np);
440 	else
441 		(void) pthread_mutex_unlock(&np->rn_lock);
442 
443 	if (par_ref != NULL)
444 		rc_node_rele_other(par_ref);
445 }
446 
447 void
448 rc_node_rele(rc_node_t *np)
449 {
450 	(void) pthread_mutex_lock(&np->rn_lock);
451 	rc_node_rele_locked(np);
452 }
453 
454 static cache_bucket_t *
455 cache_hold(uint32_t h)
456 {
457 	cache_bucket_t *bp = CACHE_BUCKET(h);
458 	(void) pthread_mutex_lock(&bp->cb_lock);
459 	return (bp);
460 }
461 
462 static void
463 cache_release(cache_bucket_t *bp)
464 {
465 	(void) pthread_mutex_unlock(&bp->cb_lock);
466 }
467 
468 static rc_node_t *
469 cache_lookup_unlocked(cache_bucket_t *bp, rc_node_lookup_t *lp)
470 {
471 	uint32_t h = rc_node_hash(lp);
472 	rc_node_t *np;
473 
474 	assert(MUTEX_HELD(&bp->cb_lock));
475 	assert(bp == CACHE_BUCKET(h));
476 
477 	for (np = bp->cb_head; np != NULL; np = np->rn_hash_next) {
478 		if (np->rn_hash == h && rc_node_match(np, lp)) {
479 			rc_node_hold(np);
480 			return (np);
481 		}
482 	}
483 
484 	return (NULL);
485 }
486 
487 static rc_node_t *
488 cache_lookup(rc_node_lookup_t *lp)
489 {
490 	uint32_t h;
491 	cache_bucket_t *bp;
492 	rc_node_t *np;
493 
494 	h = rc_node_hash(lp);
495 	bp = cache_hold(h);
496 
497 	np = cache_lookup_unlocked(bp, lp);
498 
499 	cache_release(bp);
500 
501 	return (np);
502 }
503 
504 static void
505 cache_insert_unlocked(cache_bucket_t *bp, rc_node_t *np)
506 {
507 	assert(MUTEX_HELD(&bp->cb_lock));
508 	assert(np->rn_hash == rc_node_hash(&np->rn_id));
509 	assert(bp == CACHE_BUCKET(np->rn_hash));
510 
511 	assert(np->rn_hash_next == NULL);
512 
513 	np->rn_hash_next = bp->cb_head;
514 	bp->cb_head = np;
515 }
516 
517 static void
518 cache_remove_unlocked(cache_bucket_t *bp, rc_node_t *np)
519 {
520 	rc_node_t **npp;
521 
522 	assert(MUTEX_HELD(&bp->cb_lock));
523 	assert(np->rn_hash == rc_node_hash(&np->rn_id));
524 	assert(bp == CACHE_BUCKET(np->rn_hash));
525 
526 	for (npp = &bp->cb_head; *npp != NULL; npp = &(*npp)->rn_hash_next)
527 		if (*npp == np)
528 			break;
529 
530 	assert(*npp == np);
531 	*npp = np->rn_hash_next;
532 	np->rn_hash_next = NULL;
533 }
534 
535 /*
536  * verify that the 'parent' type can have a child typed 'child'
537  * Fails with
538  *   _INVALID_TYPE - argument is invalid
539  *   _TYPE_MISMATCH - parent type cannot have children of type child
540  */
541 static int
542 rc_check_parent_child(uint32_t parent, uint32_t child)
543 {
544 	int idx;
545 	uint32_t type;
546 
547 	if (parent == 0 || parent >= NUM_TYPES ||
548 	    child == 0 || child >= NUM_TYPES)
549 		return (REP_PROTOCOL_FAIL_INVALID_TYPE); /* invalid types */
550 
551 	for (idx = 0; idx < MAX_VALID_CHILDREN; idx++) {
552 		type = rc_types[parent].rt_valid_children[idx];
553 		if (type == child)
554 			return (REP_PROTOCOL_SUCCESS);
555 	}
556 
557 	return (REP_PROTOCOL_FAIL_TYPE_MISMATCH);
558 }
559 
560 /*
561  * Fails with
562  *   _INVALID_TYPE - type is invalid
563  *   _BAD_REQUEST - name is an invalid name for a node of type type
564  */
565 int
566 rc_check_type_name(uint32_t type, const char *name)
567 {
568 	if (type == 0 || type >= NUM_TYPES)
569 		return (REP_PROTOCOL_FAIL_INVALID_TYPE); /* invalid types */
570 
571 	if (uu_check_name(name, rc_types[type].rt_name_flags) == -1)
572 		return (REP_PROTOCOL_FAIL_BAD_REQUEST);
573 
574 	return (REP_PROTOCOL_SUCCESS);
575 }
576 
577 static int
578 rc_check_pgtype_name(const char *name)
579 {
580 	if (uu_check_name(name, UU_NAME_DOMAIN) == -1)
581 		return (REP_PROTOCOL_FAIL_BAD_REQUEST);
582 
583 	return (REP_PROTOCOL_SUCCESS);
584 }
585 
586 static int
587 rc_notify_info_interested(rc_notify_info_t *rnip, rc_notify_t *np)
588 {
589 	rc_node_t *nnp = np->rcn_node;
590 	int i;
591 
592 	assert(MUTEX_HELD(&rc_pg_notify_lock));
593 
594 	if (np->rcn_delete != NULL) {
595 		assert(np->rcn_info == NULL && np->rcn_node == NULL);
596 		return (1);		/* everyone likes deletes */
597 	}
598 	if (np->rcn_node == NULL) {
599 		assert(np->rcn_info != NULL || np->rcn_delete != NULL);
600 		return (0);
601 	}
602 	assert(np->rcn_info == NULL);
603 
604 	for (i = 0; i < RC_NOTIFY_MAX_NAMES; i++) {
605 		if (rnip->rni_namelist[i] != NULL) {
606 			if (strcmp(nnp->rn_name, rnip->rni_namelist[i]) == 0)
607 				return (1);
608 		}
609 		if (rnip->rni_typelist[i] != NULL) {
610 			if (strcmp(nnp->rn_type, rnip->rni_typelist[i]) == 0)
611 				return (1);
612 		}
613 	}
614 	return (0);
615 }
616 
617 static void
618 rc_notify_insert_node(rc_node_t *nnp)
619 {
620 	rc_notify_t *np = &nnp->rn_notify;
621 	rc_notify_info_t *nip;
622 	int found = 0;
623 
624 	assert(np->rcn_info == NULL);
625 
626 	if (nnp->rn_id.rl_type != REP_PROTOCOL_ENTITY_PROPERTYGRP)
627 		return;
628 
629 	(void) pthread_mutex_lock(&rc_pg_notify_lock);
630 	np->rcn_node = nnp;
631 	for (nip = uu_list_first(rc_notify_info_list); nip != NULL;
632 	    nip = uu_list_next(rc_notify_info_list, nip)) {
633 		if (rc_notify_info_interested(nip, np)) {
634 			(void) pthread_cond_broadcast(&nip->rni_cv);
635 			found++;
636 		}
637 	}
638 	if (found)
639 		(void) uu_list_insert_before(rc_notify_list, NULL, np);
640 	else
641 		np->rcn_node = NULL;
642 
643 	(void) pthread_mutex_unlock(&rc_pg_notify_lock);
644 }
645 
646 static void
647 rc_notify_deletion(rc_notify_delete_t *ndp, const char *service,
648     const char *instance, const char *pg)
649 {
650 	rc_notify_info_t *nip;
651 
652 	uu_list_node_init(&ndp->rnd_notify, &ndp->rnd_notify.rcn_list_node,
653 	    rc_notify_pool);
654 	ndp->rnd_notify.rcn_delete = ndp;
655 
656 	(void) snprintf(ndp->rnd_fmri, sizeof (ndp->rnd_fmri),
657 	    "svc:/%s%s%s%s%s", service,
658 	    (instance != NULL)? ":" : "", (instance != NULL)? instance : "",
659 	    (pg != NULL)? "/:properties/" : "", (pg != NULL)? pg : "");
660 
661 	/*
662 	 * add to notification list, notify watchers
663 	 */
664 	(void) pthread_mutex_lock(&rc_pg_notify_lock);
665 	for (nip = uu_list_first(rc_notify_info_list); nip != NULL;
666 	    nip = uu_list_next(rc_notify_info_list, nip))
667 		(void) pthread_cond_broadcast(&nip->rni_cv);
668 	(void) uu_list_insert_before(rc_notify_list, NULL, ndp);
669 	(void) pthread_mutex_unlock(&rc_pg_notify_lock);
670 }
671 
672 static void
673 rc_notify_remove_node(rc_node_t *nnp)
674 {
675 	rc_notify_t *np = &nnp->rn_notify;
676 
677 	assert(np->rcn_info == NULL);
678 	assert(!MUTEX_HELD(&nnp->rn_lock));
679 
680 	(void) pthread_mutex_lock(&rc_pg_notify_lock);
681 	while (np->rcn_node != NULL) {
682 		if (rc_notify_in_use) {
683 			(void) pthread_cond_wait(&rc_pg_notify_cv,
684 			    &rc_pg_notify_lock);
685 			continue;
686 		}
687 		(void) uu_list_remove(rc_notify_list, np);
688 		np->rcn_node = NULL;
689 		break;
690 	}
691 	(void) pthread_mutex_unlock(&rc_pg_notify_lock);
692 }
693 
694 static void
695 rc_notify_remove_locked(rc_notify_t *np)
696 {
697 	assert(MUTEX_HELD(&rc_pg_notify_lock));
698 	assert(rc_notify_in_use == 0);
699 
700 	(void) uu_list_remove(rc_notify_list, np);
701 	if (np->rcn_node) {
702 		np->rcn_node = NULL;
703 	} else if (np->rcn_delete) {
704 		uu_free(np->rcn_delete);
705 	} else {
706 		assert(0);	/* CAN'T HAPPEN */
707 	}
708 }
709 
710 /*
711  * Permission checking functions.  See comment atop this file.
712  */
713 #ifndef NATIVE_BUILD
714 static permcheck_t *
715 pc_create()
716 {
717 	permcheck_t *p;
718 
719 	p = uu_zalloc(sizeof (*p));
720 	if (p == NULL)
721 		return (NULL);
722 	p->pc_bnum = 8;			/* Normal case will only have 2 elts. */
723 	p->pc_buckets = uu_zalloc(sizeof (*p->pc_buckets) * p->pc_bnum);
724 	if (p->pc_buckets == NULL) {
725 		uu_free(p);
726 		return (NULL);
727 	}
728 
729 	p->pc_enum = 0;
730 	return (p);
731 }
732 
733 static void
734 pc_free(permcheck_t *pcp)
735 {
736 	uint_t i;
737 	struct pc_elt *ep, *next;
738 
739 	for (i = 0; i < pcp->pc_bnum; ++i) {
740 		for (ep = pcp->pc_buckets[i]; ep != NULL; ep = next) {
741 			next = ep->pce_next;
742 			free(ep);
743 		}
744 	}
745 
746 	free(pcp->pc_buckets);
747 	free(pcp);
748 }
749 
750 static uint32_t
751 pc_hash(const char *auth)
752 {
753 	uint32_t h = 0, g;
754 	const char *p;
755 
756 	/*
757 	 * Generic hash function from uts/common/os/modhash.c.
758 	 */
759 	for (p = auth; *p != '\0'; ++p) {
760 		h = (h << 4) + *p;
761 		g = (h & 0xf0000000);
762 		if (g != 0) {
763 			h ^= (g >> 24);
764 			h ^= g;
765 		}
766 	}
767 
768 	return (h);
769 }
770 
771 static int
772 pc_exists(const permcheck_t *pcp, const char *auth)
773 {
774 	uint32_t h;
775 	struct pc_elt *ep;
776 
777 	h = pc_hash(auth);
778 	for (ep = pcp->pc_buckets[h & (pcp->pc_bnum - 1)];
779 	    ep != NULL;
780 	    ep = ep->pce_next) {
781 		if (strcmp(auth, ep->pce_auth) == 0)
782 			return (1);
783 	}
784 
785 	return (0);
786 }
787 
788 static int
789 pc_match(const permcheck_t *pcp, const char *pattern)
790 {
791 	uint_t i;
792 	struct pc_elt *ep;
793 
794 	for (i = 0; i < pcp->pc_bnum; ++i) {
795 		for (ep = pcp->pc_buckets[i]; ep != NULL; ep = ep->pce_next) {
796 			if (_auth_match(pattern, ep->pce_auth))
797 				return (1);
798 		}
799 	}
800 
801 	return (0);
802 }
803 
804 static int
805 pc_grow(permcheck_t *pcp)
806 {
807 	uint_t new_bnum, i, j;
808 	struct pc_elt **new_buckets;
809 	struct pc_elt *ep, *next;
810 
811 	new_bnum = pcp->pc_bnum * 2;
812 	if (new_bnum < pcp->pc_bnum)
813 		/* Homey don't play that. */
814 		return (-1);
815 
816 	new_buckets = uu_zalloc(sizeof (*new_buckets) * new_bnum);
817 	if (new_buckets == NULL)
818 		return (-1);
819 
820 	for (i = 0; i < pcp->pc_bnum; ++i) {
821 		for (ep = pcp->pc_buckets[i]; ep != NULL; ep = next) {
822 			next = ep->pce_next;
823 			j = pc_hash(ep->pce_auth) & (new_bnum - 1);
824 			ep->pce_next = new_buckets[j];
825 			new_buckets[j] = ep;
826 		}
827 	}
828 
829 	uu_free(pcp->pc_buckets);
830 	pcp->pc_buckets = new_buckets;
831 	pcp->pc_bnum = new_bnum;
832 
833 	return (0);
834 }
835 
836 static int
837 pc_add(permcheck_t *pcp, const char *auth)
838 {
839 	struct pc_elt *ep;
840 	uint_t i;
841 
842 	ep = uu_zalloc(offsetof(struct pc_elt, pce_auth) + strlen(auth) + 1);
843 	if (ep == NULL)
844 		return (-1);
845 
846 	/* Grow if pc_enum / pc_bnum > 3/4. */
847 	if (pcp->pc_enum * 4 > 3 * pcp->pc_bnum)
848 		/* Failure is not a stopper; we'll try again next time. */
849 		(void) pc_grow(pcp);
850 
851 	(void) strcpy(ep->pce_auth, auth);
852 
853 	i = pc_hash(auth) & (pcp->pc_bnum - 1);
854 	ep->pce_next = pcp->pc_buckets[i];
855 	pcp->pc_buckets[i] = ep;
856 
857 	++pcp->pc_enum;
858 
859 	return (0);
860 }
861 
862 /*
863  * For the type of a property group, return the authorization which may be
864  * used to modify it.
865  */
866 static const char *
867 perm_auth_for_pgtype(const char *pgtype)
868 {
869 	if (strcmp(pgtype, SCF_GROUP_METHOD) == 0)
870 		return (AUTH_MODIFY_PREFIX "method");
871 	else if (strcmp(pgtype, SCF_GROUP_DEPENDENCY) == 0)
872 		return (AUTH_MODIFY_PREFIX "dependency");
873 	else if (strcmp(pgtype, SCF_GROUP_APPLICATION) == 0)
874 		return (AUTH_MODIFY_PREFIX "application");
875 	else if (strcmp(pgtype, SCF_GROUP_FRAMEWORK) == 0)
876 		return (AUTH_MODIFY_PREFIX "framework");
877 	else
878 		return (NULL);
879 }
880 
881 /*
882  * Fails with
883  *   _NO_RESOURCES - out of memory
884  */
885 static int
886 perm_add_enabling(permcheck_t *pcp, const char *auth)
887 {
888 	return (pc_add(pcp, auth) == 0 ? REP_PROTOCOL_SUCCESS :
889 	    REP_PROTOCOL_FAIL_NO_RESOURCES);
890 }
891 
892 /* Note that perm_add_enabling_values() is defined below. */
893 
894 /*
895  * perm_granted() returns 1 if the current door caller has one of the enabling
896  * authorizations in pcp, 0 if it doesn't, and -1 if an error (usually lack of
897  * memory) occurs.  check_auth_list() checks an RBAC_AUTH_SEP-separated list
898  * of authorizations for existance in pcp, and check_prof_list() checks the
899  * authorizations granted to an RBAC_AUTH_SEP-separated list of profiles.
900  */
901 static int
902 check_auth_list(const permcheck_t *pcp, char *authlist)
903 {
904 	char *auth, *lasts;
905 	int ret;
906 
907 	for (auth = (char *)strtok_r(authlist, RBAC_AUTH_SEP, &lasts);
908 	    auth != NULL;
909 	    auth = (char *)strtok_r(NULL, RBAC_AUTH_SEP, &lasts)) {
910 		if (strchr(auth, KV_WILDCHAR) == NULL)
911 			ret = pc_exists(pcp, auth);
912 		else
913 			ret = pc_match(pcp, auth);
914 
915 		if (ret)
916 			return (ret);
917 	}
918 
919 	return (0);
920 }
921 
922 static int
923 check_prof_list(const permcheck_t *pcp, char *proflist)
924 {
925 	char *prof, *lasts, *authlist, *subproflist;
926 	profattr_t *pap;
927 	int ret = 0;
928 
929 	for (prof = strtok_r(proflist, RBAC_AUTH_SEP, &lasts);
930 	    prof != NULL;
931 	    prof = strtok_r(NULL, RBAC_AUTH_SEP, &lasts)) {
932 		pap = getprofnam(prof);
933 		if (pap == NULL)
934 			continue;
935 
936 		authlist = kva_match(pap->attr, PROFATTR_AUTHS_KW);
937 		if (authlist != NULL)
938 			ret = check_auth_list(pcp, authlist);
939 
940 		if (!ret) {
941 			subproflist = kva_match(pap->attr, PROFATTR_PROFS_KW);
942 			if (subproflist != NULL)
943 				/* depth check to avoid invinite recursion? */
944 				ret = check_prof_list(pcp, subproflist);
945 		}
946 
947 		free_profattr(pap);
948 		if (ret)
949 			return (ret);
950 	}
951 
952 	return (ret);
953 }
954 
955 static int
956 perm_granted(const permcheck_t *pcp)
957 {
958 	ucred_t *uc;
959 
960 	int ret = 0;
961 	uid_t uid;
962 	userattr_t *uap;
963 	char *authlist, *proflist, *def_prof = NULL;
964 
965 	/*
966 	 * Get generic authorizations from policy.conf
967 	 *
968 	 * Note that _get_auth_policy is not threadsafe, so we single-thread
969 	 * access to it.
970 	 */
971 	(void) pthread_mutex_lock(&perm_lock);
972 	ret = _get_auth_policy(&authlist, &def_prof);
973 	(void) pthread_mutex_unlock(&perm_lock);
974 
975 	if (ret != 0)
976 		return (-1);
977 
978 	if (authlist != NULL) {
979 		ret = check_auth_list(pcp, authlist);
980 		free(authlist);
981 
982 		if (ret) {
983 			free(def_prof);
984 			return (ret);
985 		}
986 	}
987 
988 	/*
989 	 * Put off checking def_prof for later in an attempt to consolidate
990 	 * prof_attr accesses.
991 	 */
992 
993 	/* Get the uid */
994 	if ((uc = get_ucred()) == NULL) {
995 		free(def_prof);
996 
997 		if (errno == EINVAL) {
998 			/*
999 			 * Client is no longer waiting for our response (e.g.,
1000 			 * it received a signal & resumed with EINTR).
1001 			 * Punting with door_return() would be nice but we
1002 			 * need to release all of the locks & references we
1003 			 * hold.  And we must report failure to the client
1004 			 * layer to keep it from ignoring retries as
1005 			 * already-done (idempotency & all that).  None of the
1006 			 * error codes fit very well, so we might as well
1007 			 * force the return of _PERMISSION_DENIED since we
1008 			 * couldn't determine the user.
1009 			 */
1010 			return (0);
1011 		}
1012 		assert(0);
1013 		abort();
1014 	}
1015 
1016 	uid = ucred_geteuid(uc);
1017 	assert(uid != -1);
1018 
1019 	uap = getuseruid(uid);
1020 	if (uap != NULL) {
1021 		/* Get the authorizations from user_attr. */
1022 		authlist = kva_match(uap->attr, USERATTR_AUTHS_KW);
1023 		if (authlist != NULL)
1024 			ret = check_auth_list(pcp, authlist);
1025 	}
1026 
1027 	if (!ret && def_prof != NULL) {
1028 		/* Check generic profiles. */
1029 		ret = check_prof_list(pcp, def_prof);
1030 	}
1031 
1032 	if (!ret && uap != NULL) {
1033 		proflist = kva_match(uap->attr, USERATTR_PROFILES_KW);
1034 		if (proflist != NULL)
1035 			ret = check_prof_list(pcp, proflist);
1036 	}
1037 
1038 	if (def_prof != NULL)
1039 		free(def_prof);
1040 	if (uap != NULL)
1041 		free_userattr(uap);
1042 
1043 	return (ret);
1044 }
1045 #endif /* NATIVE_BUILD */
1046 
1047 /*
1048  * flags in RC_NODE_WAITING_FLAGS are broadcast when unset, and are used to
1049  * serialize certain actions, and to wait for certain operations to complete
1050  *
1051  * The waiting flags are:
1052  *	RC_NODE_CHILDREN_CHANGING
1053  *		The child list is being built or changed (due to creation
1054  *		or deletion).  All iterators pause.
1055  *
1056  *	RC_NODE_USING_PARENT
1057  *		Someone is actively using the parent pointer, so we can't
1058  *		be removed from the parent list.
1059  *
1060  *	RC_NODE_CREATING_CHILD
1061  *		A child is being created -- locks out other creations, to
1062  *		prevent insert-insert races.
1063  *
1064  *	RC_NODE_IN_TX
1065  *		This object is running a transaction.
1066  *
1067  *	RC_NODE_DYING
1068  *		This node might be dying.  Always set as a set, using
1069  *		RC_NODE_DYING_FLAGS (which is everything but
1070  *		RC_NODE_USING_PARENT)
1071  */
1072 static int
1073 rc_node_hold_flag(rc_node_t *np, uint32_t flag)
1074 {
1075 	assert(MUTEX_HELD(&np->rn_lock));
1076 	assert((flag & ~RC_NODE_WAITING_FLAGS) == 0);
1077 
1078 	while (!(np->rn_flags & RC_NODE_DEAD) && (np->rn_flags & flag)) {
1079 		(void) pthread_cond_wait(&np->rn_cv, &np->rn_lock);
1080 	}
1081 	if (np->rn_flags & RC_NODE_DEAD)
1082 		return (0);
1083 
1084 	np->rn_flags |= flag;
1085 	return (1);
1086 }
1087 
1088 static void
1089 rc_node_rele_flag(rc_node_t *np, uint32_t flag)
1090 {
1091 	assert((flag & ~RC_NODE_WAITING_FLAGS) == 0);
1092 	assert(MUTEX_HELD(&np->rn_lock));
1093 	assert((np->rn_flags & flag) == flag);
1094 	np->rn_flags &= ~flag;
1095 	(void) pthread_cond_broadcast(&np->rn_cv);
1096 }
1097 
1098 /*
1099  * wait until a particular flag has cleared.  Fails if the object dies.
1100  */
1101 static int
1102 rc_node_wait_flag(rc_node_t *np, uint32_t flag)
1103 {
1104 	assert(MUTEX_HELD(&np->rn_lock));
1105 	while (!(np->rn_flags & RC_NODE_DEAD) && (np->rn_flags & flag))
1106 		(void) pthread_cond_wait(&np->rn_cv, &np->rn_lock);
1107 
1108 	return (!(np->rn_flags & RC_NODE_DEAD));
1109 }
1110 
1111 /*
1112  * On entry, np's lock must be held, and this thread must be holding
1113  * RC_NODE_USING_PARENT.  On return, both of them are released.
1114  *
1115  * If the return value is NULL, np either does not have a parent, or
1116  * the parent has been marked DEAD.
1117  *
1118  * If the return value is non-NULL, it is the parent of np, and both
1119  * its lock and the requested flags are held.
1120  */
1121 static rc_node_t *
1122 rc_node_hold_parent_flag(rc_node_t *np, uint32_t flag)
1123 {
1124 	rc_node_t *pp;
1125 
1126 	assert(MUTEX_HELD(&np->rn_lock));
1127 	assert(np->rn_flags & RC_NODE_USING_PARENT);
1128 
1129 	if ((pp = np->rn_parent) == NULL) {
1130 		rc_node_rele_flag(np, RC_NODE_USING_PARENT);
1131 		(void) pthread_mutex_unlock(&np->rn_lock);
1132 		return (NULL);
1133 	}
1134 	(void) pthread_mutex_unlock(&np->rn_lock);
1135 
1136 	(void) pthread_mutex_lock(&pp->rn_lock);
1137 	(void) pthread_mutex_lock(&np->rn_lock);
1138 	rc_node_rele_flag(np, RC_NODE_USING_PARENT);
1139 	(void) pthread_mutex_unlock(&np->rn_lock);
1140 
1141 	if (!rc_node_hold_flag(pp, flag)) {
1142 		(void) pthread_mutex_unlock(&pp->rn_lock);
1143 		return (NULL);
1144 	}
1145 	return (pp);
1146 }
1147 
1148 rc_node_t *
1149 rc_node_alloc(void)
1150 {
1151 	rc_node_t *np = uu_zalloc(sizeof (*np));
1152 
1153 	if (np == NULL)
1154 		return (NULL);
1155 
1156 	(void) pthread_mutex_init(&np->rn_lock, NULL);
1157 	(void) pthread_cond_init(&np->rn_cv, NULL);
1158 
1159 	np->rn_children = uu_list_create(rc_children_pool, np, 0);
1160 	np->rn_pg_notify_list = uu_list_create(rc_pg_notify_pool, np, 0);
1161 
1162 	uu_list_node_init(np, &np->rn_sibling_node, rc_children_pool);
1163 
1164 	uu_list_node_init(&np->rn_notify, &np->rn_notify.rcn_list_node,
1165 	    rc_notify_pool);
1166 
1167 	return (np);
1168 }
1169 
1170 void
1171 rc_node_destroy(rc_node_t *np)
1172 {
1173 	int i;
1174 
1175 	if (np->rn_flags & RC_NODE_UNREFED)
1176 		return;				/* being handled elsewhere */
1177 
1178 	assert(np->rn_refs == 0 && np->rn_other_refs == 0);
1179 	assert(np->rn_former == NULL);
1180 
1181 	if (np->rn_id.rl_type == REP_PROTOCOL_ENTITY_CPROPERTYGRP) {
1182 		/* Release the holds from rc_iter_next(). */
1183 		for (i = 0; i < COMPOSITION_DEPTH; ++i) {
1184 			/* rn_cchain[i] may be NULL for empty snapshots. */
1185 			if (np->rn_cchain[i] != NULL)
1186 				rc_node_rele(np->rn_cchain[i]);
1187 		}
1188 	}
1189 
1190 	if (np->rn_name != NULL)
1191 		free((void *)np->rn_name);
1192 	np->rn_name = NULL;
1193 	if (np->rn_type != NULL)
1194 		free((void *)np->rn_type);
1195 	np->rn_type = NULL;
1196 	if (np->rn_values != NULL)
1197 		object_free_values(np->rn_values, np->rn_valtype,
1198 		    np->rn_values_count, np->rn_values_size);
1199 	np->rn_values = NULL;
1200 
1201 	if (np->rn_snaplevel != NULL)
1202 		rc_snaplevel_rele(np->rn_snaplevel);
1203 	np->rn_snaplevel = NULL;
1204 
1205 	uu_list_node_fini(np, &np->rn_sibling_node, rc_children_pool);
1206 
1207 	uu_list_node_fini(&np->rn_notify, &np->rn_notify.rcn_list_node,
1208 	    rc_notify_pool);
1209 
1210 	assert(uu_list_first(np->rn_children) == NULL);
1211 	uu_list_destroy(np->rn_children);
1212 	uu_list_destroy(np->rn_pg_notify_list);
1213 
1214 	(void) pthread_mutex_destroy(&np->rn_lock);
1215 	(void) pthread_cond_destroy(&np->rn_cv);
1216 
1217 	uu_free(np);
1218 }
1219 
1220 /*
1221  * Link in a child node.
1222  *
1223  * Because of the lock ordering, cp has to already be in the hash table with
1224  * its lock dropped before we get it.  To prevent anyone from noticing that
1225  * it is parentless, the creation code sets the RC_NODE_USING_PARENT.  Once
1226  * we've linked it in, we release the flag.
1227  */
1228 static void
1229 rc_node_link_child(rc_node_t *np, rc_node_t *cp)
1230 {
1231 	assert(!MUTEX_HELD(&np->rn_lock));
1232 	assert(!MUTEX_HELD(&cp->rn_lock));
1233 
1234 	(void) pthread_mutex_lock(&np->rn_lock);
1235 	(void) pthread_mutex_lock(&cp->rn_lock);
1236 	assert(!(cp->rn_flags & RC_NODE_IN_PARENT) &&
1237 	    (cp->rn_flags & RC_NODE_USING_PARENT));
1238 
1239 	assert(rc_check_parent_child(np->rn_id.rl_type, cp->rn_id.rl_type) ==
1240 	    REP_PROTOCOL_SUCCESS);
1241 
1242 	cp->rn_parent = np;
1243 	cp->rn_flags |= RC_NODE_IN_PARENT;
1244 	(void) uu_list_insert_before(np->rn_children, NULL, cp);
1245 
1246 	(void) pthread_mutex_unlock(&np->rn_lock);
1247 
1248 	rc_node_rele_flag(cp, RC_NODE_USING_PARENT);
1249 	(void) pthread_mutex_unlock(&cp->rn_lock);
1250 }
1251 
1252 /*
1253  * Sets the rn_parent_ref field of all the children of np to pp -- always
1254  * initially invoked as rc_node_setup_parent_ref(np, np), we then recurse.
1255  *
1256  * This is used when we mark a node RC_NODE_OLD, so that when the object and
1257  * its children are no longer referenced, they will all be deleted as a unit.
1258  */
1259 static void
1260 rc_node_setup_parent_ref(rc_node_t *np, rc_node_t *pp)
1261 {
1262 	rc_node_t *cp;
1263 
1264 	assert(MUTEX_HELD(&np->rn_lock));
1265 
1266 	for (cp = uu_list_first(np->rn_children); cp != NULL;
1267 	    cp = uu_list_next(np->rn_children, cp)) {
1268 		(void) pthread_mutex_lock(&cp->rn_lock);
1269 		if (cp->rn_flags & RC_NODE_PARENT_REF) {
1270 			assert(cp->rn_parent_ref == pp);
1271 		} else {
1272 			assert(cp->rn_parent_ref == NULL);
1273 
1274 			cp->rn_flags |= RC_NODE_PARENT_REF;
1275 			cp->rn_parent_ref = pp;
1276 			if (cp->rn_refs != 0)
1277 				rc_node_hold_other(pp);
1278 		}
1279 		rc_node_setup_parent_ref(cp, pp);		/* recurse */
1280 		(void) pthread_mutex_unlock(&cp->rn_lock);
1281 	}
1282 }
1283 
1284 /*
1285  * Atomically replace 'np' with 'newp', with a parent of 'pp'.
1286  *
1287  * Requirements:
1288  *	*no* node locks may be held.
1289  *	pp must be held with RC_NODE_CHILDREN_CHANGING
1290  *	newp and np must be held with RC_NODE_IN_TX
1291  *	np must be marked RC_NODE_IN_PARENT, newp must not be
1292  *	np must be marked RC_NODE_OLD
1293  *
1294  * Afterwards:
1295  *	pp's RC_NODE_CHILDREN_CHANGING is dropped
1296  *	newp and np's RC_NODE_IN_TX is dropped
1297  *	newp->rn_former = np;
1298  *	newp is RC_NODE_IN_PARENT, np is not.
1299  *	interested notify subscribers have been notified of newp's new status.
1300  */
1301 static void
1302 rc_node_relink_child(rc_node_t *pp, rc_node_t *np, rc_node_t *newp)
1303 {
1304 	cache_bucket_t *bp;
1305 	/*
1306 	 * First, swap np and nnp in the cache.  newp's RC_NODE_IN_TX flag
1307 	 * keeps rc_node_update() from seeing it until we are done.
1308 	 */
1309 	bp = cache_hold(newp->rn_hash);
1310 	cache_remove_unlocked(bp, np);
1311 	cache_insert_unlocked(bp, newp);
1312 	cache_release(bp);
1313 
1314 	/*
1315 	 * replace np with newp in pp's list, and attach it to newp's rn_former
1316 	 * link.
1317 	 */
1318 	(void) pthread_mutex_lock(&pp->rn_lock);
1319 	assert(pp->rn_flags & RC_NODE_CHILDREN_CHANGING);
1320 
1321 	(void) pthread_mutex_lock(&newp->rn_lock);
1322 	assert(!(newp->rn_flags & RC_NODE_IN_PARENT));
1323 	assert(newp->rn_flags & RC_NODE_IN_TX);
1324 
1325 	(void) pthread_mutex_lock(&np->rn_lock);
1326 	assert(np->rn_flags & RC_NODE_IN_PARENT);
1327 	assert(np->rn_flags & RC_NODE_OLD);
1328 	assert(np->rn_flags & RC_NODE_IN_TX);
1329 
1330 	newp->rn_parent = pp;
1331 	newp->rn_flags |= RC_NODE_IN_PARENT;
1332 
1333 	/*
1334 	 * Note that we carefully add newp before removing np -- this
1335 	 * keeps iterators on the list from missing us.
1336 	 */
1337 	(void) uu_list_insert_after(pp->rn_children, np, newp);
1338 	(void) uu_list_remove(pp->rn_children, np);
1339 
1340 	/*
1341 	 * re-set np
1342 	 */
1343 	newp->rn_former = np;
1344 	np->rn_parent = NULL;
1345 	np->rn_flags &= ~RC_NODE_IN_PARENT;
1346 	np->rn_flags |= RC_NODE_ON_FORMER;
1347 
1348 	rc_notify_insert_node(newp);
1349 
1350 	rc_node_rele_flag(pp, RC_NODE_CHILDREN_CHANGING);
1351 	(void) pthread_mutex_unlock(&pp->rn_lock);
1352 	rc_node_rele_flag(newp, RC_NODE_USING_PARENT | RC_NODE_IN_TX);
1353 	(void) pthread_mutex_unlock(&newp->rn_lock);
1354 	rc_node_setup_parent_ref(np, np);
1355 	rc_node_rele_flag(np, RC_NODE_IN_TX);
1356 	(void) pthread_mutex_unlock(&np->rn_lock);
1357 }
1358 
1359 /*
1360  * makes sure a node with lookup 'nip', name 'name', and parent 'pp' exists.
1361  * 'cp' is used (and returned) if the node does not yet exist.  If it does
1362  * exist, 'cp' is freed, and the existent node is returned instead.
1363  */
1364 rc_node_t *
1365 rc_node_setup(rc_node_t *cp, rc_node_lookup_t *nip, const char *name,
1366     rc_node_t *pp)
1367 {
1368 	rc_node_t *np;
1369 	cache_bucket_t *bp;
1370 	uint32_t h = rc_node_hash(nip);
1371 
1372 	assert(cp->rn_refs == 0);
1373 
1374 	bp = cache_hold(h);
1375 	if ((np = cache_lookup_unlocked(bp, nip)) != NULL) {
1376 		cache_release(bp);
1377 
1378 		/*
1379 		 * make sure it matches our expectations
1380 		 */
1381 		(void) pthread_mutex_lock(&np->rn_lock);
1382 		if (rc_node_hold_flag(np, RC_NODE_USING_PARENT)) {
1383 			assert(np->rn_parent == pp);
1384 			assert(memcmp(&np->rn_id, nip, sizeof (*nip)) == 0);
1385 			assert(strcmp(np->rn_name, name) == 0);
1386 			assert(np->rn_type == NULL);
1387 			assert(np->rn_flags & RC_NODE_IN_PARENT);
1388 			rc_node_rele_flag(np, RC_NODE_USING_PARENT);
1389 		}
1390 		(void) pthread_mutex_unlock(&np->rn_lock);
1391 
1392 		rc_node_destroy(cp);
1393 		return (np);
1394 	}
1395 
1396 	/*
1397 	 * No one is there -- create a new node.
1398 	 */
1399 	np = cp;
1400 	rc_node_hold(np);
1401 	np->rn_id = *nip;
1402 	np->rn_hash = h;
1403 	np->rn_name = strdup(name);
1404 
1405 	np->rn_flags |= RC_NODE_USING_PARENT;
1406 
1407 	if (np->rn_id.rl_type == REP_PROTOCOL_ENTITY_INSTANCE) {
1408 #if COMPOSITION_DEPTH == 2
1409 		np->rn_cchain[0] = np;
1410 		np->rn_cchain[1] = pp;
1411 #else
1412 #error This code must be updated.
1413 #endif
1414 	}
1415 
1416 	cache_insert_unlocked(bp, np);
1417 	cache_release(bp);		/* we are now visible */
1418 
1419 	rc_node_link_child(pp, np);
1420 
1421 	return (np);
1422 }
1423 
1424 /*
1425  * makes sure a snapshot with lookup 'nip', name 'name', and parent 'pp' exists.
1426  * 'cp' is used (and returned) if the node does not yet exist.  If it does
1427  * exist, 'cp' is freed, and the existent node is returned instead.
1428  */
1429 rc_node_t *
1430 rc_node_setup_snapshot(rc_node_t *cp, rc_node_lookup_t *nip, const char *name,
1431     uint32_t snap_id, rc_node_t *pp)
1432 {
1433 	rc_node_t *np;
1434 	cache_bucket_t *bp;
1435 	uint32_t h = rc_node_hash(nip);
1436 
1437 	assert(cp->rn_refs == 0);
1438 
1439 	bp = cache_hold(h);
1440 	if ((np = cache_lookup_unlocked(bp, nip)) != NULL) {
1441 		cache_release(bp);
1442 
1443 		/*
1444 		 * make sure it matches our expectations
1445 		 */
1446 		(void) pthread_mutex_lock(&np->rn_lock);
1447 		if (rc_node_hold_flag(np, RC_NODE_USING_PARENT)) {
1448 			assert(np->rn_parent == pp);
1449 			assert(memcmp(&np->rn_id, nip, sizeof (*nip)) == 0);
1450 			assert(strcmp(np->rn_name, name) == 0);
1451 			assert(np->rn_type == NULL);
1452 			assert(np->rn_flags & RC_NODE_IN_PARENT);
1453 			rc_node_rele_flag(np, RC_NODE_USING_PARENT);
1454 		}
1455 		(void) pthread_mutex_unlock(&np->rn_lock);
1456 
1457 		rc_node_destroy(cp);
1458 		return (np);
1459 	}
1460 
1461 	/*
1462 	 * No one is there -- create a new node.
1463 	 */
1464 	np = cp;
1465 	rc_node_hold(np);
1466 	np->rn_id = *nip;
1467 	np->rn_hash = h;
1468 	np->rn_name = strdup(name);
1469 	np->rn_snapshot_id = snap_id;
1470 
1471 	np->rn_flags |= RC_NODE_USING_PARENT;
1472 
1473 	cache_insert_unlocked(bp, np);
1474 	cache_release(bp);		/* we are now visible */
1475 
1476 	rc_node_link_child(pp, np);
1477 
1478 	return (np);
1479 }
1480 
1481 /*
1482  * makes sure a snaplevel with lookup 'nip' and parent 'pp' exists.  'cp' is
1483  * used (and returned) if the node does not yet exist.  If it does exist, 'cp'
1484  * is freed, and the existent node is returned instead.
1485  */
1486 rc_node_t *
1487 rc_node_setup_snaplevel(rc_node_t *cp, rc_node_lookup_t *nip,
1488     rc_snaplevel_t *lvl, rc_node_t *pp)
1489 {
1490 	rc_node_t *np;
1491 	cache_bucket_t *bp;
1492 	uint32_t h = rc_node_hash(nip);
1493 
1494 	assert(cp->rn_refs == 0);
1495 
1496 	bp = cache_hold(h);
1497 	if ((np = cache_lookup_unlocked(bp, nip)) != NULL) {
1498 		cache_release(bp);
1499 
1500 		/*
1501 		 * make sure it matches our expectations
1502 		 */
1503 		(void) pthread_mutex_lock(&np->rn_lock);
1504 		if (rc_node_hold_flag(np, RC_NODE_USING_PARENT)) {
1505 			assert(np->rn_parent == pp);
1506 			assert(memcmp(&np->rn_id, nip, sizeof (*nip)) == 0);
1507 			assert(np->rn_name == NULL);
1508 			assert(np->rn_type == NULL);
1509 			assert(np->rn_flags & RC_NODE_IN_PARENT);
1510 			rc_node_rele_flag(np, RC_NODE_USING_PARENT);
1511 		}
1512 		(void) pthread_mutex_unlock(&np->rn_lock);
1513 
1514 		rc_node_destroy(cp);
1515 		return (np);
1516 	}
1517 
1518 	/*
1519 	 * No one is there -- create a new node.
1520 	 */
1521 	np = cp;
1522 	rc_node_hold(np);	/* released in snapshot_fill_children() */
1523 	np->rn_id = *nip;
1524 	np->rn_hash = h;
1525 
1526 	rc_snaplevel_hold(lvl);
1527 	np->rn_snaplevel = lvl;
1528 
1529 	np->rn_flags |= RC_NODE_USING_PARENT;
1530 
1531 	cache_insert_unlocked(bp, np);
1532 	cache_release(bp);		/* we are now visible */
1533 
1534 	/* Add this snaplevel to the snapshot's composition chain. */
1535 	assert(pp->rn_cchain[lvl->rsl_level_num - 1] == NULL);
1536 	pp->rn_cchain[lvl->rsl_level_num - 1] = np;
1537 
1538 	rc_node_link_child(pp, np);
1539 
1540 	return (np);
1541 }
1542 
1543 /*
1544  * Returns NULL if strdup() fails.
1545  */
1546 rc_node_t *
1547 rc_node_setup_pg(rc_node_t *cp, rc_node_lookup_t *nip, const char *name,
1548     const char *type, uint32_t flags, uint32_t gen_id, rc_node_t *pp)
1549 {
1550 	rc_node_t *np;
1551 	cache_bucket_t *bp;
1552 
1553 	uint32_t h = rc_node_hash(nip);
1554 	bp = cache_hold(h);
1555 	if ((np = cache_lookup_unlocked(bp, nip)) != NULL) {
1556 		cache_release(bp);
1557 
1558 		/*
1559 		 * make sure it matches our expectations (don't check
1560 		 * the generation number or parent, since someone could
1561 		 * have gotten a transaction through while we weren't
1562 		 * looking)
1563 		 */
1564 		(void) pthread_mutex_lock(&np->rn_lock);
1565 		if (rc_node_hold_flag(np, RC_NODE_USING_PARENT)) {
1566 			assert(memcmp(&np->rn_id, nip, sizeof (*nip)) == 0);
1567 			assert(strcmp(np->rn_name, name) == 0);
1568 			assert(strcmp(np->rn_type, type) == 0);
1569 			assert(np->rn_pgflags == flags);
1570 			assert(np->rn_flags & RC_NODE_IN_PARENT);
1571 			rc_node_rele_flag(np, RC_NODE_USING_PARENT);
1572 		}
1573 		(void) pthread_mutex_unlock(&np->rn_lock);
1574 
1575 		rc_node_destroy(cp);
1576 		return (np);
1577 	}
1578 
1579 	np = cp;
1580 	rc_node_hold(np);		/* released in fill_pg_callback() */
1581 	np->rn_id = *nip;
1582 	np->rn_hash = h;
1583 	np->rn_name = strdup(name);
1584 	if (np->rn_name == NULL) {
1585 		rc_node_rele(np);
1586 		return (NULL);
1587 	}
1588 	np->rn_type = strdup(type);
1589 	if (np->rn_type == NULL) {
1590 		free((void *)np->rn_name);
1591 		rc_node_rele(np);
1592 		return (NULL);
1593 	}
1594 	np->rn_pgflags = flags;
1595 	np->rn_gen_id = gen_id;
1596 
1597 	np->rn_flags |= RC_NODE_USING_PARENT;
1598 
1599 	cache_insert_unlocked(bp, np);
1600 	cache_release(bp);		/* we are now visible */
1601 
1602 	rc_node_link_child(pp, np);
1603 
1604 	return (np);
1605 }
1606 
1607 #if COMPOSITION_DEPTH == 2
1608 /*
1609  * Initialize a "composed property group" which represents the composition of
1610  * property groups pg1 & pg2.  It is ephemeral: once created & returned for an
1611  * ITER_READ request, keeping it out of cache_hash and any child lists
1612  * prevents it from being looked up.  Operations besides iteration are passed
1613  * through to pg1.
1614  *
1615  * pg1 & pg2 should be held before entering this function.  They will be
1616  * released in rc_node_destroy().
1617  */
1618 static int
1619 rc_node_setup_cpg(rc_node_t *cpg, rc_node_t *pg1, rc_node_t *pg2)
1620 {
1621 	if (strcmp(pg1->rn_type, pg2->rn_type) != 0)
1622 		return (REP_PROTOCOL_FAIL_TYPE_MISMATCH);
1623 
1624 	cpg->rn_id.rl_type = REP_PROTOCOL_ENTITY_CPROPERTYGRP;
1625 	cpg->rn_name = strdup(pg1->rn_name);
1626 	if (cpg->rn_name == NULL)
1627 		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
1628 
1629 	cpg->rn_cchain[0] = pg1;
1630 	cpg->rn_cchain[1] = pg2;
1631 
1632 	return (REP_PROTOCOL_SUCCESS);
1633 }
1634 #else
1635 #error This code must be updated.
1636 #endif
1637 
1638 /*
1639  * Fails with _NO_RESOURCES.
1640  */
1641 int
1642 rc_node_create_property(rc_node_t *pp, rc_node_lookup_t *nip,
1643     const char *name, rep_protocol_value_type_t type,
1644     const char *vals, size_t count, size_t size)
1645 {
1646 	rc_node_t *np;
1647 	cache_bucket_t *bp;
1648 
1649 	uint32_t h = rc_node_hash(nip);
1650 	bp = cache_hold(h);
1651 	if ((np = cache_lookup_unlocked(bp, nip)) != NULL) {
1652 		cache_release(bp);
1653 		/*
1654 		 * make sure it matches our expectations
1655 		 */
1656 		(void) pthread_mutex_lock(&np->rn_lock);
1657 		if (rc_node_hold_flag(np, RC_NODE_USING_PARENT)) {
1658 			assert(np->rn_parent == pp);
1659 			assert(memcmp(&np->rn_id, nip, sizeof (*nip)) == 0);
1660 			assert(strcmp(np->rn_name, name) == 0);
1661 			assert(np->rn_valtype == type);
1662 			assert(np->rn_values_count == count);
1663 			assert(np->rn_values_size == size);
1664 			assert(vals == NULL ||
1665 			    memcmp(np->rn_values, vals, size) == 0);
1666 			assert(np->rn_flags & RC_NODE_IN_PARENT);
1667 			rc_node_rele_flag(np, RC_NODE_USING_PARENT);
1668 		}
1669 		rc_node_rele_locked(np);
1670 		object_free_values(vals, type, count, size);
1671 		return (REP_PROTOCOL_SUCCESS);
1672 	}
1673 
1674 	/*
1675 	 * No one is there -- create a new node.
1676 	 */
1677 	np = rc_node_alloc();
1678 	if (np == NULL) {
1679 		cache_release(bp);
1680 		object_free_values(vals, type, count, size);
1681 		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
1682 	}
1683 	np->rn_id = *nip;
1684 	np->rn_hash = h;
1685 	np->rn_name = strdup(name);
1686 	if (np->rn_name == NULL) {
1687 		cache_release(bp);
1688 		object_free_values(vals, type, count, size);
1689 		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
1690 	}
1691 
1692 	np->rn_valtype = type;
1693 	np->rn_values = vals;
1694 	np->rn_values_count = count;
1695 	np->rn_values_size = size;
1696 
1697 	np->rn_flags |= RC_NODE_USING_PARENT;
1698 
1699 	cache_insert_unlocked(bp, np);
1700 	cache_release(bp);		/* we are now visible */
1701 
1702 	rc_node_link_child(pp, np);
1703 
1704 	return (REP_PROTOCOL_SUCCESS);
1705 }
1706 
1707 int
1708 rc_node_init(void)
1709 {
1710 	rc_node_t *np;
1711 	cache_bucket_t *bp;
1712 
1713 	rc_children_pool = uu_list_pool_create("rc_children_pool",
1714 	    sizeof (rc_node_t), offsetof(rc_node_t, rn_sibling_node),
1715 	    NULL, UU_LIST_POOL_DEBUG);
1716 
1717 	rc_pg_notify_pool = uu_list_pool_create("rc_pg_notify_pool",
1718 	    sizeof (rc_node_pg_notify_t),
1719 	    offsetof(rc_node_pg_notify_t, rnpn_node),
1720 	    NULL, UU_LIST_POOL_DEBUG);
1721 
1722 	rc_notify_pool = uu_list_pool_create("rc_notify_pool",
1723 	    sizeof (rc_notify_t), offsetof(rc_notify_t, rcn_list_node),
1724 	    NULL, UU_LIST_POOL_DEBUG);
1725 
1726 	rc_notify_info_pool = uu_list_pool_create("rc_notify_info_pool",
1727 	    sizeof (rc_notify_info_t),
1728 	    offsetof(rc_notify_info_t, rni_list_node),
1729 	    NULL, UU_LIST_POOL_DEBUG);
1730 
1731 	if (rc_children_pool == NULL || rc_pg_notify_pool == NULL ||
1732 	    rc_notify_pool == NULL || rc_notify_info_pool == NULL)
1733 		uu_die("out of memory");
1734 
1735 	rc_notify_list = uu_list_create(rc_notify_pool,
1736 	    &rc_notify_list, 0);
1737 
1738 	rc_notify_info_list = uu_list_create(rc_notify_info_pool,
1739 	    &rc_notify_info_list, 0);
1740 
1741 	if (rc_notify_list == NULL || rc_notify_info_list == NULL)
1742 		uu_die("out of memory");
1743 
1744 	if ((np = rc_node_alloc()) == NULL)
1745 		uu_die("out of memory");
1746 
1747 	rc_node_hold(np);
1748 	np->rn_id.rl_type = REP_PROTOCOL_ENTITY_SCOPE;
1749 	np->rn_id.rl_backend = BACKEND_TYPE_NORMAL;
1750 	np->rn_hash = rc_node_hash(&np->rn_id);
1751 	np->rn_name = "localhost";
1752 
1753 	bp = cache_hold(np->rn_hash);
1754 	cache_insert_unlocked(bp, np);
1755 	cache_release(bp);
1756 
1757 	rc_scope = np;
1758 	return (1);
1759 }
1760 
1761 /*
1762  * Fails with
1763  *   _INVALID_TYPE - type is invalid
1764  *   _TYPE_MISMATCH - np doesn't carry children of type type
1765  *   _DELETED - np has been deleted
1766  *   _NO_RESOURCES
1767  */
1768 static int
1769 rc_node_fill_children(rc_node_t *np, uint32_t type)
1770 {
1771 	int rc;
1772 
1773 	assert(MUTEX_HELD(&np->rn_lock));
1774 
1775 	if ((rc = rc_check_parent_child(np->rn_id.rl_type, type)) !=
1776 	    REP_PROTOCOL_SUCCESS)
1777 		return (rc);
1778 
1779 	if (!rc_node_hold_flag(np, RC_NODE_CHILDREN_CHANGING))
1780 		return (REP_PROTOCOL_FAIL_DELETED);
1781 
1782 	if (np->rn_flags & RC_NODE_HAS_CHILDREN) {
1783 		rc_node_rele_flag(np, RC_NODE_CHILDREN_CHANGING);
1784 		return (REP_PROTOCOL_SUCCESS);
1785 	}
1786 
1787 	(void) pthread_mutex_unlock(&np->rn_lock);
1788 	rc = object_fill_children(np);
1789 	(void) pthread_mutex_lock(&np->rn_lock);
1790 
1791 	if (rc == REP_PROTOCOL_SUCCESS) {
1792 		np->rn_flags |= RC_NODE_HAS_CHILDREN;
1793 	}
1794 	rc_node_rele_flag(np, RC_NODE_CHILDREN_CHANGING);
1795 
1796 	return (rc);
1797 }
1798 
1799 /*
1800  * Returns
1801  *   _INVALID_TYPE - type is invalid
1802  *   _TYPE_MISMATCH - np doesn't carry children of type type
1803  *   _DELETED - np has been deleted
1804  *   _NO_RESOURCES
1805  *   _SUCCESS - if *cpp is not NULL, it is held
1806  */
1807 static int
1808 rc_node_find_named_child(rc_node_t *np, const char *name, uint32_t type,
1809     rc_node_t **cpp)
1810 {
1811 	int ret;
1812 	rc_node_t *cp;
1813 
1814 	assert(MUTEX_HELD(&np->rn_lock));
1815 	assert(np->rn_id.rl_type != REP_PROTOCOL_ENTITY_CPROPERTYGRP);
1816 
1817 	ret = rc_node_fill_children(np, type);
1818 	if (ret != REP_PROTOCOL_SUCCESS)
1819 		return (ret);
1820 
1821 	for (cp = uu_list_first(np->rn_children);
1822 	    cp != NULL;
1823 	    cp = uu_list_next(np->rn_children, cp)) {
1824 		if (cp->rn_id.rl_type == type && strcmp(cp->rn_name, name) == 0)
1825 			break;
1826 	}
1827 
1828 	if (cp != NULL)
1829 		rc_node_hold(cp);
1830 	*cpp = cp;
1831 
1832 	return (REP_PROTOCOL_SUCCESS);
1833 }
1834 
1835 #ifndef NATIVE_BUILD
1836 static int rc_node_parent(rc_node_t *, rc_node_t **);
1837 
1838 /*
1839  * If the propname property exists in pg, and it is of type string, add its
1840  * values as authorizations to pcp.  pg must not be locked on entry, and it is
1841  * returned unlocked.  Returns
1842  *   _DELETED - pg was deleted
1843  *   _NO_RESOURCES
1844  *   _NOT_FOUND - pg has no property named propname
1845  *   _SUCCESS
1846  */
1847 static int
1848 perm_add_pg_prop_values(permcheck_t *pcp, rc_node_t *pg, const char *propname)
1849 {
1850 	rc_node_t *prop;
1851 	int result;
1852 
1853 	uint_t count;
1854 	const char *cp;
1855 
1856 	assert(!MUTEX_HELD(&pg->rn_lock));
1857 	assert(pg->rn_id.rl_type == REP_PROTOCOL_ENTITY_PROPERTYGRP);
1858 	assert(pg->rn_id.rl_ids[ID_SNAPSHOT] == 0);
1859 
1860 	(void) pthread_mutex_lock(&pg->rn_lock);
1861 	result = rc_node_find_named_child(pg, propname,
1862 	    REP_PROTOCOL_ENTITY_PROPERTY, &prop);
1863 	(void) pthread_mutex_unlock(&pg->rn_lock);
1864 	if (result != REP_PROTOCOL_SUCCESS) {
1865 		switch (result) {
1866 		case REP_PROTOCOL_FAIL_DELETED:
1867 		case REP_PROTOCOL_FAIL_NO_RESOURCES:
1868 			return (result);
1869 
1870 		case REP_PROTOCOL_FAIL_INVALID_TYPE:
1871 		case REP_PROTOCOL_FAIL_TYPE_MISMATCH:
1872 		default:
1873 			bad_error("rc_node_find_named_child", result);
1874 		}
1875 	}
1876 
1877 	if (prop == NULL)
1878 		return (REP_PROTOCOL_FAIL_NOT_FOUND);
1879 
1880 	/* rn_valtype is immutable, so no locking. */
1881 	if (prop->rn_valtype != REP_PROTOCOL_TYPE_STRING) {
1882 		rc_node_rele(prop);
1883 		return (REP_PROTOCOL_SUCCESS);
1884 	}
1885 
1886 	(void) pthread_mutex_lock(&prop->rn_lock);
1887 	for (count = prop->rn_values_count, cp = prop->rn_values;
1888 	    count > 0;
1889 	    --count) {
1890 		result = perm_add_enabling(pcp, cp);
1891 		if (result != REP_PROTOCOL_SUCCESS)
1892 			break;
1893 
1894 		cp = strchr(cp, '\0') + 1;
1895 	}
1896 
1897 	rc_node_rele_locked(prop);
1898 
1899 	return (result);
1900 }
1901 
1902 /*
1903  * Assuming that ent is a service or instance node, if the pgname property
1904  * group has type pgtype, and it has a propname property with string type, add
1905  * its values as authorizations to pcp.  If pgtype is NULL, it is not checked.
1906  * Returns
1907  *   _SUCCESS
1908  *   _DELETED - ent was deleted
1909  *   _NO_RESOURCES - no resources
1910  *   _NOT_FOUND - ent does not have pgname pg or propname property
1911  */
1912 static int
1913 perm_add_ent_prop_values(permcheck_t *pcp, rc_node_t *ent, const char *pgname,
1914     const char *pgtype, const char *propname)
1915 {
1916 	int r;
1917 	rc_node_t *pg;
1918 
1919 	assert(!MUTEX_HELD(&ent->rn_lock));
1920 
1921 	(void) pthread_mutex_lock(&ent->rn_lock);
1922 	r = rc_node_find_named_child(ent, pgname,
1923 	    REP_PROTOCOL_ENTITY_PROPERTYGRP, &pg);
1924 	(void) pthread_mutex_unlock(&ent->rn_lock);
1925 
1926 	switch (r) {
1927 	case REP_PROTOCOL_SUCCESS:
1928 		break;
1929 
1930 	case REP_PROTOCOL_FAIL_DELETED:
1931 	case REP_PROTOCOL_FAIL_NO_RESOURCES:
1932 		return (r);
1933 
1934 	default:
1935 		bad_error("rc_node_find_named_child", r);
1936 	}
1937 
1938 	if (pg == NULL)
1939 		return (REP_PROTOCOL_FAIL_NOT_FOUND);
1940 
1941 	if (pgtype == NULL || strcmp(pg->rn_type, pgtype) == 0) {
1942 		r = perm_add_pg_prop_values(pcp, pg, propname);
1943 		switch (r) {
1944 		case REP_PROTOCOL_FAIL_DELETED:
1945 			r = REP_PROTOCOL_FAIL_NOT_FOUND;
1946 			break;
1947 
1948 		case REP_PROTOCOL_FAIL_NO_RESOURCES:
1949 		case REP_PROTOCOL_SUCCESS:
1950 		case REP_PROTOCOL_FAIL_NOT_FOUND:
1951 			break;
1952 
1953 		default:
1954 			bad_error("perm_add_pg_prop_values", r);
1955 		}
1956 	}
1957 
1958 	rc_node_rele(pg);
1959 
1960 	return (r);
1961 }
1962 
1963 /*
1964  * If pg has a property named propname, and it string typed, add its values as
1965  * authorizations to pcp.  If pg has no such property, and its parent is an
1966  * instance, walk up to the service and try doing the same with the property
1967  * of the same name from the property group of the same name.  Returns
1968  *   _SUCCESS
1969  *   _NO_RESOURCES
1970  *   _DELETED - pg (or an ancestor) was deleted
1971  */
1972 static int
1973 perm_add_enabling_values(permcheck_t *pcp, rc_node_t *pg, const char *propname)
1974 {
1975 	int r;
1976 
1977 	r = perm_add_pg_prop_values(pcp, pg, propname);
1978 
1979 	if (r == REP_PROTOCOL_FAIL_NOT_FOUND) {
1980 		char pgname[REP_PROTOCOL_NAME_LEN + 1];
1981 		rc_node_t *inst, *svc;
1982 		size_t sz;
1983 
1984 		assert(!MUTEX_HELD(&pg->rn_lock));
1985 
1986 		if (pg->rn_id.rl_ids[ID_INSTANCE] == 0) {
1987 			/* not an instance pg */
1988 			return (REP_PROTOCOL_SUCCESS);
1989 		}
1990 
1991 		sz = strlcpy(pgname, pg->rn_name, sizeof (pgname));
1992 		assert(sz < sizeof (pgname));
1993 
1994 		/* get pg's parent */
1995 		r = rc_node_parent(pg, &inst);
1996 		if (r != REP_PROTOCOL_SUCCESS) {
1997 			assert(r == REP_PROTOCOL_FAIL_DELETED);
1998 			return (r);
1999 		}
2000 
2001 		assert(inst->rn_id.rl_type == REP_PROTOCOL_ENTITY_INSTANCE);
2002 
2003 		/* get instance's parent */
2004 		r = rc_node_parent(inst, &svc);
2005 		rc_node_rele(inst);
2006 		if (r != REP_PROTOCOL_SUCCESS) {
2007 			assert(r == REP_PROTOCOL_FAIL_DELETED);
2008 			return (r);
2009 		}
2010 
2011 		assert(svc->rn_id.rl_type == REP_PROTOCOL_ENTITY_SERVICE);
2012 
2013 		r = perm_add_ent_prop_values(pcp, svc, pgname, NULL, propname);
2014 
2015 		rc_node_rele(svc);
2016 
2017 		if (r == REP_PROTOCOL_FAIL_NOT_FOUND)
2018 			r = REP_PROTOCOL_SUCCESS;
2019 	}
2020 
2021 	return (r);
2022 }
2023 
2024 /*
2025  * Call perm_add_enabling_values() for the "action_authorization" property of
2026  * the "general" property group of inst.  Returns
2027  *   _DELETED - inst (or an ancestor) was deleted
2028  *   _NO_RESOURCES
2029  *   _SUCCESS
2030  */
2031 static int
2032 perm_add_inst_action_auth(permcheck_t *pcp, rc_node_t *inst)
2033 {
2034 	int r;
2035 	rc_node_t *svc;
2036 
2037 	assert(inst->rn_id.rl_type == REP_PROTOCOL_ENTITY_INSTANCE);
2038 
2039 	r = perm_add_ent_prop_values(pcp, inst, AUTH_PG_GENERAL,
2040 	    AUTH_PG_GENERAL_TYPE, AUTH_PROP_ACTION);
2041 
2042 	if (r != REP_PROTOCOL_FAIL_NOT_FOUND)
2043 		return (r);
2044 
2045 	r = rc_node_parent(inst, &svc);
2046 	if (r != REP_PROTOCOL_SUCCESS) {
2047 		assert(r == REP_PROTOCOL_FAIL_DELETED);
2048 		return (r);
2049 	}
2050 
2051 	r = perm_add_ent_prop_values(pcp, svc, AUTH_PG_GENERAL,
2052 	    AUTH_PG_GENERAL_TYPE, AUTH_PROP_ACTION);
2053 
2054 	return (r == REP_PROTOCOL_FAIL_NOT_FOUND ? REP_PROTOCOL_SUCCESS : r);
2055 }
2056 #endif /* NATIVE_BUILD */
2057 
2058 void
2059 rc_node_ptr_init(rc_node_ptr_t *out)
2060 {
2061 	out->rnp_node = NULL;
2062 	out->rnp_authorized = 0;
2063 	out->rnp_deleted = 0;
2064 }
2065 
2066 static void
2067 rc_node_assign(rc_node_ptr_t *out, rc_node_t *val)
2068 {
2069 	rc_node_t *cur = out->rnp_node;
2070 	if (val != NULL)
2071 		rc_node_hold(val);
2072 	out->rnp_node = val;
2073 	if (cur != NULL)
2074 		rc_node_rele(cur);
2075 	out->rnp_authorized = 0;
2076 	out->rnp_deleted = 0;
2077 }
2078 
2079 void
2080 rc_node_clear(rc_node_ptr_t *out, int deleted)
2081 {
2082 	rc_node_assign(out, NULL);
2083 	out->rnp_deleted = deleted;
2084 }
2085 
2086 void
2087 rc_node_ptr_assign(rc_node_ptr_t *out, const rc_node_ptr_t *val)
2088 {
2089 	rc_node_assign(out, val->rnp_node);
2090 }
2091 
2092 /*
2093  * rc_node_check()/RC_NODE_CHECK()
2094  *	generic "entry" checks, run before the use of an rc_node pointer.
2095  *
2096  * Fails with
2097  *   _NOT_SET
2098  *   _DELETED
2099  */
2100 static int
2101 rc_node_check_and_lock(rc_node_t *np)
2102 {
2103 	int result = REP_PROTOCOL_SUCCESS;
2104 	if (np == NULL)
2105 		return (REP_PROTOCOL_FAIL_NOT_SET);
2106 
2107 	(void) pthread_mutex_lock(&np->rn_lock);
2108 	if (!rc_node_wait_flag(np, RC_NODE_DYING)) {
2109 		result = REP_PROTOCOL_FAIL_DELETED;
2110 		(void) pthread_mutex_unlock(&np->rn_lock);
2111 	}
2112 
2113 	return (result);
2114 }
2115 
2116 /*
2117  * Fails with
2118  *   _NOT_SET - ptr is reset
2119  *   _DELETED - node has been deleted
2120  */
2121 static rc_node_t *
2122 rc_node_ptr_check_and_lock(rc_node_ptr_t *npp, int *res)
2123 {
2124 	rc_node_t *np = npp->rnp_node;
2125 	if (np == NULL) {
2126 		if (npp->rnp_deleted)
2127 			*res = REP_PROTOCOL_FAIL_DELETED;
2128 		else
2129 			*res = REP_PROTOCOL_FAIL_NOT_SET;
2130 		return (NULL);
2131 	}
2132 
2133 	(void) pthread_mutex_lock(&np->rn_lock);
2134 	if (!rc_node_wait_flag(np, RC_NODE_DYING)) {
2135 		(void) pthread_mutex_unlock(&np->rn_lock);
2136 		rc_node_clear(npp, 1);
2137 		*res = REP_PROTOCOL_FAIL_DELETED;
2138 		return (NULL);
2139 	}
2140 	return (np);
2141 }
2142 
2143 #define	RC_NODE_CHECK_AND_LOCK(n) {					\
2144 	int rc__res;							\
2145 	if ((rc__res = rc_node_check_and_lock(n)) != REP_PROTOCOL_SUCCESS) \
2146 		return (rc__res);					\
2147 }
2148 
2149 #define	RC_NODE_CHECK(n) {						\
2150 	RC_NODE_CHECK_AND_LOCK(n);					\
2151 	(void) pthread_mutex_unlock(&(n)->rn_lock);			\
2152 }
2153 
2154 #define	RC_NODE_CHECK_AND_HOLD(n) {					\
2155 	RC_NODE_CHECK_AND_LOCK(n);					\
2156 	rc_node_hold_locked(n);						\
2157 	(void) pthread_mutex_unlock(&(n)->rn_lock);			\
2158 }
2159 
2160 #define	RC_NODE_PTR_GET_CHECK_AND_LOCK(np, npp) {			\
2161 	int rc__res;							\
2162 	if (((np) = rc_node_ptr_check_and_lock(npp, &rc__res)) == NULL)	\
2163 		return (rc__res);					\
2164 }
2165 
2166 #define	RC_NODE_PTR_GET_CHECK(np, npp) {				\
2167 	RC_NODE_PTR_GET_CHECK_AND_LOCK(np, npp);			\
2168 	(void) pthread_mutex_unlock(&(np)->rn_lock);			\
2169 }
2170 
2171 #define	RC_NODE_PTR_GET_CHECK_AND_HOLD(np, npp) {			\
2172 	RC_NODE_PTR_GET_CHECK_AND_LOCK(np, npp);			\
2173 	rc_node_hold_locked(np);					\
2174 	(void) pthread_mutex_unlock(&(np)->rn_lock);			\
2175 }
2176 
2177 #define	HOLD_FLAG_OR_RETURN(np, flag) {					\
2178 	assert(MUTEX_HELD(&(np)->rn_lock));				\
2179 	assert(!((np)->rn_flags & RC_NODE_DEAD));			\
2180 	if (!rc_node_hold_flag((np), flag)) {				\
2181 		(void) pthread_mutex_unlock(&(np)->rn_lock);		\
2182 		return (REP_PROTOCOL_FAIL_DELETED);			\
2183 	}								\
2184 }
2185 
2186 #define	HOLD_PTR_FLAG_OR_RETURN(np, npp, flag) {			\
2187 	assert(MUTEX_HELD(&(np)->rn_lock));				\
2188 	assert(!((np)->rn_flags & RC_NODE_DEAD));			\
2189 	if (!rc_node_hold_flag((np), flag)) {				\
2190 		(void) pthread_mutex_unlock(&(np)->rn_lock);		\
2191 		assert((np) == (npp)->rnp_node);			\
2192 		rc_node_clear(npp, 1);					\
2193 		return (REP_PROTOCOL_FAIL_DELETED);			\
2194 	}								\
2195 }
2196 
2197 int
2198 rc_local_scope(uint32_t type, rc_node_ptr_t *out)
2199 {
2200 	if (type != REP_PROTOCOL_ENTITY_SCOPE) {
2201 		rc_node_clear(out, 0);
2202 		return (REP_PROTOCOL_FAIL_TYPE_MISMATCH);
2203 	}
2204 
2205 	/*
2206 	 * the main scope never gets destroyed
2207 	 */
2208 	rc_node_assign(out, rc_scope);
2209 
2210 	return (REP_PROTOCOL_SUCCESS);
2211 }
2212 
2213 /*
2214  * Fails with
2215  *   _NOT_SET - npp is not set
2216  *   _DELETED - the node npp pointed at has been deleted
2217  *   _TYPE_MISMATCH - type is not _SCOPE
2218  *   _NOT_FOUND - scope has no parent
2219  */
2220 static int
2221 rc_scope_parent_scope(rc_node_ptr_t *npp, uint32_t type, rc_node_ptr_t *out)
2222 {
2223 	rc_node_t *np;
2224 
2225 	rc_node_clear(out, 0);
2226 
2227 	RC_NODE_PTR_GET_CHECK(np, npp);
2228 
2229 	if (type != REP_PROTOCOL_ENTITY_SCOPE)
2230 		return (REP_PROTOCOL_FAIL_TYPE_MISMATCH);
2231 
2232 	return (REP_PROTOCOL_FAIL_NOT_FOUND);
2233 }
2234 
2235 /*
2236  * Fails with
2237  *   _NOT_SET
2238  *   _DELETED
2239  *   _NOT_APPLICABLE
2240  *   _NOT_FOUND
2241  *   _BAD_REQUEST
2242  *   _TRUNCATED
2243  */
2244 int
2245 rc_node_name(rc_node_ptr_t *npp, char *buf, size_t sz, uint32_t answertype,
2246     size_t *sz_out)
2247 {
2248 	size_t actual;
2249 	rc_node_t *np;
2250 
2251 	assert(sz == *sz_out);
2252 
2253 	RC_NODE_PTR_GET_CHECK(np, npp);
2254 
2255 	if (np->rn_id.rl_type == REP_PROTOCOL_ENTITY_CPROPERTYGRP) {
2256 		np = np->rn_cchain[0];
2257 		RC_NODE_CHECK(np);
2258 	}
2259 
2260 	switch (answertype) {
2261 	case RP_ENTITY_NAME_NAME:
2262 		if (np->rn_name == NULL)
2263 			return (REP_PROTOCOL_FAIL_NOT_APPLICABLE);
2264 		actual = strlcpy(buf, np->rn_name, sz);
2265 		break;
2266 	case RP_ENTITY_NAME_PGTYPE:
2267 		if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_PROPERTYGRP)
2268 			return (REP_PROTOCOL_FAIL_NOT_APPLICABLE);
2269 		actual = strlcpy(buf, np->rn_type, sz);
2270 		break;
2271 	case RP_ENTITY_NAME_PGFLAGS:
2272 		if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_PROPERTYGRP)
2273 			return (REP_PROTOCOL_FAIL_NOT_APPLICABLE);
2274 		actual = snprintf(buf, sz, "%d", np->rn_pgflags);
2275 		break;
2276 	case RP_ENTITY_NAME_SNAPLEVEL_SCOPE:
2277 		if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_SNAPLEVEL)
2278 			return (REP_PROTOCOL_FAIL_NOT_APPLICABLE);
2279 		actual = strlcpy(buf, np->rn_snaplevel->rsl_scope, sz);
2280 		break;
2281 	case RP_ENTITY_NAME_SNAPLEVEL_SERVICE:
2282 		if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_SNAPLEVEL)
2283 			return (REP_PROTOCOL_FAIL_NOT_APPLICABLE);
2284 		actual = strlcpy(buf, np->rn_snaplevel->rsl_service, sz);
2285 		break;
2286 	case RP_ENTITY_NAME_SNAPLEVEL_INSTANCE:
2287 		if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_SNAPLEVEL)
2288 			return (REP_PROTOCOL_FAIL_NOT_APPLICABLE);
2289 		if (np->rn_snaplevel->rsl_instance == NULL)
2290 			return (REP_PROTOCOL_FAIL_NOT_FOUND);
2291 		actual = strlcpy(buf, np->rn_snaplevel->rsl_instance, sz);
2292 		break;
2293 	default:
2294 		return (REP_PROTOCOL_FAIL_BAD_REQUEST);
2295 	}
2296 	if (actual >= sz)
2297 		return (REP_PROTOCOL_FAIL_TRUNCATED);
2298 
2299 	*sz_out = actual;
2300 	return (REP_PROTOCOL_SUCCESS);
2301 }
2302 
2303 int
2304 rc_node_get_property_type(rc_node_ptr_t *npp, rep_protocol_value_type_t *out)
2305 {
2306 	rc_node_t *np;
2307 
2308 	RC_NODE_PTR_GET_CHECK(np, npp);
2309 
2310 	if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_PROPERTY)
2311 		return (REP_PROTOCOL_FAIL_TYPE_MISMATCH);
2312 
2313 	*out = np->rn_valtype;
2314 
2315 	return (REP_PROTOCOL_SUCCESS);
2316 }
2317 
2318 /*
2319  * Get np's parent.  If np is deleted, returns _DELETED.  Otherwise puts a hold
2320  * on the parent, returns a pointer to it in *out, and returns _SUCCESS.
2321  */
2322 static int
2323 rc_node_parent(rc_node_t *np, rc_node_t **out)
2324 {
2325 	rc_node_t *pnp;
2326 	rc_node_t *np_orig;
2327 
2328 	if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_CPROPERTYGRP) {
2329 		RC_NODE_CHECK_AND_LOCK(np);
2330 	} else {
2331 		np = np->rn_cchain[0];
2332 		RC_NODE_CHECK_AND_LOCK(np);
2333 	}
2334 
2335 	np_orig = np;
2336 	rc_node_hold_locked(np);		/* simplifies the remainder */
2337 
2338 	for (;;) {
2339 		if (!rc_node_wait_flag(np,
2340 		    RC_NODE_IN_TX | RC_NODE_USING_PARENT)) {
2341 			rc_node_rele_locked(np);
2342 			return (REP_PROTOCOL_FAIL_DELETED);
2343 		}
2344 
2345 		if (!(np->rn_flags & RC_NODE_OLD))
2346 			break;
2347 
2348 		rc_node_rele_locked(np);
2349 		np = cache_lookup(&np_orig->rn_id);
2350 		assert(np != np_orig);
2351 
2352 		if (np == NULL)
2353 			goto deleted;
2354 		(void) pthread_mutex_lock(&np->rn_lock);
2355 	}
2356 
2357 	/* guaranteed to succeed without dropping the lock */
2358 	if (!rc_node_hold_flag(np, RC_NODE_USING_PARENT)) {
2359 		(void) pthread_mutex_unlock(&np->rn_lock);
2360 		*out = NULL;
2361 		rc_node_rele(np);
2362 		return (REP_PROTOCOL_FAIL_DELETED);
2363 	}
2364 
2365 	assert(np->rn_parent != NULL);
2366 	pnp = np->rn_parent;
2367 	(void) pthread_mutex_unlock(&np->rn_lock);
2368 
2369 	(void) pthread_mutex_lock(&pnp->rn_lock);
2370 	(void) pthread_mutex_lock(&np->rn_lock);
2371 	rc_node_rele_flag(np, RC_NODE_USING_PARENT);
2372 	(void) pthread_mutex_unlock(&np->rn_lock);
2373 
2374 	rc_node_hold_locked(pnp);
2375 
2376 	(void) pthread_mutex_unlock(&pnp->rn_lock);
2377 
2378 	rc_node_rele(np);
2379 	*out = pnp;
2380 	return (REP_PROTOCOL_SUCCESS);
2381 
2382 deleted:
2383 	rc_node_rele(np);
2384 	return (REP_PROTOCOL_FAIL_DELETED);
2385 }
2386 
2387 /*
2388  * Fails with
2389  *   _NOT_SET
2390  *   _DELETED
2391  */
2392 static int
2393 rc_node_ptr_parent(rc_node_ptr_t *npp, rc_node_t **out)
2394 {
2395 	rc_node_t *np;
2396 
2397 	RC_NODE_PTR_GET_CHECK(np, npp);
2398 
2399 	return (rc_node_parent(np, out));
2400 }
2401 
2402 /*
2403  * Fails with
2404  *   _NOT_SET - npp is not set
2405  *   _DELETED - the node npp pointed at has been deleted
2406  *   _TYPE_MISMATCH - npp's node's parent is not of type type
2407  *
2408  * If npp points to a scope, can also fail with
2409  *   _NOT_FOUND - scope has no parent
2410  */
2411 int
2412 rc_node_get_parent(rc_node_ptr_t *npp, uint32_t type, rc_node_ptr_t *out)
2413 {
2414 	rc_node_t *pnp;
2415 	int rc;
2416 
2417 	if (npp->rnp_node != NULL &&
2418 	    npp->rnp_node->rn_id.rl_type == REP_PROTOCOL_ENTITY_SCOPE)
2419 		return (rc_scope_parent_scope(npp, type, out));
2420 
2421 	if ((rc = rc_node_ptr_parent(npp, &pnp)) != REP_PROTOCOL_SUCCESS) {
2422 		rc_node_clear(out, 0);
2423 		return (rc);
2424 	}
2425 
2426 	if (type != pnp->rn_id.rl_type) {
2427 		rc_node_rele(pnp);
2428 		return (REP_PROTOCOL_FAIL_TYPE_MISMATCH);
2429 	}
2430 
2431 	rc_node_assign(out, pnp);
2432 	rc_node_rele(pnp);
2433 
2434 	return (REP_PROTOCOL_SUCCESS);
2435 }
2436 
2437 int
2438 rc_node_parent_type(rc_node_ptr_t *npp, uint32_t *type_out)
2439 {
2440 	rc_node_t *pnp;
2441 	int rc;
2442 
2443 	if (npp->rnp_node != NULL &&
2444 	    npp->rnp_node->rn_id.rl_type == REP_PROTOCOL_ENTITY_SCOPE) {
2445 		*type_out = REP_PROTOCOL_ENTITY_SCOPE;
2446 		return (REP_PROTOCOL_SUCCESS);
2447 	}
2448 
2449 	if ((rc = rc_node_ptr_parent(npp, &pnp)) != REP_PROTOCOL_SUCCESS)
2450 		return (rc);
2451 
2452 	*type_out = pnp->rn_id.rl_type;
2453 
2454 	rc_node_rele(pnp);
2455 
2456 	return (REP_PROTOCOL_SUCCESS);
2457 }
2458 
2459 /*
2460  * Fails with
2461  *   _INVALID_TYPE - type is invalid
2462  *   _TYPE_MISMATCH - np doesn't carry children of type type
2463  *   _DELETED - np has been deleted
2464  *   _NOT_FOUND - no child with that name/type combo found
2465  *   _NO_RESOURCES
2466  *   _BACKEND_ACCESS
2467  */
2468 int
2469 rc_node_get_child(rc_node_ptr_t *npp, const char *name, uint32_t type,
2470     rc_node_ptr_t *outp)
2471 {
2472 	rc_node_t *np, *cp;
2473 	rc_node_t *child = NULL;
2474 	int ret, idx;
2475 
2476 	RC_NODE_PTR_GET_CHECK_AND_LOCK(np, npp);
2477 	if ((ret = rc_check_type_name(type, name)) == REP_PROTOCOL_SUCCESS) {
2478 		if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_CPROPERTYGRP) {
2479 			ret = rc_node_find_named_child(np, name, type, &child);
2480 		} else {
2481 			(void) pthread_mutex_unlock(&np->rn_lock);
2482 			ret = REP_PROTOCOL_SUCCESS;
2483 			for (idx = 0; idx < COMPOSITION_DEPTH; idx++) {
2484 				cp = np->rn_cchain[idx];
2485 				if (cp == NULL)
2486 					break;
2487 				RC_NODE_CHECK_AND_LOCK(cp);
2488 				ret = rc_node_find_named_child(cp, name, type,
2489 				    &child);
2490 				(void) pthread_mutex_unlock(&cp->rn_lock);
2491 				/*
2492 				 * loop only if we succeeded, but no child of
2493 				 * the correct name was found.
2494 				 */
2495 				if (ret != REP_PROTOCOL_SUCCESS ||
2496 				    child != NULL)
2497 					break;
2498 			}
2499 			(void) pthread_mutex_lock(&np->rn_lock);
2500 		}
2501 	}
2502 	(void) pthread_mutex_unlock(&np->rn_lock);
2503 
2504 	if (ret == REP_PROTOCOL_SUCCESS) {
2505 		rc_node_assign(outp, child);
2506 		if (child != NULL)
2507 			rc_node_rele(child);
2508 		else
2509 			ret = REP_PROTOCOL_FAIL_NOT_FOUND;
2510 	} else {
2511 		rc_node_assign(outp, NULL);
2512 	}
2513 	return (ret);
2514 }
2515 
2516 int
2517 rc_node_update(rc_node_ptr_t *npp)
2518 {
2519 	cache_bucket_t *bp;
2520 	rc_node_t *np = npp->rnp_node;
2521 	rc_node_t *nnp;
2522 	rc_node_t *cpg = NULL;
2523 
2524 	if (np != NULL &&
2525 	    np->rn_id.rl_type == REP_PROTOCOL_ENTITY_CPROPERTYGRP) {
2526 		/*
2527 		 * If we're updating a composed property group, actually
2528 		 * update the top-level property group & return the
2529 		 * appropriate value.  But leave *nnp pointing at us.
2530 		 */
2531 		cpg = np;
2532 		np = np->rn_cchain[0];
2533 	}
2534 
2535 	RC_NODE_CHECK(np);
2536 
2537 	if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_PROPERTYGRP &&
2538 	    np->rn_id.rl_type != REP_PROTOCOL_ENTITY_SNAPSHOT)
2539 		return (REP_PROTOCOL_FAIL_BAD_REQUEST);
2540 
2541 	for (;;) {
2542 		bp = cache_hold(np->rn_hash);
2543 		nnp = cache_lookup_unlocked(bp, &np->rn_id);
2544 		if (nnp == NULL) {
2545 			cache_release(bp);
2546 			rc_node_clear(npp, 1);
2547 			return (REP_PROTOCOL_FAIL_DELETED);
2548 		}
2549 		/*
2550 		 * grab the lock before dropping the cache bucket, so
2551 		 * that no one else can sneak in
2552 		 */
2553 		(void) pthread_mutex_lock(&nnp->rn_lock);
2554 		cache_release(bp);
2555 
2556 		if (!(nnp->rn_flags & RC_NODE_IN_TX) ||
2557 		    !rc_node_wait_flag(nnp, RC_NODE_IN_TX))
2558 			break;
2559 
2560 		rc_node_rele_locked(nnp);
2561 	}
2562 
2563 	/*
2564 	 * If it is dead, we want to update it so that it will continue to
2565 	 * report being dead.
2566 	 */
2567 	if (nnp->rn_flags & RC_NODE_DEAD) {
2568 		(void) pthread_mutex_unlock(&nnp->rn_lock);
2569 		if (nnp != np && cpg == NULL)
2570 			rc_node_assign(npp, nnp);	/* updated */
2571 		rc_node_rele(nnp);
2572 		return (REP_PROTOCOL_FAIL_DELETED);
2573 	}
2574 
2575 	assert(!(nnp->rn_flags & RC_NODE_OLD));
2576 	(void) pthread_mutex_unlock(&nnp->rn_lock);
2577 
2578 	if (nnp != np && cpg == NULL)
2579 		rc_node_assign(npp, nnp);		/* updated */
2580 
2581 	rc_node_rele(nnp);
2582 
2583 	return ((nnp == np)? REP_PROTOCOL_SUCCESS : REP_PROTOCOL_DONE);
2584 }
2585 
2586 /*
2587  * does a generic modification check, for creation, deletion, and snapshot
2588  * management only.  Property group transactions have different checks.
2589  */
2590 int
2591 rc_node_modify_permission_check(void)
2592 {
2593 	int rc = REP_PROTOCOL_SUCCESS;
2594 	permcheck_t *pcp;
2595 	int granted;
2596 
2597 	if (!client_is_privileged()) {
2598 #ifdef NATIVE_BUILD
2599 		rc = REP_PROTOCOL_FAIL_PERMISSION_DENIED;
2600 #else
2601 		pcp = pc_create();
2602 		if (pcp != NULL) {
2603 			rc = perm_add_enabling(pcp, AUTH_MODIFY);
2604 
2605 			if (rc == REP_PROTOCOL_SUCCESS) {
2606 				granted = perm_granted(pcp);
2607 
2608 				if (granted < 0)
2609 					rc = REP_PROTOCOL_FAIL_NO_RESOURCES;
2610 			}
2611 
2612 			pc_free(pcp);
2613 		} else {
2614 			rc = REP_PROTOCOL_FAIL_NO_RESOURCES;
2615 		}
2616 
2617 		if (rc == REP_PROTOCOL_SUCCESS && !granted)
2618 			rc = REP_PROTOCOL_FAIL_PERMISSION_DENIED;
2619 #endif /* NATIVE_BUILD */
2620 	}
2621 	return (rc);
2622 }
2623 
2624 /*
2625  * Fails with
2626  *   _DELETED - node has been deleted
2627  *   _NOT_SET - npp is reset
2628  *   _NOT_APPLICABLE - type is _PROPERTYGRP
2629  *   _INVALID_TYPE - node is corrupt or type is invalid
2630  *   _TYPE_MISMATCH - node cannot have children of type type
2631  *   _BAD_REQUEST - name is invalid
2632  *		    cannot create children for this type of node
2633  *   _NO_RESOURCES - out of memory, or could not allocate new id
2634  *   _PERMISSION_DENIED
2635  *   _BACKEND_ACCESS
2636  *   _BACKEND_READONLY
2637  *   _EXISTS - child already exists
2638  */
2639 int
2640 rc_node_create_child(rc_node_ptr_t *npp, uint32_t type, const char *name,
2641     rc_node_ptr_t *cpp)
2642 {
2643 	rc_node_t *np;
2644 	rc_node_t *cp = NULL;
2645 	int rc;
2646 
2647 	rc_node_clear(cpp, 0);
2648 
2649 	RC_NODE_PTR_GET_CHECK_AND_LOCK(np, npp);
2650 
2651 	/*
2652 	 * there is a separate interface for creating property groups
2653 	 */
2654 	if (type == REP_PROTOCOL_ENTITY_PROPERTYGRP) {
2655 		(void) pthread_mutex_unlock(&np->rn_lock);
2656 		return (REP_PROTOCOL_FAIL_NOT_APPLICABLE);
2657 	}
2658 
2659 	if (np->rn_id.rl_type == REP_PROTOCOL_ENTITY_CPROPERTYGRP) {
2660 		(void) pthread_mutex_unlock(&np->rn_lock);
2661 		np = np->rn_cchain[0];
2662 		RC_NODE_CHECK_AND_LOCK(np);
2663 	}
2664 
2665 	if ((rc = rc_check_parent_child(np->rn_id.rl_type, type)) !=
2666 	    REP_PROTOCOL_SUCCESS) {
2667 		(void) pthread_mutex_unlock(&np->rn_lock);
2668 		return (rc);
2669 	}
2670 	if ((rc = rc_check_type_name(type, name)) != REP_PROTOCOL_SUCCESS) {
2671 		(void) pthread_mutex_unlock(&np->rn_lock);
2672 		return (rc);
2673 	}
2674 
2675 	if ((rc = rc_node_modify_permission_check()) != REP_PROTOCOL_SUCCESS) {
2676 		(void) pthread_mutex_unlock(&np->rn_lock);
2677 		return (rc);
2678 	}
2679 
2680 	HOLD_PTR_FLAG_OR_RETURN(np, npp, RC_NODE_CREATING_CHILD);
2681 	(void) pthread_mutex_unlock(&np->rn_lock);
2682 
2683 	rc = object_create(np, type, name, &cp);
2684 	assert(rc != REP_PROTOCOL_FAIL_NOT_APPLICABLE);
2685 
2686 	if (rc == REP_PROTOCOL_SUCCESS) {
2687 		rc_node_assign(cpp, cp);
2688 		rc_node_rele(cp);
2689 	}
2690 
2691 	(void) pthread_mutex_lock(&np->rn_lock);
2692 	rc_node_rele_flag(np, RC_NODE_CREATING_CHILD);
2693 	(void) pthread_mutex_unlock(&np->rn_lock);
2694 
2695 	return (rc);
2696 }
2697 
2698 int
2699 rc_node_create_child_pg(rc_node_ptr_t *npp, uint32_t type, const char *name,
2700     const char *pgtype, uint32_t flags, rc_node_ptr_t *cpp)
2701 {
2702 	rc_node_t *np;
2703 	rc_node_t *cp;
2704 	int rc;
2705 	permcheck_t *pcp;
2706 	int granted;
2707 
2708 	rc_node_clear(cpp, 0);
2709 
2710 	/* verify flags is valid */
2711 	if (flags & ~SCF_PG_FLAG_NONPERSISTENT)
2712 		return (REP_PROTOCOL_FAIL_BAD_REQUEST);
2713 
2714 	RC_NODE_PTR_GET_CHECK_AND_HOLD(np, npp);
2715 
2716 	if (type != REP_PROTOCOL_ENTITY_PROPERTYGRP) {
2717 		rc_node_rele(np);
2718 		return (REP_PROTOCOL_FAIL_NOT_APPLICABLE);
2719 	}
2720 
2721 	if ((rc = rc_check_parent_child(np->rn_id.rl_type, type)) !=
2722 	    REP_PROTOCOL_SUCCESS) {
2723 		rc_node_rele(np);
2724 		return (rc);
2725 	}
2726 	if ((rc = rc_check_type_name(type, name)) != REP_PROTOCOL_SUCCESS ||
2727 	    (rc = rc_check_pgtype_name(pgtype)) != REP_PROTOCOL_SUCCESS) {
2728 		rc_node_rele(np);
2729 		return (rc);
2730 	}
2731 
2732 	if (!client_is_privileged()) {
2733 #ifdef NATIVE_BUILD
2734 		rc = REP_PROTOCOL_FAIL_PERMISSION_DENIED;
2735 #else
2736 		/* Must have .smf.modify or smf.modify.<type> authorization */
2737 		pcp = pc_create();
2738 		if (pcp != NULL) {
2739 			rc = perm_add_enabling(pcp, AUTH_MODIFY);
2740 
2741 			if (rc == REP_PROTOCOL_SUCCESS) {
2742 				const char * const auth =
2743 				    perm_auth_for_pgtype(pgtype);
2744 
2745 				if (auth != NULL)
2746 					rc = perm_add_enabling(pcp, auth);
2747 			}
2748 
2749 			/*
2750 			 * .manage or $action_authorization can be used to
2751 			 * create the actions pg and the general_ovr pg.
2752 			 */
2753 			if (rc == REP_PROTOCOL_SUCCESS &&
2754 			    (flags & SCF_PG_FLAG_NONPERSISTENT) != 0 &&
2755 			    np->rn_id.rl_type == REP_PROTOCOL_ENTITY_INSTANCE &&
2756 			    ((strcmp(name, AUTH_PG_ACTIONS) == 0 &&
2757 			    strcmp(pgtype, AUTH_PG_ACTIONS_TYPE) == 0) ||
2758 			    (strcmp(name, AUTH_PG_GENERAL_OVR) == 0 &&
2759 			    strcmp(pgtype, AUTH_PG_GENERAL_OVR_TYPE) == 0))) {
2760 				rc = perm_add_enabling(pcp, AUTH_MANAGE);
2761 
2762 				if (rc == REP_PROTOCOL_SUCCESS)
2763 					rc = perm_add_inst_action_auth(pcp, np);
2764 			}
2765 
2766 			if (rc == REP_PROTOCOL_SUCCESS) {
2767 				granted = perm_granted(pcp);
2768 
2769 				if (granted < 0)
2770 					rc = REP_PROTOCOL_FAIL_NO_RESOURCES;
2771 			}
2772 
2773 			pc_free(pcp);
2774 		} else {
2775 			rc = REP_PROTOCOL_FAIL_NO_RESOURCES;
2776 		}
2777 
2778 		if (rc == REP_PROTOCOL_SUCCESS && !granted)
2779 			rc = REP_PROTOCOL_FAIL_PERMISSION_DENIED;
2780 #endif /* NATIVE_BUILD */
2781 
2782 		if (rc != REP_PROTOCOL_SUCCESS) {
2783 			rc_node_rele(np);
2784 			return (rc);
2785 		}
2786 	}
2787 
2788 	(void) pthread_mutex_lock(&np->rn_lock);
2789 	HOLD_PTR_FLAG_OR_RETURN(np, npp, RC_NODE_CREATING_CHILD);
2790 	(void) pthread_mutex_unlock(&np->rn_lock);
2791 
2792 	rc = object_create_pg(np, type, name, pgtype, flags, &cp);
2793 
2794 	if (rc == REP_PROTOCOL_SUCCESS) {
2795 		rc_node_assign(cpp, cp);
2796 		rc_node_rele(cp);
2797 	}
2798 
2799 	(void) pthread_mutex_lock(&np->rn_lock);
2800 	rc_node_rele_flag(np, RC_NODE_CREATING_CHILD);
2801 	(void) pthread_mutex_unlock(&np->rn_lock);
2802 
2803 	return (rc);
2804 }
2805 
2806 static void
2807 rc_pg_notify_fire(rc_node_pg_notify_t *pnp)
2808 {
2809 	assert(MUTEX_HELD(&rc_pg_notify_lock));
2810 
2811 	if (pnp->rnpn_pg != NULL) {
2812 		uu_list_remove(pnp->rnpn_pg->rn_pg_notify_list, pnp);
2813 		(void) close(pnp->rnpn_fd);
2814 
2815 		pnp->rnpn_pg = NULL;
2816 		pnp->rnpn_fd = -1;
2817 	} else {
2818 		assert(pnp->rnpn_fd == -1);
2819 	}
2820 }
2821 
2822 static void
2823 rc_notify_node_delete(rc_notify_delete_t *ndp, rc_node_t *np_arg)
2824 {
2825 	rc_node_t *svc = NULL;
2826 	rc_node_t *inst = NULL;
2827 	rc_node_t *pg = NULL;
2828 	rc_node_t *np = np_arg;
2829 	rc_node_t *nnp;
2830 
2831 	while (svc == NULL) {
2832 		(void) pthread_mutex_lock(&np->rn_lock);
2833 		if (!rc_node_hold_flag(np, RC_NODE_USING_PARENT)) {
2834 			(void) pthread_mutex_unlock(&np->rn_lock);
2835 			goto cleanup;
2836 		}
2837 		nnp = np->rn_parent;
2838 		rc_node_hold_locked(np);	/* hold it in place */
2839 
2840 		switch (np->rn_id.rl_type) {
2841 		case REP_PROTOCOL_ENTITY_PROPERTYGRP:
2842 			assert(pg == NULL);
2843 			pg = np;
2844 			break;
2845 		case REP_PROTOCOL_ENTITY_INSTANCE:
2846 			assert(inst == NULL);
2847 			inst = np;
2848 			break;
2849 		case REP_PROTOCOL_ENTITY_SERVICE:
2850 			assert(svc == NULL);
2851 			svc = np;
2852 			break;
2853 		default:
2854 			rc_node_rele_flag(np, RC_NODE_USING_PARENT);
2855 			rc_node_rele_locked(np);
2856 			goto cleanup;
2857 		}
2858 
2859 		(void) pthread_mutex_unlock(&np->rn_lock);
2860 
2861 		np = nnp;
2862 		if (np == NULL)
2863 			goto cleanup;
2864 	}
2865 
2866 	rc_notify_deletion(ndp,
2867 	    svc->rn_name,
2868 	    inst != NULL ? inst->rn_name : NULL,
2869 	    pg != NULL ? pg->rn_name : NULL);
2870 
2871 	ndp = NULL;
2872 
2873 cleanup:
2874 	if (ndp != NULL)
2875 		uu_free(ndp);
2876 
2877 	for (;;) {
2878 		if (svc != NULL) {
2879 			np = svc;
2880 			svc = NULL;
2881 		} else if (inst != NULL) {
2882 			np = inst;
2883 			inst = NULL;
2884 		} else if (pg != NULL) {
2885 			np = pg;
2886 			pg = NULL;
2887 		} else
2888 			break;
2889 
2890 		(void) pthread_mutex_lock(&np->rn_lock);
2891 		rc_node_rele_flag(np, RC_NODE_USING_PARENT);
2892 		rc_node_rele_locked(np);
2893 	}
2894 }
2895 
2896 /*
2897  * N.B.:  this function drops np->rn_lock on the way out.
2898  */
2899 static void
2900 rc_node_delete_hold(rc_node_t *np, int andformer)
2901 {
2902 	rc_node_t *cp;
2903 
2904 again:
2905 	assert(MUTEX_HELD(&np->rn_lock));
2906 	assert((np->rn_flags & RC_NODE_DYING_FLAGS) == RC_NODE_DYING_FLAGS);
2907 
2908 	for (cp = uu_list_first(np->rn_children); cp != NULL;
2909 	    cp = uu_list_next(np->rn_children, cp)) {
2910 		(void) pthread_mutex_lock(&cp->rn_lock);
2911 		(void) pthread_mutex_unlock(&np->rn_lock);
2912 		if (!rc_node_hold_flag(cp, RC_NODE_DYING_FLAGS)) {
2913 			/*
2914 			 * already marked as dead -- can't happen, since that
2915 			 * would require setting RC_NODE_CHILDREN_CHANGING
2916 			 * in np, and we're holding that...
2917 			 */
2918 			abort();
2919 		}
2920 		rc_node_delete_hold(cp, andformer);	/* recurse, drop lock */
2921 
2922 		(void) pthread_mutex_lock(&np->rn_lock);
2923 	}
2924 	if (andformer && (cp = np->rn_former) != NULL) {
2925 		(void) pthread_mutex_lock(&cp->rn_lock);
2926 		(void) pthread_mutex_unlock(&np->rn_lock);
2927 		if (!rc_node_hold_flag(cp, RC_NODE_DYING_FLAGS))
2928 			abort();		/* can't happen, see above */
2929 		np = cp;
2930 		goto again;		/* tail-recurse down rn_former */
2931 	}
2932 	(void) pthread_mutex_unlock(&np->rn_lock);
2933 }
2934 
2935 /*
2936  * N.B.:  this function drops np->rn_lock on the way out.
2937  */
2938 static void
2939 rc_node_delete_rele(rc_node_t *np, int andformer)
2940 {
2941 	rc_node_t *cp;
2942 
2943 again:
2944 	assert(MUTEX_HELD(&np->rn_lock));
2945 	assert((np->rn_flags & RC_NODE_DYING_FLAGS) == RC_NODE_DYING_FLAGS);
2946 
2947 	for (cp = uu_list_first(np->rn_children); cp != NULL;
2948 	    cp = uu_list_next(np->rn_children, cp)) {
2949 		(void) pthread_mutex_lock(&cp->rn_lock);
2950 		(void) pthread_mutex_unlock(&np->rn_lock);
2951 		rc_node_delete_rele(cp, andformer);	/* recurse, drop lock */
2952 		(void) pthread_mutex_lock(&np->rn_lock);
2953 	}
2954 	if (andformer && (cp = np->rn_former) != NULL) {
2955 		(void) pthread_mutex_lock(&cp->rn_lock);
2956 		rc_node_rele_flag(np, RC_NODE_DYING_FLAGS);
2957 		(void) pthread_mutex_unlock(&np->rn_lock);
2958 
2959 		np = cp;
2960 		goto again;		/* tail-recurse down rn_former */
2961 	}
2962 	rc_node_rele_flag(np, RC_NODE_DYING_FLAGS);
2963 	(void) pthread_mutex_unlock(&np->rn_lock);
2964 }
2965 
2966 static void
2967 rc_node_finish_delete(rc_node_t *cp)
2968 {
2969 	cache_bucket_t *bp;
2970 	rc_node_pg_notify_t *pnp;
2971 
2972 	assert(MUTEX_HELD(&cp->rn_lock));
2973 
2974 	if (!(cp->rn_flags & RC_NODE_OLD)) {
2975 		assert(cp->rn_flags & RC_NODE_IN_PARENT);
2976 		if (!rc_node_wait_flag(cp, RC_NODE_USING_PARENT)) {
2977 			abort();		/* can't happen, see above */
2978 		}
2979 		cp->rn_flags &= ~RC_NODE_IN_PARENT;
2980 		cp->rn_parent = NULL;
2981 	}
2982 
2983 	cp->rn_flags |= RC_NODE_DEAD;
2984 
2985 	/*
2986 	 * If this node is not out-dated, we need to remove it from
2987 	 * the notify list and cache hash table.
2988 	 */
2989 	if (!(cp->rn_flags & RC_NODE_OLD)) {
2990 		assert(cp->rn_refs > 0);	/* can't go away yet */
2991 		(void) pthread_mutex_unlock(&cp->rn_lock);
2992 
2993 		(void) pthread_mutex_lock(&rc_pg_notify_lock);
2994 		while ((pnp = uu_list_first(cp->rn_pg_notify_list)) != NULL)
2995 			rc_pg_notify_fire(pnp);
2996 		(void) pthread_mutex_unlock(&rc_pg_notify_lock);
2997 		rc_notify_remove_node(cp);
2998 
2999 		bp = cache_hold(cp->rn_hash);
3000 		(void) pthread_mutex_lock(&cp->rn_lock);
3001 		cache_remove_unlocked(bp, cp);
3002 		cache_release(bp);
3003 	}
3004 }
3005 
3006 /*
3007  * N.B.:  this function drops np->rn_lock and a reference on the way out.
3008  */
3009 static void
3010 rc_node_delete_children(rc_node_t *np, int andformer)
3011 {
3012 	rc_node_t *cp;
3013 
3014 again:
3015 	assert(np->rn_refs > 0);
3016 	assert(MUTEX_HELD(&np->rn_lock));
3017 	assert(np->rn_flags & RC_NODE_DEAD);
3018 
3019 	while ((cp = uu_list_first(np->rn_children)) != NULL) {
3020 		uu_list_remove(np->rn_children, cp);
3021 		(void) pthread_mutex_lock(&cp->rn_lock);
3022 		(void) pthread_mutex_unlock(&np->rn_lock);
3023 		rc_node_hold_locked(cp);	/* hold while we recurse */
3024 		rc_node_finish_delete(cp);
3025 		rc_node_delete_children(cp, andformer);	/* drops lock + ref */
3026 		(void) pthread_mutex_lock(&np->rn_lock);
3027 	}
3028 
3029 	/*
3030 	 * when we drop cp's lock, all the children will be gone, so we
3031 	 * can release DYING_FLAGS.
3032 	 */
3033 	rc_node_rele_flag(np, RC_NODE_DYING_FLAGS);
3034 	if (andformer && (cp = np->rn_former) != NULL) {
3035 		np->rn_former = NULL;		/* unlink */
3036 		(void) pthread_mutex_lock(&cp->rn_lock);
3037 		(void) pthread_mutex_unlock(&np->rn_lock);
3038 		np->rn_flags &= ~RC_NODE_ON_FORMER;
3039 
3040 		rc_node_hold_locked(cp);	/* hold while we loop */
3041 
3042 		rc_node_finish_delete(cp);
3043 
3044 		rc_node_rele(np);		/* drop the old reference */
3045 
3046 		np = cp;
3047 		goto again;		/* tail-recurse down rn_former */
3048 	}
3049 	rc_node_rele_locked(np);
3050 }
3051 
3052 static void
3053 rc_node_unrefed(rc_node_t *np)
3054 {
3055 	int unrefed;
3056 	rc_node_t *pp, *cur;
3057 
3058 	assert(MUTEX_HELD(&np->rn_lock));
3059 	assert(np->rn_refs == 0);
3060 	assert(np->rn_other_refs == 0);
3061 	assert(np->rn_other_refs_held == 0);
3062 
3063 	if (np->rn_flags & RC_NODE_DEAD) {
3064 		(void) pthread_mutex_unlock(&np->rn_lock);
3065 		rc_node_destroy(np);
3066 		return;
3067 	}
3068 
3069 	assert(np->rn_flags & RC_NODE_OLD);
3070 	if (np->rn_flags & RC_NODE_UNREFED) {
3071 		(void) pthread_mutex_unlock(&np->rn_lock);
3072 		return;
3073 	}
3074 	np->rn_flags |= RC_NODE_UNREFED;
3075 
3076 	(void) pthread_mutex_unlock(&np->rn_lock);
3077 
3078 	/*
3079 	 * find the current in-hash object, and grab it's RC_NODE_IN_TX
3080 	 * flag.  That protects the entire rn_former chain.
3081 	 */
3082 	for (;;) {
3083 		pp = cache_lookup(&np->rn_id);
3084 		if (pp == NULL) {
3085 			(void) pthread_mutex_lock(&np->rn_lock);
3086 			if (np->rn_flags & RC_NODE_DEAD)
3087 				goto died;
3088 			/*
3089 			 * We are trying to unreference this node, but the
3090 			 * owner of the former list does not exist.  It must
3091 			 * be the case that another thread is deleting this
3092 			 * entire sub-branch, but has not yet reached us.
3093 			 * We will in short order be deleted.
3094 			 */
3095 			np->rn_flags &= ~RC_NODE_UNREFED;
3096 			(void) pthread_mutex_unlock(&np->rn_lock);
3097 			return;
3098 		}
3099 		if (pp == np) {
3100 			/*
3101 			 * no longer unreferenced
3102 			 */
3103 			(void) pthread_mutex_lock(&np->rn_lock);
3104 			np->rn_flags &= ~RC_NODE_UNREFED;
3105 			rc_node_rele_locked(np);
3106 			return;
3107 		}
3108 		(void) pthread_mutex_lock(&pp->rn_lock);
3109 		if ((pp->rn_flags & RC_NODE_OLD) ||
3110 		    !rc_node_hold_flag(pp, RC_NODE_IN_TX)) {
3111 			rc_node_rele_locked(pp);
3112 			continue;
3113 		}
3114 		if (!(pp->rn_flags & RC_NODE_OLD)) {
3115 			(void) pthread_mutex_unlock(&pp->rn_lock);
3116 			break;
3117 		}
3118 		rc_node_rele_flag(pp, RC_NODE_IN_TX);
3119 		rc_node_rele_locked(pp);
3120 	}
3121 
3122 	(void) pthread_mutex_lock(&np->rn_lock);
3123 	if (!(np->rn_flags & (RC_NODE_OLD | RC_NODE_DEAD)) ||
3124 	    np->rn_refs != 0 || np->rn_other_refs != 0 ||
3125 	    np->rn_other_refs_held != 0) {
3126 		np->rn_flags &= ~RC_NODE_UNREFED;
3127 		(void) pthread_mutex_lock(&pp->rn_lock);
3128 
3129 		rc_node_rele_flag(pp, RC_NODE_IN_TX);
3130 		rc_node_rele_locked(pp);
3131 		return;
3132 	}
3133 
3134 	if (!rc_node_hold_flag(np, RC_NODE_DYING_FLAGS)) {
3135 		(void) pthread_mutex_unlock(&np->rn_lock);
3136 
3137 		rc_node_rele_flag(pp, RC_NODE_IN_TX);
3138 		rc_node_rele_locked(pp);
3139 
3140 		(void) pthread_mutex_lock(&np->rn_lock);
3141 		goto died;
3142 	}
3143 
3144 	rc_node_delete_hold(np, 0);
3145 
3146 	(void) pthread_mutex_lock(&np->rn_lock);
3147 	if (!(np->rn_flags & RC_NODE_OLD) ||
3148 	    np->rn_refs != 0 || np->rn_other_refs != 0 ||
3149 	    np->rn_other_refs_held != 0) {
3150 		np->rn_flags &= ~RC_NODE_UNREFED;
3151 		rc_node_delete_rele(np, 0);
3152 
3153 		(void) pthread_mutex_lock(&pp->rn_lock);
3154 		rc_node_rele_flag(pp, RC_NODE_IN_TX);
3155 		rc_node_rele_locked(pp);
3156 		return;
3157 	}
3158 
3159 	np->rn_flags |= RC_NODE_DEAD;
3160 	rc_node_hold_locked(np);
3161 	rc_node_delete_children(np, 0);
3162 
3163 	/*
3164 	 * It's gone -- remove it from the former chain and destroy it.
3165 	 */
3166 	(void) pthread_mutex_lock(&pp->rn_lock);
3167 	for (cur = pp; cur != NULL && cur->rn_former != np;
3168 	    cur = cur->rn_former)
3169 		;
3170 	assert(cur != NULL && cur != np);
3171 
3172 	cur->rn_former = np->rn_former;
3173 	np->rn_former = NULL;
3174 
3175 	rc_node_rele_flag(pp, RC_NODE_IN_TX);
3176 	rc_node_rele_locked(pp);
3177 
3178 	(void) pthread_mutex_lock(&np->rn_lock);
3179 	assert(np->rn_flags & RC_NODE_ON_FORMER);
3180 	np->rn_flags &= ~(RC_NODE_UNREFED | RC_NODE_ON_FORMER);
3181 	(void) pthread_mutex_unlock(&np->rn_lock);
3182 	rc_node_destroy(np);
3183 	return;
3184 
3185 died:
3186 	np->rn_flags &= ~RC_NODE_UNREFED;
3187 	unrefed = (np->rn_refs == 0 && np->rn_other_refs == 0 &&
3188 	    np->rn_other_refs_held == 0);
3189 	(void) pthread_mutex_unlock(&np->rn_lock);
3190 	if (unrefed)
3191 		rc_node_destroy(np);
3192 }
3193 
3194 /*
3195  * Fails with
3196  *   _NOT_SET
3197  *   _DELETED
3198  *   _BAD_REQUEST
3199  *   _PERMISSION_DENIED
3200  *   _NO_RESOURCES
3201  * and whatever object_delete() fails with.
3202  */
3203 int
3204 rc_node_delete(rc_node_ptr_t *npp)
3205 {
3206 	rc_node_t *np, *np_orig;
3207 	rc_node_t *pp = NULL;
3208 	int rc;
3209 	rc_node_pg_notify_t *pnp;
3210 	cache_bucket_t *bp;
3211 	rc_notify_delete_t *ndp;
3212 	permcheck_t *pcp;
3213 	int granted;
3214 
3215 	RC_NODE_PTR_GET_CHECK_AND_LOCK(np, npp);
3216 
3217 	switch (np->rn_id.rl_type) {
3218 	case REP_PROTOCOL_ENTITY_SERVICE:
3219 	case REP_PROTOCOL_ENTITY_INSTANCE:
3220 	case REP_PROTOCOL_ENTITY_SNAPSHOT:
3221 		break;			/* deletable */
3222 
3223 	case REP_PROTOCOL_ENTITY_SCOPE:
3224 	case REP_PROTOCOL_ENTITY_SNAPLEVEL:
3225 		/* Scopes and snaplevels are indelible. */
3226 		(void) pthread_mutex_unlock(&np->rn_lock);
3227 		return (REP_PROTOCOL_FAIL_BAD_REQUEST);
3228 
3229 	case REP_PROTOCOL_ENTITY_CPROPERTYGRP:
3230 		(void) pthread_mutex_unlock(&np->rn_lock);
3231 		np = np->rn_cchain[0];
3232 		RC_NODE_CHECK_AND_LOCK(np);
3233 		break;
3234 
3235 	case REP_PROTOCOL_ENTITY_PROPERTYGRP:
3236 		if (np->rn_id.rl_ids[ID_SNAPSHOT] == 0)
3237 			break;
3238 
3239 		/* Snapshot property groups are indelible. */
3240 		(void) pthread_mutex_unlock(&np->rn_lock);
3241 		return (REP_PROTOCOL_FAIL_PERMISSION_DENIED);
3242 
3243 	case REP_PROTOCOL_ENTITY_PROPERTY:
3244 		(void) pthread_mutex_unlock(&np->rn_lock);
3245 		return (REP_PROTOCOL_FAIL_BAD_REQUEST);
3246 
3247 	default:
3248 		assert(0);
3249 		abort();
3250 		break;
3251 	}
3252 
3253 	np_orig = np;
3254 	rc_node_hold_locked(np);	/* simplifies rest of the code */
3255 
3256 again:
3257 	/*
3258 	 * The following loop is to deal with the fact that snapshots and
3259 	 * property groups are moving targets -- changes to them result
3260 	 * in a new "child" node.  Since we can only delete from the top node,
3261 	 * we have to loop until we have a non-RC_NODE_OLD version.
3262 	 */
3263 	for (;;) {
3264 		if (!rc_node_wait_flag(np,
3265 		    RC_NODE_IN_TX | RC_NODE_USING_PARENT)) {
3266 			rc_node_rele_locked(np);
3267 			return (REP_PROTOCOL_FAIL_DELETED);
3268 		}
3269 
3270 		if (np->rn_flags & RC_NODE_OLD) {
3271 			rc_node_rele_locked(np);
3272 			np = cache_lookup(&np_orig->rn_id);
3273 			assert(np != np_orig);
3274 
3275 			if (np == NULL) {
3276 				rc = REP_PROTOCOL_FAIL_DELETED;
3277 				goto fail;
3278 			}
3279 			(void) pthread_mutex_lock(&np->rn_lock);
3280 			continue;
3281 		}
3282 
3283 		if (!rc_node_hold_flag(np, RC_NODE_USING_PARENT)) {
3284 			rc_node_rele_locked(np);
3285 			rc_node_clear(npp, 1);
3286 			return (REP_PROTOCOL_FAIL_DELETED);
3287 		}
3288 
3289 		/*
3290 		 * Mark our parent as children changing.  this call drops our
3291 		 * lock and the RC_NODE_USING_PARENT flag, and returns with
3292 		 * pp's lock held
3293 		 */
3294 		pp = rc_node_hold_parent_flag(np, RC_NODE_CHILDREN_CHANGING);
3295 		if (pp == NULL) {
3296 			/* our parent is gone, we're going next... */
3297 			rc_node_rele(np);
3298 
3299 			rc_node_clear(npp, 1);
3300 			return (REP_PROTOCOL_FAIL_DELETED);
3301 		}
3302 
3303 		rc_node_hold_locked(pp);		/* hold for later */
3304 		(void) pthread_mutex_unlock(&pp->rn_lock);
3305 
3306 		(void) pthread_mutex_lock(&np->rn_lock);
3307 		if (!(np->rn_flags & RC_NODE_OLD))
3308 			break;			/* not old -- we're done */
3309 
3310 		(void) pthread_mutex_unlock(&np->rn_lock);
3311 		(void) pthread_mutex_lock(&pp->rn_lock);
3312 		rc_node_rele_flag(pp, RC_NODE_CHILDREN_CHANGING);
3313 		rc_node_rele_locked(pp);
3314 		(void) pthread_mutex_lock(&np->rn_lock);
3315 		continue;			/* loop around and try again */
3316 	}
3317 	/*
3318 	 * Everyone out of the pool -- we grab everything but
3319 	 * RC_NODE_USING_PARENT (including RC_NODE_DYING) to keep
3320 	 * any changes from occurring while we are attempting to
3321 	 * delete the node.
3322 	 */
3323 	if (!rc_node_hold_flag(np, RC_NODE_DYING_FLAGS)) {
3324 		(void) pthread_mutex_unlock(&np->rn_lock);
3325 		rc = REP_PROTOCOL_FAIL_DELETED;
3326 		goto fail;
3327 	}
3328 
3329 	assert(!(np->rn_flags & RC_NODE_OLD));
3330 
3331 	if (!client_is_privileged()) {
3332 		/* permission check */
3333 		(void) pthread_mutex_unlock(&np->rn_lock);
3334 
3335 #ifdef NATIVE_BUILD
3336 		rc = REP_PROTOCOL_FAIL_PERMISSION_DENIED;
3337 #else
3338 		pcp = pc_create();
3339 		if (pcp != NULL) {
3340 			rc = perm_add_enabling(pcp, AUTH_MODIFY);
3341 
3342 			/* add .smf.modify.<type> for pgs. */
3343 			if (rc == REP_PROTOCOL_SUCCESS && np->rn_id.rl_type ==
3344 			    REP_PROTOCOL_ENTITY_PROPERTYGRP) {
3345 				const char * const auth =
3346 				    perm_auth_for_pgtype(np->rn_type);
3347 
3348 				if (auth != NULL)
3349 					rc = perm_add_enabling(pcp, auth);
3350 			}
3351 
3352 			if (rc == REP_PROTOCOL_SUCCESS) {
3353 				granted = perm_granted(pcp);
3354 
3355 				if (granted < 0)
3356 					rc = REP_PROTOCOL_FAIL_NO_RESOURCES;
3357 			}
3358 
3359 			pc_free(pcp);
3360 		} else {
3361 			rc = REP_PROTOCOL_FAIL_NO_RESOURCES;
3362 		}
3363 
3364 		if (rc == REP_PROTOCOL_SUCCESS && !granted)
3365 			rc = REP_PROTOCOL_FAIL_PERMISSION_DENIED;
3366 #endif /* NATIVE_BUILD */
3367 
3368 		if (rc != REP_PROTOCOL_SUCCESS) {
3369 			(void) pthread_mutex_lock(&np->rn_lock);
3370 			rc_node_rele_flag(np, RC_NODE_DYING_FLAGS);
3371 			(void) pthread_mutex_unlock(&np->rn_lock);
3372 			goto fail;
3373 		}
3374 
3375 		(void) pthread_mutex_lock(&np->rn_lock);
3376 	}
3377 
3378 	ndp = uu_zalloc(sizeof (*ndp));
3379 	if (ndp == NULL) {
3380 		rc_node_rele_flag(np, RC_NODE_DYING_FLAGS);
3381 		(void) pthread_mutex_unlock(&np->rn_lock);
3382 		rc = REP_PROTOCOL_FAIL_NO_RESOURCES;
3383 		goto fail;
3384 	}
3385 
3386 	rc_node_delete_hold(np, 1);	/* hold entire subgraph, drop lock */
3387 
3388 	rc = object_delete(np);
3389 
3390 	if (rc != REP_PROTOCOL_SUCCESS) {
3391 		(void) pthread_mutex_lock(&np->rn_lock);
3392 		rc_node_delete_rele(np, 1);		/* drops lock */
3393 		uu_free(ndp);
3394 		goto fail;
3395 	}
3396 
3397 	/*
3398 	 * Now, delicately unlink and delete the object.
3399 	 *
3400 	 * Create the delete notification, atomically remove
3401 	 * from the hash table and set the NODE_DEAD flag, and
3402 	 * remove from the parent's children list.
3403 	 */
3404 	rc_notify_node_delete(ndp, np); /* frees or uses ndp */
3405 
3406 	bp = cache_hold(np->rn_hash);
3407 
3408 	(void) pthread_mutex_lock(&np->rn_lock);
3409 	cache_remove_unlocked(bp, np);
3410 	cache_release(bp);
3411 
3412 	np->rn_flags |= RC_NODE_DEAD;
3413 	if (pp != NULL) {
3414 		(void) pthread_mutex_unlock(&np->rn_lock);
3415 
3416 		(void) pthread_mutex_lock(&pp->rn_lock);
3417 		(void) pthread_mutex_lock(&np->rn_lock);
3418 		uu_list_remove(pp->rn_children, np);
3419 		rc_node_rele_flag(pp, RC_NODE_CHILDREN_CHANGING);
3420 		(void) pthread_mutex_unlock(&pp->rn_lock);
3421 		np->rn_flags &= ~RC_NODE_IN_PARENT;
3422 	}
3423 	/*
3424 	 * finally, propagate death to our children, handle notifications,
3425 	 * and release our hold.
3426 	 */
3427 	rc_node_hold_locked(np);	/* hold for delete */
3428 	rc_node_delete_children(np, 1);	/* drops DYING_FLAGS, lock, ref */
3429 
3430 	rc_node_clear(npp, 1);
3431 
3432 	(void) pthread_mutex_lock(&rc_pg_notify_lock);
3433 	while ((pnp = uu_list_first(np->rn_pg_notify_list)) != NULL)
3434 		rc_pg_notify_fire(pnp);
3435 	(void) pthread_mutex_unlock(&rc_pg_notify_lock);
3436 	rc_notify_remove_node(np);
3437 
3438 	rc_node_rele(np);
3439 
3440 	return (rc);
3441 
3442 fail:
3443 	rc_node_rele(np);
3444 	if (rc == REP_PROTOCOL_FAIL_DELETED)
3445 		rc_node_clear(npp, 1);
3446 	if (pp != NULL) {
3447 		(void) pthread_mutex_lock(&pp->rn_lock);
3448 		rc_node_rele_flag(pp, RC_NODE_CHILDREN_CHANGING);
3449 		rc_node_rele_locked(pp);	/* drop ref and lock */
3450 	}
3451 	return (rc);
3452 }
3453 
3454 int
3455 rc_node_next_snaplevel(rc_node_ptr_t *npp, rc_node_ptr_t *cpp)
3456 {
3457 	rc_node_t *np;
3458 	rc_node_t *cp, *pp;
3459 	int res;
3460 
3461 	rc_node_clear(cpp, 0);
3462 
3463 	RC_NODE_PTR_GET_CHECK_AND_LOCK(np, npp);
3464 
3465 	if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_SNAPSHOT &&
3466 	    np->rn_id.rl_type != REP_PROTOCOL_ENTITY_SNAPLEVEL) {
3467 		(void) pthread_mutex_unlock(&np->rn_lock);
3468 		return (REP_PROTOCOL_FAIL_NOT_APPLICABLE);
3469 	}
3470 
3471 	if (np->rn_id.rl_type == REP_PROTOCOL_ENTITY_SNAPSHOT) {
3472 		if ((res = rc_node_fill_children(np,
3473 		    REP_PROTOCOL_ENTITY_SNAPLEVEL)) != REP_PROTOCOL_SUCCESS) {
3474 			(void) pthread_mutex_unlock(&np->rn_lock);
3475 			return (res);
3476 		}
3477 
3478 		for (cp = uu_list_first(np->rn_children);
3479 		    cp != NULL;
3480 		    cp = uu_list_next(np->rn_children, cp)) {
3481 			if (cp->rn_id.rl_type != REP_PROTOCOL_ENTITY_SNAPLEVEL)
3482 				continue;
3483 			rc_node_hold(cp);
3484 			break;
3485 		}
3486 
3487 		(void) pthread_mutex_unlock(&np->rn_lock);
3488 	} else {
3489 		HOLD_PTR_FLAG_OR_RETURN(np, npp, RC_NODE_USING_PARENT);
3490 		/*
3491 		 * mark our parent as children changing.  This call drops our
3492 		 * lock and the RC_NODE_USING_PARENT flag, and returns with
3493 		 * pp's lock held
3494 		 */
3495 		pp = rc_node_hold_parent_flag(np, RC_NODE_CHILDREN_CHANGING);
3496 		if (pp == NULL) {
3497 			/* our parent is gone, we're going next... */
3498 
3499 			rc_node_clear(npp, 1);
3500 			return (REP_PROTOCOL_FAIL_DELETED);
3501 		}
3502 
3503 		/*
3504 		 * find the next snaplevel
3505 		 */
3506 		cp = np;
3507 		while ((cp = uu_list_next(pp->rn_children, cp)) != NULL &&
3508 		    cp->rn_id.rl_type != REP_PROTOCOL_ENTITY_SNAPLEVEL)
3509 			;
3510 
3511 		/* it must match the snaplevel list */
3512 		assert((cp == NULL && np->rn_snaplevel->rsl_next == NULL) ||
3513 		    (cp != NULL && np->rn_snaplevel->rsl_next ==
3514 		    cp->rn_snaplevel));
3515 
3516 		if (cp != NULL)
3517 			rc_node_hold(cp);
3518 
3519 		rc_node_rele_flag(pp, RC_NODE_CHILDREN_CHANGING);
3520 
3521 		(void) pthread_mutex_unlock(&pp->rn_lock);
3522 	}
3523 
3524 	rc_node_assign(cpp, cp);
3525 	if (cp != NULL) {
3526 		rc_node_rele(cp);
3527 
3528 		return (REP_PROTOCOL_SUCCESS);
3529 	}
3530 	return (REP_PROTOCOL_FAIL_NOT_FOUND);
3531 }
3532 
3533 /*
3534  * This call takes a snapshot (np) and either:
3535  *	an existing snapid (to be associated with np), or
3536  *	a non-NULL parentp (from which a new snapshot is taken, and associated
3537  *	    with np)
3538  *
3539  * To do the association, np is duplicated, the duplicate is made to
3540  * represent the new snapid, and np is replaced with the new rc_node_t on
3541  * np's parent's child list. np is placed on the new node's rn_former list,
3542  * and replaces np in cache_hash (so rc_node_update() will find the new one).
3543  */
3544 static int
3545 rc_attach_snapshot(rc_node_t *np, uint32_t snapid, rc_node_t *parentp)
3546 {
3547 	rc_node_t *np_orig;
3548 	rc_node_t *nnp, *prev;
3549 	rc_node_t *pp;
3550 	int rc;
3551 
3552 	if (parentp != NULL)
3553 		assert(snapid == 0);
3554 
3555 	assert(MUTEX_HELD(&np->rn_lock));
3556 
3557 	if ((rc = rc_node_modify_permission_check()) != REP_PROTOCOL_SUCCESS) {
3558 		(void) pthread_mutex_unlock(&np->rn_lock);
3559 		return (rc);
3560 	}
3561 
3562 	np_orig = np;
3563 	rc_node_hold_locked(np);		/* simplifies the remainder */
3564 
3565 	/*
3566 	 * get the latest node, holding RC_NODE_IN_TX to keep the rn_former
3567 	 * list from changing.
3568 	 */
3569 	for (;;) {
3570 		if (!(np->rn_flags & RC_NODE_OLD)) {
3571 			if (!rc_node_hold_flag(np, RC_NODE_USING_PARENT)) {
3572 				goto again;
3573 			}
3574 			pp = rc_node_hold_parent_flag(np,
3575 			    RC_NODE_CHILDREN_CHANGING);
3576 
3577 			(void) pthread_mutex_lock(&np->rn_lock);
3578 			if (pp == NULL) {
3579 				goto again;
3580 			}
3581 			if (np->rn_flags & RC_NODE_OLD) {
3582 				rc_node_rele_flag(pp,
3583 				    RC_NODE_CHILDREN_CHANGING);
3584 				(void) pthread_mutex_unlock(&pp->rn_lock);
3585 				goto again;
3586 			}
3587 			(void) pthread_mutex_unlock(&pp->rn_lock);
3588 
3589 			if (!rc_node_hold_flag(np, RC_NODE_IN_TX)) {
3590 				/*
3591 				 * Can't happen, since we're holding our
3592 				 * parent's CHILDREN_CHANGING flag...
3593 				 */
3594 				abort();
3595 			}
3596 			break;			/* everything's ready */
3597 		}
3598 again:
3599 		rc_node_rele_locked(np);
3600 		np = cache_lookup(&np_orig->rn_id);
3601 
3602 		if (np == NULL)
3603 			return (REP_PROTOCOL_FAIL_DELETED);
3604 
3605 		(void) pthread_mutex_lock(&np->rn_lock);
3606 	}
3607 
3608 	if (parentp != NULL) {
3609 		if (pp != parentp) {
3610 			rc = REP_PROTOCOL_FAIL_BAD_REQUEST;
3611 			goto fail;
3612 		}
3613 		nnp = NULL;
3614 	} else {
3615 		/*
3616 		 * look for a former node with the snapid we need.
3617 		 */
3618 		if (np->rn_snapshot_id == snapid) {
3619 			rc_node_rele_flag(np, RC_NODE_IN_TX);
3620 			rc_node_rele_locked(np);
3621 
3622 			(void) pthread_mutex_lock(&pp->rn_lock);
3623 			rc_node_rele_flag(pp, RC_NODE_CHILDREN_CHANGING);
3624 			(void) pthread_mutex_unlock(&pp->rn_lock);
3625 			return (REP_PROTOCOL_SUCCESS);	/* nothing to do */
3626 		}
3627 
3628 		prev = np;
3629 		while ((nnp = prev->rn_former) != NULL) {
3630 			if (nnp->rn_snapshot_id == snapid) {
3631 				rc_node_hold(nnp);
3632 				break;		/* existing node with that id */
3633 			}
3634 			prev = nnp;
3635 		}
3636 	}
3637 
3638 	if (nnp == NULL) {
3639 		prev = NULL;
3640 		nnp = rc_node_alloc();
3641 		if (nnp == NULL) {
3642 			rc = REP_PROTOCOL_FAIL_NO_RESOURCES;
3643 			goto fail;
3644 		}
3645 
3646 		nnp->rn_id = np->rn_id;		/* structure assignment */
3647 		nnp->rn_hash = np->rn_hash;
3648 		nnp->rn_name = strdup(np->rn_name);
3649 		nnp->rn_snapshot_id = snapid;
3650 		nnp->rn_flags = RC_NODE_IN_TX | RC_NODE_USING_PARENT;
3651 
3652 		if (nnp->rn_name == NULL) {
3653 			rc = REP_PROTOCOL_FAIL_NO_RESOURCES;
3654 			goto fail;
3655 		}
3656 	}
3657 
3658 	(void) pthread_mutex_unlock(&np->rn_lock);
3659 
3660 	rc = object_snapshot_attach(&np->rn_id, &snapid, (parentp != NULL));
3661 
3662 	if (parentp != NULL)
3663 		nnp->rn_snapshot_id = snapid;	/* fill in new snapid */
3664 	else
3665 		assert(nnp->rn_snapshot_id == snapid);
3666 
3667 	(void) pthread_mutex_lock(&np->rn_lock);
3668 	if (rc != REP_PROTOCOL_SUCCESS)
3669 		goto fail;
3670 
3671 	/*
3672 	 * fix up the former chain
3673 	 */
3674 	if (prev != NULL) {
3675 		prev->rn_former = nnp->rn_former;
3676 		(void) pthread_mutex_lock(&nnp->rn_lock);
3677 		nnp->rn_flags &= ~RC_NODE_ON_FORMER;
3678 		nnp->rn_former = NULL;
3679 		(void) pthread_mutex_unlock(&nnp->rn_lock);
3680 	}
3681 	np->rn_flags |= RC_NODE_OLD;
3682 	(void) pthread_mutex_unlock(&np->rn_lock);
3683 
3684 	/*
3685 	 * replace np with nnp
3686 	 */
3687 	rc_node_relink_child(pp, np, nnp);
3688 
3689 	rc_node_rele(np);
3690 
3691 	return (REP_PROTOCOL_SUCCESS);
3692 
3693 fail:
3694 	rc_node_rele_flag(np, RC_NODE_IN_TX);
3695 	rc_node_rele_locked(np);
3696 	(void) pthread_mutex_lock(&pp->rn_lock);
3697 	rc_node_rele_flag(pp, RC_NODE_CHILDREN_CHANGING);
3698 	(void) pthread_mutex_unlock(&pp->rn_lock);
3699 
3700 	if (nnp != NULL) {
3701 		if (prev == NULL)
3702 			rc_node_destroy(nnp);
3703 		else
3704 			rc_node_rele(nnp);
3705 	}
3706 
3707 	return (rc);
3708 }
3709 
3710 int
3711 rc_snapshot_take_new(rc_node_ptr_t *npp, const char *svcname,
3712     const char *instname, const char *name, rc_node_ptr_t *outpp)
3713 {
3714 	rc_node_t *np;
3715 	rc_node_t *outp = NULL;
3716 	int rc;
3717 
3718 	rc_node_clear(outpp, 0);
3719 
3720 	RC_NODE_PTR_GET_CHECK_AND_LOCK(np, npp);
3721 	if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_INSTANCE) {
3722 		(void) pthread_mutex_unlock(&np->rn_lock);
3723 		return (REP_PROTOCOL_FAIL_TYPE_MISMATCH);
3724 	}
3725 
3726 	rc = rc_check_type_name(REP_PROTOCOL_ENTITY_SNAPSHOT, name);
3727 	if (rc != REP_PROTOCOL_SUCCESS) {
3728 		(void) pthread_mutex_unlock(&np->rn_lock);
3729 		return (rc);
3730 	}
3731 
3732 	if (svcname != NULL && (rc =
3733 	    rc_check_type_name(REP_PROTOCOL_ENTITY_SERVICE, svcname)) !=
3734 	    REP_PROTOCOL_SUCCESS) {
3735 		(void) pthread_mutex_unlock(&np->rn_lock);
3736 		return (rc);
3737 	}
3738 
3739 	if (instname != NULL && (rc =
3740 	    rc_check_type_name(REP_PROTOCOL_ENTITY_INSTANCE, instname)) !=
3741 	    REP_PROTOCOL_SUCCESS) {
3742 		(void) pthread_mutex_unlock(&np->rn_lock);
3743 		return (rc);
3744 	}
3745 
3746 	if ((rc = rc_node_modify_permission_check()) != REP_PROTOCOL_SUCCESS) {
3747 		(void) pthread_mutex_unlock(&np->rn_lock);
3748 		return (rc);
3749 	}
3750 
3751 	HOLD_PTR_FLAG_OR_RETURN(np, npp, RC_NODE_CREATING_CHILD);
3752 	(void) pthread_mutex_unlock(&np->rn_lock);
3753 
3754 	rc = object_snapshot_take_new(np, svcname, instname, name, &outp);
3755 
3756 	if (rc == REP_PROTOCOL_SUCCESS) {
3757 		rc_node_assign(outpp, outp);
3758 		rc_node_rele(outp);
3759 	}
3760 
3761 	(void) pthread_mutex_lock(&np->rn_lock);
3762 	rc_node_rele_flag(np, RC_NODE_CREATING_CHILD);
3763 	(void) pthread_mutex_unlock(&np->rn_lock);
3764 
3765 	return (rc);
3766 }
3767 
3768 int
3769 rc_snapshot_take_attach(rc_node_ptr_t *npp, rc_node_ptr_t *outpp)
3770 {
3771 	rc_node_t *np, *outp;
3772 
3773 	RC_NODE_PTR_GET_CHECK(np, npp);
3774 	if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_INSTANCE) {
3775 		return (REP_PROTOCOL_FAIL_TYPE_MISMATCH);
3776 	}
3777 
3778 	RC_NODE_PTR_GET_CHECK_AND_LOCK(outp, outpp);
3779 	if (outp->rn_id.rl_type != REP_PROTOCOL_ENTITY_SNAPSHOT) {
3780 		(void) pthread_mutex_unlock(&outp->rn_lock);
3781 		return (REP_PROTOCOL_FAIL_BAD_REQUEST);
3782 	}
3783 
3784 	return (rc_attach_snapshot(outp, 0, np));	/* drops outp's lock */
3785 }
3786 
3787 int
3788 rc_snapshot_attach(rc_node_ptr_t *npp, rc_node_ptr_t *cpp)
3789 {
3790 	rc_node_t *np;
3791 	rc_node_t *cp;
3792 	uint32_t snapid;
3793 
3794 	RC_NODE_PTR_GET_CHECK_AND_LOCK(np, npp);
3795 	if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_SNAPSHOT) {
3796 		(void) pthread_mutex_unlock(&np->rn_lock);
3797 		return (REP_PROTOCOL_FAIL_BAD_REQUEST);
3798 	}
3799 	snapid = np->rn_snapshot_id;
3800 	(void) pthread_mutex_unlock(&np->rn_lock);
3801 
3802 	RC_NODE_PTR_GET_CHECK_AND_LOCK(cp, cpp);
3803 	if (cp->rn_id.rl_type != REP_PROTOCOL_ENTITY_SNAPSHOT) {
3804 		(void) pthread_mutex_unlock(&cp->rn_lock);
3805 		return (REP_PROTOCOL_FAIL_BAD_REQUEST);
3806 	}
3807 
3808 	return (rc_attach_snapshot(cp, snapid, NULL));	/* drops cp's lock */
3809 }
3810 
3811 /*
3812  * Iteration
3813  */
3814 static int
3815 rc_iter_filter_name(rc_node_t *np, void *s)
3816 {
3817 	const char *name = s;
3818 
3819 	return (strcmp(np->rn_name, name) == 0);
3820 }
3821 
3822 static int
3823 rc_iter_filter_type(rc_node_t *np, void *s)
3824 {
3825 	const char *type = s;
3826 
3827 	return (np->rn_type != NULL && strcmp(np->rn_type, type) == 0);
3828 }
3829 
3830 /*ARGSUSED*/
3831 static int
3832 rc_iter_null_filter(rc_node_t *np, void *s)
3833 {
3834 	return (1);
3835 }
3836 
3837 /*
3838  * Allocate & initialize an rc_node_iter_t structure.  Essentially, ensure
3839  * np->rn_children is populated and call uu_list_walk_start(np->rn_children).
3840  * If successful, leaves a hold on np & increments np->rn_other_refs
3841  *
3842  * If composed is true, then set up for iteration across the top level of np's
3843  * composition chain.  If successful, leaves a hold on np and increments
3844  * rn_other_refs for the top level of np's composition chain.
3845  *
3846  * Fails with
3847  *   _NO_RESOURCES
3848  *   _INVALID_TYPE
3849  *   _TYPE_MISMATCH - np cannot carry type children
3850  *   _DELETED
3851  */
3852 static int
3853 rc_iter_create(rc_node_iter_t **resp, rc_node_t *np, uint32_t type,
3854     rc_iter_filter_func *filter, void *arg, boolean_t composed)
3855 {
3856 	rc_node_iter_t *nip;
3857 	int res;
3858 
3859 	assert(*resp == NULL);
3860 
3861 	nip = uu_zalloc(sizeof (*nip));
3862 	if (nip == NULL)
3863 		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
3864 
3865 	/* np is held by the client's rc_node_ptr_t */
3866 	if (np->rn_id.rl_type == REP_PROTOCOL_ENTITY_CPROPERTYGRP)
3867 		composed = 1;
3868 
3869 	if (!composed) {
3870 		(void) pthread_mutex_lock(&np->rn_lock);
3871 
3872 		if ((res = rc_node_fill_children(np, type)) !=
3873 		    REP_PROTOCOL_SUCCESS) {
3874 			(void) pthread_mutex_unlock(&np->rn_lock);
3875 			uu_free(nip);
3876 			return (res);
3877 		}
3878 
3879 		nip->rni_clevel = -1;
3880 
3881 		nip->rni_iter = uu_list_walk_start(np->rn_children,
3882 		    UU_WALK_ROBUST);
3883 		if (nip->rni_iter != NULL) {
3884 			nip->rni_iter_node = np;
3885 			rc_node_hold_other(np);
3886 		} else {
3887 			(void) pthread_mutex_unlock(&np->rn_lock);
3888 			uu_free(nip);
3889 			return (REP_PROTOCOL_FAIL_NO_RESOURCES);
3890 		}
3891 		(void) pthread_mutex_unlock(&np->rn_lock);
3892 	} else {
3893 		rc_node_t *ent;
3894 
3895 		if (np->rn_id.rl_type == REP_PROTOCOL_ENTITY_SNAPSHOT) {
3896 			/* rn_cchain isn't valid until children are loaded. */
3897 			(void) pthread_mutex_lock(&np->rn_lock);
3898 			res = rc_node_fill_children(np,
3899 			    REP_PROTOCOL_ENTITY_SNAPLEVEL);
3900 			(void) pthread_mutex_unlock(&np->rn_lock);
3901 			if (res != REP_PROTOCOL_SUCCESS) {
3902 				uu_free(nip);
3903 				return (res);
3904 			}
3905 
3906 			/* Check for an empty snapshot. */
3907 			if (np->rn_cchain[0] == NULL)
3908 				goto empty;
3909 		}
3910 
3911 		/* Start at the top of the composition chain. */
3912 		for (nip->rni_clevel = 0; ; ++nip->rni_clevel) {
3913 			if (nip->rni_clevel >= COMPOSITION_DEPTH) {
3914 				/* Empty composition chain. */
3915 empty:
3916 				nip->rni_clevel = -1;
3917 				nip->rni_iter = NULL;
3918 				/* It's ok, iter_next() will return _DONE. */
3919 				goto out;
3920 			}
3921 
3922 			ent = np->rn_cchain[nip->rni_clevel];
3923 			assert(ent != NULL);
3924 
3925 			if (rc_node_check_and_lock(ent) == REP_PROTOCOL_SUCCESS)
3926 				break;
3927 
3928 			/* Someone deleted it, so try the next one. */
3929 		}
3930 
3931 		res = rc_node_fill_children(ent, type);
3932 
3933 		if (res == REP_PROTOCOL_SUCCESS) {
3934 			nip->rni_iter = uu_list_walk_start(ent->rn_children,
3935 			    UU_WALK_ROBUST);
3936 
3937 			if (nip->rni_iter == NULL)
3938 				res = REP_PROTOCOL_FAIL_NO_RESOURCES;
3939 			else {
3940 				nip->rni_iter_node = ent;
3941 				rc_node_hold_other(ent);
3942 			}
3943 		}
3944 
3945 		if (res != REP_PROTOCOL_SUCCESS) {
3946 			(void) pthread_mutex_unlock(&ent->rn_lock);
3947 			uu_free(nip);
3948 			return (res);
3949 		}
3950 
3951 		(void) pthread_mutex_unlock(&ent->rn_lock);
3952 	}
3953 
3954 out:
3955 	rc_node_hold(np);		/* released by rc_iter_end() */
3956 	nip->rni_parent = np;
3957 	nip->rni_type = type;
3958 	nip->rni_filter = (filter != NULL)? filter : rc_iter_null_filter;
3959 	nip->rni_filter_arg = arg;
3960 	*resp = nip;
3961 	return (REP_PROTOCOL_SUCCESS);
3962 }
3963 
3964 static void
3965 rc_iter_end(rc_node_iter_t *iter)
3966 {
3967 	rc_node_t *np = iter->rni_parent;
3968 
3969 	if (iter->rni_clevel >= 0)
3970 		np = np->rn_cchain[iter->rni_clevel];
3971 
3972 	assert(MUTEX_HELD(&np->rn_lock));
3973 	if (iter->rni_iter != NULL)
3974 		uu_list_walk_end(iter->rni_iter);
3975 	iter->rni_iter = NULL;
3976 
3977 	(void) pthread_mutex_unlock(&np->rn_lock);
3978 	rc_node_rele(iter->rni_parent);
3979 	if (iter->rni_iter_node != NULL)
3980 		rc_node_rele_other(iter->rni_iter_node);
3981 }
3982 
3983 /*
3984  * Fails with
3985  *   _NOT_SET - npp is reset
3986  *   _DELETED - npp's node has been deleted
3987  *   _NOT_APPLICABLE - npp's node is not a property
3988  *   _NO_RESOURCES - out of memory
3989  */
3990 static int
3991 rc_node_setup_value_iter(rc_node_ptr_t *npp, rc_node_iter_t **iterp)
3992 {
3993 	rc_node_t *np;
3994 
3995 	rc_node_iter_t *nip;
3996 
3997 	assert(*iterp == NULL);
3998 
3999 	RC_NODE_PTR_GET_CHECK_AND_LOCK(np, npp);
4000 
4001 	if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_PROPERTY) {
4002 		(void) pthread_mutex_unlock(&np->rn_lock);
4003 		return (REP_PROTOCOL_FAIL_NOT_APPLICABLE);
4004 	}
4005 
4006 	nip = uu_zalloc(sizeof (*nip));
4007 	if (nip == NULL) {
4008 		(void) pthread_mutex_unlock(&np->rn_lock);
4009 		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
4010 	}
4011 
4012 	nip->rni_parent = np;
4013 	nip->rni_iter = NULL;
4014 	nip->rni_clevel = -1;
4015 	nip->rni_type = REP_PROTOCOL_ENTITY_VALUE;
4016 	nip->rni_offset = 0;
4017 	nip->rni_last_offset = 0;
4018 
4019 	rc_node_hold_locked(np);
4020 
4021 	*iterp = nip;
4022 	(void) pthread_mutex_unlock(&np->rn_lock);
4023 
4024 	return (REP_PROTOCOL_SUCCESS);
4025 }
4026 
4027 /*
4028  * Returns:
4029  *   _NOT_SET - npp is reset
4030  *   _DELETED - npp's node has been deleted
4031  *   _TYPE_MISMATCH - npp's node is not a property
4032  *   _NOT_FOUND - property has no values
4033  *   _TRUNCATED - property has >1 values (first is written into out)
4034  *   _SUCCESS - property has 1 value (which is written into out)
4035  *
4036  * We shorten *sz_out to not include anything after the final '\0'.
4037  */
4038 int
4039 rc_node_get_property_value(rc_node_ptr_t *npp,
4040     struct rep_protocol_value_response *out, size_t *sz_out)
4041 {
4042 	rc_node_t *np;
4043 	size_t w;
4044 	int ret;
4045 
4046 	assert(*sz_out == sizeof (*out));
4047 
4048 	RC_NODE_PTR_GET_CHECK_AND_LOCK(np, npp);
4049 
4050 	if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_PROPERTY) {
4051 		(void) pthread_mutex_unlock(&np->rn_lock);
4052 		return (REP_PROTOCOL_FAIL_TYPE_MISMATCH);
4053 	}
4054 
4055 	if (np->rn_values_size == 0) {
4056 		(void) pthread_mutex_unlock(&np->rn_lock);
4057 		return (REP_PROTOCOL_FAIL_NOT_FOUND);
4058 	}
4059 	out->rpr_type = np->rn_valtype;
4060 	w = strlcpy(out->rpr_value, &np->rn_values[0],
4061 	    sizeof (out->rpr_value));
4062 
4063 	if (w >= sizeof (out->rpr_value))
4064 		backend_panic("value too large");
4065 
4066 	*sz_out = offsetof(struct rep_protocol_value_response,
4067 	    rpr_value[w + 1]);
4068 
4069 	ret = (np->rn_values_count != 1)? REP_PROTOCOL_FAIL_TRUNCATED :
4070 	    REP_PROTOCOL_SUCCESS;
4071 	(void) pthread_mutex_unlock(&np->rn_lock);
4072 	return (ret);
4073 }
4074 
4075 int
4076 rc_iter_next_value(rc_node_iter_t *iter,
4077     struct rep_protocol_value_response *out, size_t *sz_out, int repeat)
4078 {
4079 	rc_node_t *np = iter->rni_parent;
4080 	const char *vals;
4081 	size_t len;
4082 
4083 	size_t start;
4084 	size_t w;
4085 
4086 	rep_protocol_responseid_t result;
4087 
4088 	assert(*sz_out == sizeof (*out));
4089 
4090 	(void) memset(out, '\0', *sz_out);
4091 
4092 	if (iter->rni_type != REP_PROTOCOL_ENTITY_VALUE)
4093 		return (REP_PROTOCOL_FAIL_BAD_REQUEST);
4094 
4095 	RC_NODE_CHECK_AND_LOCK(np);
4096 
4097 	vals = np->rn_values;
4098 	len = np->rn_values_size;
4099 
4100 	out->rpr_type = np->rn_valtype;
4101 
4102 	start = (repeat)? iter->rni_last_offset : iter->rni_offset;
4103 
4104 	if (len == 0 || start >= len) {
4105 		result = REP_PROTOCOL_DONE;
4106 		*sz_out -= sizeof (out->rpr_value);
4107 	} else {
4108 		w = strlcpy(out->rpr_value, &vals[start],
4109 		    sizeof (out->rpr_value));
4110 
4111 		if (w >= sizeof (out->rpr_value))
4112 			backend_panic("value too large");
4113 
4114 		*sz_out = offsetof(struct rep_protocol_value_response,
4115 		    rpr_value[w + 1]);
4116 
4117 		/*
4118 		 * update the offsets if we're not repeating
4119 		 */
4120 		if (!repeat) {
4121 			iter->rni_last_offset = iter->rni_offset;
4122 			iter->rni_offset += (w + 1);
4123 		}
4124 
4125 		result = REP_PROTOCOL_SUCCESS;
4126 	}
4127 
4128 	(void) pthread_mutex_unlock(&np->rn_lock);
4129 	return (result);
4130 }
4131 
4132 /*
4133  * Entry point for ITER_START from client.c.  Validate the arguments & call
4134  * rc_iter_create().
4135  *
4136  * Fails with
4137  *   _NOT_SET
4138  *   _DELETED
4139  *   _TYPE_MISMATCH - np cannot carry type children
4140  *   _BAD_REQUEST - flags is invalid
4141  *		    pattern is invalid
4142  *   _NO_RESOURCES
4143  *   _INVALID_TYPE
4144  *   _TYPE_MISMATCH - *npp cannot have children of type
4145  *   _BACKEND_ACCESS
4146  */
4147 int
4148 rc_node_setup_iter(rc_node_ptr_t *npp, rc_node_iter_t **iterp,
4149     uint32_t type, uint32_t flags, const char *pattern)
4150 {
4151 	rc_node_t *np;
4152 	rc_iter_filter_func *f = NULL;
4153 	int rc;
4154 
4155 	RC_NODE_PTR_GET_CHECK(np, npp);
4156 
4157 	if (pattern != NULL && pattern[0] == '\0')
4158 		pattern = NULL;
4159 
4160 	if (type == REP_PROTOCOL_ENTITY_VALUE) {
4161 		if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_PROPERTY)
4162 			return (REP_PROTOCOL_FAIL_TYPE_MISMATCH);
4163 		if (flags != RP_ITER_START_ALL || pattern != NULL)
4164 			return (REP_PROTOCOL_FAIL_BAD_REQUEST);
4165 
4166 		rc = rc_node_setup_value_iter(npp, iterp);
4167 		assert(rc != REP_PROTOCOL_FAIL_NOT_APPLICABLE);
4168 		return (rc);
4169 	}
4170 
4171 	if ((rc = rc_check_parent_child(np->rn_id.rl_type, type)) !=
4172 	    REP_PROTOCOL_SUCCESS)
4173 		return (rc);
4174 
4175 	if (((flags & RP_ITER_START_FILT_MASK) == RP_ITER_START_ALL) ^
4176 	    (pattern == NULL))
4177 		return (REP_PROTOCOL_FAIL_BAD_REQUEST);
4178 
4179 	/* Composition only works for instances & snapshots. */
4180 	if ((flags & RP_ITER_START_COMPOSED) &&
4181 	    (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_INSTANCE &&
4182 	    np->rn_id.rl_type != REP_PROTOCOL_ENTITY_SNAPSHOT))
4183 		return (REP_PROTOCOL_FAIL_BAD_REQUEST);
4184 
4185 	if (pattern != NULL) {
4186 		if ((rc = rc_check_type_name(type, pattern)) !=
4187 		    REP_PROTOCOL_SUCCESS)
4188 			return (rc);
4189 		pattern = strdup(pattern);
4190 		if (pattern == NULL)
4191 			return (REP_PROTOCOL_FAIL_NO_RESOURCES);
4192 	}
4193 
4194 	switch (flags & RP_ITER_START_FILT_MASK) {
4195 	case RP_ITER_START_ALL:
4196 		f = NULL;
4197 		break;
4198 	case RP_ITER_START_EXACT:
4199 		f = rc_iter_filter_name;
4200 		break;
4201 	case RP_ITER_START_PGTYPE:
4202 		if (type != REP_PROTOCOL_ENTITY_PROPERTYGRP) {
4203 			free((void *)pattern);
4204 			return (REP_PROTOCOL_FAIL_BAD_REQUEST);
4205 		}
4206 		f = rc_iter_filter_type;
4207 		break;
4208 	default:
4209 		free((void *)pattern);
4210 		return (REP_PROTOCOL_FAIL_BAD_REQUEST);
4211 	}
4212 
4213 	rc = rc_iter_create(iterp, np, type, f, (void *)pattern,
4214 	    flags & RP_ITER_START_COMPOSED);
4215 	if (rc != REP_PROTOCOL_SUCCESS && pattern != NULL)
4216 		free((void *)pattern);
4217 
4218 	return (rc);
4219 }
4220 
4221 /*
4222  * Do uu_list_walk_next(iter->rni_iter) until we find a child which matches
4223  * the filter.
4224  * For composed iterators, then check to see if there's an overlapping entity
4225  * (see embedded comments).  If we reach the end of the list, start over at
4226  * the next level.
4227  *
4228  * Returns
4229  *   _BAD_REQUEST - iter walks values
4230  *   _TYPE_MISMATCH - iter does not walk type entities
4231  *   _DELETED - parent was deleted
4232  *   _NO_RESOURCES
4233  *   _INVALID_TYPE - type is invalid
4234  *   _DONE
4235  *   _SUCCESS
4236  *
4237  * For composed property group iterators, can also return
4238  *   _TYPE_MISMATCH - parent cannot have type children
4239  */
4240 int
4241 rc_iter_next(rc_node_iter_t *iter, rc_node_ptr_t *out, uint32_t type)
4242 {
4243 	rc_node_t *np = iter->rni_parent;
4244 	rc_node_t *res;
4245 	int rc;
4246 
4247 	if (iter->rni_type == REP_PROTOCOL_ENTITY_VALUE)
4248 		return (REP_PROTOCOL_FAIL_BAD_REQUEST);
4249 
4250 	if (iter->rni_iter == NULL) {
4251 		rc_node_clear(out, 0);
4252 		return (REP_PROTOCOL_DONE);
4253 	}
4254 
4255 	if (iter->rni_type != type) {
4256 		rc_node_clear(out, 0);
4257 		return (REP_PROTOCOL_FAIL_TYPE_MISMATCH);
4258 	}
4259 
4260 	(void) pthread_mutex_lock(&np->rn_lock);  /* held by _iter_create() */
4261 
4262 	if (!rc_node_wait_flag(np, RC_NODE_CHILDREN_CHANGING)) {
4263 		(void) pthread_mutex_unlock(&np->rn_lock);
4264 		rc_node_clear(out, 1);
4265 		return (REP_PROTOCOL_FAIL_DELETED);
4266 	}
4267 
4268 	if (iter->rni_clevel >= 0) {
4269 		/* Composed iterator.  Iterate over appropriate level. */
4270 		(void) pthread_mutex_unlock(&np->rn_lock);
4271 		np = np->rn_cchain[iter->rni_clevel];
4272 		/*
4273 		 * If iter->rni_parent is an instance or a snapshot, np must
4274 		 * be valid since iter holds iter->rni_parent & possible
4275 		 * levels (service, instance, snaplevel) cannot be destroyed
4276 		 * while rni_parent is held.  If iter->rni_parent is
4277 		 * a composed property group then rc_node_setup_cpg() put
4278 		 * a hold on np.
4279 		 */
4280 
4281 		(void) pthread_mutex_lock(&np->rn_lock);
4282 
4283 		if (!rc_node_wait_flag(np, RC_NODE_CHILDREN_CHANGING)) {
4284 			(void) pthread_mutex_unlock(&np->rn_lock);
4285 			rc_node_clear(out, 1);
4286 			return (REP_PROTOCOL_FAIL_DELETED);
4287 		}
4288 	}
4289 
4290 	assert(np->rn_flags & RC_NODE_HAS_CHILDREN);
4291 
4292 	for (;;) {
4293 		res = uu_list_walk_next(iter->rni_iter);
4294 		if (res == NULL) {
4295 			rc_node_t *parent = iter->rni_parent;
4296 
4297 #if COMPOSITION_DEPTH == 2
4298 			if (iter->rni_clevel < 0 || iter->rni_clevel == 1) {
4299 				/* release walker and lock */
4300 				rc_iter_end(iter);
4301 				break;
4302 			}
4303 
4304 			/* Stop walking current level. */
4305 			uu_list_walk_end(iter->rni_iter);
4306 			iter->rni_iter = NULL;
4307 			(void) pthread_mutex_unlock(&np->rn_lock);
4308 			rc_node_rele_other(iter->rni_iter_node);
4309 			iter->rni_iter_node = NULL;
4310 
4311 			/* Start walking next level. */
4312 			++iter->rni_clevel;
4313 			np = parent->rn_cchain[iter->rni_clevel];
4314 			assert(np != NULL);
4315 #else
4316 #error This code must be updated.
4317 #endif
4318 
4319 			(void) pthread_mutex_lock(&np->rn_lock);
4320 
4321 			rc = rc_node_fill_children(np, iter->rni_type);
4322 
4323 			if (rc == REP_PROTOCOL_SUCCESS) {
4324 				iter->rni_iter =
4325 				    uu_list_walk_start(np->rn_children,
4326 					UU_WALK_ROBUST);
4327 
4328 				if (iter->rni_iter == NULL)
4329 					rc = REP_PROTOCOL_FAIL_NO_RESOURCES;
4330 				else {
4331 					iter->rni_iter_node = np;
4332 					rc_node_hold_other(np);
4333 				}
4334 			}
4335 
4336 			if (rc != REP_PROTOCOL_SUCCESS) {
4337 				(void) pthread_mutex_unlock(&np->rn_lock);
4338 				rc_node_clear(out, 0);
4339 				return (rc);
4340 			}
4341 
4342 			continue;
4343 		}
4344 
4345 		if (res->rn_id.rl_type != type ||
4346 		    !iter->rni_filter(res, iter->rni_filter_arg))
4347 			continue;
4348 
4349 		/*
4350 		 * If we're composed and not at the top level, check to see if
4351 		 * there's an entity at a higher level with the same name.  If
4352 		 * so, skip this one.
4353 		 */
4354 		if (iter->rni_clevel > 0) {
4355 			rc_node_t *ent = iter->rni_parent->rn_cchain[0];
4356 			rc_node_t *pg;
4357 
4358 #if COMPOSITION_DEPTH == 2
4359 			assert(iter->rni_clevel == 1);
4360 
4361 			(void) pthread_mutex_unlock(&np->rn_lock);
4362 			(void) pthread_mutex_lock(&ent->rn_lock);
4363 			rc = rc_node_find_named_child(ent, res->rn_name, type,
4364 			    &pg);
4365 			if (rc == REP_PROTOCOL_SUCCESS && pg != NULL)
4366 				rc_node_rele(pg);
4367 			(void) pthread_mutex_unlock(&ent->rn_lock);
4368 			if (rc != REP_PROTOCOL_SUCCESS) {
4369 				rc_node_clear(out, 0);
4370 				return (rc);
4371 			}
4372 			(void) pthread_mutex_lock(&np->rn_lock);
4373 
4374 			/* Make sure np isn't being deleted all of a sudden. */
4375 			if (!rc_node_wait_flag(np, RC_NODE_DYING)) {
4376 				(void) pthread_mutex_unlock(&np->rn_lock);
4377 				rc_node_clear(out, 1);
4378 				return (REP_PROTOCOL_FAIL_DELETED);
4379 			}
4380 
4381 			if (pg != NULL)
4382 				/* Keep going. */
4383 				continue;
4384 #else
4385 #error This code must be updated.
4386 #endif
4387 		}
4388 
4389 		/*
4390 		 * If we're composed, iterating over property groups, and not
4391 		 * at the bottom level, check to see if there's a pg at lower
4392 		 * level with the same name.  If so, return a cpg.
4393 		 */
4394 		if (iter->rni_clevel >= 0 &&
4395 		    type == REP_PROTOCOL_ENTITY_PROPERTYGRP &&
4396 		    iter->rni_clevel < COMPOSITION_DEPTH - 1) {
4397 #if COMPOSITION_DEPTH == 2
4398 			rc_node_t *pg;
4399 			rc_node_t *ent = iter->rni_parent->rn_cchain[1];
4400 
4401 			rc_node_hold(res);	/* While we drop np->rn_lock */
4402 
4403 			(void) pthread_mutex_unlock(&np->rn_lock);
4404 			(void) pthread_mutex_lock(&ent->rn_lock);
4405 			rc = rc_node_find_named_child(ent, res->rn_name, type,
4406 			    &pg);
4407 			/* holds pg if not NULL */
4408 			(void) pthread_mutex_unlock(&ent->rn_lock);
4409 			if (rc != REP_PROTOCOL_SUCCESS) {
4410 				rc_node_rele(res);
4411 				rc_node_clear(out, 0);
4412 				return (rc);
4413 			}
4414 
4415 			(void) pthread_mutex_lock(&np->rn_lock);
4416 			if (!rc_node_wait_flag(np, RC_NODE_DYING)) {
4417 				(void) pthread_mutex_unlock(&np->rn_lock);
4418 				rc_node_rele(res);
4419 				if (pg != NULL)
4420 					rc_node_rele(pg);
4421 				rc_node_clear(out, 1);
4422 				return (REP_PROTOCOL_FAIL_DELETED);
4423 			}
4424 
4425 			if (pg == NULL) {
4426 				rc_node_rele(res);
4427 			} else {
4428 				rc_node_t *cpg;
4429 
4430 				/* Keep res held for rc_node_setup_cpg(). */
4431 
4432 				cpg = rc_node_alloc();
4433 				if (cpg == NULL) {
4434 					(void) pthread_mutex_unlock(
4435 					    &np->rn_lock);
4436 					rc_node_rele(res);
4437 					rc_node_rele(pg);
4438 					rc_node_clear(out, 0);
4439 					return (REP_PROTOCOL_FAIL_NO_RESOURCES);
4440 				}
4441 
4442 				switch (rc_node_setup_cpg(cpg, res, pg)) {
4443 				case REP_PROTOCOL_SUCCESS:
4444 					res = cpg;
4445 					break;
4446 
4447 				case REP_PROTOCOL_FAIL_TYPE_MISMATCH:
4448 					/* Nevermind. */
4449 					rc_node_destroy(cpg);
4450 					rc_node_rele(pg);
4451 					rc_node_rele(res);
4452 					break;
4453 
4454 				case REP_PROTOCOL_FAIL_NO_RESOURCES:
4455 					rc_node_destroy(cpg);
4456 					(void) pthread_mutex_unlock(
4457 					    &np->rn_lock);
4458 					rc_node_rele(res);
4459 					rc_node_rele(pg);
4460 					rc_node_clear(out, 0);
4461 					return (REP_PROTOCOL_FAIL_NO_RESOURCES);
4462 
4463 				default:
4464 					assert(0);
4465 					abort();
4466 				}
4467 			}
4468 #else
4469 #error This code must be updated.
4470 #endif
4471 		}
4472 
4473 		rc_node_hold(res);
4474 		(void) pthread_mutex_unlock(&np->rn_lock);
4475 		break;
4476 	}
4477 	rc_node_assign(out, res);
4478 
4479 	if (res == NULL)
4480 		return (REP_PROTOCOL_DONE);
4481 	rc_node_rele(res);
4482 	return (REP_PROTOCOL_SUCCESS);
4483 }
4484 
4485 void
4486 rc_iter_destroy(rc_node_iter_t **nipp)
4487 {
4488 	rc_node_iter_t *nip = *nipp;
4489 	rc_node_t *np;
4490 
4491 	if (nip == NULL)
4492 		return;				/* already freed */
4493 
4494 	np = nip->rni_parent;
4495 
4496 	if (nip->rni_filter_arg != NULL)
4497 		free(nip->rni_filter_arg);
4498 	nip->rni_filter_arg = NULL;
4499 
4500 	if (nip->rni_type == REP_PROTOCOL_ENTITY_VALUE ||
4501 	    nip->rni_iter != NULL) {
4502 		if (nip->rni_clevel < 0)
4503 			(void) pthread_mutex_lock(&np->rn_lock);
4504 		else
4505 			(void) pthread_mutex_lock(
4506 			    &np->rn_cchain[nip->rni_clevel]->rn_lock);
4507 		rc_iter_end(nip);		/* release walker and lock */
4508 	}
4509 	nip->rni_parent = NULL;
4510 
4511 	uu_free(nip);
4512 	*nipp = NULL;
4513 }
4514 
4515 int
4516 rc_node_setup_tx(rc_node_ptr_t *npp, rc_node_ptr_t *txp)
4517 {
4518 	rc_node_t *np;
4519 	permcheck_t *pcp;
4520 	int ret;
4521 	int authorized = 0;
4522 
4523 	RC_NODE_PTR_GET_CHECK_AND_HOLD(np, npp);
4524 
4525 	if (np->rn_id.rl_type == REP_PROTOCOL_ENTITY_CPROPERTYGRP) {
4526 		rc_node_rele(np);
4527 		np = np->rn_cchain[0];
4528 		RC_NODE_CHECK_AND_HOLD(np);
4529 	}
4530 
4531 	if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_PROPERTYGRP) {
4532 		rc_node_rele(np);
4533 		return (REP_PROTOCOL_FAIL_TYPE_MISMATCH);
4534 	}
4535 
4536 	if (np->rn_id.rl_ids[ID_SNAPSHOT] != 0) {
4537 		rc_node_rele(np);
4538 		return (REP_PROTOCOL_FAIL_PERMISSION_DENIED);
4539 	}
4540 
4541 	if (client_is_privileged())
4542 		goto skip_checks;
4543 
4544 #ifdef NATIVE_BUILD
4545 	rc_node_rele(np);
4546 	return (REP_PROTOCOL_FAIL_PERMISSION_DENIED);
4547 #else
4548 	/* permission check */
4549 	pcp = pc_create();
4550 	if (pcp == NULL) {
4551 		rc_node_rele(np);
4552 		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
4553 	}
4554 
4555 	if (np->rn_id.rl_ids[ID_INSTANCE] != 0 &&	/* instance pg */
4556 	    ((strcmp(np->rn_name, AUTH_PG_ACTIONS) == 0 &&
4557 	    strcmp(np->rn_type, AUTH_PG_ACTIONS_TYPE) == 0) ||
4558 	    (strcmp(np->rn_name, AUTH_PG_GENERAL_OVR) == 0 &&
4559 	    strcmp(np->rn_type, AUTH_PG_GENERAL_OVR_TYPE) == 0))) {
4560 		rc_node_t *instn;
4561 
4562 		/* solaris.smf.manage can be used. */
4563 		ret = perm_add_enabling(pcp, AUTH_MANAGE);
4564 
4565 		if (ret != REP_PROTOCOL_SUCCESS) {
4566 			pc_free(pcp);
4567 			rc_node_rele(np);
4568 			return (ret);
4569 		}
4570 
4571 		/* general/action_authorization values can be used. */
4572 		ret = rc_node_parent(np, &instn);
4573 		if (ret != REP_PROTOCOL_SUCCESS) {
4574 			assert(ret == REP_PROTOCOL_FAIL_DELETED);
4575 			rc_node_rele(np);
4576 			pc_free(pcp);
4577 			return (REP_PROTOCOL_FAIL_DELETED);
4578 		}
4579 
4580 		assert(instn->rn_id.rl_type == REP_PROTOCOL_ENTITY_INSTANCE);
4581 
4582 		ret = perm_add_inst_action_auth(pcp, instn);
4583 		rc_node_rele(instn);
4584 		switch (ret) {
4585 		case REP_PROTOCOL_SUCCESS:
4586 			break;
4587 
4588 		case REP_PROTOCOL_FAIL_DELETED:
4589 		case REP_PROTOCOL_FAIL_NO_RESOURCES:
4590 			rc_node_rele(np);
4591 			pc_free(pcp);
4592 			return (ret);
4593 
4594 		default:
4595 			bad_error("perm_add_inst_action_auth", ret);
4596 		}
4597 
4598 		if (strcmp(np->rn_name, AUTH_PG_ACTIONS) == 0)
4599 			authorized = 1;		/* Don't check on commit. */
4600 	} else {
4601 		ret = perm_add_enabling(pcp, AUTH_MODIFY);
4602 
4603 		if (ret == REP_PROTOCOL_SUCCESS) {
4604 			/* propertygroup-type-specific authorization */
4605 			/* no locking because rn_type won't change anyway */
4606 			const char * const auth =
4607 			    perm_auth_for_pgtype(np->rn_type);
4608 
4609 			if (auth != NULL)
4610 				ret = perm_add_enabling(pcp, auth);
4611 		}
4612 
4613 		if (ret == REP_PROTOCOL_SUCCESS)
4614 			/* propertygroup/transaction-type-specific auths */
4615 			ret =
4616 			    perm_add_enabling_values(pcp, np, AUTH_PROP_VALUE);
4617 
4618 		if (ret == REP_PROTOCOL_SUCCESS)
4619 			ret =
4620 			    perm_add_enabling_values(pcp, np, AUTH_PROP_MODIFY);
4621 
4622 		/* AUTH_MANAGE can manipulate general/AUTH_PROP_ACTION */
4623 		if (ret == REP_PROTOCOL_SUCCESS &&
4624 		    strcmp(np->rn_name, AUTH_PG_GENERAL) == 0 &&
4625 		    strcmp(np->rn_type, AUTH_PG_GENERAL_TYPE) == 0)
4626 			ret = perm_add_enabling(pcp, AUTH_MANAGE);
4627 
4628 		if (ret != REP_PROTOCOL_SUCCESS) {
4629 			pc_free(pcp);
4630 			rc_node_rele(np);
4631 			return (ret);
4632 		}
4633 	}
4634 
4635 	ret = perm_granted(pcp);
4636 	if (ret != 1) {
4637 		pc_free(pcp);
4638 		rc_node_rele(np);
4639 		return (ret == 0 ? REP_PROTOCOL_FAIL_PERMISSION_DENIED :
4640 		    REP_PROTOCOL_FAIL_NO_RESOURCES);
4641 	}
4642 
4643 	pc_free(pcp);
4644 #endif /* NATIVE_BUILD */
4645 
4646 skip_checks:
4647 	rc_node_assign(txp, np);
4648 	txp->rnp_authorized = authorized;
4649 
4650 	rc_node_rele(np);
4651 	return (REP_PROTOCOL_SUCCESS);
4652 }
4653 
4654 /*
4655  * Return 1 if the given transaction commands only modify the values of
4656  * properties other than "modify_authorization".  Return -1 if any of the
4657  * commands are invalid, and 0 otherwise.
4658  */
4659 static int
4660 tx_allow_value(const void *cmds_arg, size_t cmds_sz, rc_node_t *pg)
4661 {
4662 	const struct rep_protocol_transaction_cmd *cmds;
4663 	uintptr_t loc;
4664 	uint32_t sz;
4665 	rc_node_t *prop;
4666 	boolean_t ok;
4667 
4668 	assert(!MUTEX_HELD(&pg->rn_lock));
4669 
4670 	loc = (uintptr_t)cmds_arg;
4671 
4672 	while (cmds_sz > 0) {
4673 		cmds = (struct rep_protocol_transaction_cmd *)loc;
4674 
4675 		if (cmds_sz <= REP_PROTOCOL_TRANSACTION_CMD_MIN_SIZE)
4676 			return (-1);
4677 
4678 		sz = cmds->rptc_size;
4679 		if (sz <= REP_PROTOCOL_TRANSACTION_CMD_MIN_SIZE)
4680 			return (-1);
4681 
4682 		sz = TX_SIZE(sz);
4683 		if (sz > cmds_sz)
4684 			return (-1);
4685 
4686 		switch (cmds[0].rptc_action) {
4687 		case REP_PROTOCOL_TX_ENTRY_CLEAR:
4688 			break;
4689 
4690 		case REP_PROTOCOL_TX_ENTRY_REPLACE:
4691 			/* Check type */
4692 			(void) pthread_mutex_lock(&pg->rn_lock);
4693 			if (rc_node_find_named_child(pg,
4694 			    (const char *)cmds[0].rptc_data,
4695 			    REP_PROTOCOL_ENTITY_PROPERTY, &prop) ==
4696 			    REP_PROTOCOL_SUCCESS) {
4697 				ok = (prop != NULL &&
4698 				    prop->rn_valtype == cmds[0].rptc_type);
4699 			} else {
4700 				/* Return more particular error? */
4701 				ok = B_FALSE;
4702 			}
4703 			(void) pthread_mutex_unlock(&pg->rn_lock);
4704 			if (ok)
4705 				break;
4706 			return (0);
4707 
4708 		default:
4709 			return (0);
4710 		}
4711 
4712 		if (strcmp((const char *)cmds[0].rptc_data, AUTH_PROP_MODIFY)
4713 		    == 0)
4714 			return (0);
4715 
4716 		loc += sz;
4717 		cmds_sz -= sz;
4718 	}
4719 
4720 	return (1);
4721 }
4722 
4723 /*
4724  * Return 1 if any of the given transaction commands affect
4725  * "action_authorization".  Return -1 if any of the commands are invalid and
4726  * 0 in all other cases.
4727  */
4728 static int
4729 tx_modifies_action(const void *cmds_arg, size_t cmds_sz)
4730 {
4731 	const struct rep_protocol_transaction_cmd *cmds;
4732 	uintptr_t loc;
4733 	uint32_t sz;
4734 
4735 	loc = (uintptr_t)cmds_arg;
4736 
4737 	while (cmds_sz > 0) {
4738 		cmds = (struct rep_protocol_transaction_cmd *)loc;
4739 
4740 		if (cmds_sz <= REP_PROTOCOL_TRANSACTION_CMD_MIN_SIZE)
4741 			return (-1);
4742 
4743 		sz = cmds->rptc_size;
4744 		if (sz <= REP_PROTOCOL_TRANSACTION_CMD_MIN_SIZE)
4745 			return (-1);
4746 
4747 		sz = TX_SIZE(sz);
4748 		if (sz > cmds_sz)
4749 			return (-1);
4750 
4751 		if (strcmp((const char *)cmds[0].rptc_data, AUTH_PROP_ACTION)
4752 		    == 0)
4753 			return (1);
4754 
4755 		loc += sz;
4756 		cmds_sz -= sz;
4757 	}
4758 
4759 	return (0);
4760 }
4761 
4762 /*
4763  * Returns 1 if the transaction commands only modify properties named
4764  * 'enabled'.
4765  */
4766 static int
4767 tx_only_enabled(const void *cmds_arg, size_t cmds_sz)
4768 {
4769 	const struct rep_protocol_transaction_cmd *cmd;
4770 	uintptr_t loc;
4771 	uint32_t sz;
4772 
4773 	loc = (uintptr_t)cmds_arg;
4774 
4775 	while (cmds_sz > 0) {
4776 		cmd = (struct rep_protocol_transaction_cmd *)loc;
4777 
4778 		if (cmds_sz <= REP_PROTOCOL_TRANSACTION_CMD_MIN_SIZE)
4779 			return (-1);
4780 
4781 		sz = cmd->rptc_size;
4782 		if (sz <= REP_PROTOCOL_TRANSACTION_CMD_MIN_SIZE)
4783 			return (-1);
4784 
4785 		sz = TX_SIZE(sz);
4786 		if (sz > cmds_sz)
4787 			return (-1);
4788 
4789 		if (strcmp((const char *)cmd->rptc_data, AUTH_PROP_ENABLED)
4790 		    != 0)
4791 			return (0);
4792 
4793 		loc += sz;
4794 		cmds_sz -= sz;
4795 	}
4796 
4797 	return (1);
4798 }
4799 
4800 int
4801 rc_tx_commit(rc_node_ptr_t *txp, const void *cmds, size_t cmds_sz)
4802 {
4803 	rc_node_t *np = txp->rnp_node;
4804 	rc_node_t *pp;
4805 	rc_node_t *nnp;
4806 	rc_node_pg_notify_t *pnp;
4807 	int rc;
4808 	permcheck_t *pcp;
4809 	int granted, normal;
4810 
4811 	RC_NODE_CHECK(np);
4812 
4813 	if (!client_is_privileged() && !txp->rnp_authorized) {
4814 #ifdef NATIVE_BUILD
4815 		return (REP_PROTOCOL_FAIL_PERMISSION_DENIED);
4816 #else
4817 		/* permission check: depends on contents of transaction */
4818 		pcp = pc_create();
4819 		if (pcp == NULL)
4820 			return (REP_PROTOCOL_FAIL_NO_RESOURCES);
4821 
4822 		/* If normal is cleared, we won't do the normal checks. */
4823 		normal = 1;
4824 		rc = REP_PROTOCOL_SUCCESS;
4825 
4826 		if (strcmp(np->rn_name, AUTH_PG_GENERAL) == 0 &&
4827 		    strcmp(np->rn_type, AUTH_PG_GENERAL_TYPE) == 0) {
4828 			/* Touching general[framework]/action_authorization? */
4829 			rc = tx_modifies_action(cmds, cmds_sz);
4830 			if (rc == -1) {
4831 				pc_free(pcp);
4832 				return (REP_PROTOCOL_FAIL_BAD_REQUEST);
4833 			}
4834 
4835 			if (rc) {
4836 				/* Yes: only AUTH_MANAGE can be used. */
4837 				rc = perm_add_enabling(pcp, AUTH_MANAGE);
4838 				normal = 0;
4839 			} else {
4840 				rc = REP_PROTOCOL_SUCCESS;
4841 			}
4842 		} else if (np->rn_id.rl_ids[ID_INSTANCE] != 0 &&
4843 		    strcmp(np->rn_name, AUTH_PG_GENERAL_OVR) == 0 &&
4844 		    strcmp(np->rn_type, AUTH_PG_GENERAL_OVR_TYPE) == 0) {
4845 			rc_node_t *instn;
4846 
4847 			rc = tx_only_enabled(cmds, cmds_sz);
4848 			if (rc == -1) {
4849 				pc_free(pcp);
4850 				return (REP_PROTOCOL_FAIL_BAD_REQUEST);
4851 			}
4852 
4853 			if (rc) {
4854 				rc = rc_node_parent(np, &instn);
4855 				if (rc != REP_PROTOCOL_SUCCESS) {
4856 					assert(rc == REP_PROTOCOL_FAIL_DELETED);
4857 					pc_free(pcp);
4858 					return (rc);
4859 				}
4860 
4861 				assert(instn->rn_id.rl_type ==
4862 				    REP_PROTOCOL_ENTITY_INSTANCE);
4863 
4864 				rc = perm_add_inst_action_auth(pcp, instn);
4865 				rc_node_rele(instn);
4866 				switch (rc) {
4867 				case REP_PROTOCOL_SUCCESS:
4868 					break;
4869 
4870 				case REP_PROTOCOL_FAIL_DELETED:
4871 				case REP_PROTOCOL_FAIL_NO_RESOURCES:
4872 					pc_free(pcp);
4873 					return (rc);
4874 
4875 				default:
4876 					bad_error("perm_add_inst_action_auth",
4877 					    rc);
4878 				}
4879 			} else {
4880 				rc = REP_PROTOCOL_SUCCESS;
4881 			}
4882 		}
4883 
4884 		if (rc == REP_PROTOCOL_SUCCESS && normal) {
4885 			rc = perm_add_enabling(pcp, AUTH_MODIFY);
4886 
4887 			if (rc == REP_PROTOCOL_SUCCESS) {
4888 				/* Add pgtype-specific authorization. */
4889 				const char * const auth =
4890 				    perm_auth_for_pgtype(np->rn_type);
4891 
4892 				if (auth != NULL)
4893 					rc = perm_add_enabling(pcp, auth);
4894 			}
4895 
4896 			/* Add pg-specific modify_authorization auths. */
4897 			if (rc == REP_PROTOCOL_SUCCESS)
4898 				rc = perm_add_enabling_values(pcp, np,
4899 				    AUTH_PROP_MODIFY);
4900 
4901 			/* If value_authorization values are ok, add them. */
4902 			if (rc == REP_PROTOCOL_SUCCESS) {
4903 				rc = tx_allow_value(cmds, cmds_sz, np);
4904 				if (rc == -1)
4905 					rc = REP_PROTOCOL_FAIL_BAD_REQUEST;
4906 				else if (rc)
4907 					rc = perm_add_enabling_values(pcp, np,
4908 					    AUTH_PROP_VALUE);
4909 			}
4910 		}
4911 
4912 		if (rc == REP_PROTOCOL_SUCCESS) {
4913 			granted = perm_granted(pcp);
4914 			if (granted < 0)
4915 				rc = REP_PROTOCOL_FAIL_NO_RESOURCES;
4916 		}
4917 
4918 		pc_free(pcp);
4919 
4920 		if (rc != REP_PROTOCOL_SUCCESS)
4921 			return (rc);
4922 
4923 		if (!granted)
4924 			return (REP_PROTOCOL_FAIL_PERMISSION_DENIED);
4925 #endif /* NATIVE_BUILD */
4926 	}
4927 
4928 	nnp = rc_node_alloc();
4929 	if (nnp == NULL)
4930 		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
4931 
4932 	nnp->rn_id = np->rn_id;			/* structure assignment */
4933 	nnp->rn_hash = np->rn_hash;
4934 	nnp->rn_name = strdup(np->rn_name);
4935 	nnp->rn_type = strdup(np->rn_type);
4936 	nnp->rn_pgflags = np->rn_pgflags;
4937 
4938 	nnp->rn_flags = RC_NODE_IN_TX | RC_NODE_USING_PARENT;
4939 
4940 	if (nnp->rn_name == NULL || nnp->rn_type == NULL) {
4941 		rc_node_destroy(nnp);
4942 		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
4943 	}
4944 
4945 	(void) pthread_mutex_lock(&np->rn_lock);
4946 	/*
4947 	 * We must have all of the old properties in the cache, or the
4948 	 * database deletions could cause inconsistencies.
4949 	 */
4950 	if ((rc = rc_node_fill_children(np, REP_PROTOCOL_ENTITY_PROPERTY)) !=
4951 	    REP_PROTOCOL_SUCCESS) {
4952 		(void) pthread_mutex_unlock(&np->rn_lock);
4953 		rc_node_destroy(nnp);
4954 		return (rc);
4955 	}
4956 
4957 	if (!rc_node_hold_flag(np, RC_NODE_USING_PARENT)) {
4958 		(void) pthread_mutex_unlock(&np->rn_lock);
4959 		rc_node_destroy(nnp);
4960 		return (REP_PROTOCOL_FAIL_DELETED);
4961 	}
4962 
4963 	if (np->rn_flags & RC_NODE_OLD) {
4964 		rc_node_rele_flag(np, RC_NODE_USING_PARENT);
4965 		(void) pthread_mutex_unlock(&np->rn_lock);
4966 		rc_node_destroy(nnp);
4967 		return (REP_PROTOCOL_FAIL_NOT_LATEST);
4968 	}
4969 
4970 	pp = rc_node_hold_parent_flag(np, RC_NODE_CHILDREN_CHANGING);
4971 	if (pp == NULL) {
4972 		/* our parent is gone, we're going next... */
4973 		rc_node_destroy(nnp);
4974 		(void) pthread_mutex_lock(&np->rn_lock);
4975 		if (np->rn_flags & RC_NODE_OLD) {
4976 			(void) pthread_mutex_unlock(&np->rn_lock);
4977 			return (REP_PROTOCOL_FAIL_NOT_LATEST);
4978 		}
4979 		(void) pthread_mutex_unlock(&np->rn_lock);
4980 		return (REP_PROTOCOL_FAIL_DELETED);
4981 	}
4982 	(void) pthread_mutex_unlock(&pp->rn_lock);
4983 
4984 	/*
4985 	 * prepare for the transaction
4986 	 */
4987 	(void) pthread_mutex_lock(&np->rn_lock);
4988 	if (!rc_node_hold_flag(np, RC_NODE_IN_TX)) {
4989 		(void) pthread_mutex_unlock(&np->rn_lock);
4990 		(void) pthread_mutex_lock(&pp->rn_lock);
4991 		rc_node_rele_flag(pp, RC_NODE_CHILDREN_CHANGING);
4992 		(void) pthread_mutex_unlock(&pp->rn_lock);
4993 		rc_node_destroy(nnp);
4994 		return (REP_PROTOCOL_FAIL_DELETED);
4995 	}
4996 	nnp->rn_gen_id = np->rn_gen_id;
4997 	(void) pthread_mutex_unlock(&np->rn_lock);
4998 
4999 	/* Sets nnp->rn_gen_id on success. */
5000 	rc = object_tx_commit(&np->rn_id, cmds, cmds_sz, &nnp->rn_gen_id);
5001 
5002 	(void) pthread_mutex_lock(&np->rn_lock);
5003 	if (rc != REP_PROTOCOL_SUCCESS) {
5004 		rc_node_rele_flag(np, RC_NODE_IN_TX);
5005 		(void) pthread_mutex_unlock(&np->rn_lock);
5006 		(void) pthread_mutex_lock(&pp->rn_lock);
5007 		rc_node_rele_flag(pp, RC_NODE_CHILDREN_CHANGING);
5008 		(void) pthread_mutex_unlock(&pp->rn_lock);
5009 		rc_node_destroy(nnp);
5010 		rc_node_clear(txp, 0);
5011 		if (rc == REP_PROTOCOL_DONE)
5012 			rc = REP_PROTOCOL_SUCCESS; /* successful empty tx */
5013 		return (rc);
5014 	}
5015 
5016 	/*
5017 	 * Notify waiters
5018 	 */
5019 	(void) pthread_mutex_lock(&rc_pg_notify_lock);
5020 	while ((pnp = uu_list_first(np->rn_pg_notify_list)) != NULL)
5021 		rc_pg_notify_fire(pnp);
5022 	(void) pthread_mutex_unlock(&rc_pg_notify_lock);
5023 
5024 	np->rn_flags |= RC_NODE_OLD;
5025 	(void) pthread_mutex_unlock(&np->rn_lock);
5026 
5027 	rc_notify_remove_node(np);
5028 
5029 	/*
5030 	 * replace np with nnp
5031 	 */
5032 	rc_node_relink_child(pp, np, nnp);
5033 
5034 	/*
5035 	 * all done -- clear the transaction.
5036 	 */
5037 	rc_node_clear(txp, 0);
5038 
5039 	return (REP_PROTOCOL_SUCCESS);
5040 }
5041 
5042 void
5043 rc_pg_notify_init(rc_node_pg_notify_t *pnp)
5044 {
5045 	uu_list_node_init(pnp, &pnp->rnpn_node, rc_pg_notify_pool);
5046 	pnp->rnpn_pg = NULL;
5047 	pnp->rnpn_fd = -1;
5048 }
5049 
5050 int
5051 rc_pg_notify_setup(rc_node_pg_notify_t *pnp, rc_node_ptr_t *npp, int fd)
5052 {
5053 	rc_node_t *np;
5054 
5055 	RC_NODE_PTR_GET_CHECK_AND_LOCK(np, npp);
5056 
5057 	if (np->rn_id.rl_type != REP_PROTOCOL_ENTITY_PROPERTYGRP) {
5058 		(void) pthread_mutex_unlock(&np->rn_lock);
5059 		return (REP_PROTOCOL_FAIL_BAD_REQUEST);
5060 	}
5061 
5062 	/*
5063 	 * wait for any transaction in progress to complete
5064 	 */
5065 	if (!rc_node_wait_flag(np, RC_NODE_IN_TX)) {
5066 		(void) pthread_mutex_unlock(&np->rn_lock);
5067 		return (REP_PROTOCOL_FAIL_DELETED);
5068 	}
5069 
5070 	if (np->rn_flags & RC_NODE_OLD) {
5071 		(void) pthread_mutex_unlock(&np->rn_lock);
5072 		return (REP_PROTOCOL_FAIL_NOT_LATEST);
5073 	}
5074 
5075 	(void) pthread_mutex_lock(&rc_pg_notify_lock);
5076 	rc_pg_notify_fire(pnp);
5077 	pnp->rnpn_pg = np;
5078 	pnp->rnpn_fd = fd;
5079 	(void) uu_list_insert_after(np->rn_pg_notify_list, NULL, pnp);
5080 	(void) pthread_mutex_unlock(&rc_pg_notify_lock);
5081 
5082 	(void) pthread_mutex_unlock(&np->rn_lock);
5083 	return (REP_PROTOCOL_SUCCESS);
5084 }
5085 
5086 void
5087 rc_pg_notify_fini(rc_node_pg_notify_t *pnp)
5088 {
5089 	(void) pthread_mutex_lock(&rc_pg_notify_lock);
5090 	rc_pg_notify_fire(pnp);
5091 	(void) pthread_mutex_unlock(&rc_pg_notify_lock);
5092 
5093 	uu_list_node_fini(pnp, &pnp->rnpn_node, rc_pg_notify_pool);
5094 }
5095 
5096 void
5097 rc_notify_info_init(rc_notify_info_t *rnip)
5098 {
5099 	int i;
5100 
5101 	uu_list_node_init(rnip, &rnip->rni_list_node, rc_notify_info_pool);
5102 	uu_list_node_init(&rnip->rni_notify, &rnip->rni_notify.rcn_list_node,
5103 	    rc_notify_pool);
5104 
5105 	rnip->rni_notify.rcn_node = NULL;
5106 	rnip->rni_notify.rcn_info = rnip;
5107 
5108 	bzero(rnip->rni_namelist, sizeof (rnip->rni_namelist));
5109 	bzero(rnip->rni_typelist, sizeof (rnip->rni_typelist));
5110 
5111 	(void) pthread_cond_init(&rnip->rni_cv, NULL);
5112 
5113 	for (i = 0; i < RC_NOTIFY_MAX_NAMES; i++) {
5114 		rnip->rni_namelist[i] = NULL;
5115 		rnip->rni_typelist[i] = NULL;
5116 	}
5117 }
5118 
5119 static void
5120 rc_notify_info_insert_locked(rc_notify_info_t *rnip)
5121 {
5122 	assert(MUTEX_HELD(&rc_pg_notify_lock));
5123 
5124 	assert(!(rnip->rni_flags & RC_NOTIFY_ACTIVE));
5125 
5126 	rnip->rni_flags |= RC_NOTIFY_ACTIVE;
5127 	(void) uu_list_insert_after(rc_notify_info_list, NULL, rnip);
5128 	(void) uu_list_insert_before(rc_notify_list, NULL, &rnip->rni_notify);
5129 }
5130 
5131 static void
5132 rc_notify_info_remove_locked(rc_notify_info_t *rnip)
5133 {
5134 	rc_notify_t *me = &rnip->rni_notify;
5135 	rc_notify_t *np;
5136 
5137 	assert(MUTEX_HELD(&rc_pg_notify_lock));
5138 
5139 	assert(rnip->rni_flags & RC_NOTIFY_ACTIVE);
5140 
5141 	assert(!(rnip->rni_flags & RC_NOTIFY_DRAIN));
5142 	rnip->rni_flags |= RC_NOTIFY_DRAIN;
5143 	(void) pthread_cond_broadcast(&rnip->rni_cv);
5144 
5145 	(void) uu_list_remove(rc_notify_info_list, rnip);
5146 
5147 	/*
5148 	 * clean up any notifications at the beginning of the list
5149 	 */
5150 	if (uu_list_first(rc_notify_list) == me) {
5151 		while ((np = uu_list_next(rc_notify_list, me)) != NULL &&
5152 		    np->rcn_info == NULL)
5153 			rc_notify_remove_locked(np);
5154 	}
5155 	(void) uu_list_remove(rc_notify_list, me);
5156 
5157 	while (rnip->rni_waiters) {
5158 		(void) pthread_cond_broadcast(&rc_pg_notify_cv);
5159 		(void) pthread_cond_broadcast(&rnip->rni_cv);
5160 		(void) pthread_cond_wait(&rnip->rni_cv, &rc_pg_notify_lock);
5161 	}
5162 
5163 	rnip->rni_flags &= ~(RC_NOTIFY_DRAIN | RC_NOTIFY_ACTIVE);
5164 }
5165 
5166 static int
5167 rc_notify_info_add_watch(rc_notify_info_t *rnip, const char **arr,
5168     const char *name)
5169 {
5170 	int i;
5171 	int rc;
5172 	char *f;
5173 
5174 	rc = rc_check_type_name(REP_PROTOCOL_ENTITY_PROPERTYGRP, name);
5175 	if (rc != REP_PROTOCOL_SUCCESS)
5176 		return (rc);
5177 
5178 	f = strdup(name);
5179 	if (f == NULL)
5180 		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
5181 
5182 	(void) pthread_mutex_lock(&rc_pg_notify_lock);
5183 
5184 	while (rnip->rni_flags & RC_NOTIFY_EMPTYING)
5185 		(void) pthread_cond_wait(&rnip->rni_cv, &rc_pg_notify_lock);
5186 
5187 	for (i = 0; i < RC_NOTIFY_MAX_NAMES; i++)
5188 		if (arr[i] == NULL)
5189 			break;
5190 
5191 	if (i == RC_NOTIFY_MAX_NAMES) {
5192 		(void) pthread_mutex_unlock(&rc_pg_notify_lock);
5193 		free(f);
5194 		return (REP_PROTOCOL_FAIL_NO_RESOURCES);
5195 	}
5196 
5197 	arr[i] = f;
5198 	if (!(rnip->rni_flags & RC_NOTIFY_ACTIVE))
5199 		rc_notify_info_insert_locked(rnip);
5200 
5201 	(void) pthread_mutex_unlock(&rc_pg_notify_lock);
5202 	return (REP_PROTOCOL_SUCCESS);
5203 }
5204 
5205 int
5206 rc_notify_info_add_name(rc_notify_info_t *rnip, const char *name)
5207 {
5208 	return (rc_notify_info_add_watch(rnip, rnip->rni_namelist, name));
5209 }
5210 
5211 int
5212 rc_notify_info_add_type(rc_notify_info_t *rnip, const char *type)
5213 {
5214 	return (rc_notify_info_add_watch(rnip, rnip->rni_typelist, type));
5215 }
5216 
5217 /*
5218  * Wait for and report an event of interest to rnip, a notification client
5219  */
5220 int
5221 rc_notify_info_wait(rc_notify_info_t *rnip, rc_node_ptr_t *out,
5222     char *outp, size_t sz)
5223 {
5224 	rc_notify_t *np;
5225 	rc_notify_t *me = &rnip->rni_notify;
5226 	rc_node_t *nnp;
5227 	rc_notify_delete_t *ndp;
5228 
5229 	int am_first_info;
5230 
5231 	if (sz > 0)
5232 		outp[0] = 0;
5233 
5234 	(void) pthread_mutex_lock(&rc_pg_notify_lock);
5235 
5236 	while ((rnip->rni_flags & (RC_NOTIFY_ACTIVE | RC_NOTIFY_DRAIN)) ==
5237 	    RC_NOTIFY_ACTIVE) {
5238 		/*
5239 		 * If I'm first on the notify list, it is my job to
5240 		 * clean up any notifications I pass by.  I can't do that
5241 		 * if someone is blocking the list from removals, so I
5242 		 * have to wait until they have all drained.
5243 		 */
5244 		am_first_info = (uu_list_first(rc_notify_list) == me);
5245 		if (am_first_info && rc_notify_in_use) {
5246 			rnip->rni_waiters++;
5247 			(void) pthread_cond_wait(&rc_pg_notify_cv,
5248 			    &rc_pg_notify_lock);
5249 			rnip->rni_waiters--;
5250 			continue;
5251 		}
5252 
5253 		/*
5254 		 * Search the list for a node of interest.
5255 		 */
5256 		np = uu_list_next(rc_notify_list, me);
5257 		while (np != NULL && !rc_notify_info_interested(rnip, np)) {
5258 			rc_notify_t *next = uu_list_next(rc_notify_list, np);
5259 
5260 			if (am_first_info) {
5261 				if (np->rcn_info) {
5262 					/*
5263 					 * Passing another client -- stop
5264 					 * cleaning up notifications
5265 					 */
5266 					am_first_info = 0;
5267 				} else {
5268 					rc_notify_remove_locked(np);
5269 				}
5270 			}
5271 			np = next;
5272 		}
5273 
5274 		/*
5275 		 * Nothing of interest -- wait for notification
5276 		 */
5277 		if (np == NULL) {
5278 			rnip->rni_waiters++;
5279 			(void) pthread_cond_wait(&rnip->rni_cv,
5280 			    &rc_pg_notify_lock);
5281 			rnip->rni_waiters--;
5282 			continue;
5283 		}
5284 
5285 		/*
5286 		 * found something to report -- move myself after the
5287 		 * notification and process it.
5288 		 */
5289 		(void) uu_list_remove(rc_notify_list, me);
5290 		(void) uu_list_insert_after(rc_notify_list, np, me);
5291 
5292 		if ((ndp = np->rcn_delete) != NULL) {
5293 			(void) strlcpy(outp, ndp->rnd_fmri, sz);
5294 			if (am_first_info)
5295 				rc_notify_remove_locked(np);
5296 			(void) pthread_mutex_unlock(&rc_pg_notify_lock);
5297 			rc_node_clear(out, 0);
5298 			return (REP_PROTOCOL_SUCCESS);
5299 		}
5300 
5301 		nnp = np->rcn_node;
5302 		assert(nnp != NULL);
5303 
5304 		/*
5305 		 * We can't bump nnp's reference count without grabbing its
5306 		 * lock, and rc_pg_notify_lock is a leaf lock.  So we
5307 		 * temporarily block all removals to keep nnp from
5308 		 * disappearing.
5309 		 */
5310 		rc_notify_in_use++;
5311 		assert(rc_notify_in_use > 0);
5312 		(void) pthread_mutex_unlock(&rc_pg_notify_lock);
5313 
5314 		rc_node_assign(out, nnp);
5315 
5316 		(void) pthread_mutex_lock(&rc_pg_notify_lock);
5317 		assert(rc_notify_in_use > 0);
5318 		rc_notify_in_use--;
5319 		if (am_first_info)
5320 			rc_notify_remove_locked(np);
5321 		if (rc_notify_in_use == 0)
5322 			(void) pthread_cond_broadcast(&rc_pg_notify_cv);
5323 		(void) pthread_mutex_unlock(&rc_pg_notify_lock);
5324 
5325 		return (REP_PROTOCOL_SUCCESS);
5326 	}
5327 	/*
5328 	 * If we're the last one out, let people know it's clear.
5329 	 */
5330 	if (rnip->rni_waiters == 0)
5331 		(void) pthread_cond_broadcast(&rnip->rni_cv);
5332 	(void) pthread_mutex_unlock(&rc_pg_notify_lock);
5333 	return (REP_PROTOCOL_DONE);
5334 }
5335 
5336 static void
5337 rc_notify_info_reset(rc_notify_info_t *rnip)
5338 {
5339 	int i;
5340 
5341 	(void) pthread_mutex_lock(&rc_pg_notify_lock);
5342 	if (rnip->rni_flags & RC_NOTIFY_ACTIVE)
5343 		rc_notify_info_remove_locked(rnip);
5344 	assert(!(rnip->rni_flags & (RC_NOTIFY_DRAIN | RC_NOTIFY_EMPTYING)));
5345 	rnip->rni_flags |= RC_NOTIFY_EMPTYING;
5346 	(void) pthread_mutex_unlock(&rc_pg_notify_lock);
5347 
5348 	for (i = 0; i < RC_NOTIFY_MAX_NAMES; i++) {
5349 		if (rnip->rni_namelist[i] != NULL) {
5350 			free((void *)rnip->rni_namelist[i]);
5351 			rnip->rni_namelist[i] = NULL;
5352 		}
5353 		if (rnip->rni_typelist[i] != NULL) {
5354 			free((void *)rnip->rni_typelist[i]);
5355 			rnip->rni_typelist[i] = NULL;
5356 		}
5357 	}
5358 
5359 	(void) pthread_mutex_lock(&rc_pg_notify_lock);
5360 	rnip->rni_flags &= ~RC_NOTIFY_EMPTYING;
5361 	(void) pthread_mutex_unlock(&rc_pg_notify_lock);
5362 }
5363 
5364 void
5365 rc_notify_info_fini(rc_notify_info_t *rnip)
5366 {
5367 	rc_notify_info_reset(rnip);
5368 
5369 	uu_list_node_fini(rnip, &rnip->rni_list_node, rc_notify_info_pool);
5370 	uu_list_node_fini(&rnip->rni_notify, &rnip->rni_notify.rcn_list_node,
5371 	    rc_notify_pool);
5372 }
5373