xref: /titanic_44/usr/src/lib/libpool/common/pool_kernel.c (revision b369f4b871a39ef94e220443957975f445f52eb6)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <assert.h>
30 #include <errno.h>
31 #include <exacct.h>
32 #include <fcntl.h>
33 #include <libnvpair.h>
34 #include <limits.h>
35 #include <poll.h>
36 #include <pool.h>
37 #include <stdlib.h>
38 #include <stdio.h>
39 #include <string.h>
40 #include <strings.h>
41 #include <stropts.h>
42 #include <thread.h>
43 #include <time.h>
44 #include <unistd.h>
45 
46 #include <libxml/tree.h>
47 
48 #include <sys/mman.h>
49 #include <sys/pool.h>
50 #include <sys/pool_impl.h>
51 #include <sys/priocntl.h>
52 #include <sys/stat.h>
53 #include <sys/time.h>
54 #include <sys/types.h>
55 
56 #include "dict.h"
57 
58 #include "pool_internal.h"
59 #include "pool_impl.h"
60 #include "pool_kernel_impl.h"
61 
62 /*
63  * libpool kernel Manipulation Routines
64  *
65  * pool_kernel.c implements the kernel manipulation routines used by the
66  * libpool kernel datastore. The functions are grouped into the following
67  * logical areas
68  *
69  */
70 
71 /*
72  * Device snapshot transfer buffer size
73  */
74 #define	KERNEL_SNAPSHOT_BUF_SZ	65535
75 
76 /*
77  * Kernel result set's initial size. 8 is probably large enough for
78  * most queries. Queries requiring more space are accomodated using
79  * realloc on a per result set basis.
80  */
81 #define	KERNEL_RS_INITIAL_SZ	8
82 
83 /*
84  * Property manipulation macros
85  */
86 #define	KERNEL_PROP_RDONLY	0x1
87 
88 /*
89  * Information required to evaluate qualifying elements for a query
90  */
91 struct query_obj {
92 	const pool_conf_t *conf;
93 	const pool_elem_t *src;
94 	const char *src_attr;
95 	pool_elem_class_t classes;
96 	pool_value_t **props;
97 	pool_knl_result_set_t *rs;
98 };
99 
100 /*
101  * Identifies a pool element with a processor set id
102  */
103 typedef struct pool_set_xref {
104 	pool_knl_pool_t	*psx_pool;
105 	uint_t		psx_pset_id;
106 	struct pool_set_xref *psx_next;
107 } pool_set_xref_t;
108 
109 /*
110  * Controls exacct snapshot load into libpool data structure
111  */
112 typedef struct pool_snap_load {
113 	int *psl_changed;
114 	pool_set_xref_t *psl_xref;
115 	pool_elem_t *psl_system;
116 	pool_knl_resource_t *psl_pset;
117 } pool_snap_load_t;
118 
119 /*
120  * Information about an XML document which is being constructed
121  */
122 struct knl_to_xml {
123 	xmlDocPtr ktx_doc;
124 	xmlNodePtr ktx_node;
125 };
126 
127 /*
128  * Undo structure processing. The following structures are all used to
129  * allow changes to the libpool snapshot and kernel following an
130  * unsuccessful commit.
131  */
132 typedef struct pool_create_undo {
133 	pool_create_t pcu_ioctl;
134 	pool_elem_t *pcu_elem;
135 } pool_create_undo_t;
136 
137 typedef struct pool_destroy_undo {
138 	pool_destroy_t pdu_ioctl;
139 	pool_elem_t *pdu_elem;
140 } pool_destroy_undo_t;
141 
142 typedef struct pool_assoc_undo {
143 	pool_assoc_t pau_ioctl;
144 	pool_elem_t *pau_assoc;
145 	pool_elem_t *pau_oldres;
146 	pool_elem_t *pau_newres;
147 } pool_assoc_undo_t;
148 
149 typedef struct pool_dissoc_undo {
150 	pool_dissoc_t pdu_ioctl;
151 	pool_elem_t *pdu_dissoc;
152 	pool_elem_t *pdu_oldres;
153 	pool_elem_t *pdu_newres;
154 } pool_dissoc_undo_t;
155 
156 typedef struct pool_xtransfer_undo {
157 	pool_xtransfer_t pxu_ioctl;
158 	pool_elem_t *pxu_src;
159 	pool_elem_t *pxu_tgt;
160 	pool_component_t **pxu_rl;
161 } pool_xtransfer_undo_t;
162 
163 typedef struct pool_propput_undo {
164 	pool_propput_t ppu_ioctl;
165 	pool_elem_t *ppu_elem;
166 	nvlist_t *ppu_alist;
167 	nvlist_t *ppu_blist;
168 	uchar_t ppu_doioctl;
169 } pool_propput_undo_t;
170 
171 typedef struct pool_proprm_undo {
172 	pool_proprm_t pru_ioctl;
173 	pool_elem_t *pru_elem;
174 	pool_value_t pru_oldval;
175 } pool_proprm_undo_t;
176 
177 extern const char *dtd_location;
178 
179 extern const char *element_class_tags[];
180 extern const char pool_info_location[];
181 
182 /*
183  * These functions are defined in pool_xml.c and represent the minimum
184  * XML support required to allow a pool kernel configuration to be
185  * exported as an XML document.
186  */
187 extern int pool_xml_set_attr(xmlNodePtr, xmlChar *, const pool_value_t *);
188 extern int pool_xml_set_prop(xmlNodePtr, xmlChar *, const pool_value_t *);
189 extern void xml_init(void);
190 extern xmlNodePtr node_create(xmlNodePtr, const xmlChar *);
191 extern void pool_error_func(void *, const char *, ...);
192 /*
193  * Utilities
194  */
195 static int load_group(pool_conf_t *, pool_knl_elem_t *, ea_object_t *,
196     pool_snap_load_t *);
197 static void pool_knl_elem_free(pool_knl_elem_t *, int);
198 static int pool_knl_put_xml_property(pool_elem_t *, xmlNodePtr, const char *,
199     const pool_value_t *);
200 static int pool_knl_snap_load_push(pool_snap_load_t *, pool_knl_pool_t *);
201 static int pool_knl_snap_load_update(pool_snap_load_t *, int, uint_t);
202 static int pool_knl_snap_load_remove(pool_snap_load_t *, int, uint_t);
203 static nvpair_t *pool_knl_find_nvpair(nvlist_t *, const char *);
204 static int pool_knl_nvlist_add_value(nvlist_t *, const char *,
205     const pool_value_t *);
206 static int pool_knl_recover(pool_conf_t *);
207 static uint64_t hash_id(const pool_elem_t *);
208 static int blocking_open(const char *, int);
209 
210 /*
211  * Connections
212  */
213 static void pool_knl_connection_free(pool_knl_connection_t *);
214 
215 /*
216  * Configuration
217  */
218 static int pool_knl_close(pool_conf_t *);
219 static int pool_knl_validate(const pool_conf_t *, pool_valid_level_t);
220 static int pool_knl_commit(pool_conf_t *);
221 static int pool_knl_export(const pool_conf_t *, const char *,
222     pool_export_format_t);
223 static int pool_knl_rollback(pool_conf_t *);
224 static pool_result_set_t *pool_knl_exec_query(const pool_conf_t *,
225     const pool_elem_t *, const char *, pool_elem_class_t, pool_value_t **);
226 static int pool_knl_remove(pool_conf_t *);
227 static char *pool_knl_get_binding(pool_conf_t *, pid_t);
228 static int pool_knl_set_binding(pool_conf_t *, const char *, idtype_t, id_t);
229 static char *pool_knl_get_resource_binding(pool_conf_t *,
230     pool_resource_elem_class_t, pid_t);
231 static int pool_knl_res_transfer(pool_resource_t *, pool_resource_t *,
232     uint64_t);
233 static int pool_knl_res_xtransfer(pool_resource_t *, pool_resource_t *,
234     pool_component_t **);
235 
236 /*
237  * Result Sets
238  */
239 static pool_knl_result_set_t *pool_knl_result_set_alloc(const pool_conf_t *);
240 static int pool_knl_result_set_append(pool_knl_result_set_t *,
241     pool_knl_elem_t *);
242 static int pool_knl_result_set_realloc(pool_knl_result_set_t *);
243 static void pool_knl_result_set_free(pool_knl_result_set_t *);
244 static pool_elem_t *pool_knl_rs_next(pool_result_set_t *);
245 static pool_elem_t *pool_knl_rs_prev(pool_result_set_t *);
246 static pool_elem_t *pool_knl_rs_first(pool_result_set_t *);
247 static pool_elem_t *pool_knl_rs_last(pool_result_set_t *);
248 static int pool_knl_rs_set_index(pool_result_set_t *, int);
249 static int pool_knl_rs_get_index(pool_result_set_t *);
250 static int pool_knl_rs_count(pool_result_set_t *);
251 static int pool_knl_rs_close(pool_result_set_t *);
252 
253 /*
254  * Element (and sub-type)
255  */
256 static pool_knl_elem_t *pool_knl_elem_wrap(pool_conf_t *, pool_elem_class_t,
257     pool_resource_elem_class_t, pool_component_elem_class_t);
258 static pool_elem_t *pool_knl_elem_create(pool_conf_t *, pool_elem_class_t,
259     pool_resource_elem_class_t, pool_component_elem_class_t);
260 static int pool_knl_elem_remove(pool_elem_t *);
261 static int pool_knl_set_container(pool_elem_t *, pool_elem_t *);
262 static pool_elem_t *pool_knl_get_container(const pool_elem_t *);
263 /*
264  * Pool element specific
265  */
266 static int pool_knl_pool_associate(pool_t *, const pool_resource_t *);
267 static int pool_knl_pool_dissociate(pool_t *, const pool_resource_t *);
268 
269 /*
270  * Resource elements specific
271  */
272 static int pool_knl_resource_is_system(const pool_resource_t *);
273 static int pool_knl_resource_can_associate(const pool_resource_t *);
274 
275 /* Properties */
276 static pool_value_class_t pool_knl_get_property(const pool_elem_t *,
277     const char *, pool_value_t *);
278 static pool_value_class_t pool_knl_get_dynamic_property(const pool_elem_t *,
279     const char *, pool_value_t *);
280 static int pool_knl_put_property(pool_elem_t *, const char *,
281     const pool_value_t *);
282 static int pool_knl_rm_property(pool_elem_t *, const char *);
283 static pool_value_t **pool_knl_get_properties(const pool_elem_t *, uint_t *);
284 
285 /*
286  * Logging
287  */
288 static int log_item_commit(log_item_t *);
289 static int log_item_undo(log_item_t *);
290 static int log_item_release(log_item_t *);
291 
292 /*
293  * Utilities
294  */
295 
296 /*
297  * load_group() updates the library configuration with the kernel
298  * snapshot supplied in ep. The function is designed to be called
299  * recursively. This function depends implicitly on the ordering of
300  * the data provided in ep. Changes to the ordering of data in ep must
301  * be matched by changes to this function.
302  */
303 int
304 load_group(pool_conf_t *conf, pool_knl_elem_t *elem, ea_object_t *ep,
305     pool_snap_load_t *psl)
306 {
307 	ea_object_t *eo;
308 	pool_knl_elem_t *old_elem;
309 	pool_knl_connection_t *prov = (pool_knl_connection_t *)conf->pc_prov;
310 	int ret = PO_SUCCESS;
311 
312 	if ((ep->eo_catalog & EXD_DATA_MASK) == EXD_GROUP_SYSTEM) {
313 		if ((elem = pool_knl_elem_wrap(conf, PEC_SYSTEM, PREC_INVALID,
314 		    PCEC_INVALID)) == NULL)
315 			return (PO_FAIL);
316 		if (nvlist_alloc(&elem->pke_properties, NV_UNIQUE_NAME_TYPE,
317 		    0) != 0) {
318 			pool_knl_elem_free(elem, PO_FALSE);
319 			pool_seterror(POE_SYSTEM);
320 			return (PO_FAIL);
321 		}
322 		/*
323 		 * Check to see if we already have an element
324 		 * for this data. If we have, free the newly
325 		 * created elem and continue with the old one
326 		 */
327 		if ((old_elem = dict_get(prov->pkc_elements, elem)) != NULL) {
328 			nvlist_free(old_elem->pke_properties);
329 			old_elem->pke_properties = elem->pke_properties;
330 			pool_knl_elem_free(elem, PO_FALSE);
331 			elem = old_elem;
332 		} else {
333 			if (dict_put(prov->pkc_elements, elem, elem) != NULL) {
334 				pool_knl_elem_free(elem, PO_TRUE);
335 				pool_seterror(POE_SYSTEM);
336 				return (PO_FAIL);
337 			}
338 		}
339 		psl->psl_system = (pool_elem_t *)elem;
340 	}
341 
342 	for (eo = ep->eo_group.eg_objs; eo != NULL; eo = eo->eo_next) {
343 		int data;
344 		pool_knl_elem_t *prop_elem = NULL;
345 
346 		data = (eo->eo_catalog & EXD_DATA_MASK);
347 
348 		switch (data) {
349 		case EXD_SYSTEM_TSTAMP:
350 		case EXD_POOL_TSTAMP:
351 		case EXD_PSET_TSTAMP:
352 		case EXD_CPU_TSTAMP:
353 			if (eo->eo_item.ei_uint64 > prov->pkc_lotime) {
354 				if (eo->eo_item.ei_uint64 > prov->pkc_ltime)
355 					prov->pkc_ltime = eo->eo_item.ei_uint64;
356 				if (psl->psl_changed) {
357 					switch (data) {
358 					case EXD_SYSTEM_TSTAMP:
359 						*psl->psl_changed |= POU_SYSTEM;
360 						break;
361 					case EXD_POOL_TSTAMP:
362 						*psl->psl_changed |= POU_POOL;
363 						break;
364 					case EXD_PSET_TSTAMP:
365 						*psl->psl_changed |= POU_PSET;
366 						break;
367 					case EXD_CPU_TSTAMP:
368 						*psl->psl_changed |= POU_CPU;
369 						break;
370 					}
371 				}
372 			}
373 			break;
374 		case EXD_SYSTEM_PROP:
375 		case EXD_POOL_PROP:
376 		case EXD_PSET_PROP:
377 		case EXD_CPU_PROP:
378 			if (data == EXD_PSET_PROP) {
379 				prop_elem = elem;
380 				elem = (pool_knl_elem_t *)psl->psl_pset;
381 			}
382 			nvlist_free(elem->pke_properties);
383 			if (nvlist_unpack(eo->eo_item.ei_raw,
384 			    eo->eo_item.ei_size, &elem->pke_properties, 0) !=
385 			    0) {
386 				pool_seterror(POE_SYSTEM);
387 				return (PO_FAIL);
388 			}
389 			elem->pke_ltime = prov->pkc_ltime;
390 			if (data == EXD_PSET_PROP) {
391 				elem = prop_elem;
392 			}
393 			break;
394 		case EXD_POOL_POOLID:
395 			if (nvlist_alloc(&elem->pke_properties,
396 			    NV_UNIQUE_NAME_TYPE, 0) != 0) {
397 				pool_seterror(POE_SYSTEM);
398 				return (PO_FAIL);
399 			}
400 			if (nvlist_add_int64(elem->pke_properties,
401 			    "pool.sys_id",
402 			    (int64_t)eo->eo_item.ei_uint32) != 0) {
403 				pool_seterror(POE_SYSTEM);
404 				return (PO_FAIL);
405 			}
406 			if ((old_elem = dict_get(prov->pkc_elements, elem)) !=
407 			    NULL) {
408 				nvlist_free(old_elem->pke_properties);
409 				old_elem->pke_properties = elem->pke_properties;
410 				pool_knl_elem_free(elem, PO_FALSE);
411 				elem = old_elem;
412 			} else {
413 				if (dict_put(prov->pkc_elements, elem, elem) !=
414 				    NULL) {
415 					pool_knl_elem_free(elem, PO_TRUE);
416 					pool_seterror(POE_SYSTEM);
417 					return (PO_FAIL);
418 				}
419 			}
420 			if (pool_knl_snap_load_push(psl,
421 			    (pool_knl_pool_t *)elem) != PO_SUCCESS) {
422 				pool_seterror(POE_SYSTEM);
423 				return (PO_FAIL);
424 			}
425 			((pool_knl_pool_t *)elem)->pkp_assoc[PREC_PSET] = NULL;
426 			break;
427 		case EXD_POOL_PSETID:
428 			if (pool_knl_snap_load_update(psl, EXD_POOL_PSETID,
429 			    eo->eo_item.ei_uint32) != PO_SUCCESS) {
430 				pool_seterror(POE_SYSTEM);
431 				return (PO_FAIL);
432 			}
433 			break;
434 		case EXD_PSET_PSETID:
435 			if (nvlist_alloc(&elem->pke_properties,
436 			    NV_UNIQUE_NAME_TYPE, 0) != 0) {
437 				pool_seterror(POE_SYSTEM);
438 				return (PO_FAIL);
439 			}
440 			if (nvlist_add_int64(elem->pke_properties,
441 			    "pset.sys_id",
442 			    (int64_t)eo->eo_item.ei_uint32) != 0) {
443 				pool_seterror(POE_SYSTEM);
444 				return (PO_FAIL);
445 			}
446 			if ((old_elem = dict_get(prov->pkc_elements, elem)) !=
447 			    NULL) {
448 				nvlist_free(old_elem->pke_properties);
449 				old_elem->pke_properties = elem->pke_properties;
450 				pool_knl_elem_free(elem, PO_FALSE);
451 				elem = old_elem;
452 			} else {
453 				if (dict_put(prov->pkc_elements, elem, elem) !=
454 				    NULL) {
455 					pool_knl_elem_free(elem, PO_TRUE);
456 					pool_seterror(POE_SYSTEM);
457 					return (PO_FAIL);
458 				}
459 			}
460 			psl->psl_pset = (pool_knl_resource_t *)elem;
461 			if (pool_knl_snap_load_remove(psl, data,
462 			    eo->eo_item.ei_uint32) != PO_SUCCESS) {
463 				pool_seterror(POE_SYSTEM);
464 				return (PO_FAIL);
465 			}
466 			break;
467 		case EXD_CPU_CPUID:
468 			if (nvlist_alloc(&elem->pke_properties,
469 			    NV_UNIQUE_NAME_TYPE, 0) != 0) {
470 				pool_seterror(POE_SYSTEM);
471 				return (PO_FAIL);
472 			}
473 			if (nvlist_add_int64(elem->pke_properties,
474 			    "cpu.sys_id",
475 			    (int64_t)eo->eo_item.ei_uint32) != 0) {
476 				pool_seterror(POE_SYSTEM);
477 				return (PO_FAIL);
478 			}
479 			if ((old_elem = dict_get(prov->pkc_elements, elem)) !=
480 			    NULL) {
481 				nvlist_free(old_elem->pke_properties);
482 				old_elem->pke_properties = elem->pke_properties;
483 				old_elem->pke_parent = elem->pke_parent;
484 				pool_knl_elem_free(elem, PO_FALSE);
485 				elem = old_elem;
486 			} else {
487 				if (dict_put(prov->pkc_elements, elem, elem) !=
488 				    NULL) {
489 					pool_knl_elem_free(elem, PO_TRUE);
490 					pool_seterror(POE_SYSTEM);
491 					return (PO_FAIL);
492 				}
493 			}
494 			break;
495 		case EXD_GROUP_POOL:
496 			if ((elem = pool_knl_elem_wrap(conf, PEC_POOL,
497 			    PREC_INVALID, PCEC_INVALID)) == NULL)
498 				return (PO_FAIL);
499 			if (pool_set_container(psl->psl_system,
500 			    (pool_elem_t *)elem) != PO_SUCCESS) {
501 				pool_seterror(POE_SYSTEM);
502 				return (PO_FAIL);
503 			}
504 			break;
505 		case EXD_GROUP_PSET:
506 			if ((elem = pool_knl_elem_wrap(conf, PEC_RES_COMP,
507 			    PREC_PSET, PCEC_INVALID)) == NULL)
508 				return (PO_FAIL);
509 			if (pool_set_container(psl->psl_system,
510 			    (pool_elem_t *)elem) != PO_SUCCESS) {
511 				pool_seterror(POE_SYSTEM);
512 				return (PO_FAIL);
513 			}
514 			break;
515 		case EXD_GROUP_CPU:
516 			if ((elem = pool_knl_elem_wrap(conf, PEC_COMP,
517 			    PREC_INVALID, PCEC_CPU)) == NULL)
518 				return (PO_FAIL);
519 			if (pool_set_container((pool_elem_t *)psl->psl_pset,
520 			    (pool_elem_t *)elem) != PO_SUCCESS) {
521 				pool_seterror(POE_SYSTEM);
522 				return (PO_FAIL);
523 			}
524 			break;
525 		default:
526 			break;
527 		}
528 
529 
530 		if (eo->eo_type == EO_GROUP) {
531 			if ((ret = load_group(conf, elem, eo, psl)) == PO_FAIL)
532 				break;
533 		}
534 	}
535 	return (ret);
536 }
537 
538 /*
539  * Push a snapshot entry onto the list of pools in the snapshot.
540  */
541 int
542 pool_knl_snap_load_push(pool_snap_load_t *psl, pool_knl_pool_t *pkp)
543 {
544 	pool_set_xref_t *psx;
545 
546 	if ((psx = malloc(sizeof (pool_set_xref_t))) == NULL) {
547 		pool_seterror(POE_SYSTEM);
548 		return (PO_FAIL);
549 	}
550 	(void) memset(psx, 0, sizeof (pool_set_xref_t));
551 	psx->psx_pool = pkp;
552 	/*
553 	 * Push onto the list of pools
554 	 */
555 	psx->psx_next = psl->psl_xref;
556 	psl->psl_xref = psx;
557 
558 	return (PO_SUCCESS);
559 }
560 
561 /*
562  * Update the current cross-reference for the supplied type of
563  * resource.
564  */
565 int
566 pool_knl_snap_load_update(pool_snap_load_t *psl, int type, uint_t id)
567 {
568 	switch (type) {
569 	case EXD_POOL_PSETID:
570 		psl->psl_xref->psx_pset_id = id;
571 		break;
572 	default:
573 		return (PO_FAIL);
574 	}
575 
576 	return (PO_SUCCESS);
577 }
578 
579 /*
580  * Remove a resource entry with the supplied type and id from the
581  * snapshot list when it is no longer required.
582  */
583 int
584 pool_knl_snap_load_remove(pool_snap_load_t *psl, int type, uint_t id)
585 {
586 	pool_set_xref_t *current, *prev, *next;
587 
588 	for (prev = NULL, current = psl->psl_xref; current != NULL;
589 	    current = next) {
590 		switch (type) {
591 		case EXD_PSET_PSETID:
592 			if (current->psx_pset_id == id)
593 				current->psx_pool->pkp_assoc[PREC_PSET] =
594 				    psl->psl_pset;
595 			break;
596 		default:
597 			return (PO_FAIL);
598 		}
599 		next = current->psx_next;
600 		if (current->psx_pool->pkp_assoc[PREC_PSET] != NULL) {
601 			if (prev != NULL) {
602 				prev->psx_next = current->psx_next;
603 			} else {
604 				psl->psl_xref = current->psx_next;
605 			}
606 			free(current);
607 		} else
608 			prev = current;
609 	}
610 
611 	return (PO_SUCCESS);
612 }
613 
614 /*
615  * Return the nvpair with the supplied name from the supplied list.
616  *
617  * NULL is returned if the name cannot be found in the list.
618  */
619 nvpair_t *
620 pool_knl_find_nvpair(nvlist_t *l, const char *name)
621 {
622 	nvpair_t *pair;
623 
624 	for (pair = nvlist_next_nvpair(l, NULL); pair != NULL;
625 		pair = nvlist_next_nvpair(l, pair)) {
626 		if (strcmp(nvpair_name(pair), name) == 0)
627 			break;
628 	}
629 	return (pair);
630 }
631 
632 /*
633  * Close the configuration. There are a few steps to closing a configuration:
634  * - Close the pseudo device
635  * - Free the data provider
636  * Returns PO_SUCCESS/PO_FAIL
637  */
638 int
639 pool_knl_close(pool_conf_t *conf)
640 {
641 	pool_knl_connection_t *prov = (pool_knl_connection_t *)conf->pc_prov;
642 
643 	if (close(prov->pkc_fd) < 0) {
644 		pool_seterror(POE_SYSTEM);
645 		return (PO_FAIL);
646 	}
647 	/*
648 	 * Rollback any pending changes before freeing the prov. This
649 	 * ensures there are no memory leaks from pending
650 	 * transactions.
651 	 */
652 	(void) pool_knl_rollback(conf);
653 	pool_knl_connection_free(prov);
654 	return (PO_SUCCESS);
655 }
656 
657 /*
658  * Remove elements in this map (previously identified as "dead") from
659  * the configuration map (prov->pkc_elements).
660  */
661 
662 /* ARGSUSED1 */
663 static void
664 remove_dead_elems(const void *key, void **value, void *cl)
665 {
666 	pool_knl_elem_t *pke = (pool_knl_elem_t *)key;
667 	pool_conf_t *conf = TO_CONF(TO_ELEM(pke));
668 	pool_knl_connection_t *prov = (pool_knl_connection_t *)conf->pc_prov;
669 
670 	assert(dict_remove(prov->pkc_elements, pke) != NULL);
671 #ifdef DEBUG
672 	dprintf("remove_dead_elems:\n");
673 	pool_elem_dprintf(TO_ELEM(pke));
674 #endif	/* DEBUG */
675 	pool_knl_elem_free(pke, PO_TRUE);
676 }
677 
678 /*
679  * Find elements which were not updated the last time that
680  * load_group() was called. Add those elements into a separate map
681  * (passed in cl) which will be later used to remove these elements
682  * from the configuration map.
683  */
684 /* ARGSUSED1 */
685 static void
686 find_dead_elems(const void *key, void **value, void *cl)
687 {
688 	pool_knl_elem_t *pke = (pool_knl_elem_t *)key;
689 	pool_conf_t *conf = TO_CONF(TO_ELEM(pke));
690 	pool_knl_connection_t *prov = (pool_knl_connection_t *)conf->pc_prov;
691 	dict_hdl_t *dead_map = (dict_hdl_t *)cl;
692 
693 	if (pke->pke_ltime < prov->pkc_ltime)
694 		(void) dict_put(dead_map, pke, pke);
695 }
696 
697 /*
698  * Update the snapshot held by the library. This function acts as the
699  * controller for the snapshot update procedure. Then snapshot is
700  * actually updated in multiple phases by the load_group() function
701  * (which updates existing elements and creates new elements as
702  * required) and then by find_dead_elems and remove_dead_elems
703  * (respectively responsible for identifying elements which are to be
704  * removed and then removing them).
705  *
706  * Returns PO_SUCCESS
707  */
708 int
709 pool_knl_update(pool_conf_t *conf, int *changed)
710 {
711 	pool_knl_connection_t *prov = (pool_knl_connection_t *)conf->pc_prov;
712 	pool_query_t query = {0};
713 	ea_object_t *ep;
714 	dict_hdl_t *dead_map;
715 	pool_snap_load_t psl = { NULL };
716 
717 	/*
718 	 * Ensure the library snapshot is consistent, if there are any
719 	 * outstanding transactions return failure.
720 	 */
721 	if (log_size(prov->pkc_log) != 0) {
722 		pool_seterror(POE_INVALID_CONF);
723 		return (PO_FAIL);
724 	}
725 	/*
726 	 * Query the kernel for a snapshot of the configuration state. Use
727 	 * load_group to allocate the user-land representation of the
728 	 * data returned in the snapshot.
729 	 */
730 	/* LINTED E_CONSTANT_CONDITION */
731 	while (1) {
732 		if (ioctl(prov->pkc_fd, POOL_QUERY, &query) < 0) {
733 			pool_seterror(POE_SYSTEM);
734 			return (PO_FAIL);
735 		}
736 		if ((query.pq_io_buf = calloc(1,
737 		    (query.pq_io_bufsize < KERNEL_SNAPSHOT_BUF_SZ) ?
738 		    query.pq_io_bufsize * 2 : query.pq_io_bufsize)) == NULL) {
739 			pool_seterror(POE_SYSTEM);
740 			return (PO_FAIL);
741 		}
742 		if (ioctl(prov->pkc_fd, POOL_QUERY, &query) < 0) {
743 			free(query.pq_io_buf);
744 			if (errno != ENOMEM) {
745 				pool_seterror(POE_SYSTEM);
746 				return (PO_FAIL);
747 			}
748 			query.pq_io_bufsize = 0;
749 			query.pq_io_buf = NULL;
750 		} else
751 			break;
752 	}
753 	if (ea_unpack_object(&ep, EUP_NOALLOC, query.pq_io_buf,
754 	    query.pq_io_bufsize) != EO_GROUP) {
755 		free(query.pq_io_buf);
756 		pool_seterror(POE_DATASTORE);
757 		return (PO_FAIL);
758 	}
759 	/*
760 	 * Update the library snapshot
761 	 */
762 	psl.psl_changed = changed;
763 	prov->pkc_lotime = prov->pkc_ltime;
764 	if (load_group(conf, NULL, ep, &psl) != PO_SUCCESS) {
765 		free(query.pq_io_buf);
766 		ea_free_object(ep, EUP_NOALLOC);
767 		return (PO_FAIL);
768 	}
769 
770 	free(query.pq_io_buf);
771 	ea_free_object(ep, EUP_NOALLOC);
772 	/*
773 	 * Now search the dictionary for items that must be removed because
774 	 * they were neither created nor updated.
775 	 */
776 	if ((dead_map = dict_new((int (*)(const void *, const void *))
777 	    pool_elem_compare, (uint64_t (*)(const void *))hash_id)) == NULL) {
778 		pool_seterror(POE_SYSTEM);
779 		return (PO_FAIL);
780 	}
781 	dict_map(prov->pkc_elements, find_dead_elems, dead_map);
782 
783 	if (dict_length(dead_map) > 0) {
784 		dict_map(dead_map, remove_dead_elems, NULL);
785 	}
786 	dict_free(&dead_map);
787 
788 	return (PO_SUCCESS);
789 }
790 
791 /*
792  * Rely on the kernel to always keep a kernel configuration valid.
793  * Returns PO_SUCCESS
794  */
795 /* ARGSUSED */
796 int
797 pool_knl_validate(const pool_conf_t *conf, pool_valid_level_t level)
798 {
799 	return ((conf->pc_state == POF_INVALID) ? PO_FAIL : PO_SUCCESS);
800 }
801 
802 /*
803  * Process all the outstanding transactions in the log. If the processing
804  * fails, then attempt to rollback and "undo" the changes.
805  */
806 int
807 pool_knl_commit(pool_conf_t *conf)
808 {
809 	pool_knl_connection_t *prov = (pool_knl_connection_t *)conf->pc_prov;
810 	int lock = 1;
811 
812 	/*
813 	 * Lock the kernel state for the commit
814 	 */
815 	if (ioctl(prov->pkc_fd, POOL_COMMIT, lock) < 0) {
816 		pool_seterror(POE_SYSTEM);
817 		return (PO_FAIL);
818 	}
819 	lock = 0;
820 	/*
821 	 * If the state is LS_FAIL, then try to recover before
822 	 * performing the commit.
823 	 */
824 	if (prov->pkc_log->l_state == LS_FAIL) {
825 		if (pool_knl_recover(conf) == PO_FAIL) {
826 			/*
827 			 * Unlock the kernel state for the
828 			 * commit. Assert that this * can't fail,
829 			 * since if it ever does fail the library is
830 			 * unusable.
831 			 */
832 			assert(ioctl(prov->pkc_fd, POOL_COMMIT, lock) >= 0);
833 		}
834 	}
835 	/*
836 	 * Commit the log
837 	 */
838 	if (log_walk(prov->pkc_log, log_item_commit) != PO_SUCCESS) {
839 		(void) pool_knl_recover(conf);
840 		/*
841 		 * Unlock the kernel state for the commit. Assert that
842 		 * this can't fail, since if it ever does fail the
843 		 * library is unusable.
844 		 */
845 		assert(ioctl(prov->pkc_fd, POOL_COMMIT, lock) >= 0);
846 		pool_seterror(POE_SYSTEM);
847 		return (PO_FAIL);
848 	}
849 	/*
850 	 * Unlock the kernel state for the commit. Assert that this
851 	 * can't fail, since if it ever does fail the library is
852 	 * unusable.
853 	 */
854 	assert(ioctl(prov->pkc_fd, POOL_COMMIT, lock) >= 0);
855 	/*
856 	 * Release the log resources
857 	 */
858 	(void) log_walk(prov->pkc_log, log_item_release);
859 	log_empty(prov->pkc_log);
860 	return (PO_SUCCESS);
861 }
862 
863 /*
864  * prop_build_cb() is designed to be called from
865  * pool_walk_properties(). The property value is used to put an XML
866  * property on the supplied ktx_node. This is an essential part of the
867  * mechanism used to export a kernel configuration in libpool XML
868  * form.
869  */
870 /* ARGSUSED */
871 static int
872 prop_build_cb(pool_conf_t *UNUSED, pool_elem_t *pe, const char *name,
873     pool_value_t *pval, void *user)
874 {
875 	struct knl_to_xml *info = (struct knl_to_xml *)user;
876 
877 	return (pool_knl_put_xml_property((pool_elem_t *)pe, info->ktx_node,
878 		name, pval));
879 }
880 
881 /*
882  * Duplicate some of the functionality from pool_xml_put_property()
883  * (see pool_xml.c) to allow a kernel configuration to add XML nodes
884  * to an XML tree which represents the kernel configuration. This is
885  * an essential part of the mechanism used to export a kernel
886  * configuration in libpool XML form.
887  */
888 int
889 pool_knl_put_xml_property(pool_elem_t *pe, xmlNodePtr node, const char *name,
890     const pool_value_t *val)
891 {
892 
893 	/*
894 	 * "type" is a special attribute which is not visible ever outside of
895 	 * libpool. Use the specific type accessor function.
896 	 */
897 	if (strcmp(name, c_type) == 0) {
898 		return (pool_xml_set_attr(node, BAD_CAST name,
899 		    val));
900 	}
901 	if (is_ns_property(pe, name) != NULL) {	/* in ns */
902 		if (pool_xml_set_attr(node,
903 		    BAD_CAST property_name_minus_ns(pe, name), val) == PO_FAIL)
904 			return (pool_xml_set_prop(node, BAD_CAST name,
905 			    val));
906 	} else
907 		return (pool_xml_set_prop(node, BAD_CAST name, val));
908 	return (PO_SUCCESS);
909 }
910 
911 /*
912  * Export the kernel configuration as an XML file. The configuration
913  * is used to build an XML document in memory. This document is then
914  * saved to the supplied location.
915  */
916 int
917 pool_knl_export(const pool_conf_t *conf, const char *location,
918     pool_export_format_t fmt)
919 {
920 	xmlNodePtr node_comment;
921 	xmlNodePtr system;
922 	int ret;
923 	pool_t **ps;
924 	pool_resource_t **rs;
925 	uint_t nelem;
926 	int i;
927 	struct knl_to_xml info;
928 	char_buf_t *cb = NULL;
929 	xmlValidCtxtPtr cvp;
930 
931 	xml_init();
932 
933 
934 	switch (fmt) {
935 	case POX_NATIVE:
936 		info.ktx_doc = xmlNewDoc(BAD_CAST "1.0");
937 		xmlCreateIntSubset(info.ktx_doc, BAD_CAST "system",
938 		    BAD_CAST "-//Sun Microsystems Inc//DTD Resource "
939 		    "Management All//EN",
940 		    BAD_CAST dtd_location);
941 
942 		if ((cvp = xmlNewValidCtxt()) == NULL) {
943 			xmlFreeDoc(info.ktx_doc);
944 			pool_seterror(POE_DATASTORE);
945 			return (PO_FAIL);
946 		}
947 		/*
948 		 * Call xmlValidateDocument() to force the parsing of
949 		 * the DTD. Ignore errors and warning messages as we
950 		 * know the document isn't valid.
951 		 */
952 		(void) xmlValidateDocument(cvp, info.ktx_doc);
953 		xmlFreeValidCtxt(cvp);
954 		if ((info.ktx_node = node_create(NULL, BAD_CAST "system")) ==
955 		    NULL) {
956 			xmlFreeDoc(info.ktx_doc);
957 			pool_seterror(POE_DATASTORE);
958 			return (PO_FAIL);
959 		}
960 
961 		system = info.ktx_node;
962 		info.ktx_doc->_private = (void *)conf;
963 
964 		xmlDocSetRootElement(info.ktx_doc, info.ktx_node);
965 		xmlSetProp(info.ktx_node, BAD_CAST c_ref_id, BAD_CAST "dummy");
966 		if ((node_comment = xmlNewDocComment(info.ktx_doc,
967 		    BAD_CAST "\nConfiguration for pools facility. Do NOT"
968 		    " edit this file by hand - use poolcfg(1)"
969 		    " or libpool(3POOL) instead.\n")) == NULL) {
970 			xmlFreeDoc(info.ktx_doc);
971 			pool_seterror(POE_DATASTORE);
972 			return (PO_FAIL);
973 		}
974 		if (xmlAddPrevSibling(info.ktx_node, node_comment) == NULL) {
975 			xmlFree(node_comment);
976 			xmlFreeDoc(info.ktx_doc);
977 			pool_seterror(POE_DATASTORE);
978 			return (PO_FAIL);
979 		}
980 		if (pool_walk_any_properties((pool_conf_t *)conf,
981 		    pool_conf_to_elem(conf), &info, prop_build_cb, 1) ==
982 		    PO_FAIL) {
983 			xmlFreeDoc(info.ktx_doc);
984 			return (PO_FAIL);
985 		}
986 		if ((cb = alloc_char_buf(CB_DEFAULT_LEN)) == NULL) {
987 			xmlFreeDoc(info.ktx_doc);
988 			return (PO_FAIL);
989 		}
990 		/*
991 		 * Now add pool details
992 		 */
993 		if ((ps = pool_query_pools(conf, &nelem, NULL)) != NULL) {
994 			for (i = 0; i < nelem; i++) {
995 				pool_elem_t *elem = TO_ELEM(ps[i]);
996 				uint_t nreselem;
997 				const char *sep = "";
998 				int j;
999 
1000 				if ((info.ktx_node = node_create(system,
1001 				    BAD_CAST element_class_tags
1002 				    [pool_elem_class(elem)])) == NULL) {
1003 					free(ps);
1004 					free_char_buf(cb);
1005 					xmlFreeDoc(info.ktx_doc);
1006 					pool_seterror(POE_DATASTORE);
1007 					return (PO_FAIL);
1008 				}
1009 				if (pool_walk_any_properties(
1010 				    (pool_conf_t *)conf,
1011 				    elem, &info, prop_build_cb, 1) == PO_FAIL) {
1012 					free(ps);
1013 					free_char_buf(cb);
1014 					xmlFreeDoc(info.ktx_doc);
1015 					return (PO_FAIL);
1016 				}
1017 				/*
1018 				 * TODO: pset specific res manipulation
1019 				 */
1020 				if ((rs = pool_query_pool_resources(conf, ps[i],
1021 				    &nreselem, NULL)) == NULL) {
1022 					free(ps);
1023 					free_char_buf(cb);
1024 					xmlFreeDoc(info.ktx_doc);
1025 					pool_seterror(POE_INVALID_CONF);
1026 					return (PO_FAIL);
1027 				}
1028 				if (set_char_buf(cb, "") == PO_FAIL) {
1029 					free(rs);
1030 					free(ps);
1031 					free_char_buf(cb);
1032 					xmlFreeDoc(info.ktx_doc);
1033 					return (PO_FAIL);
1034 				}
1035 				for (j = 0; j < nreselem; j++) {
1036 					pool_elem_t *reselem = TO_ELEM(rs[j]);
1037 					if (append_char_buf(cb, "%s%s_%d", sep,
1038 					    pool_elem_class_string(reselem),
1039 					    (int)elem_get_sysid(reselem)) ==
1040 					    PO_FAIL) {
1041 						free(rs);
1042 						free(ps);
1043 						free_char_buf(cb);
1044 						xmlFreeDoc(info.ktx_doc);
1045 						return (PO_FAIL);
1046 					}
1047 					sep = " ";
1048 				}
1049 				free(rs);
1050 				xmlSetProp(info.ktx_node, BAD_CAST "res",
1051 				    BAD_CAST cb->cb_buf);
1052 				if (set_char_buf(cb, "%s_%d",
1053 				    pool_elem_class_string(elem),
1054 				    (int)elem_get_sysid(elem)) == PO_FAIL) {
1055 					free(ps);
1056 					free_char_buf(cb);
1057 					xmlFreeDoc(info.ktx_doc);
1058 					return (PO_FAIL);
1059 				}
1060 				xmlSetProp(info.ktx_node, BAD_CAST c_ref_id,
1061 				    BAD_CAST  cb->cb_buf);
1062 			}
1063 			free(ps);
1064 		}
1065 		/*
1066 		 * Now add resource details (including components)
1067 		 */
1068 		if ((rs = pool_query_resources(conf, &nelem, NULL)) != NULL) {
1069 			for (i = 0; i < nelem; i++) {
1070 				pool_elem_t *elem = TO_ELEM(rs[i]);
1071 				pool_component_t **cs = NULL;
1072 				uint_t ncompelem;
1073 				int j;
1074 
1075 				if ((info.ktx_node = node_create(system,
1076 				    BAD_CAST element_class_tags
1077 				    [pool_elem_class(elem)])) == NULL) {
1078 					free(rs);
1079 					free_char_buf(cb);
1080 					xmlFreeDoc(info.ktx_doc);
1081 					pool_seterror(POE_DATASTORE);
1082 					return (PO_FAIL);
1083 				}
1084 				if (pool_walk_any_properties(
1085 				    (pool_conf_t *)conf,
1086 				    elem, &info, prop_build_cb, 1) == PO_FAIL) {
1087 					free(rs);
1088 					free_char_buf(cb);
1089 					xmlFreeDoc(info.ktx_doc);
1090 					return (PO_FAIL);
1091 				}
1092 				if (set_char_buf(cb, "%s_%d",
1093 				    pool_elem_class_string(elem),
1094 				    (int)elem_get_sysid(elem)) == PO_FAIL) {
1095 					free(rs);
1096 					free_char_buf(cb);
1097 					xmlFreeDoc(info.ktx_doc);
1098 					return (PO_FAIL);
1099 				}
1100 				xmlSetProp(info.ktx_node, BAD_CAST c_ref_id,
1101 				    BAD_CAST  cb->cb_buf);
1102 				if ((cs = pool_query_resource_components(conf,
1103 				    rs[i], &ncompelem, NULL)) != NULL) {
1104 					xmlNodePtr resource = info.ktx_node;
1105 
1106 					for (j = 0; j < ncompelem; j++) {
1107 						pool_elem_t *compelem =
1108 						    TO_ELEM(cs[j]);
1109 						if ((info.ktx_node =
1110 						    node_create(resource,
1111 						    BAD_CAST element_class_tags
1112 						    [pool_elem_class(
1113 						    compelem)])) == NULL) {
1114 							pool_seterror(
1115 							    POE_DATASTORE);
1116 							free(rs);
1117 							free(cs);
1118 							free_char_buf(cb);
1119 							xmlFreeDoc(info.
1120 							    ktx_doc);
1121 							return (PO_FAIL);
1122 						}
1123 						if (pool_walk_any_properties(
1124 						    (pool_conf_t *)conf,
1125 						    compelem, &info,
1126 						    prop_build_cb, 1) ==
1127 						    PO_FAIL) {
1128 							free(rs);
1129 							free(cs);
1130 							free_char_buf(cb);
1131 							xmlFreeDoc(info.
1132 							    ktx_doc);
1133 							return (PO_FAIL);
1134 						}
1135 						if (set_char_buf(cb, "%s_%d",
1136 						    pool_elem_class_string(
1137 						    compelem),
1138 						    (int)elem_get_sysid(
1139 						    compelem)) == PO_FAIL) {
1140 							free(rs);
1141 							free(cs);
1142 							free_char_buf(cb);
1143 							xmlFreeDoc(info.
1144 							    ktx_doc);
1145 							return (PO_FAIL);
1146 						}
1147 						xmlSetProp(info.ktx_node,
1148 						    BAD_CAST c_ref_id,
1149 						    BAD_CAST  cb->cb_buf);
1150 					}
1151 					free(cs);
1152 				}
1153 			}
1154 			free(rs);
1155 		}
1156 		free_char_buf(cb);
1157 		/*
1158 		 * Set up the message handlers prior to calling
1159 		 * xmlValidateDocument()
1160 		 */
1161 		if ((cvp = xmlNewValidCtxt()) == NULL) {
1162 			xmlFreeDoc(info.ktx_doc);
1163 			pool_seterror(POE_DATASTORE);
1164 			return (PO_FAIL);
1165 		}
1166 		cvp->error    = pool_error_func;
1167 		cvp->warning  = pool_error_func;
1168 		if (xmlValidateDocument(cvp, info.ktx_doc) == 0) {
1169 			xmlFreeValidCtxt(cvp);
1170 			xmlFreeDoc(info.ktx_doc);
1171 			pool_seterror(POE_INVALID_CONF);
1172 			return (PO_FAIL);
1173 		}
1174 		xmlFreeValidCtxt(cvp);
1175 		ret = xmlSaveFormatFile(location, info.ktx_doc, 1);
1176 		xmlFreeDoc(info.ktx_doc);
1177 		if (ret == -1) {
1178 			pool_seterror(POE_SYSTEM);
1179 			return (PO_FAIL);
1180 		}
1181 		return (PO_SUCCESS);
1182 	default:
1183 		pool_seterror(POE_BADPARAM);
1184 		return (PO_FAIL);
1185 	}
1186 }
1187 
1188 /*
1189  * Rollback the changes to the kernel
1190  */
1191 int
1192 pool_knl_recover(pool_conf_t *conf)
1193 {
1194 	pool_knl_connection_t *prov = (pool_knl_connection_t *)conf->pc_prov;
1195 
1196 	prov->pkc_log->l_state = LS_RECOVER;
1197 	if (log_reverse_walk(prov->pkc_log, log_item_undo) != PO_SUCCESS) {
1198 		dprintf("Library configuration consistency error\n");
1199 		prov->pkc_log->l_state = LS_FAIL;
1200 		pool_seterror(POE_INVALID_CONF);
1201 		return (PO_FAIL);
1202 	}
1203 	prov->pkc_log->l_state = LS_DO;
1204 	return (PO_SUCCESS);
1205 }
1206 
1207 /*
1208  * Rollback the changes to the configuration
1209  */
1210 int
1211 pool_knl_rollback(pool_conf_t *conf)
1212 {
1213 	pool_knl_connection_t *prov = (pool_knl_connection_t *)conf->pc_prov;
1214 
1215 	prov->pkc_log->l_state = LS_UNDO;
1216 	if (log_reverse_walk(prov->pkc_log, log_item_undo) != PO_SUCCESS) {
1217 		dprintf("Kernel configuration consistency error\n");
1218 		(void) log_walk(prov->pkc_log, log_item_release);
1219 		log_empty(prov->pkc_log);
1220 		prov->pkc_log->l_state = LS_FAIL;
1221 		pool_seterror(POE_INVALID_CONF);
1222 		return (PO_FAIL);
1223 	}
1224 	(void) log_walk(prov->pkc_log, log_item_release);
1225 	log_empty(prov->pkc_log);
1226 	prov->pkc_log->l_state = LS_DO;
1227 	return (PO_SUCCESS);
1228 }
1229 
1230 /*
1231  * Callback used to build the result set for a query. Each invocation will
1232  * supply a candidate element for inclusion. The element is filtered by:
1233  * - class
1234  * - properties
1235  * If the element "matches" the target, then it is added to the result
1236  * set, otherwise it is ignored.
1237  */
1238 /* ARGSUSED1 */
1239 static void
1240 build_result_set(const void *key, void **value, void *cl)
1241 {
1242 	struct query_obj *qo = (struct query_obj *)cl;
1243 	pool_knl_elem_t *pke = (pool_knl_elem_t *)key;
1244 
1245 	/*
1246 	 * Check to see if it's the right class of element
1247 	 */
1248 	if (qo->classes & (1 << pool_elem_class((pool_elem_t *)key))) {
1249 		int i;
1250 		/*
1251 		 * Now check to see if the src element is correct. If no src
1252 		 * element is supplied, ignore this check
1253 		 */
1254 		if (qo->src) {
1255 			pool_knl_elem_t *parent;
1256 
1257 			for (parent = pke; parent != NULL;
1258 			    parent = parent->pke_parent) {
1259 				if (parent == (pool_knl_elem_t *)qo->src)
1260 					break;
1261 			}
1262 			if (parent == NULL)
1263 				return;
1264 		}
1265 		/*
1266 		 * Now check for property matches (if there are any specified)
1267 		 */
1268 		if (qo->props) {
1269 			int matched = PO_TRUE;
1270 			for (i = 0; qo->props[i] != NULL; i++) {
1271 				pool_value_t val = POOL_VALUE_INITIALIZER;
1272 
1273 				if (pool_get_property(TO_CONF(TO_ELEM(pke)),
1274 				    (pool_elem_t *)pke,
1275 				    pool_value_get_name(qo->props[i]), &val) ==
1276 				    POC_INVAL) {
1277 					matched = PO_FALSE;
1278 					break;
1279 				} else {
1280 					if (pool_value_equal(qo->props[i],
1281 					    &val) != PO_TRUE) {
1282 						matched = PO_FALSE;
1283 						break;
1284 					}
1285 				}
1286 			}
1287 			if (matched == PO_TRUE)
1288 				(void) pool_knl_result_set_append(qo->rs,
1289 				(pool_knl_elem_t *)key);
1290 		} else {
1291 			(void) pool_knl_result_set_append(qo->rs,
1292 			    (pool_knl_elem_t *)key);
1293 		}
1294 	}
1295 }
1296 
1297 /*
1298  * Execute the supplied query and return a result set which contains
1299  * all qualifying elements.
1300  */
1301 pool_result_set_t *
1302 pool_knl_exec_query(const pool_conf_t *conf, const pool_elem_t *src,
1303     const char *src_attr, pool_elem_class_t classes, pool_value_t **props)
1304 {
1305 	pool_knl_result_set_t *rs;
1306 	pool_knl_connection_t *prov = (pool_knl_connection_t *)conf->pc_prov;
1307 	struct query_obj qo;
1308 	int matched = PO_TRUE;
1309 
1310 	/*
1311 	 * Have a buffer at this point, that we can use
1312 	 */
1313 	if ((rs = pool_knl_result_set_alloc(conf)) == NULL) {
1314 		return (NULL);
1315 	}
1316 	qo.conf = conf;
1317 	qo.src = src;
1318 	qo.src_attr = src_attr;
1319 	qo.classes = classes;
1320 	qo.props = props;
1321 	qo.rs = rs;
1322 	if (src_attr != NULL) {
1323 		pool_knl_pool_t *pkp = (pool_knl_pool_t *)src;
1324 
1325 		/*
1326 		 * Note: This logic is resource specific and must be
1327 		 * extended for additional resource types.
1328 		 */
1329 		/*
1330 		 * Check for property matches (if there are any specified)
1331 		 */
1332 		if (props) {
1333 			int i;
1334 
1335 			for (i = 0; props[i] != NULL; i++) {
1336 				pool_value_t val = POOL_VALUE_INITIALIZER;
1337 
1338 				if (pool_get_property(conf,
1339 				    (pool_elem_t *)pkp->pkp_assoc[PREC_PSET],
1340 				    pool_value_get_name(props[i]), &val) ==
1341 				    POC_INVAL) {
1342 					matched = PO_FALSE;
1343 					break;
1344 				} else {
1345 					if (pool_value_equal(props[i],
1346 					    &val) != PO_TRUE) {
1347 						matched = PO_FALSE;
1348 						break;
1349 					}
1350 				}
1351 			}
1352 		}
1353 
1354 		if (matched == PO_TRUE)
1355 			(void) pool_knl_result_set_append(rs,
1356 			    (pool_knl_elem_t *)pkp->pkp_assoc[PREC_PSET]);
1357 	} else
1358 		dict_map(prov->pkc_elements, build_result_set, &qo);
1359 
1360 	if (rs->pkr_count == 0)
1361 		pool_seterror(POE_INVALID_SEARCH);
1362 	return ((pool_result_set_t *)rs);
1363 }
1364 
1365 /*
1366  * Callback function intended to be used from pool_walk_pools(). If
1367  * the supplied pool is not the default pool attempt to destroy it.
1368  */
1369 /*ARGSUSED*/
1370 static int
1371 destroy_pool_cb(pool_conf_t *conf, pool_t *pool, void *unused)
1372 {
1373 	if (elem_is_default(TO_ELEM(pool)) != PO_TRUE)
1374 		return (pool_destroy(conf, pool));
1375 	/*
1376 	 * Return PO_SUCCESS even though we don't delete the default
1377 	 * pool so that the walk continues
1378 	 */
1379 	return (PO_SUCCESS);
1380 }
1381 
1382 /*
1383  * Remove the configuration details. This means remove all elements
1384  * apart from the system elements.
1385  */
1386 int
1387 pool_knl_remove(pool_conf_t *conf)
1388 {
1389 	uint_t i, nelem;
1390 	pool_resource_t **resources;
1391 
1392 	conf->pc_state = POF_DESTROY;
1393 	if ((resources = pool_query_resources(conf, &nelem, NULL)) != NULL) {
1394 		for (i = 0; i < nelem; i++) {
1395 			if (resource_is_system(resources[i]) == PO_FALSE)
1396 				if (pool_resource_destroy(conf, resources[i]) !=
1397 				    PO_SUCCESS) {
1398 					pool_seterror(POE_INVALID_CONF);
1399 					return (PO_FAIL);
1400 				}
1401 		}
1402 		free(resources);
1403 	}
1404 	(void) pool_walk_pools(conf, conf, destroy_pool_cb);
1405 	if (pool_conf_commit(conf, PO_FALSE) != PO_SUCCESS)
1406 		return (PO_FAIL);
1407 
1408 	if (pool_conf_close(conf) != PO_SUCCESS)
1409 		return (PO_FAIL);
1410 
1411 	return (PO_SUCCESS);
1412 }
1413 
1414 /*
1415  * Determine the name of the pool to which the supplied pid is
1416  * bound. If it cannot be determined return NULL.
1417  */
1418 char *
1419 pool_knl_get_binding(pool_conf_t *conf, pid_t pid)
1420 {
1421 	pool_knl_connection_t *prov = (pool_knl_connection_t *)conf->pc_prov;
1422 	const char *sval;
1423 	char *name = NULL;
1424 	pool_bindq_t bindq;
1425 	pool_value_t *props[] = { NULL, NULL };
1426 	uint_t nelem = 0;
1427 	pool_t **pools;
1428 	pool_value_t val = POOL_VALUE_INITIALIZER;
1429 
1430 	props[0] = &val;
1431 
1432 	bindq.pb_o_id_type = P_PID;
1433 	bindq.pb_o_id = pid;
1434 	if (ioctl(prov->pkc_fd, POOL_BINDQ, &bindq) < 0) {
1435 		pool_seterror(POE_SYSTEM);
1436 		return (NULL);
1437 	}
1438 
1439 	if (pool_value_set_name(props[0], "pool.sys_id") != PO_SUCCESS) {
1440 		return (NULL);
1441 	}
1442 	pool_value_set_int64(props[0], bindq.pb_i_id);
1443 	if ((pools = pool_query_pools(conf, &nelem, props)) == NULL) {
1444 		pool_seterror(POE_BADPARAM);
1445 		return (NULL);
1446 	}
1447 
1448 	if (nelem != 1) {
1449 		free(pools);
1450 		pool_seterror(POE_INVALID_CONF);
1451 		return (NULL);
1452 	}
1453 	if (pool_get_ns_property(TO_ELEM(pools[0]), c_name, props[0])
1454 	    == POC_INVAL) {
1455 		free(pools);
1456 		return (NULL);
1457 	}
1458 	if (pool_value_get_string(props[0], &sval) != PO_SUCCESS) {
1459 		free(pools);
1460 		return (NULL);
1461 	}
1462 	if ((name = strdup(sval)) == NULL) {
1463 		free(pools);
1464 		pool_seterror(POE_SYSTEM);
1465 		return (NULL);
1466 	}
1467 	return (name);
1468 }
1469 
1470 /*
1471  * Bind idtype id to the pool name.
1472  */
1473 int
1474 pool_knl_set_binding(pool_conf_t *conf, const char *pool_name, idtype_t idtype,
1475     id_t id)
1476 {
1477 	pool_knl_connection_t *prov = (pool_knl_connection_t *)conf->pc_prov;
1478 	pool_bind_t bind;
1479 	pool_t *pool;
1480 	int ret;
1481 
1482 	if ((pool = pool_get_pool(conf, pool_name)) == NULL)
1483 		return (PO_FAIL);
1484 
1485 	bind.pb_o_id_type = idtype;
1486 	bind.pb_o_id = id;
1487 	bind.pb_o_pool_id = elem_get_sysid(TO_ELEM(pool));
1488 
1489 	while ((ret = ioctl(prov->pkc_fd, POOL_BIND, &bind)) < 0 &&
1490 	    errno == EAGAIN);
1491 	if (ret < 0) {
1492 		pool_seterror(POE_SYSTEM);
1493 		return (PO_FAIL);
1494 	}
1495 	return (PO_SUCCESS);
1496 }
1497 
1498 /*
1499  * pool_knl_get_resource_binding() returns the binding for a pid to
1500  * the supplied type of resource. If a binding cannot be determined,
1501  * NULL is returned.
1502  */
1503 char *
1504 pool_knl_get_resource_binding(pool_conf_t *conf,
1505     pool_resource_elem_class_t type, pid_t pid)
1506 {
1507 	pool_knl_connection_t *prov = (pool_knl_connection_t *)conf->pc_prov;
1508 	const char *sval;
1509 	char *name = NULL;
1510 	pool_bindq_t bindq;
1511 	pool_value_t *props[] = { NULL, NULL };
1512 	uint_t nelem = 0;
1513 	pool_t **pools;
1514 	pool_resource_t **resources;
1515 	pool_value_t val = POOL_VALUE_INITIALIZER;
1516 
1517 	props[0] = &val;
1518 	bindq.pb_o_id_type = P_PID;
1519 	bindq.pb_o_id = pid;
1520 	if (ioctl(prov->pkc_fd, POOL_BINDQ, &bindq) < 0) {
1521 		pool_seterror(POE_SYSTEM);
1522 		return (NULL);
1523 	}
1524 
1525 	if (pool_value_set_name(props[0], "pool.sys_id") != PO_SUCCESS) {
1526 		return (NULL);
1527 	}
1528 	pool_value_set_int64(props[0], bindq.pb_i_id);
1529 	if ((pools = pool_query_pools(conf, &nelem, props)) == NULL) {
1530 		pool_seterror(POE_BADPARAM);
1531 		return (NULL);
1532 	}
1533 
1534 	if (nelem != 1) {
1535 		free(pools);
1536 		pool_seterror(POE_INVALID_CONF);
1537 		return (NULL);
1538 	}
1539 
1540 	if (pool_value_set_string(props[0], pool_resource_type_string(type)) !=
1541 	    PO_SUCCESS ||
1542 	    pool_value_set_name(props[0], c_type) != PO_SUCCESS) {
1543 		free(pools);
1544 		return (NULL);
1545 	}
1546 
1547 	if ((resources = pool_query_pool_resources(conf, pools[0], &nelem,
1548 	    NULL)) == NULL) {
1549 		free(pools);
1550 		pool_seterror(POE_INVALID_CONF);
1551 		return (NULL);
1552 	}
1553 	free(pools);
1554 	if (nelem != 1) {
1555 		free(resources);
1556 		pool_seterror(POE_INVALID_CONF);
1557 		return (NULL);
1558 	}
1559 	if (pool_get_ns_property(TO_ELEM(resources[0]), c_name, props[0]) ==
1560 	    POC_INVAL) {
1561 		free(resources);
1562 		return (NULL);
1563 	}
1564 	free(resources);
1565 	if (pool_value_get_string(props[0], &sval) != PO_SUCCESS) {
1566 		return (NULL);
1567 	}
1568 	if ((name = strdup(sval)) == NULL) {
1569 		pool_seterror(POE_SYSTEM);
1570 		return (NULL);
1571 	}
1572 	return (name);
1573 }
1574 
1575 /*
1576  * Allocate the required library data structure and initialise it.
1577  */
1578 pool_knl_elem_t *
1579 pool_knl_elem_wrap(pool_conf_t *conf, pool_elem_class_t class,
1580     pool_resource_elem_class_t res_class,
1581     pool_component_elem_class_t comp_class)
1582 {
1583 	pool_knl_elem_t *elem;
1584 	pool_elem_t *pe;
1585 
1586 	switch (class) {
1587 	case PEC_SYSTEM:
1588 		if ((elem = malloc(sizeof (pool_knl_system_t))) == NULL) {
1589 			pool_seterror(POE_SYSTEM);
1590 			return (NULL);
1591 		}
1592 		(void) memset(elem, 0, sizeof (pool_knl_system_t));
1593 		break;
1594 	case PEC_POOL:
1595 		if ((elem = malloc(sizeof (pool_knl_pool_t))) == NULL) {
1596 			pool_seterror(POE_SYSTEM);
1597 			return (NULL);
1598 		}
1599 		(void) memset(elem, 0, sizeof (pool_knl_pool_t));
1600 		break;
1601 	case PEC_RES_COMP:
1602 	case PEC_RES_AGG:
1603 		if ((elem = malloc(sizeof (pool_knl_resource_t))) == NULL) {
1604 			pool_seterror(POE_SYSTEM);
1605 			return (NULL);
1606 		}
1607 		(void) memset(elem, 0, sizeof (pool_knl_resource_t));
1608 		break;
1609 	case PEC_COMP:
1610 		if ((elem = malloc(sizeof (pool_knl_component_t))) == NULL) {
1611 			pool_seterror(POE_SYSTEM);
1612 			return (NULL);
1613 		}
1614 		(void) memset(elem, 0, sizeof (pool_knl_component_t));
1615 		break;
1616 	default:
1617 		pool_seterror(POE_BADPARAM);
1618 		return (NULL);
1619 	}
1620 	pe = TO_ELEM(elem);
1621 	pe->pe_conf = conf;
1622 	pe->pe_class = class;
1623 	pe->pe_resource_class = res_class;
1624 	pe->pe_component_class = comp_class;
1625 	/* Set up the function pointers for element manipulation */
1626 	pe->pe_get_prop = pool_knl_get_property;
1627 	pe->pe_put_prop = pool_knl_put_property;
1628 	pe->pe_rm_prop = pool_knl_rm_property;
1629 	pe->pe_get_props = pool_knl_get_properties;
1630 	pe->pe_remove = pool_knl_elem_remove;
1631 	pe->pe_get_container = pool_knl_get_container;
1632 	pe->pe_set_container = pool_knl_set_container;
1633 	/*
1634 	 * Specific initialisation for different types of element
1635 	 */
1636 	if (class == PEC_POOL) {
1637 		pool_knl_pool_t *pp = (pool_knl_pool_t *)elem;
1638 		pp->pp_associate = pool_knl_pool_associate;
1639 		pp->pp_dissociate = pool_knl_pool_dissociate;
1640 		pp->pkp_assoc[PREC_PSET] = (pool_knl_resource_t *)
1641 		    resource_by_sysid(conf, PS_NONE, "pset");
1642 	}
1643 	if (class == PEC_RES_COMP || class == PEC_RES_AGG) {
1644 		pool_knl_resource_t *pr = (pool_knl_resource_t *)elem;
1645 		pr->pr_is_system = pool_knl_resource_is_system;
1646 		pr->pr_can_associate = pool_knl_resource_can_associate;
1647 	}
1648 #if DEBUG
1649 	if (dict_put(((pool_knl_connection_t *)conf->pc_prov)->pkc_leaks,
1650 	    elem, elem) != NULL)
1651 		assert(!"leak map put failed");
1652 	dprintf("allocated %p\n", elem);
1653 #endif	/* DEBUG */
1654 	return (elem);
1655 }
1656 
1657 /*
1658  * Allocate a new pool_knl_elem_t in the supplied configuration of the
1659  * specified class.
1660  * Returns element pointer/NULL
1661  */
1662 pool_elem_t *
1663 pool_knl_elem_create(pool_conf_t *conf, pool_elem_class_t class,
1664     pool_resource_elem_class_t res_class,
1665     pool_component_elem_class_t comp_class)
1666 {
1667 	pool_knl_elem_t *elem;
1668 	pool_create_undo_t *create;
1669 	pool_knl_connection_t *prov = (pool_knl_connection_t *)conf->pc_prov;
1670 	static int id = -3;
1671 	char_buf_t *cb;
1672 
1673 	if ((elem = pool_knl_elem_wrap(conf, class, res_class, comp_class)) ==
1674 	    NULL)
1675 		return (NULL);
1676 
1677 	/*
1678 	 * Allocate an nvlist to hold properties
1679 	 */
1680 	if (nvlist_alloc(&elem->pke_properties, NV_UNIQUE_NAME_TYPE, 0) != 0) {
1681 		pool_knl_elem_free(elem, PO_FALSE);
1682 		pool_seterror(POE_SYSTEM);
1683 		return (NULL);
1684 	}
1685 	/*
1686 	 * Allocate a temporary ID and name until the element is
1687 	 * created for real
1688 	 */
1689 	if ((cb = alloc_char_buf(CB_DEFAULT_LEN)) == NULL) {
1690 		pool_knl_elem_free(elem, PO_TRUE);
1691 		return (NULL);
1692 	}
1693 	if (set_char_buf(cb, "%s.sys_id",
1694 	    pool_elem_class_string((pool_elem_t *)elem)) != PO_SUCCESS) {
1695 		pool_knl_elem_free(elem, PO_TRUE);
1696 		free_char_buf(cb);
1697 		return (NULL);
1698 	}
1699 	(void) nvlist_add_int64(elem->pke_properties, cb->cb_buf, id--);
1700 	if (set_char_buf(cb, "%s.name",
1701 	    pool_elem_class_string((pool_elem_t *)elem)) != PO_SUCCESS) {
1702 		pool_knl_elem_free(elem, PO_TRUE);
1703 		free_char_buf(cb);
1704 		return (NULL);
1705 	}
1706 	(void) nvlist_add_string(elem->pke_properties, cb->cb_buf, "");
1707 	/*
1708 	 * If it's a resource class, it will need an initial size
1709 	 */
1710 	if (class == PEC_RES_COMP || class == PEC_RES_AGG) {
1711 		if (set_char_buf(cb, "%s.size",
1712 		    pool_elem_class_string((pool_elem_t *)elem)) !=
1713 		    PO_SUCCESS) {
1714 			pool_knl_elem_free(elem, PO_TRUE);
1715 			free_char_buf(cb);
1716 			return (NULL);
1717 		}
1718 		(void) nvlist_add_uint64(elem->pke_properties, cb->cb_buf, 0);
1719 	}
1720 	free_char_buf(cb);
1721 
1722 	/*
1723 	 * Register the newly created element
1724 	 */
1725 	if (dict_put(prov->pkc_elements, elem, elem) != NULL) {
1726 		pool_knl_elem_free(elem, PO_TRUE);
1727 		pool_seterror(POE_SYSTEM);
1728 		return (NULL);
1729 	}
1730 
1731 	if (prov->pkc_log->l_state != LS_DO)
1732 		return ((pool_elem_t *)elem);
1733 
1734 	/*
1735 	 * The remaining logic is setting up the arguments for the
1736 	 * POOL_CREATE ioctl and appending the details into the log.
1737 	 */
1738 	if ((create = malloc(sizeof (pool_create_undo_t))) == NULL) {
1739 		pool_seterror(POE_SYSTEM);
1740 		return (NULL);
1741 	}
1742 	create->pcu_ioctl.pc_o_type = class;
1743 	switch (class) {
1744 	case PEC_SYSTEM:
1745 		pool_seterror(POE_BADPARAM);
1746 		free(create);
1747 		return (NULL);
1748 	case PEC_POOL: /* NO-OP */
1749 		break;
1750 	case PEC_RES_COMP:
1751 	case PEC_RES_AGG:
1752 		create->pcu_ioctl.pc_o_sub_type = res_class;
1753 		break;
1754 	case PEC_COMP:
1755 		create->pcu_ioctl.pc_o_sub_type = comp_class;
1756 		break;
1757 	default:
1758 		pool_seterror(POE_BADPARAM);
1759 		free(create);
1760 		return (NULL);
1761 	}
1762 
1763 	create->pcu_elem = (pool_elem_t *)elem;
1764 
1765 	if (log_append(prov->pkc_log, POOL_CREATE, (void *)create) !=
1766 	    PO_SUCCESS) {
1767 		free(create);
1768 		return (NULL);
1769 	}
1770 	return ((pool_elem_t *)elem);
1771 }
1772 
1773 /*
1774  * Remove the details of the element from our userland copy and destroy
1775  * the element (if appropriate) in the kernel.
1776  */
1777 int
1778 pool_knl_elem_remove(pool_elem_t *pe)
1779 {
1780 	pool_knl_connection_t *prov;
1781 	pool_destroy_undo_t *destroy;
1782 
1783 	prov = (pool_knl_connection_t *)(TO_CONF(pe))->pc_prov;
1784 
1785 	if (dict_remove(prov->pkc_elements, pe) == NULL) {
1786 		pool_seterror(POE_SYSTEM);
1787 		return (PO_FAIL);
1788 	}
1789 	if (prov->pkc_log->l_state != LS_DO) {
1790 		return (PO_SUCCESS);
1791 	}
1792 
1793 	/*
1794 	 * The remaining logic is setting up the arguments for the
1795 	 * POOL_DESTROY ioctl and appending the details into the log.
1796 	 */
1797 	if ((destroy = malloc(sizeof (pool_destroy_undo_t))) == NULL) {
1798 		pool_seterror(POE_SYSTEM);
1799 		return (PO_FAIL);
1800 	}
1801 	destroy->pdu_ioctl.pd_o_type = pool_elem_class(pe);
1802 
1803 	if (destroy->pdu_ioctl.pd_o_type == PEC_RES_COMP ||
1804 	    destroy->pdu_ioctl.pd_o_type == PEC_RES_AGG)
1805 		destroy->pdu_ioctl.pd_o_sub_type = pool_resource_elem_class(pe);
1806 
1807 	if (destroy->pdu_ioctl.pd_o_type == PEC_COMP)
1808 		destroy->pdu_ioctl.pd_o_sub_type =
1809 		    pool_component_elem_class(pe);
1810 
1811 	destroy->pdu_elem = pe;
1812 
1813 	if (log_append(prov->pkc_log, POOL_DESTROY, (void *)destroy) !=
1814 	    PO_SUCCESS) {
1815 		free(destroy);
1816 		return (PO_FAIL);
1817 	}
1818 	return (PO_SUCCESS);
1819 }
1820 
1821 /*
1822  * Set the parent of the supplied child to the supplied parent
1823  */
1824 int
1825 pool_knl_set_container(pool_elem_t *pp, pool_elem_t *pc)
1826 {
1827 	pool_knl_elem_t *pkp = (pool_knl_elem_t *)pp;
1828 	pool_knl_elem_t *pkc = (pool_knl_elem_t *)pc;
1829 
1830 	pkc->pke_parent = pkp;
1831 	return (PO_SUCCESS);
1832 }
1833 
1834 /*
1835  * TODO: Needed for msets and ssets.
1836  */
1837 /* ARGSUSED */
1838 int
1839 pool_knl_res_transfer(pool_resource_t *src, pool_resource_t *tgt,
1840     uint64_t size) {
1841 	return (PO_FAIL);
1842 }
1843 
1844 /*
1845  * Transfer resource components from one resource set to another.
1846  */
1847 int
1848 pool_knl_res_xtransfer(pool_resource_t *src, pool_resource_t *tgt,
1849     pool_component_t **rl) {
1850 	pool_elem_t *src_e = TO_ELEM(src);
1851 	pool_elem_t *tgt_e = TO_ELEM(tgt);
1852 	pool_xtransfer_undo_t *xtransfer;
1853 	size_t size;
1854 	pool_knl_connection_t *prov =
1855 	    (pool_knl_connection_t *)TO_CONF(src_e)->pc_prov;
1856 
1857 	if (prov->pkc_log->l_state != LS_DO) {
1858 		/*
1859 		 * Walk the Result Set and move the resource components
1860 		 */
1861 		for (size = 0; rl[size] != NULL; size++) {
1862 			if (pool_set_container(TO_ELEM(tgt),
1863 			    TO_ELEM(rl[size])) == PO_FAIL) {
1864 				return (PO_FAIL);
1865 			}
1866 		}
1867 		return (PO_SUCCESS);
1868 	}
1869 
1870 	/*
1871 	 * The remaining logic is setting up the arguments for the
1872 	 * POOL_XTRANSFER ioctl and appending the details into the log.
1873 	 */
1874 	if ((xtransfer = malloc(sizeof (pool_xtransfer_undo_t))) == NULL) {
1875 		pool_seterror(POE_SYSTEM);
1876 		return (PO_FAIL);
1877 	}
1878 
1879 	if (pool_elem_class(src_e) == PEC_RES_COMP) {
1880 		xtransfer->pxu_ioctl.px_o_id_type =
1881 		    pool_resource_elem_class(src_e);
1882 	} else {
1883 		pool_seterror(POE_BADPARAM);
1884 		return (PO_FAIL);
1885 	}
1886 
1887 
1888 	for (xtransfer->pxu_ioctl.px_o_complist_size = 0;
1889 	    rl[xtransfer->pxu_ioctl.px_o_complist_size] != NULL;
1890 	    xtransfer->pxu_ioctl.px_o_complist_size++)
1891 		/* calculate the size using the terminating NULL */;
1892 	if ((xtransfer->pxu_ioctl.px_o_comp_list =
1893 		calloc(xtransfer->pxu_ioctl.px_o_complist_size,
1894 		sizeof (id_t))) == NULL) {
1895 		pool_seterror(POE_SYSTEM);
1896 		return (PO_FAIL);
1897 	}
1898 	if ((xtransfer->pxu_rl = calloc(
1899 	    xtransfer->pxu_ioctl.px_o_complist_size + 1,
1900 	    sizeof (pool_component_t *))) == NULL) {
1901 		pool_seterror(POE_SYSTEM);
1902 		return (PO_FAIL);
1903 	}
1904 	(void) memcpy(xtransfer->pxu_rl, rl,
1905 	    xtransfer->pxu_ioctl.px_o_complist_size *
1906 	    sizeof (pool_component_t *));
1907 	xtransfer->pxu_src = src_e;
1908 	xtransfer->pxu_tgt = tgt_e;
1909 
1910 	if (log_append(prov->pkc_log, POOL_XTRANSFER, (void *)xtransfer) !=
1911 	    PO_SUCCESS) {
1912 		free(xtransfer);
1913 		return (PO_FAIL);
1914 	}
1915 	for (size = 0; rl[size] != NULL; size++) {
1916 		if (pool_set_container(TO_ELEM(tgt), TO_ELEM(rl[size])) ==
1917 		    PO_FAIL) {
1918 			return (PO_FAIL);
1919 		}
1920 	}
1921 	return (PO_SUCCESS);
1922 }
1923 
1924 /*
1925  * Return the parent of an element.
1926  */
1927 pool_elem_t *
1928 pool_knl_get_container(const pool_elem_t *pe)
1929 {
1930 	pool_knl_elem_t *pke = (pool_knl_elem_t *)pe;
1931 
1932 	return ((pool_elem_t *)pke->pke_parent);
1933 }
1934 
1935 /*
1936  * Note: This function is resource specific, needs extending for other
1937  * resource types
1938  */
1939 int
1940 pool_knl_resource_is_system(const pool_resource_t *pr)
1941 {
1942 	switch (pool_resource_elem_class(TO_ELEM(pr))) {
1943 	case PREC_PSET:
1944 		return (PSID_IS_SYSSET(
1945 		    elem_get_sysid(TO_ELEM(pr))));
1946 	default:
1947 		return (PO_FALSE);
1948 	}
1949 }
1950 
1951 /*
1952  * Note: This function is resource specific, needs extending for other
1953  * resource types
1954  */
1955 int
1956 pool_knl_resource_can_associate(const pool_resource_t *pr)
1957 {
1958 	switch (pool_resource_elem_class(TO_ELEM(pr))) {
1959 	case PREC_PSET:
1960 		return (PO_TRUE);
1961 	default:
1962 		return (PO_FALSE);
1963 	}
1964 }
1965 
1966 /*
1967  * pool_knl_pool_associate() associates the supplied resource to the
1968  * supplied pool.
1969  *
1970  * Returns: PO_SUCCESS/PO_FAIL
1971  */
1972 int
1973 pool_knl_pool_associate(pool_t *pool, const pool_resource_t *resource)
1974 {
1975 	pool_knl_connection_t *prov;
1976 	pool_knl_pool_t *pkp = (pool_knl_pool_t *)pool;
1977 	pool_resource_elem_class_t res_class =
1978 	    pool_resource_elem_class(TO_ELEM(resource));
1979 	pool_assoc_undo_t *assoc;
1980 	pool_knl_resource_t *orig_res = pkp->pkp_assoc[res_class];
1981 
1982 	/*
1983 	 * Are we allowed to associate with this target?
1984 	 */
1985 	if (pool_knl_resource_can_associate(resource) == PO_FALSE) {
1986 		pool_seterror(POE_BADPARAM);
1987 		return (PO_FAIL);
1988 	}
1989 	prov = (pool_knl_connection_t *)(TO_CONF(TO_ELEM(pool)))->pc_prov;
1990 
1991 	if (prov->pkc_log->l_state != LS_DO) {
1992 		pkp->pkp_assoc[res_class] = (pool_knl_resource_t *)resource;
1993 		return (PO_SUCCESS);
1994 	}
1995 
1996 	/*
1997 	 * The remaining logic is setting up the arguments for the
1998 	 * POOL_ASSOC ioctl and appending the details into the log.
1999 	 */
2000 	if ((assoc = malloc(sizeof (pool_assoc_undo_t))) == NULL) {
2001 		pool_seterror(POE_SYSTEM);
2002 		return (PO_FAIL);
2003 	}
2004 	assoc->pau_assoc = TO_ELEM(pool);
2005 	assoc->pau_oldres = (pool_elem_t *)orig_res;
2006 	assoc->pau_newres = TO_ELEM(resource);
2007 
2008 	assoc->pau_ioctl.pa_o_id_type = res_class;
2009 
2010 	if (log_append(prov->pkc_log, POOL_ASSOC, (void *)assoc) !=
2011 	    PO_SUCCESS) {
2012 		free(assoc);
2013 		pkp->pkp_assoc[res_class] = orig_res;
2014 		return (PO_FAIL);
2015 	}
2016 	pkp->pkp_assoc[res_class] = (pool_knl_resource_t *)resource;
2017 	return (PO_SUCCESS);
2018 }
2019 
2020 /*
2021  * pool_knl_pool_dissociate() dissociates the supplied resource from
2022  * the supplied pool.
2023  *
2024  * Returns: PO_SUCCESS/PO_FAIL
2025  */
2026 int
2027 pool_knl_pool_dissociate(pool_t *pool, const pool_resource_t *resource)
2028 {
2029 	pool_knl_connection_t *prov;
2030 	pool_dissoc_undo_t *dissoc;
2031 	pool_knl_pool_t *pkp = (pool_knl_pool_t *)pool;
2032 	pool_resource_t *default_res = (pool_resource_t *)get_default_resource(
2033 	    resource);
2034 	pool_resource_elem_class_t res_class =
2035 	    pool_resource_elem_class(TO_ELEM(resource));
2036 
2037 	prov = (pool_knl_connection_t *)(TO_CONF(TO_ELEM(pool)))->pc_prov;
2038 
2039 	if (prov->pkc_log->l_state != LS_DO) {
2040 		pkp->pkp_assoc[res_class] = (pool_knl_resource_t *)default_res;
2041 		return (PO_SUCCESS);
2042 	}
2043 	/*
2044 	 * The remaining logic is setting up the arguments for the
2045 	 * POOL_DISSOC ioctl and appending the details into the log.
2046 	 */
2047 	if ((dissoc = malloc(sizeof (pool_dissoc_undo_t))) == NULL) {
2048 		pool_seterror(POE_SYSTEM);
2049 		return (PO_FAIL);
2050 	}
2051 	dissoc->pdu_dissoc = TO_ELEM(pool);
2052 	dissoc->pdu_oldres = TO_ELEM(resource);
2053 	dissoc->pdu_newres = TO_ELEM(default_res);
2054 
2055 	dissoc->pdu_ioctl.pd_o_id_type = res_class;
2056 
2057 	if (log_append(prov->pkc_log, POOL_DISSOC, (void *)dissoc) !=
2058 	    PO_SUCCESS) {
2059 		free(dissoc);
2060 		pkp->pkp_assoc[res_class] = (pool_knl_resource_t *)resource;
2061 		return (PO_FAIL);
2062 	}
2063 
2064 	/*
2065 	 * Update our local copy
2066 	 */
2067 	pkp->pkp_assoc[res_class] = (pool_knl_resource_t *)default_res;
2068 	return (PO_SUCCESS);
2069 }
2070 
2071 /*
2072  * Allocate a data provider for the supplied configuration and optionally
2073  * discover resources.
2074  * The data provider is the cross over point from the "abstract" configuration
2075  * functions into the data representation specific manipulation routines.
2076  * This function sets up all the required pointers to create a kernel aware
2077  * data provider.
2078  * Returns PO_SUCCESS/PO_FAIL
2079  */
2080 int
2081 pool_knl_connection_alloc(pool_conf_t *conf, int oflags)
2082 {
2083 	pool_knl_connection_t *prov;
2084 
2085 	if ((prov = malloc(sizeof (pool_knl_connection_t))) == NULL) {
2086 		pool_seterror(POE_SYSTEM);
2087 		return (PO_FAIL);
2088 	}
2089 	(void) memset(prov, 0, sizeof (pool_knl_connection_t));
2090 	/*
2091 	 * Initialise data members
2092 	 */
2093 	prov->pc_name = strdup("kernel");
2094 	prov->pc_store_type = KERNEL_DATA_STORE;
2095 	prov->pc_oflags = oflags;
2096 	/*
2097 	 * Initialise function pointers
2098 	 */
2099 	prov->pc_close = pool_knl_close;
2100 	prov->pc_validate = pool_knl_validate;
2101 	prov->pc_commit = pool_knl_commit;
2102 	prov->pc_export = pool_knl_export;
2103 	prov->pc_rollback = pool_knl_rollback;
2104 	prov->pc_exec_query = pool_knl_exec_query;
2105 	prov->pc_elem_create = pool_knl_elem_create;
2106 	prov->pc_remove = pool_knl_remove;
2107 	prov->pc_res_xfer = pool_knl_res_transfer;
2108 	prov->pc_res_xxfer = pool_knl_res_xtransfer;
2109 	prov->pc_get_binding = pool_knl_get_binding;
2110 	prov->pc_set_binding = pool_knl_set_binding;
2111 	prov->pc_get_resource_binding = pool_knl_get_resource_binding;
2112 	/*
2113 	 * Associate the provider to it's configuration
2114 	 */
2115 	conf->pc_prov = (pool_connection_t *)prov;
2116 	/*
2117 	 * End of common initialisation
2118 	 */
2119 	/*
2120 	 * Attempt to open the pseudo device, if the configuration is opened
2121 	 * readonly then try to open an info device, otherwise try to open
2122 	 * the writeable device.
2123 	 */
2124 	if (oflags & PO_RDWR) {
2125 		if ((prov->pkc_fd = blocking_open(pool_dynamic_location(),
2126 		    O_RDWR)) < 0) {
2127 			free(prov);
2128 			conf->pc_prov = NULL;
2129 			pool_seterror(POE_SYSTEM);
2130 			return (PO_FAIL);
2131 		}
2132 	} else {
2133 		if ((prov->pkc_fd = open(pool_info_location, O_RDWR)) < 0) {
2134 			free(prov);
2135 			conf->pc_prov = NULL;
2136 			pool_seterror(POE_SYSTEM);
2137 			return (PO_FAIL);
2138 		}
2139 	}
2140 	/*
2141 	 * Allocate the element dictionary
2142 	 */
2143 	if ((prov->pkc_elements = dict_new((int (*)(const void *, const void *))
2144 	    pool_elem_compare, (uint64_t (*)(const void *))hash_id)) == NULL) {
2145 		(void) close(prov->pkc_fd);
2146 		free(prov);
2147 		conf->pc_prov = NULL;
2148 		pool_seterror(POE_SYSTEM);
2149 		return (PO_FAIL);
2150 	}
2151 #if DEBUG
2152 	if ((prov->pkc_leaks = dict_new(NULL, NULL)) == NULL) {
2153 		dict_free(&prov->pkc_elements);
2154 		(void) close(prov->pkc_fd);
2155 		free(prov);
2156 		conf->pc_prov = NULL;
2157 		pool_seterror(POE_SYSTEM);
2158 		return (PO_FAIL);
2159 	}
2160 #endif	/* DEBUG */
2161 	/*
2162 	 * Allocate the transaction log
2163 	 */
2164 	if ((prov->pkc_log = log_alloc(conf)) == NULL) {
2165 #if DEBUG
2166 		dict_free(&prov->pkc_leaks);
2167 #endif	/* DEBUG */
2168 		dict_free(&prov->pkc_elements);
2169 		(void) close(prov->pkc_fd);
2170 		free(prov);
2171 		conf->pc_prov = NULL;
2172 		return (PO_FAIL);
2173 	}
2174 	/*
2175 	 * At this point the configuration provider has been initialized,
2176 	 * mark the configuration as valid so that the various routines
2177 	 * which rely on a valid configuration will work correctly.
2178 	 */
2179 	conf->pc_state = POF_VALID;
2180 	/*
2181 	 * Update the library snapshot from the kernel
2182 	 */
2183 	if (pool_knl_update(conf, NULL) != PO_SUCCESS) {
2184 #if DEBUG
2185 		dict_free(&prov->pkc_leaks);
2186 #endif	/* DEBUG */
2187 		dict_free(&prov->pkc_elements);
2188 		(void) close(prov->pkc_fd);
2189 		free(prov);
2190 		conf->pc_prov = NULL;
2191 		conf->pc_state = POF_INVALID;
2192 		return (PO_FAIL);
2193 	}
2194 	return (PO_SUCCESS);
2195 }
2196 
2197 #if DEBUG
2198 static void
2199 pool_knl_elem_printf_cb(const void *key, void **value, void *cl)
2200 {
2201 	pool_knl_elem_t *pke = (pool_knl_elem_t *)key;
2202 	dict_hdl_t *map = (dict_hdl_t *)cl;
2203 
2204 	dprintf("leak elem:%p\n", pke);
2205 	if (pke->pke_properties != NULL) {
2206 		nvlist_print(stdout, pke->pke_properties);
2207 	} else
2208 		dprintf("no properties\n");
2209 	assert(dict_get(map, pke) == NULL);
2210 }
2211 #endif	/* DEBUG */
2212 /*
2213  * pool_knl_elem_free() releases the resources associated with the
2214  * supplied element.
2215  */
2216 static void
2217 pool_knl_elem_free(pool_knl_elem_t *pke, int freeprop)
2218 {
2219 #if DEBUG
2220 	pool_conf_t *conf = TO_CONF(TO_ELEM(pke));
2221 	if (dict_remove(((pool_knl_connection_t *)conf->pc_prov)->pkc_leaks,
2222 	    pke) == NULL)
2223 		dprintf("%p, wasn't in the leak map\n", pke);
2224 	if (freeprop == PO_TRUE) {
2225 		pool_elem_dprintf(TO_ELEM(pke));
2226 	}
2227 	dprintf("released %p\n", pke);
2228 #endif	/* DEBUG */
2229 	if (freeprop == PO_TRUE) {
2230 		nvlist_free(pke->pke_properties);
2231 	}
2232 	free(pke);
2233 }
2234 
2235 /*
2236  * pool_knl_elem_free_cb() is designed to be used with
2237  * dict_map(). When a connection is freed, this function is used to
2238  * free all element resources.
2239  */
2240 /* ARGSUSED1 */
2241 static void
2242 pool_knl_elem_free_cb(const void *key, void **value, void *cl)
2243 {
2244 	pool_knl_elem_t *pke = (pool_knl_elem_t *)key;
2245 
2246 #ifdef DEBUG
2247 	dprintf("pool_knl_elem_free_cb:\n");
2248 	dprintf("about to release %p ", pke);
2249 	pool_elem_dprintf(TO_ELEM(pke));
2250 #endif	/* DEBUG */
2251 	pool_knl_elem_free(pke, PO_TRUE);
2252 }
2253 
2254 /*
2255  * Free the resources for a kernel data provider.
2256  */
2257 void
2258 pool_knl_connection_free(pool_knl_connection_t *prov)
2259 {
2260 	if (prov->pkc_log != NULL) {
2261 		(void) log_walk(prov->pkc_log, log_item_release);
2262 		log_free(prov->pkc_log);
2263 	}
2264 	if (prov->pkc_elements != NULL) {
2265 		dict_map(prov->pkc_elements, pool_knl_elem_free_cb, NULL);
2266 #if DEBUG
2267 		dprintf("dict length is %llu\n", dict_length(prov->pkc_leaks));
2268 		dict_map(prov->pkc_leaks, pool_knl_elem_printf_cb,
2269 		    prov->pkc_elements);
2270 		assert(dict_length(prov->pkc_leaks) == 0);
2271 		dict_free(&prov->pkc_leaks);
2272 #endif	/* DEBUG */
2273 		dict_free(&prov->pkc_elements);
2274 	}
2275 	free((void *)prov->pc_name);
2276 	free(prov);
2277 }
2278 
2279 /*
2280  * Return the specified property value.
2281  *
2282  * POC_INVAL is returned if an error is detected and the error code is updated
2283  * to indicate the cause of the error.
2284  */
2285 pool_value_class_t
2286 pool_knl_get_property(const pool_elem_t *pe, const char *name,
2287     pool_value_t *val)
2288 {
2289 	pool_knl_elem_t *pke = (pool_knl_elem_t *)pe;
2290 	nvpair_t *pair;
2291 	const pool_prop_t *prop;
2292 
2293 	if ((prop = provider_get_prop(pe, name)) != NULL)
2294 		if (prop_is_stored(prop) == PO_FALSE)
2295 			return (pool_knl_get_dynamic_property(pe, name, val));
2296 
2297 	if ((pair = pool_knl_find_nvpair(pke->pke_properties, name)) == NULL) {
2298 		pool_seterror(POE_BADPARAM);
2299 		return (POC_INVAL);
2300 	}
2301 
2302 	if (pool_value_from_nvpair(val, pair) == PO_FAIL) {
2303 		return (POC_INVAL);
2304 	}
2305 
2306 	return (pool_value_get_type(val));
2307 }
2308 
2309 /*
2310  * Return the specified property value.
2311  *
2312  * If a property is designated as dynamic, then this function will
2313  * always try to return the latest value of the property from the
2314  * kernel.
2315  *
2316  * POC_INVAL is returned if an error is detected and the error code is updated
2317  * to indicate the cause of the error.
2318  */
2319 pool_value_class_t
2320 pool_knl_get_dynamic_property(const pool_elem_t *pe, const char *name,
2321     pool_value_t *val)
2322 {
2323 	pool_knl_connection_t *prov;
2324 	pool_propget_t propget = { 0 };
2325 	nvlist_t *proplist;
2326 	nvpair_t *pair;
2327 
2328 	propget.pp_o_id_type = pool_elem_class(pe);
2329 	if (pool_elem_class(pe) == PEC_RES_COMP ||
2330 	    pool_elem_class(pe) == PEC_RES_AGG)
2331 		propget.pp_o_id_subtype = pool_resource_elem_class(pe);
2332 	if (pool_elem_class(pe) == PEC_COMP)
2333 		propget.pp_o_id_subtype =
2334 		    (pool_resource_elem_class_t)pool_component_elem_class(pe);
2335 
2336 	propget.pp_o_id = elem_get_sysid(pe);
2337 	propget.pp_o_prop_name_size = strlen(name);
2338 	propget.pp_o_prop_name = (char *)name;
2339 	propget.pp_i_bufsize = KERNEL_SNAPSHOT_BUF_SZ;
2340 	propget.pp_i_buf = malloc(KERNEL_SNAPSHOT_BUF_SZ);
2341 	bzero(propget.pp_i_buf, KERNEL_SNAPSHOT_BUF_SZ);
2342 
2343 	prov = (pool_knl_connection_t *)(TO_CONF(pe))->pc_prov;
2344 	if (ioctl(prov->pkc_fd, POOL_PROPGET, &propget) < 0) {
2345 		free(propget.pp_i_buf);
2346 		pool_seterror(POE_SYSTEM);
2347 		return (POC_INVAL);
2348 	}
2349 	if (nvlist_unpack(propget.pp_i_buf, propget.pp_i_bufsize,
2350 	    &proplist, 0) != 0) {
2351 		free(propget.pp_i_buf);
2352 		pool_seterror(POE_SYSTEM);
2353 		return (POC_INVAL);
2354 	}
2355 	free(propget.pp_i_buf);
2356 
2357 	if ((pair = nvlist_next_nvpair(proplist, NULL)) == NULL) {
2358 		nvlist_free(proplist);
2359 		pool_seterror(POE_SYSTEM);
2360 		return (POC_INVAL);
2361 	}
2362 
2363 	if (pool_value_from_nvpair(val, pair) == PO_FAIL) {
2364 		nvlist_free(proplist);
2365 		return (POC_INVAL);
2366 	}
2367 	nvlist_free(proplist);
2368 	return (pool_value_get_type(val));
2369 }
2370 
2371 /*
2372  * Update the specified property value.
2373  *
2374  * PO_FAIL is returned if an error is detected and the error code is updated
2375  * to indicate the cause of the error.
2376  */
2377 int
2378 pool_knl_put_property(pool_elem_t *pe, const char *name,
2379     const pool_value_t *val)
2380 {
2381 	pool_knl_elem_t *pke = (pool_knl_elem_t *)pe;
2382 	pool_knl_connection_t *prov =
2383 	    (pool_knl_connection_t *)(TO_CONF(pe))->pc_prov;
2384 	nvpair_t *bp, *ap;
2385 	pool_propput_undo_t *propput;
2386 	nvlist_t *bl = NULL;
2387 	const pool_prop_t *prop;
2388 
2389 	if ((bp = pool_knl_find_nvpair(pke->pke_properties, name)) != NULL) {
2390 		if (nvlist_alloc(&bl, NV_UNIQUE_NAME_TYPE, 0) != 0) {
2391 			pool_seterror(POE_SYSTEM);
2392 			return (PO_FAIL);
2393 		}
2394 		if (nvlist_add_nvpair(bl, bp) != 0) {
2395 			nvlist_free(bl);
2396 			pool_seterror(POE_SYSTEM);
2397 			return (PO_FAIL);
2398 		}
2399 	}
2400 	if (pool_knl_nvlist_add_value(pke->pke_properties, name, val) !=
2401 	    PO_SUCCESS)
2402 		return (PO_FAIL);
2403 
2404 	if (prov->pkc_log->l_state != LS_DO) {
2405 		if (bl)
2406 			nvlist_free(bl);
2407 		return (PO_SUCCESS);
2408 	}
2409 	/*
2410 	 * The remaining logic is setting up the arguments for the
2411 	 * POOL_PROPPUT ioctl and appending the details into the log.
2412 	 */
2413 	if ((propput = malloc(sizeof (pool_propput_undo_t))) == NULL) {
2414 		pool_seterror(POE_SYSTEM);
2415 		return (PO_FAIL);
2416 	}
2417 	(void) memset(propput, 0, sizeof (pool_propput_undo_t));
2418 	propput->ppu_blist = bl;
2419 
2420 	ap = pool_knl_find_nvpair(pke->pke_properties, name);
2421 
2422 	if (nvlist_alloc(&propput->ppu_alist, NV_UNIQUE_NAME_TYPE, 0) != 0) {
2423 		nvlist_free(propput->ppu_blist);
2424 		free(propput);
2425 		pool_seterror(POE_SYSTEM);
2426 		return (PO_FAIL);
2427 	}
2428 	if (nvlist_add_nvpair(propput->ppu_alist, ap) != 0) {
2429 		nvlist_free(propput->ppu_blist);
2430 		nvlist_free(propput->ppu_alist);
2431 		free(propput);
2432 		pool_seterror(POE_SYSTEM);
2433 		return (PO_FAIL);
2434 	}
2435 
2436 	if (nvlist_pack(propput->ppu_alist,
2437 	    (char **)&propput->ppu_ioctl.pp_o_buf,
2438 	    &propput->ppu_ioctl.pp_o_bufsize, NV_ENCODE_NATIVE, 0) != 0) {
2439 		pool_seterror(POE_SYSTEM);
2440 		return (PO_FAIL);
2441 	}
2442 	nvlist_free(propput->ppu_alist);
2443 	propput->ppu_ioctl.pp_o_id_type = pool_elem_class(pe);
2444 	if (pool_elem_class(pe) == PEC_RES_COMP ||
2445 	    pool_elem_class(pe) == PEC_RES_AGG)
2446 		propput->ppu_ioctl.pp_o_id_sub_type =
2447 		    pool_resource_elem_class(pe);
2448 	if (pool_elem_class(pe) == PEC_COMP)
2449 		propput->ppu_ioctl.pp_o_id_sub_type =
2450 		    (pool_resource_elem_class_t)pool_component_elem_class(pe);
2451 
2452 	propput->ppu_elem = pe;
2453 	if ((prop = provider_get_prop(propput->ppu_elem, name)) != NULL) {
2454 		if (prop_is_readonly(prop) == PO_TRUE)
2455 			propput->ppu_doioctl |= KERNEL_PROP_RDONLY;
2456 	}
2457 
2458 	if (log_append(prov->pkc_log, POOL_PROPPUT, (void *)propput) !=
2459 	    PO_SUCCESS) {
2460 		nvlist_free(propput->ppu_blist);
2461 		free(propput);
2462 		return (PO_FAIL);
2463 	}
2464 	return (PO_SUCCESS);
2465 }
2466 
2467 /*
2468  * Remove the specified property value.
2469  *
2470  * PO_FAIL is returned if an error is detected and the error code is
2471  * updated to indicate the cause of the error.
2472  */
2473 int
2474 pool_knl_rm_property(pool_elem_t *pe, const char *name)
2475 {
2476 	pool_knl_elem_t *pke = (pool_knl_elem_t *)pe;
2477 	pool_knl_connection_t *prov =
2478 	    (pool_knl_connection_t *)(TO_CONF(pe))->pc_prov;
2479 	pool_proprm_undo_t *proprm;
2480 
2481 	if (pool_knl_find_nvpair(pke->pke_properties, name) == NULL) {
2482 		pool_seterror(POE_BADPARAM);
2483 		return (PO_FAIL);
2484 	}
2485 
2486 	if ((proprm = malloc(sizeof (pool_proprm_undo_t))) == NULL) {
2487 		pool_seterror(POE_SYSTEM);
2488 		return (PO_FAIL);
2489 	}
2490 	(void) memset(proprm, 0, sizeof (pool_proprm_undo_t));
2491 	proprm->pru_oldval.pv_class = POC_INVAL;
2492 	(void) pool_get_property(TO_CONF(pe), pe, name, &proprm->pru_oldval);
2493 
2494 	if (prov->pkc_log->l_state != LS_DO) {
2495 		free(proprm);
2496 		(void) nvlist_remove_all(pke->pke_properties, (char *)name);
2497 		return (PO_SUCCESS);
2498 	}
2499 	/*
2500 	 * The remaining logic is setting up the arguments for the
2501 	 * POOL_PROPRM ioctl and appending the details into the log.
2502 	 */
2503 
2504 	proprm->pru_ioctl.pp_o_id_type = pool_elem_class(pe);
2505 	if (pool_elem_class(pe) == PEC_RES_COMP ||
2506 	    pool_elem_class(pe) == PEC_RES_AGG)
2507 		proprm->pru_ioctl.pp_o_id_sub_type =
2508 		    pool_resource_elem_class(pe);
2509 
2510 	if (pool_elem_class(pe) == PEC_COMP)
2511 		proprm->pru_ioctl.pp_o_id_sub_type =
2512 		    (pool_resource_elem_class_t)pool_component_elem_class(pe);
2513 
2514 	proprm->pru_ioctl.pp_o_prop_name_size = strlen(name);
2515 	proprm->pru_ioctl.pp_o_prop_name =
2516 	    (char *)pool_value_get_name(&proprm->pru_oldval);
2517 	proprm->pru_elem = pe;
2518 
2519 	if (log_append(prov->pkc_log, POOL_PROPRM, (void *)proprm) !=
2520 	    PO_SUCCESS) {
2521 		free(proprm);
2522 		return (PO_FAIL);
2523 	}
2524 
2525 	(void) nvlist_remove_all(pke->pke_properties, (char *)name);
2526 	return (PO_SUCCESS);
2527 }
2528 
2529 /*
2530  * Return a NULL terminated array of pool_value_t which represents all
2531  * of the properties stored for an element
2532  *
2533  * Return NULL on failure. It is the caller's responsibility to free
2534  * the returned array of values.
2535  */
2536 pool_value_t **
2537 pool_knl_get_properties(const pool_elem_t *pe, uint_t *nprops)
2538 {
2539 	nvpair_t *pair;
2540 	pool_value_t **result;
2541 	pool_knl_elem_t *pke = (pool_knl_elem_t *)pe;
2542 	int i = 0;
2543 
2544 	*nprops = 0;
2545 
2546 	for (pair = nvlist_next_nvpair(pke->pke_properties, NULL); pair != NULL;
2547 		pair = nvlist_next_nvpair(pke->pke_properties, pair))
2548 		(*nprops)++;
2549 	if ((result = calloc(*nprops + 1, sizeof (pool_value_t *))) == NULL) {
2550 		pool_seterror(POE_SYSTEM);
2551 		return (NULL);
2552 	}
2553 	for (pair = nvlist_next_nvpair(pke->pke_properties, NULL); pair != NULL;
2554 	    pair = nvlist_next_nvpair(pke->pke_properties, pair), i++) {
2555 		result[i] = pool_value_alloc();
2556 		if (pool_value_from_nvpair(result[i], pair) == PO_FAIL) {
2557 			while (i-- >= 0)
2558 				pool_value_free(result[i]);
2559 			free(result);
2560 			return (NULL);
2561 		}
2562 	}
2563 	return (result);
2564 }
2565 
2566 /*
2567  * Append an entry to a result set. Reallocate the array used to store
2568  * results if it's full.
2569  * Returns PO_SUCCESS/PO_FAIL
2570  */
2571 int
2572 pool_knl_result_set_append(pool_knl_result_set_t *rs, pool_knl_elem_t *pke)
2573 {
2574 	if (rs->pkr_count == rs->pkr_size)
2575 		if (pool_knl_result_set_realloc(rs) != PO_SUCCESS)
2576 			return (PO_FAIL);
2577 
2578 	rs->pkr_list[rs->pkr_count++] = pke;
2579 
2580 	return (PO_SUCCESS);
2581 }
2582 
2583 /*
2584  * Resize the array used to store results. A simple doubling strategy
2585  * is used.
2586  * Returns PO_SUCCESS/PO_FAIL
2587  */
2588 int
2589 pool_knl_result_set_realloc(pool_knl_result_set_t *rs)
2590 {
2591 	pool_knl_elem_t **old_list = rs->pkr_list;
2592 	int new_size = rs->pkr_size * 2;
2593 
2594 	if ((rs->pkr_list = realloc(rs->pkr_list,
2595 	    new_size * sizeof (pool_knl_elem_t *))) == NULL) {
2596 		rs->pkr_list = old_list;
2597 		pool_seterror(POE_SYSTEM);
2598 		return (PO_FAIL);
2599 	}
2600 	rs->pkr_size = new_size;
2601 
2602 	return (PO_SUCCESS);
2603 }
2604 
2605 /*
2606  * Allocate a result set. The Result Set stores the result of a query.
2607  * Returns pool_knl_result_set_t pointer/NULL
2608  */
2609 pool_knl_result_set_t *
2610 pool_knl_result_set_alloc(const pool_conf_t *conf)
2611 {
2612 	pool_knl_result_set_t *rs;
2613 
2614 	if ((rs = malloc(sizeof (pool_knl_result_set_t))) == NULL) {
2615 		pool_seterror(POE_SYSTEM);
2616 		return (NULL);
2617 	}
2618 	(void) memset(rs, 0, sizeof (pool_knl_result_set_t));
2619 	rs->pkr_size = KERNEL_RS_INITIAL_SZ;
2620 	if (pool_knl_result_set_realloc(rs) == PO_FAIL) {
2621 		free(rs);
2622 		pool_seterror(POE_SYSTEM);
2623 		return (NULL);
2624 	}
2625 	rs->prs_conf = conf;
2626 	rs->prs_index = -1;
2627 	rs->prs_active = PO_TRUE;
2628 	/* Fix up the result set accessor functions to the knl specfic ones */
2629 	rs->prs_next = pool_knl_rs_next;
2630 	rs->prs_prev = pool_knl_rs_prev;
2631 	rs->prs_first = pool_knl_rs_first;
2632 	rs->prs_last = pool_knl_rs_last;
2633 	rs->prs_get_index = pool_knl_rs_get_index;
2634 	rs->prs_set_index = pool_knl_rs_set_index;
2635 	rs->prs_close = pool_knl_rs_close;
2636 	rs->prs_count = pool_knl_rs_count;
2637 	return (rs);
2638 }
2639 
2640 /*
2641  * Free a result set. Ensure that the resources are all released at
2642  * this point.
2643  */
2644 void
2645 pool_knl_result_set_free(pool_knl_result_set_t *rs)
2646 {
2647 	free(rs->pkr_list);
2648 	free(rs);
2649 }
2650 /*
2651  * Return the next element in a result set.
2652  * Returns pool_elem_t pointer/NULL
2653  */
2654 pool_elem_t *
2655 pool_knl_rs_next(pool_result_set_t *set)
2656 {
2657 	pool_knl_result_set_t *kset = (pool_knl_result_set_t *)set;
2658 
2659 	if (kset->prs_index == kset->pkr_count - 1)
2660 		return (NULL);
2661 	return ((pool_elem_t *)kset->pkr_list[++kset->prs_index]);
2662 }
2663 
2664 /*
2665  * Return the previous element in a result set.
2666  * Returns pool_elem_t pointer/NULL
2667  */
2668 pool_elem_t *
2669 pool_knl_rs_prev(pool_result_set_t *set)
2670 {
2671 	pool_knl_result_set_t *kset = (pool_knl_result_set_t *)set;
2672 
2673 	if (kset->prs_index < 0)
2674 		return (NULL);
2675 	return ((pool_elem_t *)kset->pkr_list[kset->prs_index--]);
2676 }
2677 
2678 /*
2679  * Sets the current index in a result set.
2680  * Returns PO_SUCCESS/PO_FAIL
2681  */
2682 int
2683 pool_knl_rs_set_index(pool_result_set_t *set, int index)
2684 {
2685 	pool_knl_result_set_t *kset = (pool_knl_result_set_t *)set;
2686 
2687 	if (index < 0 || index >= kset->pkr_count) {
2688 		pool_seterror(POE_BADPARAM);
2689 		return (PO_FAIL);
2690 	}
2691 	kset->prs_index = index;
2692 	return (PO_SUCCESS);
2693 }
2694 
2695 /*
2696  * Return the current index in a result set.
2697  * Returns current index
2698  */
2699 int
2700 pool_knl_rs_get_index(pool_result_set_t *set)
2701 {
2702 	pool_knl_result_set_t *kset = (pool_knl_result_set_t *)set;
2703 
2704 	return (kset->prs_index);
2705 }
2706 
2707 /*
2708  * Return the first element in a result set.
2709  * Returns pool_elem_t pointer/NULL
2710  */
2711 pool_elem_t *
2712 pool_knl_rs_first(pool_result_set_t *set)
2713 {
2714 	pool_knl_result_set_t *kset = (pool_knl_result_set_t *)set;
2715 
2716 	return ((pool_elem_t *)kset->pkr_list[0]);
2717 }
2718 
2719 /*
2720  * Return the last element in a result set.
2721  * Returns pool_elem_t pointer/NULL
2722  */
2723 pool_elem_t *
2724 pool_knl_rs_last(pool_result_set_t *set)
2725 {
2726 	pool_knl_result_set_t *kset = (pool_knl_result_set_t *)set;
2727 
2728 	return ((pool_elem_t *)kset->pkr_list[kset->pkr_count - 1]);
2729 }
2730 
2731 /*
2732  * Return the number of results in a result set.
2733  * Returns result count
2734  */
2735 int
2736 pool_knl_rs_count(pool_result_set_t *set)
2737 {
2738 	pool_knl_result_set_t *kset = (pool_knl_result_set_t *)set;
2739 
2740 	return (kset->pkr_count);
2741 }
2742 
2743 
2744 /*
2745  * Close a result set. Free the resources
2746  * Returns PO_SUCCESS/PO_FAIL
2747  */
2748 int
2749 pool_knl_rs_close(pool_result_set_t *set)
2750 {
2751 	pool_knl_result_set_t *kset = (pool_knl_result_set_t *)set;
2752 
2753 	pool_knl_result_set_free(kset);
2754 	return (PO_SUCCESS);
2755 }
2756 
2757 /*
2758  * Commit an individual transaction log item(). This processing is
2759  * essential to the pool_conf_commit() logic. When pool_conf_commit()
2760  * is invoked, the pending transaction log for the configuration is
2761  * walked and all pending changes to the kernel are invoked. If a
2762  * change succeeds it is marked in the log as successful and
2763  * processing continues, if it fails then failure is returned and the
2764  * log will be "rolled back" to undo changes to the library snapshot
2765  * and the kernel.
2766  */
2767 int
2768 log_item_commit(log_item_t *li)
2769 {
2770 	pool_knl_connection_t *prov =
2771 	    (pool_knl_connection_t *)li->li_log->l_conf->pc_prov;
2772 	pool_create_undo_t *create;
2773 	pool_destroy_undo_t *destroy;
2774 	pool_assoc_undo_t *assoc;
2775 	pool_dissoc_undo_t *dissoc;
2776 	pool_propput_undo_t *propput;
2777 	pool_proprm_undo_t *proprm;
2778 	pool_xtransfer_undo_t *xtransfer;
2779 	char_buf_t *cb;
2780 	size_t size;
2781 	pool_elem_t *pair;
2782 	pool_value_t val = POOL_VALUE_INITIALIZER;
2783 	int ret;
2784 
2785 	switch (li->li_op) {
2786 	case POOL_CREATE:
2787 		create = (pool_create_undo_t *)li->li_details;
2788 		if ((cb = alloc_char_buf(CB_DEFAULT_LEN)) == NULL)
2789 			return (PO_FAIL);
2790 		if (set_char_buf(cb, "%s.sys_id",
2791 		    pool_elem_class_string(create->pcu_elem)) != PO_SUCCESS) {
2792 			free_char_buf(cb);
2793 			return (PO_FAIL);
2794 		}
2795 #ifdef DEBUG
2796 		dprintf("log_item_commit: POOL_CREATE, remove from dict\n");
2797 		pool_elem_dprintf(create->pcu_elem);
2798 #endif	/* DEBUG */
2799 		/*
2800 		 * May not need to remove the element if it was
2801 		 * already destroyed before commit. Just cast the
2802 		 * return to void.
2803 		 */
2804 		(void) dict_remove(prov->pkc_elements,
2805 		    (pool_knl_elem_t *)create->pcu_elem);
2806 
2807 		if (ioctl(prov->pkc_fd, POOL_CREATE, &create->pcu_ioctl) < 0) {
2808 			pool_seterror(POE_SYSTEM);
2809 			return (PO_FAIL);
2810 		}
2811 		/*
2812 		 * Now that we have created our element in the kernel,
2813 		 * it has a valid allocated system id. Remove the
2814 		 * element from the element dictionary, using the
2815 		 * current key, and then re-insert under the new key.
2816 		 */
2817 #ifdef DEBUG
2818 		pool_elem_dprintf(create->pcu_elem);
2819 #endif	/* DEBUG */
2820 		assert(nvlist_add_int64(
2821 		    ((pool_knl_elem_t *)create->pcu_elem)->pke_properties,
2822 		    cb->cb_buf, create->pcu_ioctl.pc_i_id) == 0);
2823 		free_char_buf(cb);
2824 		assert(dict_put(prov->pkc_elements, create->pcu_elem,
2825 		    create->pcu_elem) == NULL);
2826 		/*
2827 		 * If the element has a pair in the static
2828 		 * configuration, update it with the sys_id
2829 		 */
2830 		if ((pair = pool_get_pair(create->pcu_elem)) != NULL) {
2831 			pool_value_set_int64(&val, create->pcu_ioctl.pc_i_id);
2832 			assert(pool_put_any_ns_property(pair, c_sys_prop, &val)
2833 			    == PO_SUCCESS);
2834 		}
2835 		li->li_state = LS_UNDO;
2836 		break;
2837 	case POOL_DESTROY:
2838 		destroy = (pool_destroy_undo_t *)li->li_details;
2839 
2840 		destroy->pdu_ioctl.pd_o_id = elem_get_sysid(destroy->pdu_elem);
2841 
2842 		/*
2843 		 * It may be that this element was created in the last
2844 		 * transaction. In which case POOL_CREATE, above, will
2845 		 * have re-inserted the element in the dictionary. Try
2846 		 * to remove it just in case this has occurred.
2847 		 */
2848 		(void) dict_remove(prov->pkc_elements,
2849 		    (pool_knl_elem_t *)destroy->pdu_elem);
2850 		while ((ret = ioctl(prov->pkc_fd, POOL_DESTROY,
2851 		    &destroy->pdu_ioctl)) < 0 && errno == EAGAIN);
2852 		if (ret < 0) {
2853 			pool_seterror(POE_SYSTEM);
2854 			return (PO_FAIL);
2855 		}
2856 #ifdef DEBUG
2857 		dprintf("log_item_commit: POOL_DESTROY\n");
2858 		pool_elem_dprintf(destroy->pdu_elem);
2859 #endif	/* DEBUG */
2860 		li->li_state = LS_UNDO;
2861 		break;
2862 	case POOL_ASSOC:
2863 		assoc = (pool_assoc_undo_t *)li->li_details;
2864 
2865 		assoc->pau_ioctl.pa_o_pool_id =
2866 		    elem_get_sysid(assoc->pau_assoc);
2867 		assoc->pau_ioctl.pa_o_res_id =
2868 		    elem_get_sysid(assoc->pau_newres);
2869 		while ((ret = ioctl(prov->pkc_fd, POOL_ASSOC,
2870 		    &assoc->pau_ioctl)) < 0 && errno == EAGAIN);
2871 		if (ret < 0) {
2872 			pool_seterror(POE_SYSTEM);
2873 			return (PO_FAIL);
2874 		}
2875 		li->li_state = LS_UNDO;
2876 		break;
2877 	case POOL_DISSOC:
2878 		dissoc = (pool_dissoc_undo_t *)li->li_details;
2879 
2880 		dissoc->pdu_ioctl.pd_o_pool_id =
2881 		    elem_get_sysid(dissoc->pdu_dissoc);
2882 
2883 		while ((ret = ioctl(prov->pkc_fd, POOL_DISSOC,
2884 		    &dissoc->pdu_ioctl)) < 0 && errno == EAGAIN);
2885 		if (ret < 0) {
2886 			pool_seterror(POE_SYSTEM);
2887 			return (PO_FAIL);
2888 		}
2889 		li->li_state = LS_UNDO;
2890 		break;
2891 	case POOL_TRANSFER:
2892 		li->li_state = LS_UNDO;
2893 		pool_seterror(POE_BADPARAM);
2894 		return (PO_FAIL);
2895 		break;
2896 	case POOL_XTRANSFER:
2897 		xtransfer = (pool_xtransfer_undo_t *)li->li_details;
2898 
2899 		xtransfer->pxu_ioctl.px_o_src_id =
2900 		    elem_get_sysid(xtransfer->pxu_src);
2901 		xtransfer->pxu_ioctl.px_o_tgt_id =
2902 		    elem_get_sysid(xtransfer->pxu_tgt);
2903 		for (size = 0; xtransfer->pxu_rl[size] != NULL; size ++) {
2904 			xtransfer->pxu_ioctl.px_o_comp_list[size] =
2905 			    elem_get_sysid(TO_ELEM(xtransfer->pxu_rl[size]));
2906 #ifdef DEBUG
2907 			dprintf("log_item_commit: POOL_XTRANSFER\n");
2908 			pool_elem_dprintf(TO_ELEM(xtransfer->pxu_rl[size]));
2909 #endif	/* DEBUG */
2910 		}
2911 
2912 		/*
2913 		 * Don't actually transfer resources if the configuration
2914 		 * is in POF_DESTROY state. This is to prevent problems
2915 		 * relating to transferring off-line CPUs. Instead rely
2916 		 * on the POOL_DESTROY ioctl to transfer the CPUS.
2917 		 */
2918 		if (li->li_log->l_conf->pc_state != POF_DESTROY &&
2919 		    ioctl(prov->pkc_fd, POOL_XTRANSFER,
2920 		    &xtransfer->pxu_ioctl) < 0) {
2921 #ifdef DEBUG
2922 			dprintf("log_item_commit: POOL_XTRANSFER, ioctl "
2923 			    "failed\n");
2924 #endif	/* DEBUG */
2925 			pool_seterror(POE_SYSTEM);
2926 			return (PO_FAIL);
2927 		}
2928 		li->li_state = LS_UNDO;
2929 		break;
2930 	case POOL_PROPPUT:
2931 		propput = (pool_propput_undo_t *)li->li_details;
2932 
2933 		if (pool_elem_class(propput->ppu_elem) != PEC_SYSTEM) {
2934 			propput->ppu_ioctl.pp_o_id =
2935 			    elem_get_sysid(propput->ppu_elem);
2936 		}
2937 		/*
2938 		 * Some properties, e.g. pset.size, are read-only in the
2939 		 * kernel and attempting to change them will fail and cause
2940 		 * problems. Although this property is read-only through the
2941 		 * public interface, the library needs to modify it's value.
2942 		 */
2943 		if ((propput->ppu_doioctl & KERNEL_PROP_RDONLY) == 0) {
2944 			if (ioctl(prov->pkc_fd, POOL_PROPPUT,
2945 			    &propput->ppu_ioctl) < 0) {
2946 				pool_seterror(POE_SYSTEM);
2947 				return (PO_FAIL);
2948 			}
2949 		}
2950 		li->li_state = LS_UNDO;
2951 		break;
2952 	case POOL_PROPRM:
2953 		proprm = (pool_proprm_undo_t *)li->li_details;
2954 
2955 		if (pool_elem_class(proprm->pru_elem) != PEC_SYSTEM) {
2956 			proprm->pru_ioctl.pp_o_id =
2957 			    elem_get_sysid(proprm->pru_elem);
2958 		}
2959 		if (ioctl(prov->pkc_fd, POOL_PROPRM, &proprm->pru_ioctl) < 0) {
2960 			pool_seterror(POE_SYSTEM);
2961 			return (PO_FAIL);
2962 		}
2963 		li->li_state = LS_UNDO;
2964 		break;
2965 	default:
2966 		return (PO_FAIL);
2967 	}
2968 	return (PO_SUCCESS);
2969 }
2970 
2971 /*
2972  * Undo an individual transaction log item(). This processing is
2973  * essential to the pool_conf_commit() and pool_conf_rollback()
2974  * logic. Changes to the libpool snapshot and the kernel are carried
2975  * out separately. The library snapshot is updated synchronously,
2976  * however the kernel update is delayed until the user calls
2977  * pool_conf_commit().
2978  *
2979  * When undoing transactions, library changes will be undone unless
2980  * this invocation is as a result of a commit failure, in which case
2981  * the log state will be LS_RECOVER. Kernel changes will only be
2982  * undone if they are marked as having been done, in which case the
2983  * log item state will be LS_UNDO.
2984  */
2985 int
2986 log_item_undo(log_item_t *li)
2987 {
2988 	pool_knl_connection_t *prov =
2989 	    (pool_knl_connection_t *)li->li_log->l_conf->pc_prov;
2990 	pool_create_undo_t *create;
2991 	pool_destroy_undo_t *destroy;
2992 	pool_assoc_undo_t *assoc;
2993 	pool_dissoc_undo_t *dissoc;
2994 	pool_propput_undo_t *propput;
2995 	pool_proprm_undo_t *proprm;
2996 	pool_xtransfer_undo_t *xtransfer;
2997 	char_buf_t *cb;
2998 	size_t size;
2999 	pool_destroy_t u_destroy;
3000 	pool_create_t u_create;
3001 	pool_assoc_t u_assoc;
3002 	pool_xtransfer_t u_xtransfer;
3003 	pool_propput_t u_propput;
3004 	pool_proprm_t u_proprm;
3005 	pool_conf_t *conf = li->li_log->l_conf;
3006 	nvpair_t *pair;
3007 	nvlist_t *tmplist;
3008 	int ret;
3009 
3010 	if (li->li_log->l_state != LS_RECOVER) {
3011 	switch (li->li_op) {
3012 	case POOL_CREATE:
3013 		create = (pool_create_undo_t *)li->li_details;
3014 
3015 		(void) dict_remove(prov->pkc_elements, create->pcu_elem);
3016 #ifdef DEBUG
3017 		dprintf("log_item_undo: POOL_CREATE\n");
3018 		assert(create->pcu_elem != NULL);
3019 		dprintf("log_item_undo: POOL_CREATE %p\n", create->pcu_elem);
3020 		pool_elem_dprintf(create->pcu_elem);
3021 #endif	/* DEBUG */
3022 		pool_knl_elem_free((pool_knl_elem_t *)create->pcu_elem,
3023 		    PO_TRUE);
3024 		break;
3025 	case POOL_DESTROY:
3026 		destroy = (pool_destroy_undo_t *)li->li_details;
3027 
3028 		assert(dict_put(prov->pkc_elements, destroy->pdu_elem,
3029 		    destroy->pdu_elem) == NULL);
3030 		break;
3031 	case POOL_ASSOC:
3032 		assoc = (pool_assoc_undo_t *)li->li_details;
3033 
3034 		if (assoc->pau_oldres != NULL)
3035 			((pool_knl_pool_t *)assoc->pau_assoc)->pkp_assoc
3036 			    [pool_resource_elem_class(assoc->pau_oldres)] =
3037 			    (pool_knl_resource_t *)assoc->pau_oldres;
3038 		break;
3039 	case POOL_DISSOC:
3040 		dissoc = (pool_dissoc_undo_t *)li->li_details;
3041 
3042 		if (dissoc->pdu_oldres != NULL)
3043 			((pool_knl_pool_t *)dissoc->pdu_dissoc)->pkp_assoc
3044 			    [pool_resource_elem_class(dissoc->pdu_oldres)] =
3045 			    (pool_knl_resource_t *)dissoc->pdu_oldres;
3046 		break;
3047 	case POOL_TRANSFER:
3048 		pool_seterror(POE_BADPARAM);
3049 		return (PO_FAIL);
3050 		break;
3051 	case POOL_XTRANSFER:
3052 		xtransfer = (pool_xtransfer_undo_t *)li->li_details;
3053 
3054 		for (size = 0; xtransfer->pxu_rl[size] != NULL; size++) {
3055 			pool_value_t val = POOL_VALUE_INITIALIZER;
3056 			uint64_t src_size;
3057 			uint64_t tgt_size;
3058 
3059 			if (pool_set_container(xtransfer->pxu_src,
3060 			    TO_ELEM(xtransfer->pxu_rl[size])) == PO_FAIL) {
3061 				return (PO_FAIL);
3062 			}
3063 			/*
3064 			 * Maintain the library view of the size
3065 			 */
3066 			if (resource_get_size(pool_elem_res(xtransfer->pxu_src),
3067 			    &src_size) != PO_SUCCESS ||
3068 			    resource_get_size(pool_elem_res(xtransfer->pxu_tgt),
3069 			    &tgt_size) != PO_SUCCESS) {
3070 				pool_seterror(POE_BADPARAM);
3071 				return (PO_FAIL);
3072 			}
3073 			src_size++;
3074 			tgt_size--;
3075 			pool_value_set_uint64(&val, src_size);
3076 			(void) pool_put_any_ns_property(xtransfer->pxu_src,
3077 			    c_size_prop, &val);
3078 			pool_value_set_uint64(&val, tgt_size);
3079 			(void) pool_put_any_ns_property(xtransfer->pxu_tgt,
3080 			    c_size_prop, &val);
3081 		}
3082 		break;
3083 	case POOL_PROPPUT:
3084 		propput = (pool_propput_undo_t *)li->li_details;
3085 
3086 		if ((propput->ppu_doioctl & KERNEL_PROP_RDONLY) == 0) {
3087 			if (propput->ppu_blist != NULL) {
3088 				if (nvlist_merge(
3089 				    ((pool_knl_elem_t *)propput->ppu_elem)->
3090 				    pke_properties, propput->ppu_blist, 0)
3091 				    != 0) {
3092 					pool_seterror(POE_SYSTEM);
3093 					return (PO_FAIL);
3094 				}
3095 			} else {
3096 				if (nvlist_unpack(propput->ppu_ioctl.pp_o_buf,
3097 				    propput->ppu_ioctl.pp_o_bufsize,
3098 				    &propput->ppu_alist, 0) != 0) {
3099 					pool_seterror(POE_SYSTEM);
3100 					return (PO_FAIL);
3101 				}
3102 				pair = nvlist_next_nvpair(propput->ppu_alist,
3103 				    NULL);
3104 				(void) nvlist_remove_all(((pool_knl_elem_t *)
3105 				    propput->ppu_elem)->pke_properties,
3106 				    nvpair_name(pair));
3107 				nvlist_free(propput->ppu_alist);
3108 			}
3109 		}
3110 		break;
3111 	case POOL_PROPRM:
3112 		proprm = (pool_proprm_undo_t *)li->li_details;
3113 
3114 		if (pool_value_get_type(&proprm->pru_oldval) != POC_INVAL) {
3115 			if (pool_put_property(conf, proprm->pru_elem,
3116 			    proprm->pru_ioctl.pp_o_prop_name,
3117 			    &proprm->pru_oldval) != PO_SUCCESS) {
3118 				return (PO_FAIL);
3119 			}
3120 		}
3121 		break;
3122 	default:
3123 		return (PO_FAIL);
3124 	}
3125 	}
3126 	/*
3127 	 * Only try to undo the state of the kernel if we modified it.
3128 	 */
3129 	if (li->li_state == LS_DO) {
3130 		return (PO_SUCCESS);
3131 	}
3132 
3133 	switch (li->li_op) {
3134 	case POOL_CREATE:
3135 		create = (pool_create_undo_t *)li->li_details;
3136 
3137 		u_destroy.pd_o_type = create->pcu_ioctl.pc_o_type;
3138 		u_destroy.pd_o_sub_type = create->pcu_ioctl.pc_o_sub_type;
3139 		u_destroy.pd_o_id = create->pcu_ioctl.pc_i_id;
3140 
3141 		while ((ret = ioctl(prov->pkc_fd, POOL_DESTROY,
3142 		    &u_destroy)) < 0 && errno == EAGAIN);
3143 		if (ret < 0) {
3144 			pool_seterror(POE_SYSTEM);
3145 			return (PO_FAIL);
3146 		}
3147 		li->li_state = LS_DO;
3148 		break;
3149 	case POOL_DESTROY:
3150 		destroy = (pool_destroy_undo_t *)li->li_details;
3151 
3152 		u_create.pc_o_type = destroy->pdu_ioctl.pd_o_type;
3153 		u_create.pc_o_sub_type = destroy->pdu_ioctl.pd_o_sub_type;
3154 
3155 		if (ioctl(prov->pkc_fd, POOL_CREATE, &u_create) < 0) {
3156 			pool_seterror(POE_SYSTEM);
3157 			return (PO_FAIL);
3158 		}
3159 
3160 		if ((cb = alloc_char_buf(CB_DEFAULT_LEN)) == NULL) {
3161 			return (PO_FAIL);
3162 		}
3163 		if (set_char_buf(cb, "%s.sys_id",
3164 		    pool_elem_class_string(destroy->pdu_elem)) != PO_SUCCESS) {
3165 			free_char_buf(cb);
3166 			return (PO_FAIL);
3167 		}
3168 		(void) nvlist_add_int64(
3169 		    ((pool_knl_elem_t *)destroy->pdu_elem)->pke_properties,
3170 		    cb->cb_buf, u_create.pc_i_id);
3171 		free_char_buf(cb);
3172 		if (dict_put(prov->pkc_elements, destroy->pdu_elem,
3173 		    destroy->pdu_elem) != NULL) {
3174 			pool_seterror(POE_SYSTEM);
3175 			return (PO_FAIL);
3176 		}
3177 		/*
3178 		 * Now we need to reset all the properties and
3179 		 * associations in the kernel for this newly created
3180 		 * replacement.
3181 		 */
3182 		u_propput.pp_o_id_type = destroy->pdu_ioctl.pd_o_type;
3183 		u_propput.pp_o_id_sub_type = destroy->pdu_ioctl.pd_o_sub_type;
3184 		u_propput.pp_o_id = u_create.pc_i_id;
3185 		u_propput.pp_o_buf = NULL;
3186 		/*
3187 		 * Remove the read-only properties before attempting
3188 		 * to restore the state of the newly created property
3189 		 */
3190 		(void) nvlist_dup(((pool_knl_elem_t *)destroy->pdu_elem)->
3191 		pke_properties, &tmplist, 0);
3192 		for (pair = nvlist_next_nvpair(tmplist, NULL); pair != NULL;
3193 		    pair = nvlist_next_nvpair(tmplist, pair)) {
3194 			const pool_prop_t *prop;
3195 			char *name = nvpair_name(pair);
3196 			if ((prop = provider_get_prop(destroy->pdu_elem,
3197 			    name)) != NULL)
3198 				if (prop_is_readonly(prop) == PO_TRUE)
3199 					(void) nvlist_remove_all(tmplist, name);
3200 		}
3201 		if (nvlist_pack(tmplist, (char **)&u_propput.pp_o_buf,
3202 		    &u_propput.pp_o_bufsize, NV_ENCODE_NATIVE, 0) != 0) {
3203 			pool_seterror(POE_SYSTEM);
3204 			return (PO_FAIL);
3205 		}
3206 		nvlist_free(tmplist);
3207 		if (ioctl(prov->pkc_fd, POOL_PROPPUT, &u_propput) < 0) {
3208 			free(u_propput.pp_o_buf);
3209 			pool_seterror(POE_SYSTEM);
3210 			return (PO_FAIL);
3211 		}
3212 		free(u_propput.pp_o_buf);
3213 		/*
3214 		 * Now reset the associations for all the resource
3215 		 * types if the thing which we are recreating is a
3216 		 * pool
3217 		 *
3218 		 * TODO: This is resource specific and must be
3219 		 * extended for additional resource types.
3220 		 */
3221 		if (destroy->pdu_ioctl.pd_o_type == PEC_POOL) {
3222 			u_assoc.pa_o_pool_id = u_create.pc_i_id;
3223 			u_assoc.pa_o_res_id =
3224 			    elem_get_sysid(
3225 			    TO_ELEM(((pool_knl_pool_t *)destroy->pdu_elem)->
3226 			    pkp_assoc[PREC_PSET]));
3227 			u_assoc.pa_o_id_type = PREC_PSET;
3228 
3229 			if (ioctl(prov->pkc_fd, POOL_ASSOC, &u_assoc) < 0) {
3230 				pool_seterror(POE_SYSTEM);
3231 				return (PO_FAIL);
3232 			}
3233 		}
3234 		li->li_state = LS_DO;
3235 		break;
3236 	case POOL_ASSOC:
3237 		assoc = (pool_assoc_undo_t *)li->li_details;
3238 
3239 		u_assoc.pa_o_pool_id = elem_get_sysid(assoc->pau_assoc);
3240 		u_assoc.pa_o_res_id = elem_get_sysid(assoc->pau_oldres);
3241 		u_assoc.pa_o_id_type = assoc->pau_ioctl.pa_o_id_type;
3242 
3243 		while ((ret = ioctl(prov->pkc_fd, POOL_ASSOC, &u_assoc)) < 0 &&
3244 		    errno == EAGAIN);
3245 		if (ret < 0) {
3246 			pool_seterror(POE_SYSTEM);
3247 			return (PO_FAIL);
3248 		}
3249 		li->li_state = LS_DO;
3250 		break;
3251 	case POOL_DISSOC:
3252 		dissoc = (pool_dissoc_undo_t *)li->li_details;
3253 
3254 		u_assoc.pa_o_pool_id = elem_get_sysid(dissoc->pdu_dissoc);
3255 		u_assoc.pa_o_res_id = elem_get_sysid(dissoc->pdu_oldres);
3256 		u_assoc.pa_o_id_type = dissoc->pdu_ioctl.pd_o_id_type;
3257 
3258 		while ((ret = ioctl(prov->pkc_fd, POOL_ASSOC, &u_assoc)) < 0 &&
3259 		    errno == EAGAIN);
3260 		if (ret < 0) {
3261 			pool_seterror(POE_SYSTEM);
3262 			return (PO_FAIL);
3263 		}
3264 		li->li_state = LS_DO;
3265 		break;
3266 	case POOL_TRANSFER:
3267 		li->li_state = LS_DO;
3268 		pool_seterror(POE_BADPARAM);
3269 		return (PO_FAIL);
3270 		break;
3271 	case POOL_XTRANSFER:
3272 		xtransfer = (pool_xtransfer_undo_t *)li->li_details;
3273 
3274 		(void) memcpy(&u_xtransfer, &xtransfer->pxu_ioctl,
3275 		    sizeof (pool_xtransfer_t));
3276 		u_xtransfer.px_o_src_id = elem_get_sysid(xtransfer->pxu_tgt);
3277 		u_xtransfer.px_o_tgt_id = elem_get_sysid(xtransfer->pxu_src);
3278 
3279 		if (ioctl(prov->pkc_fd, POOL_XTRANSFER, &u_xtransfer) < 0) {
3280 			pool_seterror(POE_SYSTEM);
3281 			return (PO_FAIL);
3282 		}
3283 		li->li_state = LS_DO;
3284 		break;
3285 	case POOL_PROPPUT:
3286 		propput = (pool_propput_undo_t *)li->li_details;
3287 
3288 		if ((propput->ppu_doioctl & KERNEL_PROP_RDONLY) == 0) {
3289 			if (propput->ppu_blist) {
3290 				(void) memcpy(&u_propput, &propput->ppu_ioctl,
3291 				    sizeof (pool_propput_t));
3292 				u_propput.pp_o_id =
3293 				    elem_get_sysid(propput->ppu_elem);
3294 				u_propput.pp_o_buf = NULL;
3295 				if (nvlist_pack(propput->ppu_blist,
3296 				    (char **)&u_propput.pp_o_buf,
3297 				    &u_propput.pp_o_bufsize,
3298 				    NV_ENCODE_NATIVE, 0) != 0) {
3299 					pool_seterror(POE_SYSTEM);
3300 					return (PO_FAIL);
3301 				}
3302 				if (ioctl(prov->pkc_fd, POOL_PROPPUT,
3303 				    &u_propput) < 0) {
3304 					free(u_propput.pp_o_buf);
3305 					pool_seterror(POE_SYSTEM);
3306 					return (PO_FAIL);
3307 				}
3308 				free(u_propput.pp_o_buf);
3309 			} else {
3310 				if (nvlist_unpack(propput->
3311 				    ppu_ioctl.pp_o_buf,
3312 				    propput->ppu_ioctl.pp_o_bufsize,
3313 				    &propput->ppu_alist, 0) != 0) {
3314 					pool_seterror(POE_SYSTEM);
3315 					return (PO_FAIL);
3316 				}
3317 				u_proprm.pp_o_id_type =
3318 				    propput->ppu_ioctl.pp_o_id_type;
3319 				u_proprm.pp_o_id_sub_type =
3320 				    propput->ppu_ioctl.pp_o_id_sub_type;
3321 				u_proprm.pp_o_id =
3322 				    elem_get_sysid(propput->ppu_elem);
3323 				pair = nvlist_next_nvpair(propput->ppu_alist,
3324 				    NULL);
3325 				u_proprm.pp_o_prop_name = nvpair_name(pair);
3326 				u_proprm.pp_o_prop_name_size =
3327 				    strlen(u_proprm.pp_o_prop_name);
3328 
3329 				if (provider_get_prop(propput->ppu_elem,
3330 				    u_proprm.pp_o_prop_name) == NULL) {
3331 					if (ioctl(prov->pkc_fd, POOL_PROPRM,
3332 					    &u_proprm) < 0) {
3333 						nvlist_free(propput->ppu_alist);
3334 						pool_seterror(POE_SYSTEM);
3335 						return (PO_FAIL);
3336 					}
3337 				}
3338 				nvlist_free(propput->ppu_alist);
3339 			}
3340 		}
3341 		li->li_state = LS_DO;
3342 		break;
3343 	case POOL_PROPRM:
3344 		proprm = (pool_proprm_undo_t *)li->li_details;
3345 
3346 		u_propput.pp_o_id_type = proprm->pru_ioctl.pp_o_id_type;
3347 		u_propput.pp_o_id_sub_type =
3348 		    proprm->pru_ioctl.pp_o_id_sub_type;
3349 		u_propput.pp_o_id = elem_get_sysid(proprm->pru_elem);
3350 		u_propput.pp_o_buf = NULL;
3351 		/*
3352 		 * Only try to remove the appropriate property
3353 		 */
3354 		if (nvlist_alloc(&tmplist, NV_UNIQUE_NAME_TYPE, 0) !=
3355 		    0) {
3356 			pool_seterror(POE_SYSTEM);
3357 			return (PO_FAIL);
3358 		}
3359 		if (pool_knl_nvlist_add_value(tmplist,
3360 		    pool_value_get_name(&proprm->pru_oldval),
3361 		    &proprm->pru_oldval) != PO_SUCCESS)
3362 			return (PO_FAIL);
3363 
3364 		if (nvlist_pack(tmplist,
3365 		    (char **)&u_propput.pp_o_buf, &u_propput.pp_o_bufsize,
3366 		    NV_ENCODE_NATIVE, 0) != 0) {
3367 			nvlist_free(tmplist);
3368 			pool_seterror(POE_SYSTEM);
3369 			return (PO_FAIL);
3370 		}
3371 		nvlist_free(tmplist);
3372 		if (ioctl(prov->pkc_fd, POOL_PROPPUT, &u_propput) < 0) {
3373 			free(u_propput.pp_o_buf);
3374 			pool_seterror(POE_SYSTEM);
3375 			return (PO_FAIL);
3376 		}
3377 		free(u_propput.pp_o_buf);
3378 		li->li_state = LS_DO;
3379 		break;
3380 	default:
3381 		return (PO_FAIL);
3382 	}
3383 		return (PO_SUCCESS);
3384 }
3385 
3386 /*
3387  * A log item stores state about the transaction it represents. This
3388  * function releases the resources associated with the transaction and
3389  * used to store the transaction state.
3390  */
3391 int
3392 log_item_release(log_item_t *li)
3393 {
3394 	pool_create_undo_t *create;
3395 	pool_destroy_undo_t *destroy;
3396 	pool_assoc_undo_t *assoc;
3397 	pool_dissoc_undo_t *dissoc;
3398 	pool_propput_undo_t *propput;
3399 	pool_proprm_undo_t *proprm;
3400 	pool_xtransfer_undo_t *xtransfer;
3401 
3402 	switch (li->li_op) {
3403 	case POOL_CREATE:
3404 		create = (pool_create_undo_t *)li->li_details;
3405 
3406 		free(create);
3407 		break;
3408 	case POOL_DESTROY:
3409 		destroy = (pool_destroy_undo_t *)li->li_details;
3410 
3411 #ifdef DEBUG
3412 		dprintf("log_item_release: POOL_DESTROY\n");
3413 #endif	/* DEBUG */
3414 
3415 		if (li->li_state == LS_UNDO) {
3416 #ifdef DEBUG
3417 			pool_elem_dprintf(destroy->pdu_elem);
3418 #endif	/* DEBUG */
3419 			pool_knl_elem_free((pool_knl_elem_t *)destroy->
3420 			    pdu_elem, PO_TRUE);
3421 		}
3422 		free(destroy);
3423 		break;
3424 	case POOL_ASSOC:
3425 		assoc = (pool_assoc_undo_t *)li->li_details;
3426 
3427 		free(assoc);
3428 		break;
3429 	case POOL_DISSOC:
3430 		dissoc = (pool_dissoc_undo_t *)li->li_details;
3431 
3432 		free(dissoc);
3433 		break;
3434 	case POOL_TRANSFER:
3435 		pool_seterror(POE_BADPARAM);
3436 		return (PO_FAIL);
3437 		break;
3438 	case POOL_XTRANSFER:
3439 		xtransfer = (pool_xtransfer_undo_t *)li->li_details;
3440 
3441 		free(xtransfer->pxu_rl);
3442 		free(xtransfer->pxu_ioctl.px_o_comp_list);
3443 		free(xtransfer);
3444 		break;
3445 	case POOL_PROPPUT:
3446 		propput = (pool_propput_undo_t *)li->li_details;
3447 
3448 		if (propput->ppu_blist)
3449 			nvlist_free(propput->ppu_blist);
3450 		free(propput->ppu_ioctl.pp_o_buf);
3451 		free(propput);
3452 		break;
3453 	case POOL_PROPRM:
3454 		proprm = (pool_proprm_undo_t *)li->li_details;
3455 
3456 		free(proprm);
3457 		break;
3458 	default:
3459 		return (PO_FAIL);
3460 	}
3461 	return (PO_SUCCESS);
3462 }
3463 
3464 /*
3465  * pool_knl_nvlist_add_value() adds a pool_value_t to an nvlist.
3466  */
3467 int
3468 pool_knl_nvlist_add_value(nvlist_t *list, const char *name,
3469     const pool_value_t *pv)
3470 {
3471 	uint64_t uval;
3472 	int64_t ival;
3473 	double dval;
3474 	uchar_t dval_b[sizeof (double)];
3475 	uchar_t bval;
3476 	const char *sval;
3477 	pool_value_class_t type;
3478 	char *nv_name;
3479 
3480 	if ((type = pool_value_get_type(pv)) == POC_INVAL) {
3481 		pool_seterror(POE_BADPARAM);
3482 		return (PO_FAIL);
3483 	}
3484 	nv_name = (char *)name;
3485 
3486 	switch (type) {
3487 	case POC_UINT:
3488 		if (pool_value_get_uint64(pv, &uval) == POC_INVAL) {
3489 			return (PO_FAIL);
3490 		}
3491 		if (nvlist_add_uint64(list, nv_name, uval) != 0) {
3492 			pool_seterror(POE_SYSTEM);
3493 			return (PO_FAIL);
3494 		}
3495 		break;
3496 	case POC_INT:
3497 		if (pool_value_get_int64(pv, &ival) == POC_INVAL) {
3498 			return (PO_FAIL);
3499 		}
3500 		if (nvlist_add_int64(list, nv_name, ival) != 0) {
3501 			pool_seterror(POE_SYSTEM);
3502 			return (PO_FAIL);
3503 		}
3504 		break;
3505 	case POC_DOUBLE:
3506 		if (pool_value_get_double(pv, &dval) == POC_INVAL) {
3507 			return (PO_FAIL);
3508 		}
3509 		/*
3510 		 * Since there is no support for doubles in the
3511 		 * kernel, store the double value in a byte array.
3512 		 */
3513 		(void) memcpy(dval_b, &dval, sizeof (double));
3514 		if (nvlist_add_byte_array(list, nv_name, dval_b,
3515 		    sizeof (double)) != 0) {
3516 			pool_seterror(POE_SYSTEM);
3517 			return (PO_FAIL);
3518 		}
3519 		break;
3520 	case POC_BOOL:
3521 		if (pool_value_get_bool(pv, &bval) == POC_INVAL) {
3522 			return (PO_FAIL);
3523 		}
3524 		if (nvlist_add_byte(list, nv_name, bval) != 0) {
3525 			pool_seterror(POE_SYSTEM);
3526 			return (PO_FAIL);
3527 		}
3528 		break;
3529 	case POC_STRING:
3530 		if (pool_value_get_string(pv, &sval) == POC_INVAL) {
3531 			return (PO_FAIL);
3532 		}
3533 		if (nvlist_add_string(list, nv_name, (char *)sval) != 0) {
3534 			pool_seterror(POE_SYSTEM);
3535 			return (PO_FAIL);
3536 		}
3537 		break;
3538 	default:
3539 		pool_seterror(POE_BADPARAM);
3540 		return (PO_FAIL);
3541 	}
3542 	return (PO_SUCCESS);
3543 }
3544 
3545 /*
3546  * hash_id() hashes all elements in a pool configuration using the
3547  * "sys_id" property. Not all elements have a "sys_id" property,
3548  * however elem_get_sysid() caters for this by always returning a
3549  * constant value for those elements. This isn't anticipated to lead
3550  * to a performance degradation in the hash, since those elements
3551  * which are likely to be most prevalent in a configuration do have
3552  * "sys_id" as a property.
3553  */
3554 uint64_t
3555 hash_id(const pool_elem_t *pe)
3556 {
3557 	id_t id;
3558 
3559 	id = elem_get_sysid(pe);
3560 	return (hash_buf(&id, sizeof (id)));
3561 }
3562 
3563 /*
3564  *  blocking_open() guarantees access to the pool device, if open()
3565  * is failing with EBUSY.
3566  */
3567 int
3568 blocking_open(const char *path, int oflag)
3569 {
3570 	int fd;
3571 
3572 	while ((fd = open(path, oflag)) == -1 && errno == EBUSY)
3573 		(void) poll(NULL, 0, 1 * MILLISEC);
3574 
3575 	return (fd);
3576 }
3577