xref: /titanic_51/usr/src/lib/libpool/common/pool_kernel.c (revision 4812581794004eff0af2b765b832403b30bf64ab)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  * Copyright 2012 Milan Jurik. All rights reserved.
26  */
27 
28 #include <assert.h>
29 #include <errno.h>
30 #include <exacct.h>
31 #include <fcntl.h>
32 #include <libnvpair.h>
33 #include <limits.h>
34 #include <poll.h>
35 #include <pool.h>
36 #include <stdlib.h>
37 #include <stdio.h>
38 #include <string.h>
39 #include <strings.h>
40 #include <stropts.h>
41 #include <thread.h>
42 #include <time.h>
43 #include <unistd.h>
44 
45 #include <libxml/tree.h>
46 
47 #include <sys/mman.h>
48 #include <sys/pool.h>
49 #include <sys/pool_impl.h>
50 #include <sys/priocntl.h>
51 #include <sys/stat.h>
52 #include <sys/time.h>
53 #include <sys/types.h>
54 
55 #include "dict.h"
56 
57 #include "pool_internal.h"
58 #include "pool_impl.h"
59 #include "pool_kernel_impl.h"
60 
61 /*
62  * libpool kernel Manipulation Routines
63  *
64  * pool_kernel.c implements the kernel manipulation routines used by the
65  * libpool kernel datastore. The functions are grouped into the following
66  * logical areas
67  *
68  */
69 
70 /*
71  * Device snapshot transfer buffer size
72  */
73 #define	KERNEL_SNAPSHOT_BUF_SZ	65535
74 
75 /*
76  * Kernel result set's initial size. 8 is probably large enough for
77  * most queries. Queries requiring more space are accomodated using
78  * realloc on a per result set basis.
79  */
80 #define	KERNEL_RS_INITIAL_SZ	8
81 
82 /*
83  * Property manipulation macros
84  */
85 #define	KERNEL_PROP_RDONLY	0x1
86 
87 /*
88  * Information required to evaluate qualifying elements for a query
89  */
90 struct query_obj {
91 	const pool_conf_t *conf;
92 	const pool_elem_t *src;
93 	const char *src_attr;
94 	pool_elem_class_t classes;
95 	pool_value_t **props;
96 	pool_knl_result_set_t *rs;
97 };
98 
99 /*
100  * Identifies a pool element with a processor set id
101  */
102 typedef struct pool_set_xref {
103 	pool_knl_pool_t	*psx_pool;
104 	uint_t		psx_pset_id;
105 	struct pool_set_xref *psx_next;
106 } pool_set_xref_t;
107 
108 /*
109  * Controls exacct snapshot load into libpool data structure
110  */
111 typedef struct pool_snap_load {
112 	int *psl_changed;
113 	pool_set_xref_t *psl_xref;
114 	pool_elem_t *psl_system;
115 	pool_knl_resource_t *psl_pset;
116 } pool_snap_load_t;
117 
118 /*
119  * Information about an XML document which is being constructed
120  */
121 struct knl_to_xml {
122 	xmlDocPtr ktx_doc;
123 	xmlNodePtr ktx_node;
124 };
125 
126 /*
127  * Undo structure processing. The following structures are all used to
128  * allow changes to the libpool snapshot and kernel following an
129  * unsuccessful commit.
130  */
131 typedef struct pool_create_undo {
132 	pool_create_t pcu_ioctl;
133 	pool_elem_t *pcu_elem;
134 } pool_create_undo_t;
135 
136 typedef struct pool_destroy_undo {
137 	pool_destroy_t pdu_ioctl;
138 	pool_elem_t *pdu_elem;
139 } pool_destroy_undo_t;
140 
141 typedef struct pool_assoc_undo {
142 	pool_assoc_t pau_ioctl;
143 	pool_elem_t *pau_assoc;
144 	pool_elem_t *pau_oldres;
145 	pool_elem_t *pau_newres;
146 } pool_assoc_undo_t;
147 
148 typedef struct pool_dissoc_undo {
149 	pool_dissoc_t pdu_ioctl;
150 	pool_elem_t *pdu_dissoc;
151 	pool_elem_t *pdu_oldres;
152 	pool_elem_t *pdu_newres;
153 } pool_dissoc_undo_t;
154 
155 typedef struct pool_xtransfer_undo {
156 	pool_xtransfer_t pxu_ioctl;
157 	pool_elem_t *pxu_src;
158 	pool_elem_t *pxu_tgt;
159 	pool_component_t **pxu_rl;
160 } pool_xtransfer_undo_t;
161 
162 typedef struct pool_propput_undo {
163 	pool_propput_t ppu_ioctl;
164 	pool_elem_t *ppu_elem;
165 	nvlist_t *ppu_alist;
166 	nvlist_t *ppu_blist;
167 	uchar_t ppu_doioctl;
168 } pool_propput_undo_t;
169 
170 typedef struct pool_proprm_undo {
171 	pool_proprm_t pru_ioctl;
172 	pool_elem_t *pru_elem;
173 	pool_value_t pru_oldval;
174 } pool_proprm_undo_t;
175 
176 extern const char *dtd_location;
177 
178 extern const char *element_class_tags[];
179 extern const char pool_info_location[];
180 
181 /*
182  * These functions are defined in pool_xml.c and represent the minimum
183  * XML support required to allow a pool kernel configuration to be
184  * exported as an XML document.
185  */
186 extern int pool_xml_set_attr(xmlNodePtr, xmlChar *, const pool_value_t *);
187 extern int pool_xml_set_prop(xmlNodePtr, xmlChar *, const pool_value_t *);
188 extern void xml_init(void);
189 extern xmlNodePtr node_create(xmlNodePtr, const xmlChar *);
190 extern void pool_error_func(void *, const char *, ...);
191 /*
192  * Utilities
193  */
194 static int load_group(pool_conf_t *, pool_knl_elem_t *, ea_object_t *,
195     pool_snap_load_t *);
196 static void pool_knl_elem_free(pool_knl_elem_t *, int);
197 static int pool_knl_put_xml_property(pool_elem_t *, xmlNodePtr, const char *,
198     const pool_value_t *);
199 static int pool_knl_snap_load_push(pool_snap_load_t *, pool_knl_pool_t *);
200 static int pool_knl_snap_load_update(pool_snap_load_t *, int, uint_t);
201 static int pool_knl_snap_load_remove(pool_snap_load_t *, int, uint_t);
202 static nvpair_t *pool_knl_find_nvpair(nvlist_t *, const char *);
203 static int pool_knl_nvlist_add_value(nvlist_t *, const char *,
204     const pool_value_t *);
205 static int pool_knl_recover(pool_conf_t *);
206 static uint64_t hash_id(const pool_elem_t *);
207 static int blocking_open(const char *, int);
208 
209 /*
210  * Connections
211  */
212 static void pool_knl_connection_free(pool_knl_connection_t *);
213 
214 /*
215  * Configuration
216  */
217 static int pool_knl_close(pool_conf_t *);
218 static int pool_knl_validate(const pool_conf_t *, pool_valid_level_t);
219 static int pool_knl_commit(pool_conf_t *);
220 static int pool_knl_export(const pool_conf_t *, const char *,
221     pool_export_format_t);
222 static int pool_knl_rollback(pool_conf_t *);
223 static pool_result_set_t *pool_knl_exec_query(const pool_conf_t *,
224     const pool_elem_t *, const char *, pool_elem_class_t, pool_value_t **);
225 static int pool_knl_remove(pool_conf_t *);
226 static char *pool_knl_get_binding(pool_conf_t *, pid_t);
227 static int pool_knl_set_binding(pool_conf_t *, const char *, idtype_t, id_t);
228 static char *pool_knl_get_resource_binding(pool_conf_t *,
229     pool_resource_elem_class_t, pid_t);
230 static int pool_knl_res_transfer(pool_resource_t *, pool_resource_t *,
231     uint64_t);
232 static int pool_knl_res_xtransfer(pool_resource_t *, pool_resource_t *,
233     pool_component_t **);
234 
235 /*
236  * Result Sets
237  */
238 static pool_knl_result_set_t *pool_knl_result_set_alloc(const pool_conf_t *);
239 static int pool_knl_result_set_append(pool_knl_result_set_t *,
240     pool_knl_elem_t *);
241 static int pool_knl_result_set_realloc(pool_knl_result_set_t *);
242 static void pool_knl_result_set_free(pool_knl_result_set_t *);
243 static pool_elem_t *pool_knl_rs_next(pool_result_set_t *);
244 static pool_elem_t *pool_knl_rs_prev(pool_result_set_t *);
245 static pool_elem_t *pool_knl_rs_first(pool_result_set_t *);
246 static pool_elem_t *pool_knl_rs_last(pool_result_set_t *);
247 static int pool_knl_rs_set_index(pool_result_set_t *, int);
248 static int pool_knl_rs_get_index(pool_result_set_t *);
249 static int pool_knl_rs_count(pool_result_set_t *);
250 static int pool_knl_rs_close(pool_result_set_t *);
251 
252 /*
253  * Element (and sub-type)
254  */
255 static pool_knl_elem_t *pool_knl_elem_wrap(pool_conf_t *, pool_elem_class_t,
256     pool_resource_elem_class_t, pool_component_elem_class_t);
257 static pool_elem_t *pool_knl_elem_create(pool_conf_t *, pool_elem_class_t,
258     pool_resource_elem_class_t, pool_component_elem_class_t);
259 static int pool_knl_elem_remove(pool_elem_t *);
260 static int pool_knl_set_container(pool_elem_t *, pool_elem_t *);
261 static pool_elem_t *pool_knl_get_container(const pool_elem_t *);
262 /*
263  * Pool element specific
264  */
265 static int pool_knl_pool_associate(pool_t *, const pool_resource_t *);
266 static int pool_knl_pool_dissociate(pool_t *, const pool_resource_t *);
267 
268 /*
269  * Resource elements specific
270  */
271 static int pool_knl_resource_is_system(const pool_resource_t *);
272 static int pool_knl_resource_can_associate(const pool_resource_t *);
273 
274 /* Properties */
275 static pool_value_class_t pool_knl_get_property(const pool_elem_t *,
276     const char *, pool_value_t *);
277 static pool_value_class_t pool_knl_get_dynamic_property(const pool_elem_t *,
278     const char *, pool_value_t *);
279 static int pool_knl_put_property(pool_elem_t *, const char *,
280     const pool_value_t *);
281 static int pool_knl_rm_property(pool_elem_t *, const char *);
282 static pool_value_t **pool_knl_get_properties(const pool_elem_t *, uint_t *);
283 
284 /*
285  * Logging
286  */
287 static int log_item_commit(log_item_t *);
288 static int log_item_undo(log_item_t *);
289 static int log_item_release(log_item_t *);
290 
291 /*
292  * Utilities
293  */
294 
295 /*
296  * load_group() updates the library configuration with the kernel
297  * snapshot supplied in ep. The function is designed to be called
298  * recursively. This function depends implicitly on the ordering of
299  * the data provided in ep. Changes to the ordering of data in ep must
300  * be matched by changes to this function.
301  */
302 int
303 load_group(pool_conf_t *conf, pool_knl_elem_t *elem, ea_object_t *ep,
304     pool_snap_load_t *psl)
305 {
306 	ea_object_t *eo;
307 	pool_knl_elem_t *old_elem;
308 	pool_knl_connection_t *prov = (pool_knl_connection_t *)conf->pc_prov;
309 	int ret = PO_SUCCESS;
310 
311 	if ((ep->eo_catalog & EXD_DATA_MASK) == EXD_GROUP_SYSTEM) {
312 		if ((elem = pool_knl_elem_wrap(conf, PEC_SYSTEM, PREC_INVALID,
313 		    PCEC_INVALID)) == NULL)
314 			return (PO_FAIL);
315 		if (nvlist_alloc(&elem->pke_properties, NV_UNIQUE_NAME_TYPE,
316 		    0) != 0) {
317 			pool_knl_elem_free(elem, PO_FALSE);
318 			pool_seterror(POE_SYSTEM);
319 			return (PO_FAIL);
320 		}
321 		/*
322 		 * Check to see if we already have an element
323 		 * for this data. If we have, free the newly
324 		 * created elem and continue with the old one
325 		 */
326 		if ((old_elem = dict_get(prov->pkc_elements, elem)) != NULL) {
327 			nvlist_free(old_elem->pke_properties);
328 			old_elem->pke_properties = elem->pke_properties;
329 			pool_knl_elem_free(elem, PO_FALSE);
330 			elem = old_elem;
331 		} else {
332 			if (dict_put(prov->pkc_elements, elem, elem) != NULL) {
333 				pool_knl_elem_free(elem, PO_TRUE);
334 				pool_seterror(POE_SYSTEM);
335 				return (PO_FAIL);
336 			}
337 		}
338 		psl->psl_system = (pool_elem_t *)elem;
339 	}
340 
341 	for (eo = ep->eo_group.eg_objs; eo != NULL; eo = eo->eo_next) {
342 		int data;
343 		pool_knl_elem_t *prop_elem = NULL;
344 
345 		data = (eo->eo_catalog & EXD_DATA_MASK);
346 
347 		switch (data) {
348 		case EXD_SYSTEM_TSTAMP:
349 		case EXD_POOL_TSTAMP:
350 		case EXD_PSET_TSTAMP:
351 		case EXD_CPU_TSTAMP:
352 			if (eo->eo_item.ei_uint64 > prov->pkc_lotime) {
353 				if (eo->eo_item.ei_uint64 > prov->pkc_ltime)
354 					prov->pkc_ltime = eo->eo_item.ei_uint64;
355 				if (psl->psl_changed) {
356 					switch (data) {
357 					case EXD_SYSTEM_TSTAMP:
358 						*psl->psl_changed |= POU_SYSTEM;
359 						break;
360 					case EXD_POOL_TSTAMP:
361 						*psl->psl_changed |= POU_POOL;
362 						break;
363 					case EXD_PSET_TSTAMP:
364 						*psl->psl_changed |= POU_PSET;
365 						break;
366 					case EXD_CPU_TSTAMP:
367 						*psl->psl_changed |= POU_CPU;
368 						break;
369 					}
370 				}
371 			}
372 			break;
373 		case EXD_SYSTEM_PROP:
374 		case EXD_POOL_PROP:
375 		case EXD_PSET_PROP:
376 		case EXD_CPU_PROP:
377 			if (data == EXD_PSET_PROP) {
378 				prop_elem = elem;
379 				elem = (pool_knl_elem_t *)psl->psl_pset;
380 			}
381 			nvlist_free(elem->pke_properties);
382 			if (nvlist_unpack(eo->eo_item.ei_raw,
383 			    eo->eo_item.ei_size, &elem->pke_properties, 0) !=
384 			    0) {
385 				pool_seterror(POE_SYSTEM);
386 				return (PO_FAIL);
387 			}
388 			elem->pke_ltime = prov->pkc_ltime;
389 			if (data == EXD_PSET_PROP) {
390 				elem = prop_elem;
391 			}
392 			break;
393 		case EXD_POOL_POOLID:
394 			if (nvlist_alloc(&elem->pke_properties,
395 			    NV_UNIQUE_NAME_TYPE, 0) != 0) {
396 				pool_seterror(POE_SYSTEM);
397 				return (PO_FAIL);
398 			}
399 			if (nvlist_add_int64(elem->pke_properties,
400 			    "pool.sys_id",
401 			    (int64_t)eo->eo_item.ei_uint32) != 0) {
402 				pool_seterror(POE_SYSTEM);
403 				return (PO_FAIL);
404 			}
405 			if ((old_elem = dict_get(prov->pkc_elements, elem)) !=
406 			    NULL) {
407 				nvlist_free(old_elem->pke_properties);
408 				old_elem->pke_properties = elem->pke_properties;
409 				pool_knl_elem_free(elem, PO_FALSE);
410 				elem = old_elem;
411 			} else {
412 				if (dict_put(prov->pkc_elements, elem, elem) !=
413 				    NULL) {
414 					pool_knl_elem_free(elem, PO_TRUE);
415 					pool_seterror(POE_SYSTEM);
416 					return (PO_FAIL);
417 				}
418 			}
419 			if (pool_knl_snap_load_push(psl,
420 			    (pool_knl_pool_t *)elem) != PO_SUCCESS) {
421 				pool_seterror(POE_SYSTEM);
422 				return (PO_FAIL);
423 			}
424 			((pool_knl_pool_t *)elem)->pkp_assoc[PREC_PSET] = NULL;
425 			break;
426 		case EXD_POOL_PSETID:
427 			if (pool_knl_snap_load_update(psl, EXD_POOL_PSETID,
428 			    eo->eo_item.ei_uint32) != PO_SUCCESS) {
429 				pool_seterror(POE_SYSTEM);
430 				return (PO_FAIL);
431 			}
432 			break;
433 		case EXD_PSET_PSETID:
434 			if (nvlist_alloc(&elem->pke_properties,
435 			    NV_UNIQUE_NAME_TYPE, 0) != 0) {
436 				pool_seterror(POE_SYSTEM);
437 				return (PO_FAIL);
438 			}
439 			if (nvlist_add_int64(elem->pke_properties,
440 			    "pset.sys_id",
441 			    (int64_t)eo->eo_item.ei_uint32) != 0) {
442 				pool_seterror(POE_SYSTEM);
443 				return (PO_FAIL);
444 			}
445 			if ((old_elem = dict_get(prov->pkc_elements, elem)) !=
446 			    NULL) {
447 				nvlist_free(old_elem->pke_properties);
448 				old_elem->pke_properties = elem->pke_properties;
449 				pool_knl_elem_free(elem, PO_FALSE);
450 				elem = old_elem;
451 			} else {
452 				if (dict_put(prov->pkc_elements, elem, elem) !=
453 				    NULL) {
454 					pool_knl_elem_free(elem, PO_TRUE);
455 					pool_seterror(POE_SYSTEM);
456 					return (PO_FAIL);
457 				}
458 			}
459 			psl->psl_pset = (pool_knl_resource_t *)elem;
460 			if (pool_knl_snap_load_remove(psl, data,
461 			    eo->eo_item.ei_uint32) != PO_SUCCESS) {
462 				pool_seterror(POE_SYSTEM);
463 				return (PO_FAIL);
464 			}
465 			break;
466 		case EXD_CPU_CPUID:
467 			if (nvlist_alloc(&elem->pke_properties,
468 			    NV_UNIQUE_NAME_TYPE, 0) != 0) {
469 				pool_seterror(POE_SYSTEM);
470 				return (PO_FAIL);
471 			}
472 			if (nvlist_add_int64(elem->pke_properties,
473 			    "cpu.sys_id",
474 			    (int64_t)eo->eo_item.ei_uint32) != 0) {
475 				pool_seterror(POE_SYSTEM);
476 				return (PO_FAIL);
477 			}
478 			if ((old_elem = dict_get(prov->pkc_elements, elem)) !=
479 			    NULL) {
480 				nvlist_free(old_elem->pke_properties);
481 				old_elem->pke_properties = elem->pke_properties;
482 				old_elem->pke_parent = elem->pke_parent;
483 				pool_knl_elem_free(elem, PO_FALSE);
484 				elem = old_elem;
485 			} else {
486 				if (dict_put(prov->pkc_elements, elem, elem) !=
487 				    NULL) {
488 					pool_knl_elem_free(elem, PO_TRUE);
489 					pool_seterror(POE_SYSTEM);
490 					return (PO_FAIL);
491 				}
492 			}
493 			break;
494 		case EXD_GROUP_POOL:
495 			if ((elem = pool_knl_elem_wrap(conf, PEC_POOL,
496 			    PREC_INVALID, PCEC_INVALID)) == NULL)
497 				return (PO_FAIL);
498 			if (pool_set_container(psl->psl_system,
499 			    (pool_elem_t *)elem) != PO_SUCCESS) {
500 				pool_seterror(POE_SYSTEM);
501 				return (PO_FAIL);
502 			}
503 			break;
504 		case EXD_GROUP_PSET:
505 			if ((elem = pool_knl_elem_wrap(conf, PEC_RES_COMP,
506 			    PREC_PSET, PCEC_INVALID)) == NULL)
507 				return (PO_FAIL);
508 			if (pool_set_container(psl->psl_system,
509 			    (pool_elem_t *)elem) != PO_SUCCESS) {
510 				pool_seterror(POE_SYSTEM);
511 				return (PO_FAIL);
512 			}
513 			break;
514 		case EXD_GROUP_CPU:
515 			if ((elem = pool_knl_elem_wrap(conf, PEC_COMP,
516 			    PREC_INVALID, PCEC_CPU)) == NULL)
517 				return (PO_FAIL);
518 			if (pool_set_container((pool_elem_t *)psl->psl_pset,
519 			    (pool_elem_t *)elem) != PO_SUCCESS) {
520 				pool_seterror(POE_SYSTEM);
521 				return (PO_FAIL);
522 			}
523 			break;
524 		default:
525 			break;
526 		}
527 
528 
529 		if (eo->eo_type == EO_GROUP) {
530 			if ((ret = load_group(conf, elem, eo, psl)) == PO_FAIL)
531 				break;
532 		}
533 	}
534 	return (ret);
535 }
536 
537 /*
538  * Push a snapshot entry onto the list of pools in the snapshot.
539  */
540 int
541 pool_knl_snap_load_push(pool_snap_load_t *psl, pool_knl_pool_t *pkp)
542 {
543 	pool_set_xref_t *psx;
544 
545 	if ((psx = malloc(sizeof (pool_set_xref_t))) == NULL) {
546 		pool_seterror(POE_SYSTEM);
547 		return (PO_FAIL);
548 	}
549 	(void) memset(psx, 0, sizeof (pool_set_xref_t));
550 	psx->psx_pool = pkp;
551 	/*
552 	 * Push onto the list of pools
553 	 */
554 	psx->psx_next = psl->psl_xref;
555 	psl->psl_xref = psx;
556 
557 	return (PO_SUCCESS);
558 }
559 
560 /*
561  * Update the current cross-reference for the supplied type of
562  * resource.
563  */
564 int
565 pool_knl_snap_load_update(pool_snap_load_t *psl, int type, uint_t id)
566 {
567 	switch (type) {
568 	case EXD_POOL_PSETID:
569 		psl->psl_xref->psx_pset_id = id;
570 		break;
571 	default:
572 		return (PO_FAIL);
573 	}
574 
575 	return (PO_SUCCESS);
576 }
577 
578 /*
579  * Remove a resource entry with the supplied type and id from the
580  * snapshot list when it is no longer required.
581  */
582 int
583 pool_knl_snap_load_remove(pool_snap_load_t *psl, int type, uint_t id)
584 {
585 	pool_set_xref_t *current, *prev, *next;
586 
587 	for (prev = NULL, current = psl->psl_xref; current != NULL;
588 	    current = next) {
589 		switch (type) {
590 		case EXD_PSET_PSETID:
591 			if (current->psx_pset_id == id)
592 				current->psx_pool->pkp_assoc[PREC_PSET] =
593 				    psl->psl_pset;
594 			break;
595 		default:
596 			return (PO_FAIL);
597 		}
598 		next = current->psx_next;
599 		if (current->psx_pool->pkp_assoc[PREC_PSET] != NULL) {
600 			if (prev != NULL) {
601 				prev->psx_next = current->psx_next;
602 			} else {
603 				psl->psl_xref = current->psx_next;
604 			}
605 			free(current);
606 		} else
607 			prev = current;
608 	}
609 
610 	return (PO_SUCCESS);
611 }
612 
613 /*
614  * Return the nvpair with the supplied name from the supplied list.
615  *
616  * NULL is returned if the name cannot be found in the list.
617  */
618 nvpair_t *
619 pool_knl_find_nvpair(nvlist_t *l, const char *name)
620 {
621 	nvpair_t *pair;
622 
623 	for (pair = nvlist_next_nvpair(l, NULL); pair != NULL;
624 	    pair = nvlist_next_nvpair(l, pair)) {
625 		if (strcmp(nvpair_name(pair), name) == 0)
626 			break;
627 	}
628 	return (pair);
629 }
630 
631 /*
632  * Close the configuration. There are a few steps to closing a configuration:
633  * - Close the pseudo device
634  * - Free the data provider
635  * Returns PO_SUCCESS/PO_FAIL
636  */
637 int
638 pool_knl_close(pool_conf_t *conf)
639 {
640 	pool_knl_connection_t *prov = (pool_knl_connection_t *)conf->pc_prov;
641 
642 	if (close(prov->pkc_fd) < 0) {
643 		pool_seterror(POE_SYSTEM);
644 		return (PO_FAIL);
645 	}
646 	/*
647 	 * Rollback any pending changes before freeing the prov. This
648 	 * ensures there are no memory leaks from pending transactions.
649 	 * However, don't rollback when we've done a temporary pool since the
650 	 * pool/resources haven't really been committed in this case.
651 	 * They will all be freed in pool_knl_connection_free and we don't
652 	 * want to double free them.
653 	 */
654 	if (!(conf->pc_prov->pc_oflags & PO_TEMP))
655 		(void) pool_knl_rollback(conf);
656 	pool_knl_connection_free(prov);
657 	return (PO_SUCCESS);
658 }
659 
660 /*
661  * Remove elements in this map (previously identified as "dead") from
662  * the configuration map (prov->pkc_elements).
663  */
664 
665 /* ARGSUSED1 */
666 static void
667 remove_dead_elems(const void *key, void **value, void *cl)
668 {
669 	pool_knl_elem_t *pke = (pool_knl_elem_t *)key;
670 	pool_conf_t *conf = TO_CONF(TO_ELEM(pke));
671 	pool_knl_connection_t *prov = (pool_knl_connection_t *)conf->pc_prov;
672 
673 	assert(dict_remove(prov->pkc_elements, pke) != NULL);
674 #ifdef DEBUG
675 	dprintf("remove_dead_elems:\n");
676 	pool_elem_dprintf(TO_ELEM(pke));
677 #endif	/* DEBUG */
678 	pool_knl_elem_free(pke, PO_TRUE);
679 }
680 
681 /*
682  * Find elements which were not updated the last time that
683  * load_group() was called. Add those elements into a separate map
684  * (passed in cl) which will be later used to remove these elements
685  * from the configuration map.
686  */
687 /* ARGSUSED1 */
688 static void
689 find_dead_elems(const void *key, void **value, void *cl)
690 {
691 	pool_knl_elem_t *pke = (pool_knl_elem_t *)key;
692 	pool_conf_t *conf = TO_CONF(TO_ELEM(pke));
693 	pool_knl_connection_t *prov = (pool_knl_connection_t *)conf->pc_prov;
694 	dict_hdl_t *dead_map = (dict_hdl_t *)cl;
695 
696 	if (pke->pke_ltime < prov->pkc_ltime)
697 		(void) dict_put(dead_map, pke, pke);
698 }
699 
700 /*
701  * Update the snapshot held by the library. This function acts as the
702  * controller for the snapshot update procedure. Then snapshot is
703  * actually updated in multiple phases by the load_group() function
704  * (which updates existing elements and creates new elements as
705  * required) and then by find_dead_elems and remove_dead_elems
706  * (respectively responsible for identifying elements which are to be
707  * removed and then removing them).
708  *
709  * Returns PO_SUCCESS
710  */
711 int
712 pool_knl_update(pool_conf_t *conf, int *changed)
713 {
714 	pool_knl_connection_t *prov = (pool_knl_connection_t *)conf->pc_prov;
715 	pool_query_t query = {0};
716 	ea_object_t *ep;
717 	dict_hdl_t *dead_map;
718 	pool_snap_load_t psl = { NULL };
719 
720 	/*
721 	 * Ensure the library snapshot is consistent, if there are any
722 	 * outstanding transactions return failure.
723 	 */
724 	if (log_size(prov->pkc_log) != 0) {
725 		pool_seterror(POE_INVALID_CONF);
726 		return (PO_FAIL);
727 	}
728 	/*
729 	 * Query the kernel for a snapshot of the configuration state. Use
730 	 * load_group to allocate the user-land representation of the
731 	 * data returned in the snapshot.
732 	 */
733 	/* LINTED E_CONSTANT_CONDITION */
734 	while (1) {
735 		if (ioctl(prov->pkc_fd, POOL_QUERY, &query) < 0) {
736 			pool_seterror(POE_SYSTEM);
737 			return (PO_FAIL);
738 		}
739 		if ((query.pq_io_buf = calloc(1,
740 		    (query.pq_io_bufsize < KERNEL_SNAPSHOT_BUF_SZ) ?
741 		    query.pq_io_bufsize * 2 : query.pq_io_bufsize)) == NULL) {
742 			pool_seterror(POE_SYSTEM);
743 			return (PO_FAIL);
744 		}
745 		if (ioctl(prov->pkc_fd, POOL_QUERY, &query) < 0) {
746 			free(query.pq_io_buf);
747 			if (errno != ENOMEM) {
748 				pool_seterror(POE_SYSTEM);
749 				return (PO_FAIL);
750 			}
751 			query.pq_io_bufsize = 0;
752 			query.pq_io_buf = NULL;
753 		} else
754 			break;
755 	}
756 	if (ea_unpack_object(&ep, EUP_NOALLOC, query.pq_io_buf,
757 	    query.pq_io_bufsize) != EO_GROUP) {
758 		free(query.pq_io_buf);
759 		pool_seterror(POE_DATASTORE);
760 		return (PO_FAIL);
761 	}
762 	/*
763 	 * Update the library snapshot
764 	 */
765 	psl.psl_changed = changed;
766 	prov->pkc_lotime = prov->pkc_ltime;
767 	if (load_group(conf, NULL, ep, &psl) != PO_SUCCESS) {
768 		free(query.pq_io_buf);
769 		ea_free_object(ep, EUP_NOALLOC);
770 		return (PO_FAIL);
771 	}
772 
773 	free(query.pq_io_buf);
774 	ea_free_object(ep, EUP_NOALLOC);
775 	/*
776 	 * Now search the dictionary for items that must be removed because
777 	 * they were neither created nor updated.
778 	 */
779 	if ((dead_map = dict_new((int (*)(const void *, const void *))
780 	    pool_elem_compare, (uint64_t (*)(const void *))hash_id)) == NULL) {
781 		pool_seterror(POE_SYSTEM);
782 		return (PO_FAIL);
783 	}
784 	dict_map(prov->pkc_elements, find_dead_elems, dead_map);
785 
786 	if (dict_length(dead_map) > 0) {
787 		dict_map(dead_map, remove_dead_elems, NULL);
788 	}
789 	dict_free(&dead_map);
790 
791 	return (PO_SUCCESS);
792 }
793 
794 /*
795  * Rely on the kernel to always keep a kernel configuration valid.
796  * Returns PO_SUCCESS
797  */
798 /* ARGSUSED */
799 int
800 pool_knl_validate(const pool_conf_t *conf, pool_valid_level_t level)
801 {
802 	return ((conf->pc_state == POF_INVALID) ? PO_FAIL : PO_SUCCESS);
803 }
804 
805 /*
806  * Process all the outstanding transactions in the log. If the processing
807  * fails, then attempt to rollback and "undo" the changes.
808  */
809 int
810 pool_knl_commit(pool_conf_t *conf)
811 {
812 	pool_knl_connection_t *prov = (pool_knl_connection_t *)conf->pc_prov;
813 	int lock = 1;
814 
815 	/*
816 	 * Lock the kernel state for the commit
817 	 */
818 	if (ioctl(prov->pkc_fd, POOL_COMMIT, lock) < 0) {
819 		pool_seterror(POE_SYSTEM);
820 		return (PO_FAIL);
821 	}
822 	lock = 0;
823 	/*
824 	 * If the state is LS_FAIL, then try to recover before
825 	 * performing the commit.
826 	 */
827 	if (prov->pkc_log->l_state == LS_FAIL) {
828 		if (pool_knl_recover(conf) == PO_FAIL) {
829 			/*
830 			 * Unlock the kernel state for the
831 			 * commit. Assert that this * can't fail,
832 			 * since if it ever does fail the library is
833 			 * unusable.
834 			 */
835 			assert(ioctl(prov->pkc_fd, POOL_COMMIT, lock) >= 0);
836 		}
837 	}
838 	/*
839 	 * Commit the log
840 	 */
841 	if (log_walk(prov->pkc_log, log_item_commit) != PO_SUCCESS) {
842 		(void) pool_knl_recover(conf);
843 		/*
844 		 * Unlock the kernel state for the commit. Assert that
845 		 * this can't fail, since if it ever does fail the
846 		 * library is unusable.
847 		 */
848 		assert(ioctl(prov->pkc_fd, POOL_COMMIT, lock) >= 0);
849 		pool_seterror(POE_SYSTEM);
850 		return (PO_FAIL);
851 	}
852 	/*
853 	 * Unlock the kernel state for the commit. Assert that this
854 	 * can't fail, since if it ever does fail the library is
855 	 * unusable.
856 	 */
857 	assert(ioctl(prov->pkc_fd, POOL_COMMIT, lock) >= 0);
858 	/*
859 	 * Release the log resources
860 	 */
861 	(void) log_walk(prov->pkc_log, log_item_release);
862 	log_empty(prov->pkc_log);
863 	return (PO_SUCCESS);
864 }
865 
866 /*
867  * prop_build_cb() is designed to be called from
868  * pool_walk_properties(). The property value is used to put an XML
869  * property on the supplied ktx_node. This is an essential part of the
870  * mechanism used to export a kernel configuration in libpool XML
871  * form.
872  */
873 /* ARGSUSED */
874 static int
875 prop_build_cb(pool_conf_t *UNUSED, pool_elem_t *pe, const char *name,
876     pool_value_t *pval, void *user)
877 {
878 	struct knl_to_xml *info = (struct knl_to_xml *)user;
879 
880 	return (pool_knl_put_xml_property((pool_elem_t *)pe, info->ktx_node,
881 	    name, pval));
882 }
883 
884 /*
885  * Duplicate some of the functionality from pool_xml_put_property()
886  * (see pool_xml.c) to allow a kernel configuration to add XML nodes
887  * to an XML tree which represents the kernel configuration. This is
888  * an essential part of the mechanism used to export a kernel
889  * configuration in libpool XML form.
890  */
891 int
892 pool_knl_put_xml_property(pool_elem_t *pe, xmlNodePtr node, const char *name,
893     const pool_value_t *val)
894 {
895 
896 	/*
897 	 * "type" is a special attribute which is not visible ever outside of
898 	 * libpool. Use the specific type accessor function.
899 	 */
900 	if (strcmp(name, c_type) == 0) {
901 		return (pool_xml_set_attr(node, BAD_CAST name,
902 		    val));
903 	}
904 	if (is_ns_property(pe, name) != NULL) {	/* in ns */
905 		if (pool_xml_set_attr(node,
906 		    BAD_CAST property_name_minus_ns(pe, name), val) == PO_FAIL)
907 			return (pool_xml_set_prop(node, BAD_CAST name,
908 			    val));
909 	} else
910 		return (pool_xml_set_prop(node, BAD_CAST name, val));
911 	return (PO_SUCCESS);
912 }
913 
914 /*
915  * Export the kernel configuration as an XML file. The configuration
916  * is used to build an XML document in memory. This document is then
917  * saved to the supplied location.
918  */
919 int
920 pool_knl_export(const pool_conf_t *conf, const char *location,
921     pool_export_format_t fmt)
922 {
923 	xmlNodePtr node_comment;
924 	xmlNodePtr system;
925 	int ret;
926 	pool_t **ps;
927 	pool_resource_t **rs;
928 	uint_t nelem;
929 	int i;
930 	struct knl_to_xml info;
931 	char_buf_t *cb = NULL;
932 	xmlValidCtxtPtr cvp;
933 
934 	xml_init();
935 
936 
937 	switch (fmt) {
938 	case POX_NATIVE:
939 		info.ktx_doc = xmlNewDoc(BAD_CAST "1.0");
940 		(void) xmlCreateIntSubset(info.ktx_doc, BAD_CAST "system",
941 		    BAD_CAST "-//Sun Microsystems Inc//DTD Resource "
942 		    "Management All//EN",
943 		    BAD_CAST dtd_location);
944 
945 		if ((cvp = xmlNewValidCtxt()) == NULL) {
946 			xmlFreeDoc(info.ktx_doc);
947 			pool_seterror(POE_DATASTORE);
948 			return (PO_FAIL);
949 		}
950 		/*
951 		 * Call xmlValidateDocument() to force the parsing of
952 		 * the DTD. Ignore errors and warning messages as we
953 		 * know the document isn't valid.
954 		 */
955 		(void) xmlValidateDocument(cvp, info.ktx_doc);
956 		xmlFreeValidCtxt(cvp);
957 		if ((info.ktx_node = node_create(NULL, BAD_CAST "system")) ==
958 		    NULL) {
959 			xmlFreeDoc(info.ktx_doc);
960 			pool_seterror(POE_DATASTORE);
961 			return (PO_FAIL);
962 		}
963 
964 		system = info.ktx_node;
965 		info.ktx_doc->_private = (void *)conf;
966 
967 		(void) xmlDocSetRootElement(info.ktx_doc, info.ktx_node);
968 		(void) xmlSetProp(info.ktx_node, BAD_CAST c_ref_id,
969 		    BAD_CAST "dummy");
970 		if ((node_comment = xmlNewDocComment(info.ktx_doc,
971 		    BAD_CAST "\nConfiguration for pools facility. Do NOT"
972 		    " edit this file by hand - use poolcfg(1)"
973 		    " or libpool(3POOL) instead.\n")) == NULL) {
974 			xmlFreeDoc(info.ktx_doc);
975 			pool_seterror(POE_DATASTORE);
976 			return (PO_FAIL);
977 		}
978 		if (xmlAddPrevSibling(info.ktx_node, node_comment) == NULL) {
979 			xmlFree(node_comment);
980 			xmlFreeDoc(info.ktx_doc);
981 			pool_seterror(POE_DATASTORE);
982 			return (PO_FAIL);
983 		}
984 		if (pool_walk_any_properties((pool_conf_t *)conf,
985 		    pool_conf_to_elem(conf), &info, prop_build_cb, 1) ==
986 		    PO_FAIL) {
987 			xmlFreeDoc(info.ktx_doc);
988 			return (PO_FAIL);
989 		}
990 		if ((cb = alloc_char_buf(CB_DEFAULT_LEN)) == NULL) {
991 			xmlFreeDoc(info.ktx_doc);
992 			return (PO_FAIL);
993 		}
994 		/*
995 		 * Now add pool details
996 		 */
997 		if ((ps = pool_query_pools(conf, &nelem, NULL)) != NULL) {
998 			for (i = 0; i < nelem; i++) {
999 				pool_elem_t *elem = TO_ELEM(ps[i]);
1000 				uint_t nreselem;
1001 				const char *sep = "";
1002 				int j;
1003 
1004 				if (elem_is_tmp(elem))
1005 					continue;
1006 
1007 				if ((info.ktx_node = node_create(system,
1008 				    BAD_CAST element_class_tags
1009 				    [pool_elem_class(elem)])) == NULL) {
1010 					free(ps);
1011 					free_char_buf(cb);
1012 					xmlFreeDoc(info.ktx_doc);
1013 					pool_seterror(POE_DATASTORE);
1014 					return (PO_FAIL);
1015 				}
1016 				if (pool_walk_any_properties(
1017 				    (pool_conf_t *)conf,
1018 				    elem, &info, prop_build_cb, 1) == PO_FAIL) {
1019 					free(ps);
1020 					free_char_buf(cb);
1021 					xmlFreeDoc(info.ktx_doc);
1022 					return (PO_FAIL);
1023 				}
1024 				/*
1025 				 * TODO: pset specific res manipulation
1026 				 */
1027 				if ((rs = pool_query_pool_resources(conf, ps[i],
1028 				    &nreselem, NULL)) == NULL) {
1029 					free(ps);
1030 					free_char_buf(cb);
1031 					xmlFreeDoc(info.ktx_doc);
1032 					pool_seterror(POE_INVALID_CONF);
1033 					return (PO_FAIL);
1034 				}
1035 				if (set_char_buf(cb, "") == PO_FAIL) {
1036 					free(rs);
1037 					free(ps);
1038 					free_char_buf(cb);
1039 					xmlFreeDoc(info.ktx_doc);
1040 					return (PO_FAIL);
1041 				}
1042 				for (j = 0; j < nreselem; j++) {
1043 					pool_elem_t *reselem = TO_ELEM(rs[j]);
1044 					if (append_char_buf(cb, "%s%s_%d", sep,
1045 					    pool_elem_class_string(reselem),
1046 					    (int)elem_get_sysid(reselem)) ==
1047 					    PO_FAIL) {
1048 						free(rs);
1049 						free(ps);
1050 						free_char_buf(cb);
1051 						xmlFreeDoc(info.ktx_doc);
1052 						return (PO_FAIL);
1053 					}
1054 					sep = " ";
1055 				}
1056 				free(rs);
1057 				(void) xmlSetProp(info.ktx_node, BAD_CAST "res",
1058 				    BAD_CAST cb->cb_buf);
1059 				if (set_char_buf(cb, "%s_%d",
1060 				    pool_elem_class_string(elem),
1061 				    (int)elem_get_sysid(elem)) == PO_FAIL) {
1062 					free(ps);
1063 					free_char_buf(cb);
1064 					xmlFreeDoc(info.ktx_doc);
1065 					return (PO_FAIL);
1066 				}
1067 				(void) xmlSetProp(info.ktx_node,
1068 				    BAD_CAST c_ref_id,
1069 				    BAD_CAST  cb->cb_buf);
1070 			}
1071 			free(ps);
1072 		}
1073 		/*
1074 		 * Now add resource details (including components)
1075 		 */
1076 		if ((rs = pool_query_resources(conf, &nelem, NULL)) != NULL) {
1077 			for (i = 0; i < nelem; i++) {
1078 				pool_elem_t *elem = TO_ELEM(rs[i]);
1079 				pool_component_t **cs = NULL;
1080 				uint_t ncompelem;
1081 				int j;
1082 
1083 				if (elem_is_tmp(elem))
1084 					continue;
1085 
1086 				if ((info.ktx_node = node_create(system,
1087 				    BAD_CAST element_class_tags
1088 				    [pool_elem_class(elem)])) == NULL) {
1089 					free(rs);
1090 					free_char_buf(cb);
1091 					xmlFreeDoc(info.ktx_doc);
1092 					pool_seterror(POE_DATASTORE);
1093 					return (PO_FAIL);
1094 				}
1095 				if (pool_walk_any_properties(
1096 				    (pool_conf_t *)conf,
1097 				    elem, &info, prop_build_cb, 1) == PO_FAIL) {
1098 					free(rs);
1099 					free_char_buf(cb);
1100 					xmlFreeDoc(info.ktx_doc);
1101 					return (PO_FAIL);
1102 				}
1103 				if (set_char_buf(cb, "%s_%d",
1104 				    pool_elem_class_string(elem),
1105 				    (int)elem_get_sysid(elem)) == PO_FAIL) {
1106 					free(rs);
1107 					free_char_buf(cb);
1108 					xmlFreeDoc(info.ktx_doc);
1109 					return (PO_FAIL);
1110 				}
1111 				(void) xmlSetProp(info.ktx_node,
1112 				    BAD_CAST c_ref_id,
1113 				    BAD_CAST  cb->cb_buf);
1114 				if ((cs = pool_query_resource_components(conf,
1115 				    rs[i], &ncompelem, NULL)) != NULL) {
1116 					xmlNodePtr resource = info.ktx_node;
1117 
1118 					for (j = 0; j < ncompelem; j++) {
1119 						pool_elem_t *compelem =
1120 						    TO_ELEM(cs[j]);
1121 						if ((info.ktx_node =
1122 						    node_create(resource,
1123 						    BAD_CAST element_class_tags
1124 						    [pool_elem_class(
1125 						    compelem)])) == NULL) {
1126 							pool_seterror(
1127 							    POE_DATASTORE);
1128 							free(rs);
1129 							free(cs);
1130 							free_char_buf(cb);
1131 							xmlFreeDoc(info.
1132 							    ktx_doc);
1133 							return (PO_FAIL);
1134 						}
1135 						if (pool_walk_any_properties(
1136 						    (pool_conf_t *)conf,
1137 						    compelem, &info,
1138 						    prop_build_cb, 1) ==
1139 						    PO_FAIL) {
1140 							free(rs);
1141 							free(cs);
1142 							free_char_buf(cb);
1143 							xmlFreeDoc(info.
1144 							    ktx_doc);
1145 							return (PO_FAIL);
1146 						}
1147 						if (set_char_buf(cb, "%s_%d",
1148 						    pool_elem_class_string(
1149 						    compelem),
1150 						    (int)elem_get_sysid(
1151 						    compelem)) == PO_FAIL) {
1152 							free(rs);
1153 							free(cs);
1154 							free_char_buf(cb);
1155 							xmlFreeDoc(info.
1156 							    ktx_doc);
1157 							return (PO_FAIL);
1158 						}
1159 						(void) xmlSetProp(info.ktx_node,
1160 						    BAD_CAST c_ref_id,
1161 						    BAD_CAST  cb->cb_buf);
1162 					}
1163 					free(cs);
1164 				}
1165 			}
1166 			free(rs);
1167 		}
1168 		free_char_buf(cb);
1169 		/*
1170 		 * Set up the message handlers prior to calling
1171 		 * xmlValidateDocument()
1172 		 */
1173 		if ((cvp = xmlNewValidCtxt()) == NULL) {
1174 			xmlFreeDoc(info.ktx_doc);
1175 			pool_seterror(POE_DATASTORE);
1176 			return (PO_FAIL);
1177 		}
1178 		cvp->error    = pool_error_func;
1179 		cvp->warning  = pool_error_func;
1180 		if (xmlValidateDocument(cvp, info.ktx_doc) == 0) {
1181 			xmlFreeValidCtxt(cvp);
1182 			xmlFreeDoc(info.ktx_doc);
1183 			pool_seterror(POE_INVALID_CONF);
1184 			return (PO_FAIL);
1185 		}
1186 		xmlFreeValidCtxt(cvp);
1187 		ret = xmlSaveFormatFile(location, info.ktx_doc, 1);
1188 		xmlFreeDoc(info.ktx_doc);
1189 		if (ret == -1) {
1190 			pool_seterror(POE_SYSTEM);
1191 			return (PO_FAIL);
1192 		}
1193 		return (PO_SUCCESS);
1194 	default:
1195 		pool_seterror(POE_BADPARAM);
1196 		return (PO_FAIL);
1197 	}
1198 }
1199 
1200 /*
1201  * Rollback the changes to the kernel
1202  */
1203 int
1204 pool_knl_recover(pool_conf_t *conf)
1205 {
1206 	pool_knl_connection_t *prov = (pool_knl_connection_t *)conf->pc_prov;
1207 
1208 	prov->pkc_log->l_state = LS_RECOVER;
1209 	if (log_reverse_walk(prov->pkc_log, log_item_undo) != PO_SUCCESS) {
1210 		dprintf("Library configuration consistency error\n");
1211 		prov->pkc_log->l_state = LS_FAIL;
1212 		pool_seterror(POE_INVALID_CONF);
1213 		return (PO_FAIL);
1214 	}
1215 	prov->pkc_log->l_state = LS_DO;
1216 	return (PO_SUCCESS);
1217 }
1218 
1219 /*
1220  * Rollback the changes to the configuration
1221  */
1222 int
1223 pool_knl_rollback(pool_conf_t *conf)
1224 {
1225 	pool_knl_connection_t *prov = (pool_knl_connection_t *)conf->pc_prov;
1226 
1227 	prov->pkc_log->l_state = LS_UNDO;
1228 	if (log_reverse_walk(prov->pkc_log, log_item_undo) != PO_SUCCESS) {
1229 		dprintf("Kernel configuration consistency error\n");
1230 		(void) log_walk(prov->pkc_log, log_item_release);
1231 		log_empty(prov->pkc_log);
1232 		prov->pkc_log->l_state = LS_FAIL;
1233 		pool_seterror(POE_INVALID_CONF);
1234 		return (PO_FAIL);
1235 	}
1236 	(void) log_walk(prov->pkc_log, log_item_release);
1237 	log_empty(prov->pkc_log);
1238 	prov->pkc_log->l_state = LS_DO;
1239 	return (PO_SUCCESS);
1240 }
1241 
1242 /*
1243  * Callback used to build the result set for a query. Each invocation will
1244  * supply a candidate element for inclusion. The element is filtered by:
1245  * - class
1246  * - properties
1247  * If the element "matches" the target, then it is added to the result
1248  * set, otherwise it is ignored.
1249  */
1250 /* ARGSUSED1 */
1251 static void
1252 build_result_set(const void *key, void **value, void *cl)
1253 {
1254 	struct query_obj *qo = (struct query_obj *)cl;
1255 	pool_knl_elem_t *pke = (pool_knl_elem_t *)key;
1256 
1257 	/*
1258 	 * Check to see if it's the right class of element
1259 	 */
1260 	if (qo->classes & (1 << pool_elem_class((pool_elem_t *)key))) {
1261 		int i;
1262 		/*
1263 		 * Now check to see if the src element is correct. If no src
1264 		 * element is supplied, ignore this check
1265 		 */
1266 		if (qo->src) {
1267 			pool_knl_elem_t *parent;
1268 
1269 			for (parent = pke; parent != NULL;
1270 			    parent = parent->pke_parent) {
1271 				if (parent == (pool_knl_elem_t *)qo->src)
1272 					break;
1273 			}
1274 			if (parent == NULL)
1275 				return;
1276 		}
1277 		/*
1278 		 * Now check for property matches (if there are any specified)
1279 		 */
1280 		if (qo->props) {
1281 			int matched = PO_TRUE;
1282 			for (i = 0; qo->props[i] != NULL; i++) {
1283 				pool_value_t val = POOL_VALUE_INITIALIZER;
1284 
1285 				if (pool_get_property(TO_CONF(TO_ELEM(pke)),
1286 				    (pool_elem_t *)pke,
1287 				    pool_value_get_name(qo->props[i]), &val) ==
1288 				    POC_INVAL) {
1289 					matched = PO_FALSE;
1290 					break;
1291 				} else {
1292 					if (pool_value_equal(qo->props[i],
1293 					    &val) != PO_TRUE) {
1294 						matched = PO_FALSE;
1295 						break;
1296 					}
1297 				}
1298 			}
1299 			if (matched == PO_TRUE)
1300 				(void) pool_knl_result_set_append(qo->rs,
1301 				    (pool_knl_elem_t *)key);
1302 		} else {
1303 			(void) pool_knl_result_set_append(qo->rs,
1304 			    (pool_knl_elem_t *)key);
1305 		}
1306 	}
1307 }
1308 
1309 /*
1310  * Execute the supplied query and return a result set which contains
1311  * all qualifying elements.
1312  */
1313 pool_result_set_t *
1314 pool_knl_exec_query(const pool_conf_t *conf, const pool_elem_t *src,
1315     const char *src_attr, pool_elem_class_t classes, pool_value_t **props)
1316 {
1317 	pool_knl_result_set_t *rs;
1318 	pool_knl_connection_t *prov = (pool_knl_connection_t *)conf->pc_prov;
1319 	struct query_obj qo;
1320 	int matched = PO_TRUE;
1321 
1322 	/*
1323 	 * Have a buffer at this point, that we can use
1324 	 */
1325 	if ((rs = pool_knl_result_set_alloc(conf)) == NULL) {
1326 		return (NULL);
1327 	}
1328 	qo.conf = conf;
1329 	qo.src = src;
1330 	qo.src_attr = src_attr;
1331 	qo.classes = classes;
1332 	qo.props = props;
1333 	qo.rs = rs;
1334 	if (src_attr != NULL) {
1335 		pool_knl_pool_t *pkp = (pool_knl_pool_t *)src;
1336 
1337 		/*
1338 		 * Note: This logic is resource specific and must be
1339 		 * extended for additional resource types.
1340 		 */
1341 		/*
1342 		 * Check for property matches (if there are any specified)
1343 		 */
1344 		if (props) {
1345 			int i;
1346 
1347 			for (i = 0; props[i] != NULL; i++) {
1348 				pool_value_t val = POOL_VALUE_INITIALIZER;
1349 
1350 				if (pool_get_property(conf,
1351 				    (pool_elem_t *)pkp->pkp_assoc[PREC_PSET],
1352 				    pool_value_get_name(props[i]), &val) ==
1353 				    POC_INVAL) {
1354 					matched = PO_FALSE;
1355 					break;
1356 				} else {
1357 					if (pool_value_equal(props[i],
1358 					    &val) != PO_TRUE) {
1359 						matched = PO_FALSE;
1360 						break;
1361 					}
1362 				}
1363 			}
1364 		}
1365 
1366 		if (matched == PO_TRUE)
1367 			(void) pool_knl_result_set_append(rs,
1368 			    (pool_knl_elem_t *)pkp->pkp_assoc[PREC_PSET]);
1369 	} else
1370 		dict_map(prov->pkc_elements, build_result_set, &qo);
1371 
1372 	if (rs->pkr_count == 0)
1373 		pool_seterror(POE_INVALID_SEARCH);
1374 	return ((pool_result_set_t *)rs);
1375 }
1376 
1377 /*
1378  * Callback function intended to be used from pool_walk_pools(). If
1379  * the supplied pool is not the default pool attempt to destroy it.
1380  */
1381 /*ARGSUSED*/
1382 static int
1383 destroy_pool_cb(pool_conf_t *conf, pool_t *pool, void *unused)
1384 {
1385 	if (elem_is_default(TO_ELEM(pool)) != PO_TRUE)
1386 		return (pool_destroy(conf, pool));
1387 	/*
1388 	 * Return PO_SUCCESS even though we don't delete the default
1389 	 * pool so that the walk continues
1390 	 */
1391 	return (PO_SUCCESS);
1392 }
1393 
1394 /*
1395  * Remove the configuration details. This means remove all elements
1396  * apart from the system elements.
1397  */
1398 int
1399 pool_knl_remove(pool_conf_t *conf)
1400 {
1401 	uint_t i, nelem;
1402 	pool_resource_t **resources;
1403 
1404 	conf->pc_state = POF_DESTROY;
1405 	if ((resources = pool_query_resources(conf, &nelem, NULL)) != NULL) {
1406 		for (i = 0; i < nelem; i++) {
1407 			if (resource_is_system(resources[i]) == PO_FALSE)
1408 				if (pool_resource_destroy(conf, resources[i]) !=
1409 				    PO_SUCCESS) {
1410 					pool_seterror(POE_INVALID_CONF);
1411 					return (PO_FAIL);
1412 				}
1413 		}
1414 		free(resources);
1415 	}
1416 	(void) pool_walk_pools(conf, conf, destroy_pool_cb);
1417 	if (pool_conf_commit(conf, PO_FALSE) != PO_SUCCESS)
1418 		return (PO_FAIL);
1419 
1420 	if (pool_conf_close(conf) != PO_SUCCESS)
1421 		return (PO_FAIL);
1422 
1423 	return (PO_SUCCESS);
1424 }
1425 
1426 /*
1427  * Determine the name of the pool to which the supplied pid is
1428  * bound. If it cannot be determined return NULL.
1429  */
1430 char *
1431 pool_knl_get_binding(pool_conf_t *conf, pid_t pid)
1432 {
1433 	pool_knl_connection_t *prov = (pool_knl_connection_t *)conf->pc_prov;
1434 	const char *sval;
1435 	char *name = NULL;
1436 	pool_bindq_t bindq;
1437 	pool_value_t *props[] = { NULL, NULL };
1438 	uint_t nelem = 0;
1439 	pool_t **pools;
1440 	pool_value_t val = POOL_VALUE_INITIALIZER;
1441 
1442 	props[0] = &val;
1443 
1444 	bindq.pb_o_id_type = P_PID;
1445 	bindq.pb_o_id = pid;
1446 	if (ioctl(prov->pkc_fd, POOL_BINDQ, &bindq) < 0) {
1447 		pool_seterror(POE_SYSTEM);
1448 		return (NULL);
1449 	}
1450 
1451 	if (pool_value_set_name(props[0], "pool.sys_id") != PO_SUCCESS) {
1452 		return (NULL);
1453 	}
1454 	pool_value_set_int64(props[0], bindq.pb_i_id);
1455 	if ((pools = pool_query_pools(conf, &nelem, props)) == NULL) {
1456 		pool_seterror(POE_BADPARAM);
1457 		return (NULL);
1458 	}
1459 
1460 	if (nelem != 1) {
1461 		free(pools);
1462 		pool_seterror(POE_INVALID_CONF);
1463 		return (NULL);
1464 	}
1465 	if (pool_get_ns_property(TO_ELEM(pools[0]), c_name, props[0])
1466 	    == POC_INVAL) {
1467 		free(pools);
1468 		return (NULL);
1469 	}
1470 	if (pool_value_get_string(props[0], &sval) != PO_SUCCESS) {
1471 		free(pools);
1472 		return (NULL);
1473 	}
1474 	if ((name = strdup(sval)) == NULL) {
1475 		free(pools);
1476 		pool_seterror(POE_SYSTEM);
1477 		return (NULL);
1478 	}
1479 	return (name);
1480 }
1481 
1482 /*
1483  * Bind idtype id to the pool name.
1484  */
1485 int
1486 pool_knl_set_binding(pool_conf_t *conf, const char *pool_name, idtype_t idtype,
1487     id_t id)
1488 {
1489 	pool_knl_connection_t *prov = (pool_knl_connection_t *)conf->pc_prov;
1490 	pool_bind_t bind;
1491 	pool_t *pool;
1492 	int ret;
1493 
1494 	if ((pool = pool_get_pool(conf, pool_name)) == NULL)
1495 		return (PO_FAIL);
1496 
1497 	bind.pb_o_id_type = idtype;
1498 	bind.pb_o_id = id;
1499 	bind.pb_o_pool_id = elem_get_sysid(TO_ELEM(pool));
1500 
1501 	while ((ret = ioctl(prov->pkc_fd, POOL_BIND, &bind)) < 0 &&
1502 	    errno == EAGAIN)
1503 		;
1504 	if (ret < 0) {
1505 		pool_seterror(POE_SYSTEM);
1506 		return (PO_FAIL);
1507 	}
1508 	return (PO_SUCCESS);
1509 }
1510 
1511 /*
1512  * pool_knl_get_resource_binding() returns the binding for a pid to
1513  * the supplied type of resource. If a binding cannot be determined,
1514  * NULL is returned.
1515  */
1516 char *
1517 pool_knl_get_resource_binding(pool_conf_t *conf,
1518     pool_resource_elem_class_t type, pid_t pid)
1519 {
1520 	pool_knl_connection_t *prov = (pool_knl_connection_t *)conf->pc_prov;
1521 	const char *sval;
1522 	char *name = NULL;
1523 	pool_bindq_t bindq;
1524 	pool_value_t *props[] = { NULL, NULL };
1525 	uint_t nelem = 0;
1526 	pool_t **pools;
1527 	pool_resource_t **resources;
1528 	pool_value_t val = POOL_VALUE_INITIALIZER;
1529 
1530 	props[0] = &val;
1531 	bindq.pb_o_id_type = P_PID;
1532 	bindq.pb_o_id = pid;
1533 	if (ioctl(prov->pkc_fd, POOL_BINDQ, &bindq) < 0) {
1534 		pool_seterror(POE_SYSTEM);
1535 		return (NULL);
1536 	}
1537 
1538 	if (pool_value_set_name(props[0], "pool.sys_id") != PO_SUCCESS) {
1539 		return (NULL);
1540 	}
1541 	pool_value_set_int64(props[0], bindq.pb_i_id);
1542 	if ((pools = pool_query_pools(conf, &nelem, props)) == NULL) {
1543 		pool_seterror(POE_BADPARAM);
1544 		return (NULL);
1545 	}
1546 
1547 	if (nelem != 1) {
1548 		free(pools);
1549 		pool_seterror(POE_INVALID_CONF);
1550 		return (NULL);
1551 	}
1552 
1553 	if (pool_value_set_string(props[0], pool_resource_type_string(type)) !=
1554 	    PO_SUCCESS ||
1555 	    pool_value_set_name(props[0], c_type) != PO_SUCCESS) {
1556 		free(pools);
1557 		return (NULL);
1558 	}
1559 
1560 	if ((resources = pool_query_pool_resources(conf, pools[0], &nelem,
1561 	    NULL)) == NULL) {
1562 		free(pools);
1563 		pool_seterror(POE_INVALID_CONF);
1564 		return (NULL);
1565 	}
1566 	free(pools);
1567 	if (nelem != 1) {
1568 		free(resources);
1569 		pool_seterror(POE_INVALID_CONF);
1570 		return (NULL);
1571 	}
1572 	if (pool_get_ns_property(TO_ELEM(resources[0]), c_name, props[0]) ==
1573 	    POC_INVAL) {
1574 		free(resources);
1575 		return (NULL);
1576 	}
1577 	free(resources);
1578 	if (pool_value_get_string(props[0], &sval) != PO_SUCCESS) {
1579 		return (NULL);
1580 	}
1581 	if ((name = strdup(sval)) == NULL) {
1582 		pool_seterror(POE_SYSTEM);
1583 		return (NULL);
1584 	}
1585 	return (name);
1586 }
1587 
1588 /*
1589  * Allocate the required library data structure and initialise it.
1590  */
1591 pool_knl_elem_t *
1592 pool_knl_elem_wrap(pool_conf_t *conf, pool_elem_class_t class,
1593     pool_resource_elem_class_t res_class,
1594     pool_component_elem_class_t comp_class)
1595 {
1596 	pool_knl_elem_t *elem;
1597 	pool_elem_t *pe;
1598 
1599 	switch (class) {
1600 	case PEC_SYSTEM:
1601 		if ((elem = malloc(sizeof (pool_knl_system_t))) == NULL) {
1602 			pool_seterror(POE_SYSTEM);
1603 			return (NULL);
1604 		}
1605 		(void) memset(elem, 0, sizeof (pool_knl_system_t));
1606 		break;
1607 	case PEC_POOL:
1608 		if ((elem = malloc(sizeof (pool_knl_pool_t))) == NULL) {
1609 			pool_seterror(POE_SYSTEM);
1610 			return (NULL);
1611 		}
1612 		(void) memset(elem, 0, sizeof (pool_knl_pool_t));
1613 		break;
1614 	case PEC_RES_COMP:
1615 	case PEC_RES_AGG:
1616 		if ((elem = malloc(sizeof (pool_knl_resource_t))) == NULL) {
1617 			pool_seterror(POE_SYSTEM);
1618 			return (NULL);
1619 		}
1620 		(void) memset(elem, 0, sizeof (pool_knl_resource_t));
1621 		break;
1622 	case PEC_COMP:
1623 		if ((elem = malloc(sizeof (pool_knl_component_t))) == NULL) {
1624 			pool_seterror(POE_SYSTEM);
1625 			return (NULL);
1626 		}
1627 		(void) memset(elem, 0, sizeof (pool_knl_component_t));
1628 		break;
1629 	default:
1630 		pool_seterror(POE_BADPARAM);
1631 		return (NULL);
1632 	}
1633 	pe = TO_ELEM(elem);
1634 	pe->pe_conf = conf;
1635 	pe->pe_class = class;
1636 	pe->pe_resource_class = res_class;
1637 	pe->pe_component_class = comp_class;
1638 	/* Set up the function pointers for element manipulation */
1639 	pe->pe_get_prop = pool_knl_get_property;
1640 	pe->pe_put_prop = pool_knl_put_property;
1641 	pe->pe_rm_prop = pool_knl_rm_property;
1642 	pe->pe_get_props = pool_knl_get_properties;
1643 	pe->pe_remove = pool_knl_elem_remove;
1644 	pe->pe_get_container = pool_knl_get_container;
1645 	pe->pe_set_container = pool_knl_set_container;
1646 	/*
1647 	 * Specific initialisation for different types of element
1648 	 */
1649 	if (class == PEC_POOL) {
1650 		pool_knl_pool_t *pp = (pool_knl_pool_t *)elem;
1651 		pp->pp_associate = pool_knl_pool_associate;
1652 		pp->pp_dissociate = pool_knl_pool_dissociate;
1653 		pp->pkp_assoc[PREC_PSET] = (pool_knl_resource_t *)
1654 		    resource_by_sysid(conf, PS_NONE, "pset");
1655 	}
1656 	if (class == PEC_RES_COMP || class == PEC_RES_AGG) {
1657 		pool_knl_resource_t *pr = (pool_knl_resource_t *)elem;
1658 		pr->pr_is_system = pool_knl_resource_is_system;
1659 		pr->pr_can_associate = pool_knl_resource_can_associate;
1660 	}
1661 #if DEBUG
1662 	if (dict_put(((pool_knl_connection_t *)conf->pc_prov)->pkc_leaks,
1663 	    elem, elem) != NULL)
1664 		assert(!"leak map put failed");
1665 	dprintf("allocated %p\n", elem);
1666 #endif	/* DEBUG */
1667 	return (elem);
1668 }
1669 
1670 /*
1671  * Allocate a new pool_knl_elem_t in the supplied configuration of the
1672  * specified class.
1673  * Returns element pointer/NULL
1674  */
1675 pool_elem_t *
1676 pool_knl_elem_create(pool_conf_t *conf, pool_elem_class_t class,
1677     pool_resource_elem_class_t res_class,
1678     pool_component_elem_class_t comp_class)
1679 {
1680 	pool_knl_elem_t *elem;
1681 	pool_create_undo_t *create;
1682 	pool_knl_connection_t *prov = (pool_knl_connection_t *)conf->pc_prov;
1683 	static int id = -3;
1684 	char_buf_t *cb;
1685 
1686 	if ((elem = pool_knl_elem_wrap(conf, class, res_class, comp_class)) ==
1687 	    NULL)
1688 		return (NULL);
1689 
1690 	/*
1691 	 * Allocate an nvlist to hold properties
1692 	 */
1693 	if (nvlist_alloc(&elem->pke_properties, NV_UNIQUE_NAME_TYPE, 0) != 0) {
1694 		pool_knl_elem_free(elem, PO_FALSE);
1695 		pool_seterror(POE_SYSTEM);
1696 		return (NULL);
1697 	}
1698 	/*
1699 	 * Allocate a temporary ID and name until the element is
1700 	 * created for real
1701 	 */
1702 	if ((cb = alloc_char_buf(CB_DEFAULT_LEN)) == NULL) {
1703 		pool_knl_elem_free(elem, PO_TRUE);
1704 		return (NULL);
1705 	}
1706 	if (set_char_buf(cb, "%s.sys_id",
1707 	    pool_elem_class_string((pool_elem_t *)elem)) != PO_SUCCESS) {
1708 		pool_knl_elem_free(elem, PO_TRUE);
1709 		free_char_buf(cb);
1710 		return (NULL);
1711 	}
1712 	(void) nvlist_add_int64(elem->pke_properties, cb->cb_buf, id--);
1713 	if (set_char_buf(cb, "%s.name",
1714 	    pool_elem_class_string((pool_elem_t *)elem)) != PO_SUCCESS) {
1715 		pool_knl_elem_free(elem, PO_TRUE);
1716 		free_char_buf(cb);
1717 		return (NULL);
1718 	}
1719 	(void) nvlist_add_string(elem->pke_properties, cb->cb_buf, "");
1720 	/*
1721 	 * If it's a resource class, it will need an initial size
1722 	 */
1723 	if (class == PEC_RES_COMP || class == PEC_RES_AGG) {
1724 		if (set_char_buf(cb, "%s.size",
1725 		    pool_elem_class_string((pool_elem_t *)elem)) !=
1726 		    PO_SUCCESS) {
1727 			pool_knl_elem_free(elem, PO_TRUE);
1728 			free_char_buf(cb);
1729 			return (NULL);
1730 		}
1731 		(void) nvlist_add_uint64(elem->pke_properties, cb->cb_buf, 0);
1732 	}
1733 	free_char_buf(cb);
1734 
1735 	/*
1736 	 * Register the newly created element
1737 	 */
1738 	if (dict_put(prov->pkc_elements, elem, elem) != NULL) {
1739 		pool_knl_elem_free(elem, PO_TRUE);
1740 		pool_seterror(POE_SYSTEM);
1741 		return (NULL);
1742 	}
1743 
1744 	if (prov->pkc_log->l_state != LS_DO)
1745 		return ((pool_elem_t *)elem);
1746 
1747 	/*
1748 	 * The remaining logic is setting up the arguments for the
1749 	 * POOL_CREATE ioctl and appending the details into the log.
1750 	 */
1751 	if ((create = malloc(sizeof (pool_create_undo_t))) == NULL) {
1752 		pool_seterror(POE_SYSTEM);
1753 		return (NULL);
1754 	}
1755 	create->pcu_ioctl.pc_o_type = class;
1756 	switch (class) {
1757 	case PEC_SYSTEM:
1758 		pool_seterror(POE_BADPARAM);
1759 		free(create);
1760 		return (NULL);
1761 	case PEC_POOL: /* NO-OP */
1762 		break;
1763 	case PEC_RES_COMP:
1764 	case PEC_RES_AGG:
1765 		create->pcu_ioctl.pc_o_sub_type = res_class;
1766 		break;
1767 	case PEC_COMP:
1768 		create->pcu_ioctl.pc_o_sub_type = comp_class;
1769 		break;
1770 	default:
1771 		pool_seterror(POE_BADPARAM);
1772 		free(create);
1773 		return (NULL);
1774 	}
1775 
1776 	create->pcu_elem = (pool_elem_t *)elem;
1777 
1778 	if (log_append(prov->pkc_log, POOL_CREATE, (void *)create) !=
1779 	    PO_SUCCESS) {
1780 		free(create);
1781 		return (NULL);
1782 	}
1783 	return ((pool_elem_t *)elem);
1784 }
1785 
1786 /*
1787  * Remove the details of the element from our userland copy and destroy
1788  * the element (if appropriate) in the kernel.
1789  */
1790 int
1791 pool_knl_elem_remove(pool_elem_t *pe)
1792 {
1793 	pool_knl_connection_t *prov;
1794 	pool_destroy_undo_t *destroy;
1795 
1796 	prov = (pool_knl_connection_t *)(TO_CONF(pe))->pc_prov;
1797 
1798 	if (dict_remove(prov->pkc_elements, pe) == NULL) {
1799 		pool_seterror(POE_SYSTEM);
1800 		return (PO_FAIL);
1801 	}
1802 	if (prov->pkc_log->l_state != LS_DO) {
1803 		return (PO_SUCCESS);
1804 	}
1805 
1806 	/*
1807 	 * The remaining logic is setting up the arguments for the
1808 	 * POOL_DESTROY ioctl and appending the details into the log.
1809 	 */
1810 	if ((destroy = malloc(sizeof (pool_destroy_undo_t))) == NULL) {
1811 		pool_seterror(POE_SYSTEM);
1812 		return (PO_FAIL);
1813 	}
1814 	destroy->pdu_ioctl.pd_o_type = pool_elem_class(pe);
1815 
1816 	if (destroy->pdu_ioctl.pd_o_type == PEC_RES_COMP ||
1817 	    destroy->pdu_ioctl.pd_o_type == PEC_RES_AGG)
1818 		destroy->pdu_ioctl.pd_o_sub_type = pool_resource_elem_class(pe);
1819 
1820 	if (destroy->pdu_ioctl.pd_o_type == PEC_COMP)
1821 		destroy->pdu_ioctl.pd_o_sub_type =
1822 		    pool_component_elem_class(pe);
1823 
1824 	destroy->pdu_elem = pe;
1825 
1826 	if (log_append(prov->pkc_log, POOL_DESTROY, (void *)destroy) !=
1827 	    PO_SUCCESS) {
1828 		free(destroy);
1829 		return (PO_FAIL);
1830 	}
1831 	return (PO_SUCCESS);
1832 }
1833 
1834 /*
1835  * Set the parent of the supplied child to the supplied parent
1836  */
1837 int
1838 pool_knl_set_container(pool_elem_t *pp, pool_elem_t *pc)
1839 {
1840 	pool_knl_elem_t *pkp = (pool_knl_elem_t *)pp;
1841 	pool_knl_elem_t *pkc = (pool_knl_elem_t *)pc;
1842 
1843 	pkc->pke_parent = pkp;
1844 	return (PO_SUCCESS);
1845 }
1846 
1847 /*
1848  * TODO: Needed for msets and ssets.
1849  */
1850 /* ARGSUSED */
1851 int
1852 pool_knl_res_transfer(pool_resource_t *src, pool_resource_t *tgt,
1853     uint64_t size) {
1854 	return (PO_FAIL);
1855 }
1856 
1857 /*
1858  * Transfer resource components from one resource set to another.
1859  */
1860 int
1861 pool_knl_res_xtransfer(pool_resource_t *src, pool_resource_t *tgt,
1862     pool_component_t **rl) {
1863 	pool_elem_t *src_e = TO_ELEM(src);
1864 	pool_elem_t *tgt_e = TO_ELEM(tgt);
1865 	pool_xtransfer_undo_t *xtransfer;
1866 	size_t size;
1867 	pool_knl_connection_t *prov =
1868 	    (pool_knl_connection_t *)TO_CONF(src_e)->pc_prov;
1869 
1870 	if (prov->pkc_log->l_state != LS_DO) {
1871 		/*
1872 		 * Walk the Result Set and move the resource components
1873 		 */
1874 		for (size = 0; rl[size] != NULL; size++) {
1875 			if (pool_set_container(TO_ELEM(tgt),
1876 			    TO_ELEM(rl[size])) == PO_FAIL) {
1877 				return (PO_FAIL);
1878 			}
1879 		}
1880 		return (PO_SUCCESS);
1881 	}
1882 
1883 	/*
1884 	 * The remaining logic is setting up the arguments for the
1885 	 * POOL_XTRANSFER ioctl and appending the details into the log.
1886 	 */
1887 	if ((xtransfer = malloc(sizeof (pool_xtransfer_undo_t))) == NULL) {
1888 		pool_seterror(POE_SYSTEM);
1889 		return (PO_FAIL);
1890 	}
1891 
1892 	if (pool_elem_class(src_e) == PEC_RES_COMP) {
1893 		xtransfer->pxu_ioctl.px_o_id_type =
1894 		    pool_resource_elem_class(src_e);
1895 	} else {
1896 		pool_seterror(POE_BADPARAM);
1897 		return (PO_FAIL);
1898 	}
1899 
1900 
1901 	for (xtransfer->pxu_ioctl.px_o_complist_size = 0;
1902 	    rl[xtransfer->pxu_ioctl.px_o_complist_size] != NULL;
1903 	    xtransfer->pxu_ioctl.px_o_complist_size++)
1904 		/* calculate the size using the terminating NULL */;
1905 	if ((xtransfer->pxu_ioctl.px_o_comp_list =
1906 		calloc(xtransfer->pxu_ioctl.px_o_complist_size,
1907 		sizeof (id_t))) == NULL) {
1908 		pool_seterror(POE_SYSTEM);
1909 		return (PO_FAIL);
1910 	}
1911 	if ((xtransfer->pxu_rl = calloc(
1912 	    xtransfer->pxu_ioctl.px_o_complist_size + 1,
1913 	    sizeof (pool_component_t *))) == NULL) {
1914 		pool_seterror(POE_SYSTEM);
1915 		return (PO_FAIL);
1916 	}
1917 	(void) memcpy(xtransfer->pxu_rl, rl,
1918 	    xtransfer->pxu_ioctl.px_o_complist_size *
1919 	    sizeof (pool_component_t *));
1920 	xtransfer->pxu_src = src_e;
1921 	xtransfer->pxu_tgt = tgt_e;
1922 
1923 	if (log_append(prov->pkc_log, POOL_XTRANSFER, (void *)xtransfer) !=
1924 	    PO_SUCCESS) {
1925 		free(xtransfer);
1926 		return (PO_FAIL);
1927 	}
1928 	for (size = 0; rl[size] != NULL; size++) {
1929 		if (pool_set_container(TO_ELEM(tgt), TO_ELEM(rl[size])) ==
1930 		    PO_FAIL) {
1931 			return (PO_FAIL);
1932 		}
1933 	}
1934 	return (PO_SUCCESS);
1935 }
1936 
1937 /*
1938  * Return the parent of an element.
1939  */
1940 pool_elem_t *
1941 pool_knl_get_container(const pool_elem_t *pe)
1942 {
1943 	pool_knl_elem_t *pke = (pool_knl_elem_t *)pe;
1944 
1945 	return ((pool_elem_t *)pke->pke_parent);
1946 }
1947 
1948 /*
1949  * Note: This function is resource specific, needs extending for other
1950  * resource types
1951  */
1952 int
1953 pool_knl_resource_is_system(const pool_resource_t *pr)
1954 {
1955 	switch (pool_resource_elem_class(TO_ELEM(pr))) {
1956 	case PREC_PSET:
1957 		return (PSID_IS_SYSSET(
1958 		    elem_get_sysid(TO_ELEM(pr))));
1959 	default:
1960 		return (PO_FALSE);
1961 	}
1962 }
1963 
1964 /*
1965  * Note: This function is resource specific, needs extending for other
1966  * resource types
1967  */
1968 int
1969 pool_knl_resource_can_associate(const pool_resource_t *pr)
1970 {
1971 	switch (pool_resource_elem_class(TO_ELEM(pr))) {
1972 	case PREC_PSET:
1973 		return (PO_TRUE);
1974 	default:
1975 		return (PO_FALSE);
1976 	}
1977 }
1978 
1979 /*
1980  * pool_knl_pool_associate() associates the supplied resource to the
1981  * supplied pool.
1982  *
1983  * Returns: PO_SUCCESS/PO_FAIL
1984  */
1985 int
1986 pool_knl_pool_associate(pool_t *pool, const pool_resource_t *resource)
1987 {
1988 	pool_knl_connection_t *prov;
1989 	pool_knl_pool_t *pkp = (pool_knl_pool_t *)pool;
1990 	pool_resource_elem_class_t res_class =
1991 	    pool_resource_elem_class(TO_ELEM(resource));
1992 	pool_assoc_undo_t *assoc;
1993 	pool_knl_resource_t *orig_res = pkp->pkp_assoc[res_class];
1994 
1995 	/*
1996 	 * Are we allowed to associate with this target?
1997 	 */
1998 	if (pool_knl_resource_can_associate(resource) == PO_FALSE) {
1999 		pool_seterror(POE_BADPARAM);
2000 		return (PO_FAIL);
2001 	}
2002 	prov = (pool_knl_connection_t *)(TO_CONF(TO_ELEM(pool)))->pc_prov;
2003 
2004 	if (prov->pkc_log->l_state != LS_DO) {
2005 		pkp->pkp_assoc[res_class] = (pool_knl_resource_t *)resource;
2006 		return (PO_SUCCESS);
2007 	}
2008 
2009 	/*
2010 	 * The remaining logic is setting up the arguments for the
2011 	 * POOL_ASSOC ioctl and appending the details into the log.
2012 	 */
2013 	if ((assoc = malloc(sizeof (pool_assoc_undo_t))) == NULL) {
2014 		pool_seterror(POE_SYSTEM);
2015 		return (PO_FAIL);
2016 	}
2017 	assoc->pau_assoc = TO_ELEM(pool);
2018 	assoc->pau_oldres = (pool_elem_t *)orig_res;
2019 	assoc->pau_newres = TO_ELEM(resource);
2020 
2021 	assoc->pau_ioctl.pa_o_id_type = res_class;
2022 
2023 	if (log_append(prov->pkc_log, POOL_ASSOC, (void *)assoc) !=
2024 	    PO_SUCCESS) {
2025 		free(assoc);
2026 		pkp->pkp_assoc[res_class] = orig_res;
2027 		return (PO_FAIL);
2028 	}
2029 	pkp->pkp_assoc[res_class] = (pool_knl_resource_t *)resource;
2030 	return (PO_SUCCESS);
2031 }
2032 
2033 /*
2034  * pool_knl_pool_dissociate() dissociates the supplied resource from
2035  * the supplied pool.
2036  *
2037  * Returns: PO_SUCCESS/PO_FAIL
2038  */
2039 int
2040 pool_knl_pool_dissociate(pool_t *pool, const pool_resource_t *resource)
2041 {
2042 	pool_knl_connection_t *prov;
2043 	pool_dissoc_undo_t *dissoc;
2044 	pool_knl_pool_t *pkp = (pool_knl_pool_t *)pool;
2045 	pool_resource_t *default_res = (pool_resource_t *)get_default_resource(
2046 	    resource);
2047 	pool_resource_elem_class_t res_class =
2048 	    pool_resource_elem_class(TO_ELEM(resource));
2049 
2050 	prov = (pool_knl_connection_t *)(TO_CONF(TO_ELEM(pool)))->pc_prov;
2051 
2052 	if (prov->pkc_log->l_state != LS_DO) {
2053 		pkp->pkp_assoc[res_class] = (pool_knl_resource_t *)default_res;
2054 		return (PO_SUCCESS);
2055 	}
2056 	/*
2057 	 * The remaining logic is setting up the arguments for the
2058 	 * POOL_DISSOC ioctl and appending the details into the log.
2059 	 */
2060 	if ((dissoc = malloc(sizeof (pool_dissoc_undo_t))) == NULL) {
2061 		pool_seterror(POE_SYSTEM);
2062 		return (PO_FAIL);
2063 	}
2064 	dissoc->pdu_dissoc = TO_ELEM(pool);
2065 	dissoc->pdu_oldres = TO_ELEM(resource);
2066 	dissoc->pdu_newres = TO_ELEM(default_res);
2067 
2068 	dissoc->pdu_ioctl.pd_o_id_type = res_class;
2069 
2070 	if (log_append(prov->pkc_log, POOL_DISSOC, (void *)dissoc) !=
2071 	    PO_SUCCESS) {
2072 		free(dissoc);
2073 		pkp->pkp_assoc[res_class] = (pool_knl_resource_t *)resource;
2074 		return (PO_FAIL);
2075 	}
2076 
2077 	/*
2078 	 * Update our local copy
2079 	 */
2080 	pkp->pkp_assoc[res_class] = (pool_knl_resource_t *)default_res;
2081 	return (PO_SUCCESS);
2082 }
2083 
2084 /*
2085  * Allocate a data provider for the supplied configuration and optionally
2086  * discover resources.
2087  * The data provider is the cross over point from the "abstract" configuration
2088  * functions into the data representation specific manipulation routines.
2089  * This function sets up all the required pointers to create a kernel aware
2090  * data provider.
2091  * Returns PO_SUCCESS/PO_FAIL
2092  */
2093 int
2094 pool_knl_connection_alloc(pool_conf_t *conf, int oflags)
2095 {
2096 	pool_knl_connection_t *prov;
2097 
2098 	if ((prov = malloc(sizeof (pool_knl_connection_t))) == NULL) {
2099 		pool_seterror(POE_SYSTEM);
2100 		return (PO_FAIL);
2101 	}
2102 	(void) memset(prov, 0, sizeof (pool_knl_connection_t));
2103 	/*
2104 	 * Initialise data members
2105 	 */
2106 	prov->pc_name = strdup("kernel");
2107 	prov->pc_store_type = KERNEL_DATA_STORE;
2108 	prov->pc_oflags = oflags;
2109 	/*
2110 	 * Initialise function pointers
2111 	 */
2112 	prov->pc_close = pool_knl_close;
2113 	prov->pc_validate = pool_knl_validate;
2114 	prov->pc_commit = pool_knl_commit;
2115 	prov->pc_export = pool_knl_export;
2116 	prov->pc_rollback = pool_knl_rollback;
2117 	prov->pc_exec_query = pool_knl_exec_query;
2118 	prov->pc_elem_create = pool_knl_elem_create;
2119 	prov->pc_remove = pool_knl_remove;
2120 	prov->pc_res_xfer = pool_knl_res_transfer;
2121 	prov->pc_res_xxfer = pool_knl_res_xtransfer;
2122 	prov->pc_get_binding = pool_knl_get_binding;
2123 	prov->pc_set_binding = pool_knl_set_binding;
2124 	prov->pc_get_resource_binding = pool_knl_get_resource_binding;
2125 	/*
2126 	 * Associate the provider to it's configuration
2127 	 */
2128 	conf->pc_prov = (pool_connection_t *)prov;
2129 	/*
2130 	 * End of common initialisation
2131 	 */
2132 	/*
2133 	 * Attempt to open the pseudo device, if the configuration is opened
2134 	 * readonly then try to open an info device, otherwise try to open
2135 	 * the writeable device.
2136 	 */
2137 	if (oflags & PO_RDWR) {
2138 		if ((prov->pkc_fd = blocking_open(pool_dynamic_location(),
2139 		    O_RDWR)) < 0) {
2140 			free(prov);
2141 			conf->pc_prov = NULL;
2142 			pool_seterror(POE_SYSTEM);
2143 			return (PO_FAIL);
2144 		}
2145 	} else {
2146 		if ((prov->pkc_fd = open(pool_info_location, O_RDWR)) < 0) {
2147 			free(prov);
2148 			conf->pc_prov = NULL;
2149 			pool_seterror(POE_SYSTEM);
2150 			return (PO_FAIL);
2151 		}
2152 	}
2153 	/*
2154 	 * Allocate the element dictionary
2155 	 */
2156 	if ((prov->pkc_elements = dict_new((int (*)(const void *, const void *))
2157 	    pool_elem_compare, (uint64_t (*)(const void *))hash_id)) == NULL) {
2158 		(void) close(prov->pkc_fd);
2159 		free(prov);
2160 		conf->pc_prov = NULL;
2161 		pool_seterror(POE_SYSTEM);
2162 		return (PO_FAIL);
2163 	}
2164 #if DEBUG
2165 	if ((prov->pkc_leaks = dict_new(NULL, NULL)) == NULL) {
2166 		dict_free(&prov->pkc_elements);
2167 		(void) close(prov->pkc_fd);
2168 		free(prov);
2169 		conf->pc_prov = NULL;
2170 		pool_seterror(POE_SYSTEM);
2171 		return (PO_FAIL);
2172 	}
2173 #endif	/* DEBUG */
2174 	/*
2175 	 * Allocate the transaction log
2176 	 */
2177 	if ((prov->pkc_log = log_alloc(conf)) == NULL) {
2178 #if DEBUG
2179 		dict_free(&prov->pkc_leaks);
2180 #endif	/* DEBUG */
2181 		dict_free(&prov->pkc_elements);
2182 		(void) close(prov->pkc_fd);
2183 		free(prov);
2184 		conf->pc_prov = NULL;
2185 		return (PO_FAIL);
2186 	}
2187 	/*
2188 	 * At this point the configuration provider has been initialized,
2189 	 * mark the configuration as valid so that the various routines
2190 	 * which rely on a valid configuration will work correctly.
2191 	 */
2192 	conf->pc_state = POF_VALID;
2193 	/*
2194 	 * Update the library snapshot from the kernel
2195 	 */
2196 	if (pool_knl_update(conf, NULL) != PO_SUCCESS) {
2197 #if DEBUG
2198 		dict_free(&prov->pkc_leaks);
2199 #endif	/* DEBUG */
2200 		dict_free(&prov->pkc_elements);
2201 		(void) close(prov->pkc_fd);
2202 		free(prov);
2203 		conf->pc_prov = NULL;
2204 		conf->pc_state = POF_INVALID;
2205 		return (PO_FAIL);
2206 	}
2207 	return (PO_SUCCESS);
2208 }
2209 
2210 #if DEBUG
2211 static void
2212 pool_knl_elem_printf_cb(const void *key, void **value, void *cl)
2213 {
2214 	pool_knl_elem_t *pke = (pool_knl_elem_t *)key;
2215 	dict_hdl_t *map = (dict_hdl_t *)cl;
2216 
2217 	dprintf("leak elem:%p\n", pke);
2218 	if (pke->pke_properties != NULL) {
2219 		nvlist_print(stdout, pke->pke_properties);
2220 	} else
2221 		dprintf("no properties\n");
2222 	assert(dict_get(map, pke) == NULL);
2223 }
2224 #endif	/* DEBUG */
2225 /*
2226  * pool_knl_elem_free() releases the resources associated with the
2227  * supplied element.
2228  */
2229 static void
2230 pool_knl_elem_free(pool_knl_elem_t *pke, int freeprop)
2231 {
2232 #if DEBUG
2233 	pool_conf_t *conf = TO_CONF(TO_ELEM(pke));
2234 	if (dict_remove(((pool_knl_connection_t *)conf->pc_prov)->pkc_leaks,
2235 	    pke) == NULL)
2236 		dprintf("%p, wasn't in the leak map\n", pke);
2237 	if (freeprop == PO_TRUE) {
2238 		pool_elem_dprintf(TO_ELEM(pke));
2239 	}
2240 	dprintf("released %p\n", pke);
2241 #endif	/* DEBUG */
2242 	if (freeprop == PO_TRUE) {
2243 		nvlist_free(pke->pke_properties);
2244 	}
2245 	free(pke);
2246 }
2247 
2248 /*
2249  * pool_knl_elem_free_cb() is designed to be used with
2250  * dict_map(). When a connection is freed, this function is used to
2251  * free all element resources.
2252  */
2253 /* ARGSUSED1 */
2254 static void
2255 pool_knl_elem_free_cb(const void *key, void **value, void *cl)
2256 {
2257 	pool_knl_elem_t *pke = (pool_knl_elem_t *)key;
2258 
2259 #ifdef DEBUG
2260 	dprintf("pool_knl_elem_free_cb:\n");
2261 	dprintf("about to release %p ", pke);
2262 	pool_elem_dprintf(TO_ELEM(pke));
2263 #endif	/* DEBUG */
2264 	pool_knl_elem_free(pke, PO_TRUE);
2265 }
2266 
2267 /*
2268  * Free the resources for a kernel data provider.
2269  */
2270 void
2271 pool_knl_connection_free(pool_knl_connection_t *prov)
2272 {
2273 	if (prov->pkc_log != NULL) {
2274 		(void) log_walk(prov->pkc_log, log_item_release);
2275 		log_free(prov->pkc_log);
2276 	}
2277 	if (prov->pkc_elements != NULL) {
2278 		dict_map(prov->pkc_elements, pool_knl_elem_free_cb, NULL);
2279 #if DEBUG
2280 		dprintf("dict length is %llu\n", dict_length(prov->pkc_leaks));
2281 		dict_map(prov->pkc_leaks, pool_knl_elem_printf_cb,
2282 		    prov->pkc_elements);
2283 		assert(dict_length(prov->pkc_leaks) == 0);
2284 		dict_free(&prov->pkc_leaks);
2285 #endif	/* DEBUG */
2286 		dict_free(&prov->pkc_elements);
2287 	}
2288 	free((void *)prov->pc_name);
2289 	free(prov);
2290 }
2291 
2292 /*
2293  * Return the specified property value.
2294  *
2295  * POC_INVAL is returned if an error is detected and the error code is updated
2296  * to indicate the cause of the error.
2297  */
2298 pool_value_class_t
2299 pool_knl_get_property(const pool_elem_t *pe, const char *name,
2300     pool_value_t *val)
2301 {
2302 	pool_knl_elem_t *pke = (pool_knl_elem_t *)pe;
2303 	nvpair_t *pair;
2304 	const pool_prop_t *prop;
2305 
2306 	if ((prop = provider_get_prop(pe, name)) != NULL)
2307 		if (prop_is_stored(prop) == PO_FALSE)
2308 			return (pool_knl_get_dynamic_property(pe, name, val));
2309 
2310 	if ((pair = pool_knl_find_nvpair(pke->pke_properties, name)) == NULL) {
2311 		pool_seterror(POE_BADPARAM);
2312 		return (POC_INVAL);
2313 	}
2314 
2315 	if (pool_value_from_nvpair(val, pair) == PO_FAIL) {
2316 		return (POC_INVAL);
2317 	}
2318 
2319 	return (pool_value_get_type(val));
2320 }
2321 
2322 /*
2323  * Return the specified property value.
2324  *
2325  * If a property is designated as dynamic, then this function will
2326  * always try to return the latest value of the property from the
2327  * kernel.
2328  *
2329  * POC_INVAL is returned if an error is detected and the error code is updated
2330  * to indicate the cause of the error.
2331  */
2332 pool_value_class_t
2333 pool_knl_get_dynamic_property(const pool_elem_t *pe, const char *name,
2334     pool_value_t *val)
2335 {
2336 	pool_knl_connection_t *prov;
2337 	pool_propget_t propget = { 0 };
2338 	nvlist_t *proplist;
2339 	nvpair_t *pair;
2340 
2341 	propget.pp_o_id_type = pool_elem_class(pe);
2342 	if (pool_elem_class(pe) == PEC_RES_COMP ||
2343 	    pool_elem_class(pe) == PEC_RES_AGG)
2344 		propget.pp_o_id_subtype = pool_resource_elem_class(pe);
2345 	if (pool_elem_class(pe) == PEC_COMP)
2346 		propget.pp_o_id_subtype =
2347 		    (pool_resource_elem_class_t)pool_component_elem_class(pe);
2348 
2349 	propget.pp_o_id = elem_get_sysid(pe);
2350 	propget.pp_o_prop_name_size = strlen(name);
2351 	propget.pp_o_prop_name = (char *)name;
2352 	propget.pp_i_bufsize = KERNEL_SNAPSHOT_BUF_SZ;
2353 	propget.pp_i_buf = malloc(KERNEL_SNAPSHOT_BUF_SZ);
2354 	bzero(propget.pp_i_buf, KERNEL_SNAPSHOT_BUF_SZ);
2355 
2356 	prov = (pool_knl_connection_t *)(TO_CONF(pe))->pc_prov;
2357 	if (ioctl(prov->pkc_fd, POOL_PROPGET, &propget) < 0) {
2358 		free(propget.pp_i_buf);
2359 		pool_seterror(POE_SYSTEM);
2360 		return (POC_INVAL);
2361 	}
2362 	if (nvlist_unpack(propget.pp_i_buf, propget.pp_i_bufsize,
2363 	    &proplist, 0) != 0) {
2364 		free(propget.pp_i_buf);
2365 		pool_seterror(POE_SYSTEM);
2366 		return (POC_INVAL);
2367 	}
2368 	free(propget.pp_i_buf);
2369 
2370 	if ((pair = nvlist_next_nvpair(proplist, NULL)) == NULL) {
2371 		nvlist_free(proplist);
2372 		pool_seterror(POE_SYSTEM);
2373 		return (POC_INVAL);
2374 	}
2375 
2376 	if (pool_value_from_nvpair(val, pair) == PO_FAIL) {
2377 		nvlist_free(proplist);
2378 		return (POC_INVAL);
2379 	}
2380 	nvlist_free(proplist);
2381 	return (pool_value_get_type(val));
2382 }
2383 
2384 /*
2385  * Update the specified property value.
2386  *
2387  * PO_FAIL is returned if an error is detected and the error code is updated
2388  * to indicate the cause of the error.
2389  */
2390 int
2391 pool_knl_put_property(pool_elem_t *pe, const char *name,
2392     const pool_value_t *val)
2393 {
2394 	pool_knl_elem_t *pke = (pool_knl_elem_t *)pe;
2395 	pool_knl_connection_t *prov =
2396 	    (pool_knl_connection_t *)(TO_CONF(pe))->pc_prov;
2397 	nvpair_t *bp, *ap;
2398 	pool_propput_undo_t *propput;
2399 	nvlist_t *bl = NULL;
2400 	const pool_prop_t *prop;
2401 
2402 	if ((bp = pool_knl_find_nvpair(pke->pke_properties, name)) != NULL) {
2403 		if (nvlist_alloc(&bl, NV_UNIQUE_NAME_TYPE, 0) != 0) {
2404 			pool_seterror(POE_SYSTEM);
2405 			return (PO_FAIL);
2406 		}
2407 		if (nvlist_add_nvpair(bl, bp) != 0) {
2408 			nvlist_free(bl);
2409 			pool_seterror(POE_SYSTEM);
2410 			return (PO_FAIL);
2411 		}
2412 	}
2413 	if (pool_knl_nvlist_add_value(pke->pke_properties, name, val) !=
2414 	    PO_SUCCESS)
2415 		return (PO_FAIL);
2416 
2417 	if (prov->pkc_log->l_state != LS_DO) {
2418 		if (bl)
2419 			nvlist_free(bl);
2420 		return (PO_SUCCESS);
2421 	}
2422 	/*
2423 	 * The remaining logic is setting up the arguments for the
2424 	 * POOL_PROPPUT ioctl and appending the details into the log.
2425 	 */
2426 	if ((propput = malloc(sizeof (pool_propput_undo_t))) == NULL) {
2427 		pool_seterror(POE_SYSTEM);
2428 		return (PO_FAIL);
2429 	}
2430 	(void) memset(propput, 0, sizeof (pool_propput_undo_t));
2431 	propput->ppu_blist = bl;
2432 
2433 	ap = pool_knl_find_nvpair(pke->pke_properties, name);
2434 
2435 	if (nvlist_alloc(&propput->ppu_alist, NV_UNIQUE_NAME_TYPE, 0) != 0) {
2436 		nvlist_free(propput->ppu_blist);
2437 		free(propput);
2438 		pool_seterror(POE_SYSTEM);
2439 		return (PO_FAIL);
2440 	}
2441 	if (nvlist_add_nvpair(propput->ppu_alist, ap) != 0) {
2442 		nvlist_free(propput->ppu_blist);
2443 		nvlist_free(propput->ppu_alist);
2444 		free(propput);
2445 		pool_seterror(POE_SYSTEM);
2446 		return (PO_FAIL);
2447 	}
2448 
2449 	if (nvlist_pack(propput->ppu_alist,
2450 	    (char **)&propput->ppu_ioctl.pp_o_buf,
2451 	    &propput->ppu_ioctl.pp_o_bufsize, NV_ENCODE_NATIVE, 0) != 0) {
2452 		pool_seterror(POE_SYSTEM);
2453 		return (PO_FAIL);
2454 	}
2455 	nvlist_free(propput->ppu_alist);
2456 	propput->ppu_ioctl.pp_o_id_type = pool_elem_class(pe);
2457 	if (pool_elem_class(pe) == PEC_RES_COMP ||
2458 	    pool_elem_class(pe) == PEC_RES_AGG)
2459 		propput->ppu_ioctl.pp_o_id_sub_type =
2460 		    pool_resource_elem_class(pe);
2461 	if (pool_elem_class(pe) == PEC_COMP)
2462 		propput->ppu_ioctl.pp_o_id_sub_type =
2463 		    (pool_resource_elem_class_t)pool_component_elem_class(pe);
2464 
2465 	propput->ppu_elem = pe;
2466 	if ((prop = provider_get_prop(propput->ppu_elem, name)) != NULL) {
2467 		if (prop_is_readonly(prop) == PO_TRUE)
2468 			propput->ppu_doioctl |= KERNEL_PROP_RDONLY;
2469 	}
2470 
2471 	if (log_append(prov->pkc_log, POOL_PROPPUT, (void *)propput) !=
2472 	    PO_SUCCESS) {
2473 		nvlist_free(propput->ppu_blist);
2474 		free(propput);
2475 		return (PO_FAIL);
2476 	}
2477 	return (PO_SUCCESS);
2478 }
2479 
2480 /*
2481  * Remove the specified property value.
2482  *
2483  * PO_FAIL is returned if an error is detected and the error code is
2484  * updated to indicate the cause of the error.
2485  */
2486 int
2487 pool_knl_rm_property(pool_elem_t *pe, const char *name)
2488 {
2489 	pool_knl_elem_t *pke = (pool_knl_elem_t *)pe;
2490 	pool_knl_connection_t *prov =
2491 	    (pool_knl_connection_t *)(TO_CONF(pe))->pc_prov;
2492 	pool_proprm_undo_t *proprm;
2493 
2494 	if (pool_knl_find_nvpair(pke->pke_properties, name) == NULL) {
2495 		pool_seterror(POE_BADPARAM);
2496 		return (PO_FAIL);
2497 	}
2498 
2499 	if ((proprm = malloc(sizeof (pool_proprm_undo_t))) == NULL) {
2500 		pool_seterror(POE_SYSTEM);
2501 		return (PO_FAIL);
2502 	}
2503 	(void) memset(proprm, 0, sizeof (pool_proprm_undo_t));
2504 	proprm->pru_oldval.pv_class = POC_INVAL;
2505 	(void) pool_get_property(TO_CONF(pe), pe, name, &proprm->pru_oldval);
2506 
2507 	if (prov->pkc_log->l_state != LS_DO) {
2508 		free(proprm);
2509 		(void) nvlist_remove_all(pke->pke_properties, (char *)name);
2510 		return (PO_SUCCESS);
2511 	}
2512 	/*
2513 	 * The remaining logic is setting up the arguments for the
2514 	 * POOL_PROPRM ioctl and appending the details into the log.
2515 	 */
2516 
2517 	proprm->pru_ioctl.pp_o_id_type = pool_elem_class(pe);
2518 	if (pool_elem_class(pe) == PEC_RES_COMP ||
2519 	    pool_elem_class(pe) == PEC_RES_AGG)
2520 		proprm->pru_ioctl.pp_o_id_sub_type =
2521 		    pool_resource_elem_class(pe);
2522 
2523 	if (pool_elem_class(pe) == PEC_COMP)
2524 		proprm->pru_ioctl.pp_o_id_sub_type =
2525 		    (pool_resource_elem_class_t)pool_component_elem_class(pe);
2526 
2527 	proprm->pru_ioctl.pp_o_prop_name_size = strlen(name);
2528 	proprm->pru_ioctl.pp_o_prop_name =
2529 	    (char *)pool_value_get_name(&proprm->pru_oldval);
2530 	proprm->pru_elem = pe;
2531 
2532 	if (log_append(prov->pkc_log, POOL_PROPRM, (void *)proprm) !=
2533 	    PO_SUCCESS) {
2534 		free(proprm);
2535 		return (PO_FAIL);
2536 	}
2537 
2538 	(void) nvlist_remove_all(pke->pke_properties, (char *)name);
2539 	return (PO_SUCCESS);
2540 }
2541 
2542 /*
2543  * Return a NULL terminated array of pool_value_t which represents all
2544  * of the properties stored for an element
2545  *
2546  * Return NULL on failure. It is the caller's responsibility to free
2547  * the returned array of values.
2548  */
2549 pool_value_t **
2550 pool_knl_get_properties(const pool_elem_t *pe, uint_t *nprops)
2551 {
2552 	nvpair_t *pair;
2553 	pool_value_t **result;
2554 	pool_knl_elem_t *pke = (pool_knl_elem_t *)pe;
2555 	int i = 0;
2556 
2557 	*nprops = 0;
2558 
2559 	for (pair = nvlist_next_nvpair(pke->pke_properties, NULL); pair != NULL;
2560 	    pair = nvlist_next_nvpair(pke->pke_properties, pair))
2561 		(*nprops)++;
2562 	if ((result = calloc(*nprops + 1, sizeof (pool_value_t *))) == NULL) {
2563 		pool_seterror(POE_SYSTEM);
2564 		return (NULL);
2565 	}
2566 	for (pair = nvlist_next_nvpair(pke->pke_properties, NULL); pair != NULL;
2567 	    pair = nvlist_next_nvpair(pke->pke_properties, pair), i++) {
2568 		result[i] = pool_value_alloc();
2569 		if (pool_value_from_nvpair(result[i], pair) == PO_FAIL) {
2570 			while (i-- >= 0)
2571 				pool_value_free(result[i]);
2572 			free(result);
2573 			return (NULL);
2574 		}
2575 	}
2576 	return (result);
2577 }
2578 
2579 /*
2580  * Append an entry to a result set. Reallocate the array used to store
2581  * results if it's full.
2582  * Returns PO_SUCCESS/PO_FAIL
2583  */
2584 int
2585 pool_knl_result_set_append(pool_knl_result_set_t *rs, pool_knl_elem_t *pke)
2586 {
2587 	if (rs->pkr_count == rs->pkr_size)
2588 		if (pool_knl_result_set_realloc(rs) != PO_SUCCESS)
2589 			return (PO_FAIL);
2590 
2591 	rs->pkr_list[rs->pkr_count++] = pke;
2592 
2593 	return (PO_SUCCESS);
2594 }
2595 
2596 /*
2597  * Resize the array used to store results. A simple doubling strategy
2598  * is used.
2599  * Returns PO_SUCCESS/PO_FAIL
2600  */
2601 int
2602 pool_knl_result_set_realloc(pool_knl_result_set_t *rs)
2603 {
2604 	pool_knl_elem_t **old_list = rs->pkr_list;
2605 	int new_size = rs->pkr_size * 2;
2606 
2607 	if ((rs->pkr_list = realloc(rs->pkr_list,
2608 	    new_size * sizeof (pool_knl_elem_t *))) == NULL) {
2609 		rs->pkr_list = old_list;
2610 		pool_seterror(POE_SYSTEM);
2611 		return (PO_FAIL);
2612 	}
2613 	rs->pkr_size = new_size;
2614 
2615 	return (PO_SUCCESS);
2616 }
2617 
2618 /*
2619  * Allocate a result set. The Result Set stores the result of a query.
2620  * Returns pool_knl_result_set_t pointer/NULL
2621  */
2622 pool_knl_result_set_t *
2623 pool_knl_result_set_alloc(const pool_conf_t *conf)
2624 {
2625 	pool_knl_result_set_t *rs;
2626 
2627 	if ((rs = malloc(sizeof (pool_knl_result_set_t))) == NULL) {
2628 		pool_seterror(POE_SYSTEM);
2629 		return (NULL);
2630 	}
2631 	(void) memset(rs, 0, sizeof (pool_knl_result_set_t));
2632 	rs->pkr_size = KERNEL_RS_INITIAL_SZ;
2633 	if (pool_knl_result_set_realloc(rs) == PO_FAIL) {
2634 		free(rs);
2635 		pool_seterror(POE_SYSTEM);
2636 		return (NULL);
2637 	}
2638 	rs->prs_conf = conf;
2639 	rs->prs_index = -1;
2640 	rs->prs_active = PO_TRUE;
2641 	/* Fix up the result set accessor functions to the knl specfic ones */
2642 	rs->prs_next = pool_knl_rs_next;
2643 	rs->prs_prev = pool_knl_rs_prev;
2644 	rs->prs_first = pool_knl_rs_first;
2645 	rs->prs_last = pool_knl_rs_last;
2646 	rs->prs_get_index = pool_knl_rs_get_index;
2647 	rs->prs_set_index = pool_knl_rs_set_index;
2648 	rs->prs_close = pool_knl_rs_close;
2649 	rs->prs_count = pool_knl_rs_count;
2650 	return (rs);
2651 }
2652 
2653 /*
2654  * Free a result set. Ensure that the resources are all released at
2655  * this point.
2656  */
2657 void
2658 pool_knl_result_set_free(pool_knl_result_set_t *rs)
2659 {
2660 	free(rs->pkr_list);
2661 	free(rs);
2662 }
2663 /*
2664  * Return the next element in a result set.
2665  * Returns pool_elem_t pointer/NULL
2666  */
2667 pool_elem_t *
2668 pool_knl_rs_next(pool_result_set_t *set)
2669 {
2670 	pool_knl_result_set_t *kset = (pool_knl_result_set_t *)set;
2671 
2672 	if (kset->prs_index == kset->pkr_count - 1)
2673 		return (NULL);
2674 	return ((pool_elem_t *)kset->pkr_list[++kset->prs_index]);
2675 }
2676 
2677 /*
2678  * Return the previous element in a result set.
2679  * Returns pool_elem_t pointer/NULL
2680  */
2681 pool_elem_t *
2682 pool_knl_rs_prev(pool_result_set_t *set)
2683 {
2684 	pool_knl_result_set_t *kset = (pool_knl_result_set_t *)set;
2685 
2686 	if (kset->prs_index < 0)
2687 		return (NULL);
2688 	return ((pool_elem_t *)kset->pkr_list[kset->prs_index--]);
2689 }
2690 
2691 /*
2692  * Sets the current index in a result set.
2693  * Returns PO_SUCCESS/PO_FAIL
2694  */
2695 int
2696 pool_knl_rs_set_index(pool_result_set_t *set, int index)
2697 {
2698 	pool_knl_result_set_t *kset = (pool_knl_result_set_t *)set;
2699 
2700 	if (index < 0 || index >= kset->pkr_count) {
2701 		pool_seterror(POE_BADPARAM);
2702 		return (PO_FAIL);
2703 	}
2704 	kset->prs_index = index;
2705 	return (PO_SUCCESS);
2706 }
2707 
2708 /*
2709  * Return the current index in a result set.
2710  * Returns current index
2711  */
2712 int
2713 pool_knl_rs_get_index(pool_result_set_t *set)
2714 {
2715 	pool_knl_result_set_t *kset = (pool_knl_result_set_t *)set;
2716 
2717 	return (kset->prs_index);
2718 }
2719 
2720 /*
2721  * Return the first element in a result set.
2722  * Returns pool_elem_t pointer/NULL
2723  */
2724 pool_elem_t *
2725 pool_knl_rs_first(pool_result_set_t *set)
2726 {
2727 	pool_knl_result_set_t *kset = (pool_knl_result_set_t *)set;
2728 
2729 	return ((pool_elem_t *)kset->pkr_list[0]);
2730 }
2731 
2732 /*
2733  * Return the last element in a result set.
2734  * Returns pool_elem_t pointer/NULL
2735  */
2736 pool_elem_t *
2737 pool_knl_rs_last(pool_result_set_t *set)
2738 {
2739 	pool_knl_result_set_t *kset = (pool_knl_result_set_t *)set;
2740 
2741 	return ((pool_elem_t *)kset->pkr_list[kset->pkr_count - 1]);
2742 }
2743 
2744 /*
2745  * Return the number of results in a result set.
2746  * Returns result count
2747  */
2748 int
2749 pool_knl_rs_count(pool_result_set_t *set)
2750 {
2751 	pool_knl_result_set_t *kset = (pool_knl_result_set_t *)set;
2752 
2753 	return (kset->pkr_count);
2754 }
2755 
2756 
2757 /*
2758  * Close a result set. Free the resources
2759  * Returns PO_SUCCESS/PO_FAIL
2760  */
2761 int
2762 pool_knl_rs_close(pool_result_set_t *set)
2763 {
2764 	pool_knl_result_set_t *kset = (pool_knl_result_set_t *)set;
2765 
2766 	pool_knl_result_set_free(kset);
2767 	return (PO_SUCCESS);
2768 }
2769 
2770 /*
2771  * Commit an individual transaction log item(). This processing is
2772  * essential to the pool_conf_commit() logic. When pool_conf_commit()
2773  * is invoked, the pending transaction log for the configuration is
2774  * walked and all pending changes to the kernel are invoked. If a
2775  * change succeeds it is marked in the log as successful and
2776  * processing continues, if it fails then failure is returned and the
2777  * log will be "rolled back" to undo changes to the library snapshot
2778  * and the kernel.
2779  */
2780 int
2781 log_item_commit(log_item_t *li)
2782 {
2783 	pool_knl_connection_t *prov =
2784 	    (pool_knl_connection_t *)li->li_log->l_conf->pc_prov;
2785 	pool_create_undo_t *create;
2786 	pool_destroy_undo_t *destroy;
2787 	pool_assoc_undo_t *assoc;
2788 	pool_dissoc_undo_t *dissoc;
2789 	pool_propput_undo_t *propput;
2790 	pool_proprm_undo_t *proprm;
2791 	pool_xtransfer_undo_t *xtransfer;
2792 	char_buf_t *cb;
2793 	size_t size;
2794 	pool_elem_t *pair;
2795 	pool_value_t val = POOL_VALUE_INITIALIZER;
2796 	int ret;
2797 
2798 	switch (li->li_op) {
2799 	case POOL_CREATE:
2800 		create = (pool_create_undo_t *)li->li_details;
2801 		if ((cb = alloc_char_buf(CB_DEFAULT_LEN)) == NULL)
2802 			return (PO_FAIL);
2803 		if (set_char_buf(cb, "%s.sys_id",
2804 		    pool_elem_class_string(create->pcu_elem)) != PO_SUCCESS) {
2805 			free_char_buf(cb);
2806 			return (PO_FAIL);
2807 		}
2808 #ifdef DEBUG
2809 		dprintf("log_item_commit: POOL_CREATE, remove from dict\n");
2810 		pool_elem_dprintf(create->pcu_elem);
2811 #endif	/* DEBUG */
2812 		/*
2813 		 * May not need to remove the element if it was
2814 		 * already destroyed before commit. Just cast the
2815 		 * return to void.
2816 		 */
2817 		(void) dict_remove(prov->pkc_elements,
2818 		    (pool_knl_elem_t *)create->pcu_elem);
2819 
2820 		if (ioctl(prov->pkc_fd, POOL_CREATE, &create->pcu_ioctl) < 0) {
2821 			pool_seterror(POE_SYSTEM);
2822 			return (PO_FAIL);
2823 		}
2824 		/*
2825 		 * Now that we have created our element in the kernel,
2826 		 * it has a valid allocated system id. Remove the
2827 		 * element from the element dictionary, using the
2828 		 * current key, and then re-insert under the new key.
2829 		 */
2830 #ifdef DEBUG
2831 		pool_elem_dprintf(create->pcu_elem);
2832 #endif	/* DEBUG */
2833 		assert(nvlist_add_int64(
2834 		    ((pool_knl_elem_t *)create->pcu_elem)->pke_properties,
2835 		    cb->cb_buf, create->pcu_ioctl.pc_i_id) == 0);
2836 		free_char_buf(cb);
2837 		assert(dict_put(prov->pkc_elements, create->pcu_elem,
2838 		    create->pcu_elem) == NULL);
2839 		/*
2840 		 * If the element has a pair in the static
2841 		 * configuration, update it with the sys_id
2842 		 */
2843 		if ((pair = pool_get_pair(create->pcu_elem)) != NULL) {
2844 			pool_value_set_int64(&val, create->pcu_ioctl.pc_i_id);
2845 			assert(pool_put_any_ns_property(pair, c_sys_prop, &val)
2846 			    == PO_SUCCESS);
2847 		}
2848 		li->li_state = LS_UNDO;
2849 		break;
2850 	case POOL_DESTROY:
2851 		destroy = (pool_destroy_undo_t *)li->li_details;
2852 
2853 		destroy->pdu_ioctl.pd_o_id = elem_get_sysid(destroy->pdu_elem);
2854 
2855 		/*
2856 		 * It may be that this element was created in the last
2857 		 * transaction. In which case POOL_CREATE, above, will
2858 		 * have re-inserted the element in the dictionary. Try
2859 		 * to remove it just in case this has occurred.
2860 		 */
2861 		(void) dict_remove(prov->pkc_elements,
2862 		    (pool_knl_elem_t *)destroy->pdu_elem);
2863 		while ((ret = ioctl(prov->pkc_fd, POOL_DESTROY,
2864 		    &destroy->pdu_ioctl)) < 0 && errno == EAGAIN)
2865 			;
2866 		if (ret < 0) {
2867 			pool_seterror(POE_SYSTEM);
2868 			return (PO_FAIL);
2869 		}
2870 #ifdef DEBUG
2871 		dprintf("log_item_commit: POOL_DESTROY\n");
2872 		pool_elem_dprintf(destroy->pdu_elem);
2873 #endif	/* DEBUG */
2874 		li->li_state = LS_UNDO;
2875 		break;
2876 	case POOL_ASSOC:
2877 		assoc = (pool_assoc_undo_t *)li->li_details;
2878 
2879 		assoc->pau_ioctl.pa_o_pool_id =
2880 		    elem_get_sysid(assoc->pau_assoc);
2881 		assoc->pau_ioctl.pa_o_res_id =
2882 		    elem_get_sysid(assoc->pau_newres);
2883 		while ((ret = ioctl(prov->pkc_fd, POOL_ASSOC,
2884 		    &assoc->pau_ioctl)) < 0 && errno == EAGAIN)
2885 			;
2886 		if (ret < 0) {
2887 			pool_seterror(POE_SYSTEM);
2888 			return (PO_FAIL);
2889 		}
2890 		li->li_state = LS_UNDO;
2891 		break;
2892 	case POOL_DISSOC:
2893 		dissoc = (pool_dissoc_undo_t *)li->li_details;
2894 
2895 		dissoc->pdu_ioctl.pd_o_pool_id =
2896 		    elem_get_sysid(dissoc->pdu_dissoc);
2897 
2898 		while ((ret = ioctl(prov->pkc_fd, POOL_DISSOC,
2899 		    &dissoc->pdu_ioctl)) < 0 && errno == EAGAIN)
2900 			;
2901 		if (ret < 0) {
2902 			pool_seterror(POE_SYSTEM);
2903 			return (PO_FAIL);
2904 		}
2905 		li->li_state = LS_UNDO;
2906 		break;
2907 	case POOL_TRANSFER:
2908 		li->li_state = LS_UNDO;
2909 		pool_seterror(POE_BADPARAM);
2910 		return (PO_FAIL);
2911 	case POOL_XTRANSFER:
2912 		xtransfer = (pool_xtransfer_undo_t *)li->li_details;
2913 
2914 		xtransfer->pxu_ioctl.px_o_src_id =
2915 		    elem_get_sysid(xtransfer->pxu_src);
2916 		xtransfer->pxu_ioctl.px_o_tgt_id =
2917 		    elem_get_sysid(xtransfer->pxu_tgt);
2918 		for (size = 0; xtransfer->pxu_rl[size] != NULL; size ++) {
2919 			xtransfer->pxu_ioctl.px_o_comp_list[size] =
2920 			    elem_get_sysid(TO_ELEM(xtransfer->pxu_rl[size]));
2921 #ifdef DEBUG
2922 			dprintf("log_item_commit: POOL_XTRANSFER\n");
2923 			pool_elem_dprintf(TO_ELEM(xtransfer->pxu_rl[size]));
2924 #endif	/* DEBUG */
2925 		}
2926 
2927 		/*
2928 		 * Don't actually transfer resources if the configuration
2929 		 * is in POF_DESTROY state. This is to prevent problems
2930 		 * relating to transferring off-line CPUs. Instead rely
2931 		 * on the POOL_DESTROY ioctl to transfer the CPUS.
2932 		 */
2933 		if (li->li_log->l_conf->pc_state != POF_DESTROY &&
2934 		    ioctl(prov->pkc_fd, POOL_XTRANSFER,
2935 		    &xtransfer->pxu_ioctl) < 0) {
2936 #ifdef DEBUG
2937 			dprintf("log_item_commit: POOL_XTRANSFER, ioctl "
2938 			    "failed\n");
2939 #endif	/* DEBUG */
2940 			pool_seterror(POE_SYSTEM);
2941 			return (PO_FAIL);
2942 		}
2943 		li->li_state = LS_UNDO;
2944 		break;
2945 	case POOL_PROPPUT:
2946 		propput = (pool_propput_undo_t *)li->li_details;
2947 
2948 		if (pool_elem_class(propput->ppu_elem) != PEC_SYSTEM) {
2949 			propput->ppu_ioctl.pp_o_id =
2950 			    elem_get_sysid(propput->ppu_elem);
2951 		}
2952 		/*
2953 		 * Some properties, e.g. pset.size, are read-only in the
2954 		 * kernel and attempting to change them will fail and cause
2955 		 * problems. Although this property is read-only through the
2956 		 * public interface, the library needs to modify it's value.
2957 		 */
2958 		if ((propput->ppu_doioctl & KERNEL_PROP_RDONLY) == 0) {
2959 			if (ioctl(prov->pkc_fd, POOL_PROPPUT,
2960 			    &propput->ppu_ioctl) < 0) {
2961 				pool_seterror(POE_SYSTEM);
2962 				return (PO_FAIL);
2963 			}
2964 		}
2965 		li->li_state = LS_UNDO;
2966 		break;
2967 	case POOL_PROPRM:
2968 		proprm = (pool_proprm_undo_t *)li->li_details;
2969 
2970 		if (pool_elem_class(proprm->pru_elem) != PEC_SYSTEM) {
2971 			proprm->pru_ioctl.pp_o_id =
2972 			    elem_get_sysid(proprm->pru_elem);
2973 		}
2974 		if (ioctl(prov->pkc_fd, POOL_PROPRM, &proprm->pru_ioctl) < 0) {
2975 			pool_seterror(POE_SYSTEM);
2976 			return (PO_FAIL);
2977 		}
2978 		li->li_state = LS_UNDO;
2979 		break;
2980 	default:
2981 		return (PO_FAIL);
2982 	}
2983 	return (PO_SUCCESS);
2984 }
2985 
2986 /*
2987  * Undo an individual transaction log item(). This processing is
2988  * essential to the pool_conf_commit() and pool_conf_rollback()
2989  * logic. Changes to the libpool snapshot and the kernel are carried
2990  * out separately. The library snapshot is updated synchronously,
2991  * however the kernel update is delayed until the user calls
2992  * pool_conf_commit().
2993  *
2994  * When undoing transactions, library changes will be undone unless
2995  * this invocation is as a result of a commit failure, in which case
2996  * the log state will be LS_RECOVER. Kernel changes will only be
2997  * undone if they are marked as having been done, in which case the
2998  * log item state will be LS_UNDO.
2999  */
3000 int
3001 log_item_undo(log_item_t *li)
3002 {
3003 	pool_knl_connection_t *prov =
3004 	    (pool_knl_connection_t *)li->li_log->l_conf->pc_prov;
3005 	pool_create_undo_t *create;
3006 	pool_destroy_undo_t *destroy;
3007 	pool_assoc_undo_t *assoc;
3008 	pool_dissoc_undo_t *dissoc;
3009 	pool_propput_undo_t *propput;
3010 	pool_proprm_undo_t *proprm;
3011 	pool_xtransfer_undo_t *xtransfer;
3012 	char_buf_t *cb;
3013 	size_t size;
3014 	pool_destroy_t u_destroy;
3015 	pool_create_t u_create;
3016 	pool_assoc_t u_assoc;
3017 	pool_xtransfer_t u_xtransfer;
3018 	pool_propput_t u_propput;
3019 	pool_proprm_t u_proprm;
3020 	pool_conf_t *conf = li->li_log->l_conf;
3021 	nvpair_t *pair;
3022 	nvlist_t *tmplist;
3023 	int ret;
3024 
3025 	if (li->li_log->l_state != LS_RECOVER) {
3026 	switch (li->li_op) {
3027 	case POOL_CREATE:
3028 		create = (pool_create_undo_t *)li->li_details;
3029 
3030 		(void) dict_remove(prov->pkc_elements, create->pcu_elem);
3031 #ifdef DEBUG
3032 		dprintf("log_item_undo: POOL_CREATE\n");
3033 		assert(create->pcu_elem != NULL);
3034 		dprintf("log_item_undo: POOL_CREATE %p\n", create->pcu_elem);
3035 		pool_elem_dprintf(create->pcu_elem);
3036 #endif	/* DEBUG */
3037 		pool_knl_elem_free((pool_knl_elem_t *)create->pcu_elem,
3038 		    PO_TRUE);
3039 		break;
3040 	case POOL_DESTROY:
3041 		destroy = (pool_destroy_undo_t *)li->li_details;
3042 
3043 		assert(dict_put(prov->pkc_elements, destroy->pdu_elem,
3044 		    destroy->pdu_elem) == NULL);
3045 		break;
3046 	case POOL_ASSOC:
3047 		assoc = (pool_assoc_undo_t *)li->li_details;
3048 
3049 		if (assoc->pau_oldres != NULL)
3050 			((pool_knl_pool_t *)assoc->pau_assoc)->pkp_assoc
3051 			    [pool_resource_elem_class(assoc->pau_oldres)] =
3052 			    (pool_knl_resource_t *)assoc->pau_oldres;
3053 		break;
3054 	case POOL_DISSOC:
3055 		dissoc = (pool_dissoc_undo_t *)li->li_details;
3056 
3057 		if (dissoc->pdu_oldres != NULL)
3058 			((pool_knl_pool_t *)dissoc->pdu_dissoc)->pkp_assoc
3059 			    [pool_resource_elem_class(dissoc->pdu_oldres)] =
3060 			    (pool_knl_resource_t *)dissoc->pdu_oldres;
3061 		break;
3062 	case POOL_TRANSFER:
3063 		pool_seterror(POE_BADPARAM);
3064 		return (PO_FAIL);
3065 	case POOL_XTRANSFER:
3066 		xtransfer = (pool_xtransfer_undo_t *)li->li_details;
3067 
3068 		for (size = 0; xtransfer->pxu_rl[size] != NULL; size++) {
3069 			pool_value_t val = POOL_VALUE_INITIALIZER;
3070 			uint64_t src_size;
3071 			uint64_t tgt_size;
3072 
3073 			if (pool_set_container(xtransfer->pxu_src,
3074 			    TO_ELEM(xtransfer->pxu_rl[size])) == PO_FAIL) {
3075 				return (PO_FAIL);
3076 			}
3077 			/*
3078 			 * Maintain the library view of the size
3079 			 */
3080 			if (resource_get_size(pool_elem_res(xtransfer->pxu_src),
3081 			    &src_size) != PO_SUCCESS ||
3082 			    resource_get_size(pool_elem_res(xtransfer->pxu_tgt),
3083 			    &tgt_size) != PO_SUCCESS) {
3084 				pool_seterror(POE_BADPARAM);
3085 				return (PO_FAIL);
3086 			}
3087 			src_size++;
3088 			tgt_size--;
3089 			pool_value_set_uint64(&val, src_size);
3090 			(void) pool_put_any_ns_property(xtransfer->pxu_src,
3091 			    c_size_prop, &val);
3092 			pool_value_set_uint64(&val, tgt_size);
3093 			(void) pool_put_any_ns_property(xtransfer->pxu_tgt,
3094 			    c_size_prop, &val);
3095 		}
3096 		break;
3097 	case POOL_PROPPUT:
3098 		propput = (pool_propput_undo_t *)li->li_details;
3099 
3100 		if ((propput->ppu_doioctl & KERNEL_PROP_RDONLY) == 0) {
3101 			if (propput->ppu_blist != NULL) {
3102 				if (nvlist_merge(
3103 				    ((pool_knl_elem_t *)propput->ppu_elem)->
3104 				    pke_properties, propput->ppu_blist, 0)
3105 				    != 0) {
3106 					pool_seterror(POE_SYSTEM);
3107 					return (PO_FAIL);
3108 				}
3109 			} else {
3110 				if (nvlist_unpack(propput->ppu_ioctl.pp_o_buf,
3111 				    propput->ppu_ioctl.pp_o_bufsize,
3112 				    &propput->ppu_alist, 0) != 0) {
3113 					pool_seterror(POE_SYSTEM);
3114 					return (PO_FAIL);
3115 				}
3116 				pair = nvlist_next_nvpair(propput->ppu_alist,
3117 				    NULL);
3118 				(void) nvlist_remove_all(((pool_knl_elem_t *)
3119 				    propput->ppu_elem)->pke_properties,
3120 				    nvpair_name(pair));
3121 				nvlist_free(propput->ppu_alist);
3122 			}
3123 		}
3124 		break;
3125 	case POOL_PROPRM:
3126 		proprm = (pool_proprm_undo_t *)li->li_details;
3127 
3128 		if (pool_value_get_type(&proprm->pru_oldval) != POC_INVAL) {
3129 			if (pool_put_property(conf, proprm->pru_elem,
3130 			    proprm->pru_ioctl.pp_o_prop_name,
3131 			    &proprm->pru_oldval) != PO_SUCCESS) {
3132 				return (PO_FAIL);
3133 			}
3134 		}
3135 		break;
3136 	default:
3137 		return (PO_FAIL);
3138 	}
3139 	}
3140 	/*
3141 	 * Only try to undo the state of the kernel if we modified it.
3142 	 */
3143 	if (li->li_state == LS_DO) {
3144 		return (PO_SUCCESS);
3145 	}
3146 
3147 	switch (li->li_op) {
3148 	case POOL_CREATE:
3149 		create = (pool_create_undo_t *)li->li_details;
3150 
3151 		u_destroy.pd_o_type = create->pcu_ioctl.pc_o_type;
3152 		u_destroy.pd_o_sub_type = create->pcu_ioctl.pc_o_sub_type;
3153 		u_destroy.pd_o_id = create->pcu_ioctl.pc_i_id;
3154 
3155 		while ((ret = ioctl(prov->pkc_fd, POOL_DESTROY,
3156 		    &u_destroy)) < 0 && errno == EAGAIN)
3157 			;
3158 		if (ret < 0) {
3159 			pool_seterror(POE_SYSTEM);
3160 			return (PO_FAIL);
3161 		}
3162 		li->li_state = LS_DO;
3163 		break;
3164 	case POOL_DESTROY:
3165 		destroy = (pool_destroy_undo_t *)li->li_details;
3166 
3167 		u_create.pc_o_type = destroy->pdu_ioctl.pd_o_type;
3168 		u_create.pc_o_sub_type = destroy->pdu_ioctl.pd_o_sub_type;
3169 
3170 		if (ioctl(prov->pkc_fd, POOL_CREATE, &u_create) < 0) {
3171 			pool_seterror(POE_SYSTEM);
3172 			return (PO_FAIL);
3173 		}
3174 
3175 		if ((cb = alloc_char_buf(CB_DEFAULT_LEN)) == NULL) {
3176 			return (PO_FAIL);
3177 		}
3178 		if (set_char_buf(cb, "%s.sys_id",
3179 		    pool_elem_class_string(destroy->pdu_elem)) != PO_SUCCESS) {
3180 			free_char_buf(cb);
3181 			return (PO_FAIL);
3182 		}
3183 		(void) nvlist_add_int64(
3184 		    ((pool_knl_elem_t *)destroy->pdu_elem)->pke_properties,
3185 		    cb->cb_buf, u_create.pc_i_id);
3186 		free_char_buf(cb);
3187 		if (dict_put(prov->pkc_elements, destroy->pdu_elem,
3188 		    destroy->pdu_elem) != NULL) {
3189 			pool_seterror(POE_SYSTEM);
3190 			return (PO_FAIL);
3191 		}
3192 		/*
3193 		 * Now we need to reset all the properties and
3194 		 * associations in the kernel for this newly created
3195 		 * replacement.
3196 		 */
3197 		u_propput.pp_o_id_type = destroy->pdu_ioctl.pd_o_type;
3198 		u_propput.pp_o_id_sub_type = destroy->pdu_ioctl.pd_o_sub_type;
3199 		u_propput.pp_o_id = u_create.pc_i_id;
3200 		u_propput.pp_o_buf = NULL;
3201 		/*
3202 		 * Remove the read-only properties before attempting
3203 		 * to restore the state of the newly created property
3204 		 */
3205 		(void) nvlist_dup(((pool_knl_elem_t *)destroy->pdu_elem)->
3206 		    pke_properties, &tmplist, 0);
3207 		for (pair = nvlist_next_nvpair(tmplist, NULL); pair != NULL;
3208 		    pair = nvlist_next_nvpair(tmplist, pair)) {
3209 			const pool_prop_t *prop;
3210 			char *name = nvpair_name(pair);
3211 			if ((prop = provider_get_prop(destroy->pdu_elem,
3212 			    name)) != NULL)
3213 				if (prop_is_readonly(prop) == PO_TRUE)
3214 					(void) nvlist_remove_all(tmplist, name);
3215 		}
3216 		if (nvlist_pack(tmplist, (char **)&u_propput.pp_o_buf,
3217 		    &u_propput.pp_o_bufsize, NV_ENCODE_NATIVE, 0) != 0) {
3218 			pool_seterror(POE_SYSTEM);
3219 			return (PO_FAIL);
3220 		}
3221 		nvlist_free(tmplist);
3222 		if (ioctl(prov->pkc_fd, POOL_PROPPUT, &u_propput) < 0) {
3223 			free(u_propput.pp_o_buf);
3224 			pool_seterror(POE_SYSTEM);
3225 			return (PO_FAIL);
3226 		}
3227 		free(u_propput.pp_o_buf);
3228 		/*
3229 		 * Now reset the associations for all the resource
3230 		 * types if the thing which we are recreating is a
3231 		 * pool
3232 		 *
3233 		 * TODO: This is resource specific and must be
3234 		 * extended for additional resource types.
3235 		 */
3236 		if (destroy->pdu_ioctl.pd_o_type == PEC_POOL) {
3237 			u_assoc.pa_o_pool_id = u_create.pc_i_id;
3238 			u_assoc.pa_o_res_id =
3239 			    elem_get_sysid(
3240 			    TO_ELEM(((pool_knl_pool_t *)destroy->pdu_elem)->
3241 			    pkp_assoc[PREC_PSET]));
3242 			u_assoc.pa_o_id_type = PREC_PSET;
3243 
3244 			if (ioctl(prov->pkc_fd, POOL_ASSOC, &u_assoc) < 0) {
3245 				pool_seterror(POE_SYSTEM);
3246 				return (PO_FAIL);
3247 			}
3248 		}
3249 		li->li_state = LS_DO;
3250 		break;
3251 	case POOL_ASSOC:
3252 		assoc = (pool_assoc_undo_t *)li->li_details;
3253 
3254 		u_assoc.pa_o_pool_id = elem_get_sysid(assoc->pau_assoc);
3255 		u_assoc.pa_o_res_id = elem_get_sysid(assoc->pau_oldres);
3256 		u_assoc.pa_o_id_type = assoc->pau_ioctl.pa_o_id_type;
3257 
3258 		while ((ret = ioctl(prov->pkc_fd, POOL_ASSOC, &u_assoc)) < 0 &&
3259 		    errno == EAGAIN)
3260 			;
3261 		if (ret < 0) {
3262 			pool_seterror(POE_SYSTEM);
3263 			return (PO_FAIL);
3264 		}
3265 		li->li_state = LS_DO;
3266 		break;
3267 	case POOL_DISSOC:
3268 		dissoc = (pool_dissoc_undo_t *)li->li_details;
3269 
3270 		u_assoc.pa_o_pool_id = elem_get_sysid(dissoc->pdu_dissoc);
3271 		u_assoc.pa_o_res_id = elem_get_sysid(dissoc->pdu_oldres);
3272 		u_assoc.pa_o_id_type = dissoc->pdu_ioctl.pd_o_id_type;
3273 
3274 		while ((ret = ioctl(prov->pkc_fd, POOL_ASSOC, &u_assoc)) < 0 &&
3275 		    errno == EAGAIN)
3276 			;
3277 		if (ret < 0) {
3278 			pool_seterror(POE_SYSTEM);
3279 			return (PO_FAIL);
3280 		}
3281 		li->li_state = LS_DO;
3282 		break;
3283 	case POOL_TRANSFER:
3284 		li->li_state = LS_DO;
3285 		pool_seterror(POE_BADPARAM);
3286 		return (PO_FAIL);
3287 	case POOL_XTRANSFER:
3288 		xtransfer = (pool_xtransfer_undo_t *)li->li_details;
3289 
3290 		(void) memcpy(&u_xtransfer, &xtransfer->pxu_ioctl,
3291 		    sizeof (pool_xtransfer_t));
3292 		u_xtransfer.px_o_src_id = elem_get_sysid(xtransfer->pxu_tgt);
3293 		u_xtransfer.px_o_tgt_id = elem_get_sysid(xtransfer->pxu_src);
3294 
3295 		if (ioctl(prov->pkc_fd, POOL_XTRANSFER, &u_xtransfer) < 0) {
3296 			pool_seterror(POE_SYSTEM);
3297 			return (PO_FAIL);
3298 		}
3299 		li->li_state = LS_DO;
3300 		break;
3301 	case POOL_PROPPUT:
3302 		propput = (pool_propput_undo_t *)li->li_details;
3303 
3304 		if ((propput->ppu_doioctl & KERNEL_PROP_RDONLY) == 0) {
3305 			if (propput->ppu_blist) {
3306 				(void) memcpy(&u_propput, &propput->ppu_ioctl,
3307 				    sizeof (pool_propput_t));
3308 				u_propput.pp_o_id =
3309 				    elem_get_sysid(propput->ppu_elem);
3310 				u_propput.pp_o_buf = NULL;
3311 				if (nvlist_pack(propput->ppu_blist,
3312 				    (char **)&u_propput.pp_o_buf,
3313 				    &u_propput.pp_o_bufsize,
3314 				    NV_ENCODE_NATIVE, 0) != 0) {
3315 					pool_seterror(POE_SYSTEM);
3316 					return (PO_FAIL);
3317 				}
3318 				if (ioctl(prov->pkc_fd, POOL_PROPPUT,
3319 				    &u_propput) < 0) {
3320 					free(u_propput.pp_o_buf);
3321 					pool_seterror(POE_SYSTEM);
3322 					return (PO_FAIL);
3323 				}
3324 				free(u_propput.pp_o_buf);
3325 			} else {
3326 				if (nvlist_unpack(propput->
3327 				    ppu_ioctl.pp_o_buf,
3328 				    propput->ppu_ioctl.pp_o_bufsize,
3329 				    &propput->ppu_alist, 0) != 0) {
3330 					pool_seterror(POE_SYSTEM);
3331 					return (PO_FAIL);
3332 				}
3333 				u_proprm.pp_o_id_type =
3334 				    propput->ppu_ioctl.pp_o_id_type;
3335 				u_proprm.pp_o_id_sub_type =
3336 				    propput->ppu_ioctl.pp_o_id_sub_type;
3337 				u_proprm.pp_o_id =
3338 				    elem_get_sysid(propput->ppu_elem);
3339 				pair = nvlist_next_nvpair(propput->ppu_alist,
3340 				    NULL);
3341 				u_proprm.pp_o_prop_name = nvpair_name(pair);
3342 				u_proprm.pp_o_prop_name_size =
3343 				    strlen(u_proprm.pp_o_prop_name);
3344 
3345 				if (provider_get_prop(propput->ppu_elem,
3346 				    u_proprm.pp_o_prop_name) == NULL) {
3347 					if (ioctl(prov->pkc_fd, POOL_PROPRM,
3348 					    &u_proprm) < 0) {
3349 						nvlist_free(propput->ppu_alist);
3350 						pool_seterror(POE_SYSTEM);
3351 						return (PO_FAIL);
3352 					}
3353 				}
3354 				nvlist_free(propput->ppu_alist);
3355 			}
3356 		}
3357 		li->li_state = LS_DO;
3358 		break;
3359 	case POOL_PROPRM:
3360 		proprm = (pool_proprm_undo_t *)li->li_details;
3361 
3362 		u_propput.pp_o_id_type = proprm->pru_ioctl.pp_o_id_type;
3363 		u_propput.pp_o_id_sub_type =
3364 		    proprm->pru_ioctl.pp_o_id_sub_type;
3365 		u_propput.pp_o_id = elem_get_sysid(proprm->pru_elem);
3366 		u_propput.pp_o_buf = NULL;
3367 		/*
3368 		 * Only try to remove the appropriate property
3369 		 */
3370 		if (nvlist_alloc(&tmplist, NV_UNIQUE_NAME_TYPE, 0) !=
3371 		    0) {
3372 			pool_seterror(POE_SYSTEM);
3373 			return (PO_FAIL);
3374 		}
3375 		if (pool_knl_nvlist_add_value(tmplist,
3376 		    pool_value_get_name(&proprm->pru_oldval),
3377 		    &proprm->pru_oldval) != PO_SUCCESS)
3378 			return (PO_FAIL);
3379 
3380 		if (nvlist_pack(tmplist,
3381 		    (char **)&u_propput.pp_o_buf, &u_propput.pp_o_bufsize,
3382 		    NV_ENCODE_NATIVE, 0) != 0) {
3383 			nvlist_free(tmplist);
3384 			pool_seterror(POE_SYSTEM);
3385 			return (PO_FAIL);
3386 		}
3387 		nvlist_free(tmplist);
3388 		if (ioctl(prov->pkc_fd, POOL_PROPPUT, &u_propput) < 0) {
3389 			free(u_propput.pp_o_buf);
3390 			pool_seterror(POE_SYSTEM);
3391 			return (PO_FAIL);
3392 		}
3393 		free(u_propput.pp_o_buf);
3394 		li->li_state = LS_DO;
3395 		break;
3396 	default:
3397 		return (PO_FAIL);
3398 	}
3399 		return (PO_SUCCESS);
3400 }
3401 
3402 /*
3403  * A log item stores state about the transaction it represents. This
3404  * function releases the resources associated with the transaction and
3405  * used to store the transaction state.
3406  */
3407 int
3408 log_item_release(log_item_t *li)
3409 {
3410 	pool_create_undo_t *create;
3411 	pool_destroy_undo_t *destroy;
3412 	pool_assoc_undo_t *assoc;
3413 	pool_dissoc_undo_t *dissoc;
3414 	pool_propput_undo_t *propput;
3415 	pool_proprm_undo_t *proprm;
3416 	pool_xtransfer_undo_t *xtransfer;
3417 
3418 	switch (li->li_op) {
3419 	case POOL_CREATE:
3420 		create = (pool_create_undo_t *)li->li_details;
3421 
3422 		free(create);
3423 		break;
3424 	case POOL_DESTROY:
3425 		destroy = (pool_destroy_undo_t *)li->li_details;
3426 
3427 #ifdef DEBUG
3428 		dprintf("log_item_release: POOL_DESTROY\n");
3429 #endif	/* DEBUG */
3430 
3431 		if (li->li_state == LS_UNDO) {
3432 #ifdef DEBUG
3433 			pool_elem_dprintf(destroy->pdu_elem);
3434 #endif	/* DEBUG */
3435 			pool_knl_elem_free((pool_knl_elem_t *)destroy->
3436 			    pdu_elem, PO_TRUE);
3437 		}
3438 		free(destroy);
3439 		break;
3440 	case POOL_ASSOC:
3441 		assoc = (pool_assoc_undo_t *)li->li_details;
3442 
3443 		free(assoc);
3444 		break;
3445 	case POOL_DISSOC:
3446 		dissoc = (pool_dissoc_undo_t *)li->li_details;
3447 
3448 		free(dissoc);
3449 		break;
3450 	case POOL_TRANSFER:
3451 		pool_seterror(POE_BADPARAM);
3452 		return (PO_FAIL);
3453 	case POOL_XTRANSFER:
3454 		xtransfer = (pool_xtransfer_undo_t *)li->li_details;
3455 
3456 		free(xtransfer->pxu_rl);
3457 		free(xtransfer->pxu_ioctl.px_o_comp_list);
3458 		free(xtransfer);
3459 		break;
3460 	case POOL_PROPPUT:
3461 		propput = (pool_propput_undo_t *)li->li_details;
3462 
3463 		if (propput->ppu_blist)
3464 			nvlist_free(propput->ppu_blist);
3465 		free(propput->ppu_ioctl.pp_o_buf);
3466 		free(propput);
3467 		break;
3468 	case POOL_PROPRM:
3469 		proprm = (pool_proprm_undo_t *)li->li_details;
3470 
3471 		free(proprm);
3472 		break;
3473 	default:
3474 		return (PO_FAIL);
3475 	}
3476 	return (PO_SUCCESS);
3477 }
3478 
3479 /*
3480  * pool_knl_nvlist_add_value() adds a pool_value_t to an nvlist.
3481  */
3482 int
3483 pool_knl_nvlist_add_value(nvlist_t *list, const char *name,
3484     const pool_value_t *pv)
3485 {
3486 	uint64_t uval;
3487 	int64_t ival;
3488 	double dval;
3489 	uchar_t dval_b[sizeof (double)];
3490 	uchar_t bval;
3491 	const char *sval;
3492 	pool_value_class_t type;
3493 	char *nv_name;
3494 
3495 	if ((type = pool_value_get_type(pv)) == POC_INVAL) {
3496 		pool_seterror(POE_BADPARAM);
3497 		return (PO_FAIL);
3498 	}
3499 	nv_name = (char *)name;
3500 
3501 	switch (type) {
3502 	case POC_UINT:
3503 		if (pool_value_get_uint64(pv, &uval) == POC_INVAL) {
3504 			return (PO_FAIL);
3505 		}
3506 		if (nvlist_add_uint64(list, nv_name, uval) != 0) {
3507 			pool_seterror(POE_SYSTEM);
3508 			return (PO_FAIL);
3509 		}
3510 		break;
3511 	case POC_INT:
3512 		if (pool_value_get_int64(pv, &ival) == POC_INVAL) {
3513 			return (PO_FAIL);
3514 		}
3515 		if (nvlist_add_int64(list, nv_name, ival) != 0) {
3516 			pool_seterror(POE_SYSTEM);
3517 			return (PO_FAIL);
3518 		}
3519 		break;
3520 	case POC_DOUBLE:
3521 		if (pool_value_get_double(pv, &dval) == POC_INVAL) {
3522 			return (PO_FAIL);
3523 		}
3524 		/*
3525 		 * Since there is no support for doubles in the
3526 		 * kernel, store the double value in a byte array.
3527 		 */
3528 		(void) memcpy(dval_b, &dval, sizeof (double));
3529 		if (nvlist_add_byte_array(list, nv_name, dval_b,
3530 		    sizeof (double)) != 0) {
3531 			pool_seterror(POE_SYSTEM);
3532 			return (PO_FAIL);
3533 		}
3534 		break;
3535 	case POC_BOOL:
3536 		if (pool_value_get_bool(pv, &bval) == POC_INVAL) {
3537 			return (PO_FAIL);
3538 		}
3539 		if (nvlist_add_byte(list, nv_name, bval) != 0) {
3540 			pool_seterror(POE_SYSTEM);
3541 			return (PO_FAIL);
3542 		}
3543 		break;
3544 	case POC_STRING:
3545 		if (pool_value_get_string(pv, &sval) == POC_INVAL) {
3546 			return (PO_FAIL);
3547 		}
3548 		if (nvlist_add_string(list, nv_name, (char *)sval) != 0) {
3549 			pool_seterror(POE_SYSTEM);
3550 			return (PO_FAIL);
3551 		}
3552 		break;
3553 	default:
3554 		pool_seterror(POE_BADPARAM);
3555 		return (PO_FAIL);
3556 	}
3557 	return (PO_SUCCESS);
3558 }
3559 
3560 /*
3561  * hash_id() hashes all elements in a pool configuration using the
3562  * "sys_id" property. Not all elements have a "sys_id" property,
3563  * however elem_get_sysid() caters for this by always returning a
3564  * constant value for those elements. This isn't anticipated to lead
3565  * to a performance degradation in the hash, since those elements
3566  * which are likely to be most prevalent in a configuration do have
3567  * "sys_id" as a property.
3568  */
3569 uint64_t
3570 hash_id(const pool_elem_t *pe)
3571 {
3572 	id_t id;
3573 
3574 	id = elem_get_sysid(pe);
3575 	return (hash_buf(&id, sizeof (id)));
3576 }
3577 
3578 /*
3579  *  blocking_open() guarantees access to the pool device, if open()
3580  * is failing with EBUSY.
3581  */
3582 int
3583 blocking_open(const char *path, int oflag)
3584 {
3585 	int fd;
3586 
3587 	while ((fd = open(path, oflag)) == -1 && errno == EBUSY)
3588 		(void) poll(NULL, 0, 1 * MILLISEC);
3589 
3590 	return (fd);
3591 }
3592