xref: /titanic_50/usr/src/lib/libpool/common/pool_kernel.c (revision 42cac157f878fbb7ae190eb0339c6932f3192b87)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #include <assert.h>
28 #include <errno.h>
29 #include <exacct.h>
30 #include <fcntl.h>
31 #include <libnvpair.h>
32 #include <limits.h>
33 #include <poll.h>
34 #include <pool.h>
35 #include <stdlib.h>
36 #include <stdio.h>
37 #include <string.h>
38 #include <strings.h>
39 #include <stropts.h>
40 #include <thread.h>
41 #include <time.h>
42 #include <unistd.h>
43 
44 #include <libxml/tree.h>
45 
46 #include <sys/mman.h>
47 #include <sys/pool.h>
48 #include <sys/pool_impl.h>
49 #include <sys/priocntl.h>
50 #include <sys/stat.h>
51 #include <sys/time.h>
52 #include <sys/types.h>
53 
54 #include "dict.h"
55 
56 #include "pool_internal.h"
57 #include "pool_impl.h"
58 #include "pool_kernel_impl.h"
59 
60 /*
61  * libpool kernel Manipulation Routines
62  *
63  * pool_kernel.c implements the kernel manipulation routines used by the
64  * libpool kernel datastore. The functions are grouped into the following
65  * logical areas
66  *
67  */
68 
69 /*
70  * Device snapshot transfer buffer size
71  */
72 #define	KERNEL_SNAPSHOT_BUF_SZ	65535
73 
74 /*
75  * Kernel result set's initial size. 8 is probably large enough for
76  * most queries. Queries requiring more space are accomodated using
77  * realloc on a per result set basis.
78  */
79 #define	KERNEL_RS_INITIAL_SZ	8
80 
81 /*
82  * Property manipulation macros
83  */
84 #define	KERNEL_PROP_RDONLY	0x1
85 
86 /*
87  * Information required to evaluate qualifying elements for a query
88  */
89 struct query_obj {
90 	const pool_conf_t *conf;
91 	const pool_elem_t *src;
92 	const char *src_attr;
93 	pool_elem_class_t classes;
94 	pool_value_t **props;
95 	pool_knl_result_set_t *rs;
96 };
97 
98 /*
99  * Identifies a pool element with a processor set id
100  */
101 typedef struct pool_set_xref {
102 	pool_knl_pool_t	*psx_pool;
103 	uint_t		psx_pset_id;
104 	struct pool_set_xref *psx_next;
105 } pool_set_xref_t;
106 
107 /*
108  * Controls exacct snapshot load into libpool data structure
109  */
110 typedef struct pool_snap_load {
111 	int *psl_changed;
112 	pool_set_xref_t *psl_xref;
113 	pool_elem_t *psl_system;
114 	pool_knl_resource_t *psl_pset;
115 } pool_snap_load_t;
116 
117 /*
118  * Information about an XML document which is being constructed
119  */
120 struct knl_to_xml {
121 	xmlDocPtr ktx_doc;
122 	xmlNodePtr ktx_node;
123 };
124 
125 /*
126  * Undo structure processing. The following structures are all used to
127  * allow changes to the libpool snapshot and kernel following an
128  * unsuccessful commit.
129  */
130 typedef struct pool_create_undo {
131 	pool_create_t pcu_ioctl;
132 	pool_elem_t *pcu_elem;
133 } pool_create_undo_t;
134 
135 typedef struct pool_destroy_undo {
136 	pool_destroy_t pdu_ioctl;
137 	pool_elem_t *pdu_elem;
138 } pool_destroy_undo_t;
139 
140 typedef struct pool_assoc_undo {
141 	pool_assoc_t pau_ioctl;
142 	pool_elem_t *pau_assoc;
143 	pool_elem_t *pau_oldres;
144 	pool_elem_t *pau_newres;
145 } pool_assoc_undo_t;
146 
147 typedef struct pool_dissoc_undo {
148 	pool_dissoc_t pdu_ioctl;
149 	pool_elem_t *pdu_dissoc;
150 	pool_elem_t *pdu_oldres;
151 	pool_elem_t *pdu_newres;
152 } pool_dissoc_undo_t;
153 
154 typedef struct pool_xtransfer_undo {
155 	pool_xtransfer_t pxu_ioctl;
156 	pool_elem_t *pxu_src;
157 	pool_elem_t *pxu_tgt;
158 	pool_component_t **pxu_rl;
159 } pool_xtransfer_undo_t;
160 
161 typedef struct pool_propput_undo {
162 	pool_propput_t ppu_ioctl;
163 	pool_elem_t *ppu_elem;
164 	nvlist_t *ppu_alist;
165 	nvlist_t *ppu_blist;
166 	uchar_t ppu_doioctl;
167 } pool_propput_undo_t;
168 
169 typedef struct pool_proprm_undo {
170 	pool_proprm_t pru_ioctl;
171 	pool_elem_t *pru_elem;
172 	pool_value_t pru_oldval;
173 } pool_proprm_undo_t;
174 
175 extern const char *dtd_location;
176 
177 extern const char *element_class_tags[];
178 extern const char pool_info_location[];
179 
180 /*
181  * These functions are defined in pool_xml.c and represent the minimum
182  * XML support required to allow a pool kernel configuration to be
183  * exported as an XML document.
184  */
185 extern int pool_xml_set_attr(xmlNodePtr, xmlChar *, const pool_value_t *);
186 extern int pool_xml_set_prop(xmlNodePtr, xmlChar *, const pool_value_t *);
187 extern void xml_init(void);
188 extern xmlNodePtr node_create(xmlNodePtr, const xmlChar *);
189 extern void pool_error_func(void *, const char *, ...);
190 /*
191  * Utilities
192  */
193 static int load_group(pool_conf_t *, pool_knl_elem_t *, ea_object_t *,
194     pool_snap_load_t *);
195 static void pool_knl_elem_free(pool_knl_elem_t *, int);
196 static int pool_knl_put_xml_property(pool_elem_t *, xmlNodePtr, const char *,
197     const pool_value_t *);
198 static int pool_knl_snap_load_push(pool_snap_load_t *, pool_knl_pool_t *);
199 static int pool_knl_snap_load_update(pool_snap_load_t *, int, uint_t);
200 static int pool_knl_snap_load_remove(pool_snap_load_t *, int, uint_t);
201 static nvpair_t *pool_knl_find_nvpair(nvlist_t *, const char *);
202 static int pool_knl_nvlist_add_value(nvlist_t *, const char *,
203     const pool_value_t *);
204 static int pool_knl_recover(pool_conf_t *);
205 static uint64_t hash_id(const pool_elem_t *);
206 static int blocking_open(const char *, int);
207 
208 /*
209  * Connections
210  */
211 static void pool_knl_connection_free(pool_knl_connection_t *);
212 
213 /*
214  * Configuration
215  */
216 static int pool_knl_close(pool_conf_t *);
217 static int pool_knl_validate(const pool_conf_t *, pool_valid_level_t);
218 static int pool_knl_commit(pool_conf_t *);
219 static int pool_knl_export(const pool_conf_t *, const char *,
220     pool_export_format_t);
221 static int pool_knl_rollback(pool_conf_t *);
222 static pool_result_set_t *pool_knl_exec_query(const pool_conf_t *,
223     const pool_elem_t *, const char *, pool_elem_class_t, pool_value_t **);
224 static int pool_knl_remove(pool_conf_t *);
225 static char *pool_knl_get_binding(pool_conf_t *, pid_t);
226 static int pool_knl_set_binding(pool_conf_t *, const char *, idtype_t, id_t);
227 static char *pool_knl_get_resource_binding(pool_conf_t *,
228     pool_resource_elem_class_t, pid_t);
229 static int pool_knl_res_transfer(pool_resource_t *, pool_resource_t *,
230     uint64_t);
231 static int pool_knl_res_xtransfer(pool_resource_t *, pool_resource_t *,
232     pool_component_t **);
233 
234 /*
235  * Result Sets
236  */
237 static pool_knl_result_set_t *pool_knl_result_set_alloc(const pool_conf_t *);
238 static int pool_knl_result_set_append(pool_knl_result_set_t *,
239     pool_knl_elem_t *);
240 static int pool_knl_result_set_realloc(pool_knl_result_set_t *);
241 static void pool_knl_result_set_free(pool_knl_result_set_t *);
242 static pool_elem_t *pool_knl_rs_next(pool_result_set_t *);
243 static pool_elem_t *pool_knl_rs_prev(pool_result_set_t *);
244 static pool_elem_t *pool_knl_rs_first(pool_result_set_t *);
245 static pool_elem_t *pool_knl_rs_last(pool_result_set_t *);
246 static int pool_knl_rs_set_index(pool_result_set_t *, int);
247 static int pool_knl_rs_get_index(pool_result_set_t *);
248 static int pool_knl_rs_count(pool_result_set_t *);
249 static int pool_knl_rs_close(pool_result_set_t *);
250 
251 /*
252  * Element (and sub-type)
253  */
254 static pool_knl_elem_t *pool_knl_elem_wrap(pool_conf_t *, pool_elem_class_t,
255     pool_resource_elem_class_t, pool_component_elem_class_t);
256 static pool_elem_t *pool_knl_elem_create(pool_conf_t *, pool_elem_class_t,
257     pool_resource_elem_class_t, pool_component_elem_class_t);
258 static int pool_knl_elem_remove(pool_elem_t *);
259 static int pool_knl_set_container(pool_elem_t *, pool_elem_t *);
260 static pool_elem_t *pool_knl_get_container(const pool_elem_t *);
261 /*
262  * Pool element specific
263  */
264 static int pool_knl_pool_associate(pool_t *, const pool_resource_t *);
265 static int pool_knl_pool_dissociate(pool_t *, const pool_resource_t *);
266 
267 /*
268  * Resource elements specific
269  */
270 static int pool_knl_resource_is_system(const pool_resource_t *);
271 static int pool_knl_resource_can_associate(const pool_resource_t *);
272 
273 /* Properties */
274 static pool_value_class_t pool_knl_get_property(const pool_elem_t *,
275     const char *, pool_value_t *);
276 static pool_value_class_t pool_knl_get_dynamic_property(const pool_elem_t *,
277     const char *, pool_value_t *);
278 static int pool_knl_put_property(pool_elem_t *, const char *,
279     const pool_value_t *);
280 static int pool_knl_rm_property(pool_elem_t *, const char *);
281 static pool_value_t **pool_knl_get_properties(const pool_elem_t *, uint_t *);
282 
283 /*
284  * Logging
285  */
286 static int log_item_commit(log_item_t *);
287 static int log_item_undo(log_item_t *);
288 static int log_item_release(log_item_t *);
289 
290 /*
291  * Utilities
292  */
293 
294 /*
295  * load_group() updates the library configuration with the kernel
296  * snapshot supplied in ep. The function is designed to be called
297  * recursively. This function depends implicitly on the ordering of
298  * the data provided in ep. Changes to the ordering of data in ep must
299  * be matched by changes to this function.
300  */
301 int
302 load_group(pool_conf_t *conf, pool_knl_elem_t *elem, ea_object_t *ep,
303     pool_snap_load_t *psl)
304 {
305 	ea_object_t *eo;
306 	pool_knl_elem_t *old_elem;
307 	pool_knl_connection_t *prov = (pool_knl_connection_t *)conf->pc_prov;
308 	int ret = PO_SUCCESS;
309 
310 	if ((ep->eo_catalog & EXD_DATA_MASK) == EXD_GROUP_SYSTEM) {
311 		if ((elem = pool_knl_elem_wrap(conf, PEC_SYSTEM, PREC_INVALID,
312 		    PCEC_INVALID)) == NULL)
313 			return (PO_FAIL);
314 		if (nvlist_alloc(&elem->pke_properties, NV_UNIQUE_NAME_TYPE,
315 		    0) != 0) {
316 			pool_knl_elem_free(elem, PO_FALSE);
317 			pool_seterror(POE_SYSTEM);
318 			return (PO_FAIL);
319 		}
320 		/*
321 		 * Check to see if we already have an element
322 		 * for this data. If we have, free the newly
323 		 * created elem and continue with the old one
324 		 */
325 		if ((old_elem = dict_get(prov->pkc_elements, elem)) != NULL) {
326 			nvlist_free(old_elem->pke_properties);
327 			old_elem->pke_properties = elem->pke_properties;
328 			pool_knl_elem_free(elem, PO_FALSE);
329 			elem = old_elem;
330 		} else {
331 			if (dict_put(prov->pkc_elements, elem, elem) != NULL) {
332 				pool_knl_elem_free(elem, PO_TRUE);
333 				pool_seterror(POE_SYSTEM);
334 				return (PO_FAIL);
335 			}
336 		}
337 		psl->psl_system = (pool_elem_t *)elem;
338 	}
339 
340 	for (eo = ep->eo_group.eg_objs; eo != NULL; eo = eo->eo_next) {
341 		int data;
342 		pool_knl_elem_t *prop_elem = NULL;
343 
344 		data = (eo->eo_catalog & EXD_DATA_MASK);
345 
346 		switch (data) {
347 		case EXD_SYSTEM_TSTAMP:
348 		case EXD_POOL_TSTAMP:
349 		case EXD_PSET_TSTAMP:
350 		case EXD_CPU_TSTAMP:
351 			if (eo->eo_item.ei_uint64 > prov->pkc_lotime) {
352 				if (eo->eo_item.ei_uint64 > prov->pkc_ltime)
353 					prov->pkc_ltime = eo->eo_item.ei_uint64;
354 				if (psl->psl_changed) {
355 					switch (data) {
356 					case EXD_SYSTEM_TSTAMP:
357 						*psl->psl_changed |= POU_SYSTEM;
358 						break;
359 					case EXD_POOL_TSTAMP:
360 						*psl->psl_changed |= POU_POOL;
361 						break;
362 					case EXD_PSET_TSTAMP:
363 						*psl->psl_changed |= POU_PSET;
364 						break;
365 					case EXD_CPU_TSTAMP:
366 						*psl->psl_changed |= POU_CPU;
367 						break;
368 					}
369 				}
370 			}
371 			break;
372 		case EXD_SYSTEM_PROP:
373 		case EXD_POOL_PROP:
374 		case EXD_PSET_PROP:
375 		case EXD_CPU_PROP:
376 			if (data == EXD_PSET_PROP) {
377 				prop_elem = elem;
378 				elem = (pool_knl_elem_t *)psl->psl_pset;
379 			}
380 			nvlist_free(elem->pke_properties);
381 			if (nvlist_unpack(eo->eo_item.ei_raw,
382 			    eo->eo_item.ei_size, &elem->pke_properties, 0) !=
383 			    0) {
384 				pool_seterror(POE_SYSTEM);
385 				return (PO_FAIL);
386 			}
387 			elem->pke_ltime = prov->pkc_ltime;
388 			if (data == EXD_PSET_PROP) {
389 				elem = prop_elem;
390 			}
391 			break;
392 		case EXD_POOL_POOLID:
393 			if (nvlist_alloc(&elem->pke_properties,
394 			    NV_UNIQUE_NAME_TYPE, 0) != 0) {
395 				pool_seterror(POE_SYSTEM);
396 				return (PO_FAIL);
397 			}
398 			if (nvlist_add_int64(elem->pke_properties,
399 			    "pool.sys_id",
400 			    (int64_t)eo->eo_item.ei_uint32) != 0) {
401 				pool_seterror(POE_SYSTEM);
402 				return (PO_FAIL);
403 			}
404 			if ((old_elem = dict_get(prov->pkc_elements, elem)) !=
405 			    NULL) {
406 				nvlist_free(old_elem->pke_properties);
407 				old_elem->pke_properties = elem->pke_properties;
408 				pool_knl_elem_free(elem, PO_FALSE);
409 				elem = old_elem;
410 			} else {
411 				if (dict_put(prov->pkc_elements, elem, elem) !=
412 				    NULL) {
413 					pool_knl_elem_free(elem, PO_TRUE);
414 					pool_seterror(POE_SYSTEM);
415 					return (PO_FAIL);
416 				}
417 			}
418 			if (pool_knl_snap_load_push(psl,
419 			    (pool_knl_pool_t *)elem) != PO_SUCCESS) {
420 				pool_seterror(POE_SYSTEM);
421 				return (PO_FAIL);
422 			}
423 			((pool_knl_pool_t *)elem)->pkp_assoc[PREC_PSET] = NULL;
424 			break;
425 		case EXD_POOL_PSETID:
426 			if (pool_knl_snap_load_update(psl, EXD_POOL_PSETID,
427 			    eo->eo_item.ei_uint32) != PO_SUCCESS) {
428 				pool_seterror(POE_SYSTEM);
429 				return (PO_FAIL);
430 			}
431 			break;
432 		case EXD_PSET_PSETID:
433 			if (nvlist_alloc(&elem->pke_properties,
434 			    NV_UNIQUE_NAME_TYPE, 0) != 0) {
435 				pool_seterror(POE_SYSTEM);
436 				return (PO_FAIL);
437 			}
438 			if (nvlist_add_int64(elem->pke_properties,
439 			    "pset.sys_id",
440 			    (int64_t)eo->eo_item.ei_uint32) != 0) {
441 				pool_seterror(POE_SYSTEM);
442 				return (PO_FAIL);
443 			}
444 			if ((old_elem = dict_get(prov->pkc_elements, elem)) !=
445 			    NULL) {
446 				nvlist_free(old_elem->pke_properties);
447 				old_elem->pke_properties = elem->pke_properties;
448 				pool_knl_elem_free(elem, PO_FALSE);
449 				elem = old_elem;
450 			} else {
451 				if (dict_put(prov->pkc_elements, elem, elem) !=
452 				    NULL) {
453 					pool_knl_elem_free(elem, PO_TRUE);
454 					pool_seterror(POE_SYSTEM);
455 					return (PO_FAIL);
456 				}
457 			}
458 			psl->psl_pset = (pool_knl_resource_t *)elem;
459 			if (pool_knl_snap_load_remove(psl, data,
460 			    eo->eo_item.ei_uint32) != PO_SUCCESS) {
461 				pool_seterror(POE_SYSTEM);
462 				return (PO_FAIL);
463 			}
464 			break;
465 		case EXD_CPU_CPUID:
466 			if (nvlist_alloc(&elem->pke_properties,
467 			    NV_UNIQUE_NAME_TYPE, 0) != 0) {
468 				pool_seterror(POE_SYSTEM);
469 				return (PO_FAIL);
470 			}
471 			if (nvlist_add_int64(elem->pke_properties,
472 			    "cpu.sys_id",
473 			    (int64_t)eo->eo_item.ei_uint32) != 0) {
474 				pool_seterror(POE_SYSTEM);
475 				return (PO_FAIL);
476 			}
477 			if ((old_elem = dict_get(prov->pkc_elements, elem)) !=
478 			    NULL) {
479 				nvlist_free(old_elem->pke_properties);
480 				old_elem->pke_properties = elem->pke_properties;
481 				old_elem->pke_parent = elem->pke_parent;
482 				pool_knl_elem_free(elem, PO_FALSE);
483 				elem = old_elem;
484 			} else {
485 				if (dict_put(prov->pkc_elements, elem, elem) !=
486 				    NULL) {
487 					pool_knl_elem_free(elem, PO_TRUE);
488 					pool_seterror(POE_SYSTEM);
489 					return (PO_FAIL);
490 				}
491 			}
492 			break;
493 		case EXD_GROUP_POOL:
494 			if ((elem = pool_knl_elem_wrap(conf, PEC_POOL,
495 			    PREC_INVALID, PCEC_INVALID)) == NULL)
496 				return (PO_FAIL);
497 			if (pool_set_container(psl->psl_system,
498 			    (pool_elem_t *)elem) != PO_SUCCESS) {
499 				pool_seterror(POE_SYSTEM);
500 				return (PO_FAIL);
501 			}
502 			break;
503 		case EXD_GROUP_PSET:
504 			if ((elem = pool_knl_elem_wrap(conf, PEC_RES_COMP,
505 			    PREC_PSET, PCEC_INVALID)) == NULL)
506 				return (PO_FAIL);
507 			if (pool_set_container(psl->psl_system,
508 			    (pool_elem_t *)elem) != PO_SUCCESS) {
509 				pool_seterror(POE_SYSTEM);
510 				return (PO_FAIL);
511 			}
512 			break;
513 		case EXD_GROUP_CPU:
514 			if ((elem = pool_knl_elem_wrap(conf, PEC_COMP,
515 			    PREC_INVALID, PCEC_CPU)) == NULL)
516 				return (PO_FAIL);
517 			if (pool_set_container((pool_elem_t *)psl->psl_pset,
518 			    (pool_elem_t *)elem) != PO_SUCCESS) {
519 				pool_seterror(POE_SYSTEM);
520 				return (PO_FAIL);
521 			}
522 			break;
523 		default:
524 			break;
525 		}
526 
527 
528 		if (eo->eo_type == EO_GROUP) {
529 			if ((ret = load_group(conf, elem, eo, psl)) == PO_FAIL)
530 				break;
531 		}
532 	}
533 	return (ret);
534 }
535 
536 /*
537  * Push a snapshot entry onto the list of pools in the snapshot.
538  */
539 int
540 pool_knl_snap_load_push(pool_snap_load_t *psl, pool_knl_pool_t *pkp)
541 {
542 	pool_set_xref_t *psx;
543 
544 	if ((psx = malloc(sizeof (pool_set_xref_t))) == NULL) {
545 		pool_seterror(POE_SYSTEM);
546 		return (PO_FAIL);
547 	}
548 	(void) memset(psx, 0, sizeof (pool_set_xref_t));
549 	psx->psx_pool = pkp;
550 	/*
551 	 * Push onto the list of pools
552 	 */
553 	psx->psx_next = psl->psl_xref;
554 	psl->psl_xref = psx;
555 
556 	return (PO_SUCCESS);
557 }
558 
559 /*
560  * Update the current cross-reference for the supplied type of
561  * resource.
562  */
563 int
564 pool_knl_snap_load_update(pool_snap_load_t *psl, int type, uint_t id)
565 {
566 	switch (type) {
567 	case EXD_POOL_PSETID:
568 		psl->psl_xref->psx_pset_id = id;
569 		break;
570 	default:
571 		return (PO_FAIL);
572 	}
573 
574 	return (PO_SUCCESS);
575 }
576 
577 /*
578  * Remove a resource entry with the supplied type and id from the
579  * snapshot list when it is no longer required.
580  */
581 int
582 pool_knl_snap_load_remove(pool_snap_load_t *psl, int type, uint_t id)
583 {
584 	pool_set_xref_t *current, *prev, *next;
585 
586 	for (prev = NULL, current = psl->psl_xref; current != NULL;
587 	    current = next) {
588 		switch (type) {
589 		case EXD_PSET_PSETID:
590 			if (current->psx_pset_id == id)
591 				current->psx_pool->pkp_assoc[PREC_PSET] =
592 				    psl->psl_pset;
593 			break;
594 		default:
595 			return (PO_FAIL);
596 		}
597 		next = current->psx_next;
598 		if (current->psx_pool->pkp_assoc[PREC_PSET] != NULL) {
599 			if (prev != NULL) {
600 				prev->psx_next = current->psx_next;
601 			} else {
602 				psl->psl_xref = current->psx_next;
603 			}
604 			free(current);
605 		} else
606 			prev = current;
607 	}
608 
609 	return (PO_SUCCESS);
610 }
611 
612 /*
613  * Return the nvpair with the supplied name from the supplied list.
614  *
615  * NULL is returned if the name cannot be found in the list.
616  */
617 nvpair_t *
618 pool_knl_find_nvpair(nvlist_t *l, const char *name)
619 {
620 	nvpair_t *pair;
621 
622 	for (pair = nvlist_next_nvpair(l, NULL); pair != NULL;
623 	    pair = nvlist_next_nvpair(l, pair)) {
624 		if (strcmp(nvpair_name(pair), name) == 0)
625 			break;
626 	}
627 	return (pair);
628 }
629 
630 /*
631  * Close the configuration. There are a few steps to closing a configuration:
632  * - Close the pseudo device
633  * - Free the data provider
634  * Returns PO_SUCCESS/PO_FAIL
635  */
636 int
637 pool_knl_close(pool_conf_t *conf)
638 {
639 	pool_knl_connection_t *prov = (pool_knl_connection_t *)conf->pc_prov;
640 
641 	if (close(prov->pkc_fd) < 0) {
642 		pool_seterror(POE_SYSTEM);
643 		return (PO_FAIL);
644 	}
645 	/*
646 	 * Rollback any pending changes before freeing the prov. This
647 	 * ensures there are no memory leaks from pending transactions.
648 	 * However, don't rollback when we've done a temporary pool since the
649 	 * pool/resources haven't really been committed in this case.
650 	 * They will all be freed in pool_knl_connection_free and we don't
651 	 * want to double free them.
652 	 */
653 	if (!(conf->pc_prov->pc_oflags & PO_TEMP))
654 		(void) pool_knl_rollback(conf);
655 	pool_knl_connection_free(prov);
656 	return (PO_SUCCESS);
657 }
658 
659 /*
660  * Remove elements in this map (previously identified as "dead") from
661  * the configuration map (prov->pkc_elements).
662  */
663 
664 /* ARGSUSED1 */
665 static void
666 remove_dead_elems(const void *key, void **value, void *cl)
667 {
668 	pool_knl_elem_t *pke = (pool_knl_elem_t *)key;
669 	pool_conf_t *conf = TO_CONF(TO_ELEM(pke));
670 	pool_knl_connection_t *prov = (pool_knl_connection_t *)conf->pc_prov;
671 
672 	assert(dict_remove(prov->pkc_elements, pke) != NULL);
673 #ifdef DEBUG
674 	dprintf("remove_dead_elems:\n");
675 	pool_elem_dprintf(TO_ELEM(pke));
676 #endif	/* DEBUG */
677 	pool_knl_elem_free(pke, PO_TRUE);
678 }
679 
680 /*
681  * Find elements which were not updated the last time that
682  * load_group() was called. Add those elements into a separate map
683  * (passed in cl) which will be later used to remove these elements
684  * from the configuration map.
685  */
686 /* ARGSUSED1 */
687 static void
688 find_dead_elems(const void *key, void **value, void *cl)
689 {
690 	pool_knl_elem_t *pke = (pool_knl_elem_t *)key;
691 	pool_conf_t *conf = TO_CONF(TO_ELEM(pke));
692 	pool_knl_connection_t *prov = (pool_knl_connection_t *)conf->pc_prov;
693 	dict_hdl_t *dead_map = (dict_hdl_t *)cl;
694 
695 	if (pke->pke_ltime < prov->pkc_ltime)
696 		(void) dict_put(dead_map, pke, pke);
697 }
698 
699 /*
700  * Update the snapshot held by the library. This function acts as the
701  * controller for the snapshot update procedure. Then snapshot is
702  * actually updated in multiple phases by the load_group() function
703  * (which updates existing elements and creates new elements as
704  * required) and then by find_dead_elems and remove_dead_elems
705  * (respectively responsible for identifying elements which are to be
706  * removed and then removing them).
707  *
708  * Returns PO_SUCCESS
709  */
710 int
711 pool_knl_update(pool_conf_t *conf, int *changed)
712 {
713 	pool_knl_connection_t *prov = (pool_knl_connection_t *)conf->pc_prov;
714 	pool_query_t query = {0};
715 	ea_object_t *ep;
716 	dict_hdl_t *dead_map;
717 	pool_snap_load_t psl = { NULL };
718 
719 	/*
720 	 * Ensure the library snapshot is consistent, if there are any
721 	 * outstanding transactions return failure.
722 	 */
723 	if (log_size(prov->pkc_log) != 0) {
724 		pool_seterror(POE_INVALID_CONF);
725 		return (PO_FAIL);
726 	}
727 	/*
728 	 * Query the kernel for a snapshot of the configuration state. Use
729 	 * load_group to allocate the user-land representation of the
730 	 * data returned in the snapshot.
731 	 */
732 	/* LINTED E_CONSTANT_CONDITION */
733 	while (1) {
734 		if (ioctl(prov->pkc_fd, POOL_QUERY, &query) < 0) {
735 			pool_seterror(POE_SYSTEM);
736 			return (PO_FAIL);
737 		}
738 		if ((query.pq_io_buf = calloc(1,
739 		    (query.pq_io_bufsize < KERNEL_SNAPSHOT_BUF_SZ) ?
740 		    query.pq_io_bufsize * 2 : query.pq_io_bufsize)) == NULL) {
741 			pool_seterror(POE_SYSTEM);
742 			return (PO_FAIL);
743 		}
744 		if (ioctl(prov->pkc_fd, POOL_QUERY, &query) < 0) {
745 			free(query.pq_io_buf);
746 			if (errno != ENOMEM) {
747 				pool_seterror(POE_SYSTEM);
748 				return (PO_FAIL);
749 			}
750 			query.pq_io_bufsize = 0;
751 			query.pq_io_buf = NULL;
752 		} else
753 			break;
754 	}
755 	if (ea_unpack_object(&ep, EUP_NOALLOC, query.pq_io_buf,
756 	    query.pq_io_bufsize) != EO_GROUP) {
757 		free(query.pq_io_buf);
758 		pool_seterror(POE_DATASTORE);
759 		return (PO_FAIL);
760 	}
761 	/*
762 	 * Update the library snapshot
763 	 */
764 	psl.psl_changed = changed;
765 	prov->pkc_lotime = prov->pkc_ltime;
766 	if (load_group(conf, NULL, ep, &psl) != PO_SUCCESS) {
767 		free(query.pq_io_buf);
768 		ea_free_object(ep, EUP_NOALLOC);
769 		return (PO_FAIL);
770 	}
771 
772 	free(query.pq_io_buf);
773 	ea_free_object(ep, EUP_NOALLOC);
774 	/*
775 	 * Now search the dictionary for items that must be removed because
776 	 * they were neither created nor updated.
777 	 */
778 	if ((dead_map = dict_new((int (*)(const void *, const void *))
779 	    pool_elem_compare, (uint64_t (*)(const void *))hash_id)) == NULL) {
780 		pool_seterror(POE_SYSTEM);
781 		return (PO_FAIL);
782 	}
783 	dict_map(prov->pkc_elements, find_dead_elems, dead_map);
784 
785 	if (dict_length(dead_map) > 0) {
786 		dict_map(dead_map, remove_dead_elems, NULL);
787 	}
788 	dict_free(&dead_map);
789 
790 	return (PO_SUCCESS);
791 }
792 
793 /*
794  * Rely on the kernel to always keep a kernel configuration valid.
795  * Returns PO_SUCCESS
796  */
797 /* ARGSUSED */
798 int
799 pool_knl_validate(const pool_conf_t *conf, pool_valid_level_t level)
800 {
801 	return ((conf->pc_state == POF_INVALID) ? PO_FAIL : PO_SUCCESS);
802 }
803 
804 /*
805  * Process all the outstanding transactions in the log. If the processing
806  * fails, then attempt to rollback and "undo" the changes.
807  */
808 int
809 pool_knl_commit(pool_conf_t *conf)
810 {
811 	pool_knl_connection_t *prov = (pool_knl_connection_t *)conf->pc_prov;
812 	int lock = 1;
813 
814 	/*
815 	 * Lock the kernel state for the commit
816 	 */
817 	if (ioctl(prov->pkc_fd, POOL_COMMIT, lock) < 0) {
818 		pool_seterror(POE_SYSTEM);
819 		return (PO_FAIL);
820 	}
821 	lock = 0;
822 	/*
823 	 * If the state is LS_FAIL, then try to recover before
824 	 * performing the commit.
825 	 */
826 	if (prov->pkc_log->l_state == LS_FAIL) {
827 		if (pool_knl_recover(conf) == PO_FAIL) {
828 			/*
829 			 * Unlock the kernel state for the
830 			 * commit. Assert that this * can't fail,
831 			 * since if it ever does fail the library is
832 			 * unusable.
833 			 */
834 			assert(ioctl(prov->pkc_fd, POOL_COMMIT, lock) >= 0);
835 		}
836 	}
837 	/*
838 	 * Commit the log
839 	 */
840 	if (log_walk(prov->pkc_log, log_item_commit) != PO_SUCCESS) {
841 		(void) pool_knl_recover(conf);
842 		/*
843 		 * Unlock the kernel state for the commit. Assert that
844 		 * this can't fail, since if it ever does fail the
845 		 * library is unusable.
846 		 */
847 		assert(ioctl(prov->pkc_fd, POOL_COMMIT, lock) >= 0);
848 		pool_seterror(POE_SYSTEM);
849 		return (PO_FAIL);
850 	}
851 	/*
852 	 * Unlock the kernel state for the commit. Assert that this
853 	 * can't fail, since if it ever does fail the library is
854 	 * unusable.
855 	 */
856 	assert(ioctl(prov->pkc_fd, POOL_COMMIT, lock) >= 0);
857 	/*
858 	 * Release the log resources
859 	 */
860 	(void) log_walk(prov->pkc_log, log_item_release);
861 	log_empty(prov->pkc_log);
862 	return (PO_SUCCESS);
863 }
864 
865 /*
866  * prop_build_cb() is designed to be called from
867  * pool_walk_properties(). The property value is used to put an XML
868  * property on the supplied ktx_node. This is an essential part of the
869  * mechanism used to export a kernel configuration in libpool XML
870  * form.
871  */
872 /* ARGSUSED */
873 static int
874 prop_build_cb(pool_conf_t *UNUSED, pool_elem_t *pe, const char *name,
875     pool_value_t *pval, void *user)
876 {
877 	struct knl_to_xml *info = (struct knl_to_xml *)user;
878 
879 	return (pool_knl_put_xml_property((pool_elem_t *)pe, info->ktx_node,
880 	    name, pval));
881 }
882 
883 /*
884  * Duplicate some of the functionality from pool_xml_put_property()
885  * (see pool_xml.c) to allow a kernel configuration to add XML nodes
886  * to an XML tree which represents the kernel configuration. This is
887  * an essential part of the mechanism used to export a kernel
888  * configuration in libpool XML form.
889  */
890 int
891 pool_knl_put_xml_property(pool_elem_t *pe, xmlNodePtr node, const char *name,
892     const pool_value_t *val)
893 {
894 
895 	/*
896 	 * "type" is a special attribute which is not visible ever outside of
897 	 * libpool. Use the specific type accessor function.
898 	 */
899 	if (strcmp(name, c_type) == 0) {
900 		return (pool_xml_set_attr(node, BAD_CAST name,
901 		    val));
902 	}
903 	if (is_ns_property(pe, name) != NULL) {	/* in ns */
904 		if (pool_xml_set_attr(node,
905 		    BAD_CAST property_name_minus_ns(pe, name), val) == PO_FAIL)
906 			return (pool_xml_set_prop(node, BAD_CAST name,
907 			    val));
908 	} else
909 		return (pool_xml_set_prop(node, BAD_CAST name, val));
910 	return (PO_SUCCESS);
911 }
912 
913 /*
914  * Export the kernel configuration as an XML file. The configuration
915  * is used to build an XML document in memory. This document is then
916  * saved to the supplied location.
917  */
918 int
919 pool_knl_export(const pool_conf_t *conf, const char *location,
920     pool_export_format_t fmt)
921 {
922 	xmlNodePtr node_comment;
923 	xmlNodePtr system;
924 	int ret;
925 	pool_t **ps;
926 	pool_resource_t **rs;
927 	uint_t nelem;
928 	int i;
929 	struct knl_to_xml info;
930 	char_buf_t *cb = NULL;
931 	xmlValidCtxtPtr cvp;
932 
933 	xml_init();
934 
935 
936 	switch (fmt) {
937 	case POX_NATIVE:
938 		info.ktx_doc = xmlNewDoc(BAD_CAST "1.0");
939 		(void) xmlCreateIntSubset(info.ktx_doc, BAD_CAST "system",
940 		    BAD_CAST "-//Sun Microsystems Inc//DTD Resource "
941 		    "Management All//EN",
942 		    BAD_CAST dtd_location);
943 
944 		if ((cvp = xmlNewValidCtxt()) == NULL) {
945 			xmlFreeDoc(info.ktx_doc);
946 			pool_seterror(POE_DATASTORE);
947 			return (PO_FAIL);
948 		}
949 		/*
950 		 * Call xmlValidateDocument() to force the parsing of
951 		 * the DTD. Ignore errors and warning messages as we
952 		 * know the document isn't valid.
953 		 */
954 		(void) xmlValidateDocument(cvp, info.ktx_doc);
955 		xmlFreeValidCtxt(cvp);
956 		if ((info.ktx_node = node_create(NULL, BAD_CAST "system")) ==
957 		    NULL) {
958 			xmlFreeDoc(info.ktx_doc);
959 			pool_seterror(POE_DATASTORE);
960 			return (PO_FAIL);
961 		}
962 
963 		system = info.ktx_node;
964 		info.ktx_doc->_private = (void *)conf;
965 
966 		(void) xmlDocSetRootElement(info.ktx_doc, info.ktx_node);
967 		(void) xmlSetProp(info.ktx_node, BAD_CAST c_ref_id,
968 		    BAD_CAST "dummy");
969 		if ((node_comment = xmlNewDocComment(info.ktx_doc,
970 		    BAD_CAST "\nConfiguration for pools facility. Do NOT"
971 		    " edit this file by hand - use poolcfg(1)"
972 		    " or libpool(3POOL) instead.\n")) == NULL) {
973 			xmlFreeDoc(info.ktx_doc);
974 			pool_seterror(POE_DATASTORE);
975 			return (PO_FAIL);
976 		}
977 		if (xmlAddPrevSibling(info.ktx_node, node_comment) == NULL) {
978 			xmlFree(node_comment);
979 			xmlFreeDoc(info.ktx_doc);
980 			pool_seterror(POE_DATASTORE);
981 			return (PO_FAIL);
982 		}
983 		if (pool_walk_any_properties((pool_conf_t *)conf,
984 		    pool_conf_to_elem(conf), &info, prop_build_cb, 1) ==
985 		    PO_FAIL) {
986 			xmlFreeDoc(info.ktx_doc);
987 			return (PO_FAIL);
988 		}
989 		if ((cb = alloc_char_buf(CB_DEFAULT_LEN)) == NULL) {
990 			xmlFreeDoc(info.ktx_doc);
991 			return (PO_FAIL);
992 		}
993 		/*
994 		 * Now add pool details
995 		 */
996 		if ((ps = pool_query_pools(conf, &nelem, NULL)) != NULL) {
997 			for (i = 0; i < nelem; i++) {
998 				pool_elem_t *elem = TO_ELEM(ps[i]);
999 				uint_t nreselem;
1000 				const char *sep = "";
1001 				int j;
1002 
1003 				if (elem_is_tmp(elem))
1004 					continue;
1005 
1006 				if ((info.ktx_node = node_create(system,
1007 				    BAD_CAST element_class_tags
1008 				    [pool_elem_class(elem)])) == NULL) {
1009 					free(ps);
1010 					free_char_buf(cb);
1011 					xmlFreeDoc(info.ktx_doc);
1012 					pool_seterror(POE_DATASTORE);
1013 					return (PO_FAIL);
1014 				}
1015 				if (pool_walk_any_properties(
1016 				    (pool_conf_t *)conf,
1017 				    elem, &info, prop_build_cb, 1) == PO_FAIL) {
1018 					free(ps);
1019 					free_char_buf(cb);
1020 					xmlFreeDoc(info.ktx_doc);
1021 					return (PO_FAIL);
1022 				}
1023 				/*
1024 				 * TODO: pset specific res manipulation
1025 				 */
1026 				if ((rs = pool_query_pool_resources(conf, ps[i],
1027 				    &nreselem, NULL)) == NULL) {
1028 					free(ps);
1029 					free_char_buf(cb);
1030 					xmlFreeDoc(info.ktx_doc);
1031 					pool_seterror(POE_INVALID_CONF);
1032 					return (PO_FAIL);
1033 				}
1034 				if (set_char_buf(cb, "") == PO_FAIL) {
1035 					free(rs);
1036 					free(ps);
1037 					free_char_buf(cb);
1038 					xmlFreeDoc(info.ktx_doc);
1039 					return (PO_FAIL);
1040 				}
1041 				for (j = 0; j < nreselem; j++) {
1042 					pool_elem_t *reselem = TO_ELEM(rs[j]);
1043 					if (append_char_buf(cb, "%s%s_%d", sep,
1044 					    pool_elem_class_string(reselem),
1045 					    (int)elem_get_sysid(reselem)) ==
1046 					    PO_FAIL) {
1047 						free(rs);
1048 						free(ps);
1049 						free_char_buf(cb);
1050 						xmlFreeDoc(info.ktx_doc);
1051 						return (PO_FAIL);
1052 					}
1053 					sep = " ";
1054 				}
1055 				free(rs);
1056 				(void) xmlSetProp(info.ktx_node, BAD_CAST "res",
1057 				    BAD_CAST cb->cb_buf);
1058 				if (set_char_buf(cb, "%s_%d",
1059 				    pool_elem_class_string(elem),
1060 				    (int)elem_get_sysid(elem)) == PO_FAIL) {
1061 					free(ps);
1062 					free_char_buf(cb);
1063 					xmlFreeDoc(info.ktx_doc);
1064 					return (PO_FAIL);
1065 				}
1066 				(void) xmlSetProp(info.ktx_node,
1067 				    BAD_CAST c_ref_id,
1068 				    BAD_CAST  cb->cb_buf);
1069 			}
1070 			free(ps);
1071 		}
1072 		/*
1073 		 * Now add resource details (including components)
1074 		 */
1075 		if ((rs = pool_query_resources(conf, &nelem, NULL)) != NULL) {
1076 			for (i = 0; i < nelem; i++) {
1077 				pool_elem_t *elem = TO_ELEM(rs[i]);
1078 				pool_component_t **cs = NULL;
1079 				uint_t ncompelem;
1080 				int j;
1081 
1082 				if (elem_is_tmp(elem))
1083 					continue;
1084 
1085 				if ((info.ktx_node = node_create(system,
1086 				    BAD_CAST element_class_tags
1087 				    [pool_elem_class(elem)])) == NULL) {
1088 					free(rs);
1089 					free_char_buf(cb);
1090 					xmlFreeDoc(info.ktx_doc);
1091 					pool_seterror(POE_DATASTORE);
1092 					return (PO_FAIL);
1093 				}
1094 				if (pool_walk_any_properties(
1095 				    (pool_conf_t *)conf,
1096 				    elem, &info, prop_build_cb, 1) == PO_FAIL) {
1097 					free(rs);
1098 					free_char_buf(cb);
1099 					xmlFreeDoc(info.ktx_doc);
1100 					return (PO_FAIL);
1101 				}
1102 				if (set_char_buf(cb, "%s_%d",
1103 				    pool_elem_class_string(elem),
1104 				    (int)elem_get_sysid(elem)) == PO_FAIL) {
1105 					free(rs);
1106 					free_char_buf(cb);
1107 					xmlFreeDoc(info.ktx_doc);
1108 					return (PO_FAIL);
1109 				}
1110 				(void) xmlSetProp(info.ktx_node,
1111 				    BAD_CAST c_ref_id,
1112 				    BAD_CAST  cb->cb_buf);
1113 				if ((cs = pool_query_resource_components(conf,
1114 				    rs[i], &ncompelem, NULL)) != NULL) {
1115 					xmlNodePtr resource = info.ktx_node;
1116 
1117 					for (j = 0; j < ncompelem; j++) {
1118 						pool_elem_t *compelem =
1119 						    TO_ELEM(cs[j]);
1120 						if ((info.ktx_node =
1121 						    node_create(resource,
1122 						    BAD_CAST element_class_tags
1123 						    [pool_elem_class(
1124 						    compelem)])) == NULL) {
1125 							pool_seterror(
1126 							    POE_DATASTORE);
1127 							free(rs);
1128 							free(cs);
1129 							free_char_buf(cb);
1130 							xmlFreeDoc(info.
1131 							    ktx_doc);
1132 							return (PO_FAIL);
1133 						}
1134 						if (pool_walk_any_properties(
1135 						    (pool_conf_t *)conf,
1136 						    compelem, &info,
1137 						    prop_build_cb, 1) ==
1138 						    PO_FAIL) {
1139 							free(rs);
1140 							free(cs);
1141 							free_char_buf(cb);
1142 							xmlFreeDoc(info.
1143 							    ktx_doc);
1144 							return (PO_FAIL);
1145 						}
1146 						if (set_char_buf(cb, "%s_%d",
1147 						    pool_elem_class_string(
1148 						    compelem),
1149 						    (int)elem_get_sysid(
1150 						    compelem)) == PO_FAIL) {
1151 							free(rs);
1152 							free(cs);
1153 							free_char_buf(cb);
1154 							xmlFreeDoc(info.
1155 							    ktx_doc);
1156 							return (PO_FAIL);
1157 						}
1158 						(void) xmlSetProp(info.ktx_node,
1159 						    BAD_CAST c_ref_id,
1160 						    BAD_CAST  cb->cb_buf);
1161 					}
1162 					free(cs);
1163 				}
1164 			}
1165 			free(rs);
1166 		}
1167 		free_char_buf(cb);
1168 		/*
1169 		 * Set up the message handlers prior to calling
1170 		 * xmlValidateDocument()
1171 		 */
1172 		if ((cvp = xmlNewValidCtxt()) == NULL) {
1173 			xmlFreeDoc(info.ktx_doc);
1174 			pool_seterror(POE_DATASTORE);
1175 			return (PO_FAIL);
1176 		}
1177 		cvp->error    = pool_error_func;
1178 		cvp->warning  = pool_error_func;
1179 		if (xmlValidateDocument(cvp, info.ktx_doc) == 0) {
1180 			xmlFreeValidCtxt(cvp);
1181 			xmlFreeDoc(info.ktx_doc);
1182 			pool_seterror(POE_INVALID_CONF);
1183 			return (PO_FAIL);
1184 		}
1185 		xmlFreeValidCtxt(cvp);
1186 		ret = xmlSaveFormatFile(location, info.ktx_doc, 1);
1187 		xmlFreeDoc(info.ktx_doc);
1188 		if (ret == -1) {
1189 			pool_seterror(POE_SYSTEM);
1190 			return (PO_FAIL);
1191 		}
1192 		return (PO_SUCCESS);
1193 	default:
1194 		pool_seterror(POE_BADPARAM);
1195 		return (PO_FAIL);
1196 	}
1197 }
1198 
1199 /*
1200  * Rollback the changes to the kernel
1201  */
1202 int
1203 pool_knl_recover(pool_conf_t *conf)
1204 {
1205 	pool_knl_connection_t *prov = (pool_knl_connection_t *)conf->pc_prov;
1206 
1207 	prov->pkc_log->l_state = LS_RECOVER;
1208 	if (log_reverse_walk(prov->pkc_log, log_item_undo) != PO_SUCCESS) {
1209 		dprintf("Library configuration consistency error\n");
1210 		prov->pkc_log->l_state = LS_FAIL;
1211 		pool_seterror(POE_INVALID_CONF);
1212 		return (PO_FAIL);
1213 	}
1214 	prov->pkc_log->l_state = LS_DO;
1215 	return (PO_SUCCESS);
1216 }
1217 
1218 /*
1219  * Rollback the changes to the configuration
1220  */
1221 int
1222 pool_knl_rollback(pool_conf_t *conf)
1223 {
1224 	pool_knl_connection_t *prov = (pool_knl_connection_t *)conf->pc_prov;
1225 
1226 	prov->pkc_log->l_state = LS_UNDO;
1227 	if (log_reverse_walk(prov->pkc_log, log_item_undo) != PO_SUCCESS) {
1228 		dprintf("Kernel configuration consistency error\n");
1229 		(void) log_walk(prov->pkc_log, log_item_release);
1230 		log_empty(prov->pkc_log);
1231 		prov->pkc_log->l_state = LS_FAIL;
1232 		pool_seterror(POE_INVALID_CONF);
1233 		return (PO_FAIL);
1234 	}
1235 	(void) log_walk(prov->pkc_log, log_item_release);
1236 	log_empty(prov->pkc_log);
1237 	prov->pkc_log->l_state = LS_DO;
1238 	return (PO_SUCCESS);
1239 }
1240 
1241 /*
1242  * Callback used to build the result set for a query. Each invocation will
1243  * supply a candidate element for inclusion. The element is filtered by:
1244  * - class
1245  * - properties
1246  * If the element "matches" the target, then it is added to the result
1247  * set, otherwise it is ignored.
1248  */
1249 /* ARGSUSED1 */
1250 static void
1251 build_result_set(const void *key, void **value, void *cl)
1252 {
1253 	struct query_obj *qo = (struct query_obj *)cl;
1254 	pool_knl_elem_t *pke = (pool_knl_elem_t *)key;
1255 
1256 	/*
1257 	 * Check to see if it's the right class of element
1258 	 */
1259 	if (qo->classes & (1 << pool_elem_class((pool_elem_t *)key))) {
1260 		int i;
1261 		/*
1262 		 * Now check to see if the src element is correct. If no src
1263 		 * element is supplied, ignore this check
1264 		 */
1265 		if (qo->src) {
1266 			pool_knl_elem_t *parent;
1267 
1268 			for (parent = pke; parent != NULL;
1269 			    parent = parent->pke_parent) {
1270 				if (parent == (pool_knl_elem_t *)qo->src)
1271 					break;
1272 			}
1273 			if (parent == NULL)
1274 				return;
1275 		}
1276 		/*
1277 		 * Now check for property matches (if there are any specified)
1278 		 */
1279 		if (qo->props) {
1280 			int matched = PO_TRUE;
1281 			for (i = 0; qo->props[i] != NULL; i++) {
1282 				pool_value_t val = POOL_VALUE_INITIALIZER;
1283 
1284 				if (pool_get_property(TO_CONF(TO_ELEM(pke)),
1285 				    (pool_elem_t *)pke,
1286 				    pool_value_get_name(qo->props[i]), &val) ==
1287 				    POC_INVAL) {
1288 					matched = PO_FALSE;
1289 					break;
1290 				} else {
1291 					if (pool_value_equal(qo->props[i],
1292 					    &val) != PO_TRUE) {
1293 						matched = PO_FALSE;
1294 						break;
1295 					}
1296 				}
1297 			}
1298 			if (matched == PO_TRUE)
1299 				(void) pool_knl_result_set_append(qo->rs,
1300 				    (pool_knl_elem_t *)key);
1301 		} else {
1302 			(void) pool_knl_result_set_append(qo->rs,
1303 			    (pool_knl_elem_t *)key);
1304 		}
1305 	}
1306 }
1307 
1308 /*
1309  * Execute the supplied query and return a result set which contains
1310  * all qualifying elements.
1311  */
1312 pool_result_set_t *
1313 pool_knl_exec_query(const pool_conf_t *conf, const pool_elem_t *src,
1314     const char *src_attr, pool_elem_class_t classes, pool_value_t **props)
1315 {
1316 	pool_knl_result_set_t *rs;
1317 	pool_knl_connection_t *prov = (pool_knl_connection_t *)conf->pc_prov;
1318 	struct query_obj qo;
1319 	int matched = PO_TRUE;
1320 
1321 	/*
1322 	 * Have a buffer at this point, that we can use
1323 	 */
1324 	if ((rs = pool_knl_result_set_alloc(conf)) == NULL) {
1325 		return (NULL);
1326 	}
1327 	qo.conf = conf;
1328 	qo.src = src;
1329 	qo.src_attr = src_attr;
1330 	qo.classes = classes;
1331 	qo.props = props;
1332 	qo.rs = rs;
1333 	if (src_attr != NULL) {
1334 		pool_knl_pool_t *pkp = (pool_knl_pool_t *)src;
1335 
1336 		/*
1337 		 * Note: This logic is resource specific and must be
1338 		 * extended for additional resource types.
1339 		 */
1340 		/*
1341 		 * Check for property matches (if there are any specified)
1342 		 */
1343 		if (props) {
1344 			int i;
1345 
1346 			for (i = 0; props[i] != NULL; i++) {
1347 				pool_value_t val = POOL_VALUE_INITIALIZER;
1348 
1349 				if (pool_get_property(conf,
1350 				    (pool_elem_t *)pkp->pkp_assoc[PREC_PSET],
1351 				    pool_value_get_name(props[i]), &val) ==
1352 				    POC_INVAL) {
1353 					matched = PO_FALSE;
1354 					break;
1355 				} else {
1356 					if (pool_value_equal(props[i],
1357 					    &val) != PO_TRUE) {
1358 						matched = PO_FALSE;
1359 						break;
1360 					}
1361 				}
1362 			}
1363 		}
1364 
1365 		if (matched == PO_TRUE)
1366 			(void) pool_knl_result_set_append(rs,
1367 			    (pool_knl_elem_t *)pkp->pkp_assoc[PREC_PSET]);
1368 	} else
1369 		dict_map(prov->pkc_elements, build_result_set, &qo);
1370 
1371 	if (rs->pkr_count == 0)
1372 		pool_seterror(POE_INVALID_SEARCH);
1373 	return ((pool_result_set_t *)rs);
1374 }
1375 
1376 /*
1377  * Callback function intended to be used from pool_walk_pools(). If
1378  * the supplied pool is not the default pool attempt to destroy it.
1379  */
1380 /*ARGSUSED*/
1381 static int
1382 destroy_pool_cb(pool_conf_t *conf, pool_t *pool, void *unused)
1383 {
1384 	if (elem_is_default(TO_ELEM(pool)) != PO_TRUE)
1385 		return (pool_destroy(conf, pool));
1386 	/*
1387 	 * Return PO_SUCCESS even though we don't delete the default
1388 	 * pool so that the walk continues
1389 	 */
1390 	return (PO_SUCCESS);
1391 }
1392 
1393 /*
1394  * Remove the configuration details. This means remove all elements
1395  * apart from the system elements.
1396  */
1397 int
1398 pool_knl_remove(pool_conf_t *conf)
1399 {
1400 	uint_t i, nelem;
1401 	pool_resource_t **resources;
1402 
1403 	conf->pc_state = POF_DESTROY;
1404 	if ((resources = pool_query_resources(conf, &nelem, NULL)) != NULL) {
1405 		for (i = 0; i < nelem; i++) {
1406 			if (resource_is_system(resources[i]) == PO_FALSE)
1407 				if (pool_resource_destroy(conf, resources[i]) !=
1408 				    PO_SUCCESS) {
1409 					pool_seterror(POE_INVALID_CONF);
1410 					return (PO_FAIL);
1411 				}
1412 		}
1413 		free(resources);
1414 	}
1415 	(void) pool_walk_pools(conf, conf, destroy_pool_cb);
1416 	if (pool_conf_commit(conf, PO_FALSE) != PO_SUCCESS)
1417 		return (PO_FAIL);
1418 
1419 	if (pool_conf_close(conf) != PO_SUCCESS)
1420 		return (PO_FAIL);
1421 
1422 	return (PO_SUCCESS);
1423 }
1424 
1425 /*
1426  * Determine the name of the pool to which the supplied pid is
1427  * bound. If it cannot be determined return NULL.
1428  */
1429 char *
1430 pool_knl_get_binding(pool_conf_t *conf, pid_t pid)
1431 {
1432 	pool_knl_connection_t *prov = (pool_knl_connection_t *)conf->pc_prov;
1433 	const char *sval;
1434 	char *name = NULL;
1435 	pool_bindq_t bindq;
1436 	pool_value_t *props[] = { NULL, NULL };
1437 	uint_t nelem = 0;
1438 	pool_t **pools;
1439 	pool_value_t val = POOL_VALUE_INITIALIZER;
1440 
1441 	props[0] = &val;
1442 
1443 	bindq.pb_o_id_type = P_PID;
1444 	bindq.pb_o_id = pid;
1445 	if (ioctl(prov->pkc_fd, POOL_BINDQ, &bindq) < 0) {
1446 		pool_seterror(POE_SYSTEM);
1447 		return (NULL);
1448 	}
1449 
1450 	if (pool_value_set_name(props[0], "pool.sys_id") != PO_SUCCESS) {
1451 		return (NULL);
1452 	}
1453 	pool_value_set_int64(props[0], bindq.pb_i_id);
1454 	if ((pools = pool_query_pools(conf, &nelem, props)) == NULL) {
1455 		pool_seterror(POE_BADPARAM);
1456 		return (NULL);
1457 	}
1458 
1459 	if (nelem != 1) {
1460 		free(pools);
1461 		pool_seterror(POE_INVALID_CONF);
1462 		return (NULL);
1463 	}
1464 	if (pool_get_ns_property(TO_ELEM(pools[0]), c_name, props[0])
1465 	    == POC_INVAL) {
1466 		free(pools);
1467 		return (NULL);
1468 	}
1469 	if (pool_value_get_string(props[0], &sval) != PO_SUCCESS) {
1470 		free(pools);
1471 		return (NULL);
1472 	}
1473 	if ((name = strdup(sval)) == NULL) {
1474 		free(pools);
1475 		pool_seterror(POE_SYSTEM);
1476 		return (NULL);
1477 	}
1478 	return (name);
1479 }
1480 
1481 /*
1482  * Bind idtype id to the pool name.
1483  */
1484 int
1485 pool_knl_set_binding(pool_conf_t *conf, const char *pool_name, idtype_t idtype,
1486     id_t id)
1487 {
1488 	pool_knl_connection_t *prov = (pool_knl_connection_t *)conf->pc_prov;
1489 	pool_bind_t bind;
1490 	pool_t *pool;
1491 	int ret;
1492 
1493 	if ((pool = pool_get_pool(conf, pool_name)) == NULL)
1494 		return (PO_FAIL);
1495 
1496 	bind.pb_o_id_type = idtype;
1497 	bind.pb_o_id = id;
1498 	bind.pb_o_pool_id = elem_get_sysid(TO_ELEM(pool));
1499 
1500 	while ((ret = ioctl(prov->pkc_fd, POOL_BIND, &bind)) < 0 &&
1501 	    errno == EAGAIN)
1502 		;
1503 	if (ret < 0) {
1504 		pool_seterror(POE_SYSTEM);
1505 		return (PO_FAIL);
1506 	}
1507 	return (PO_SUCCESS);
1508 }
1509 
1510 /*
1511  * pool_knl_get_resource_binding() returns the binding for a pid to
1512  * the supplied type of resource. If a binding cannot be determined,
1513  * NULL is returned.
1514  */
1515 char *
1516 pool_knl_get_resource_binding(pool_conf_t *conf,
1517     pool_resource_elem_class_t type, pid_t pid)
1518 {
1519 	pool_knl_connection_t *prov = (pool_knl_connection_t *)conf->pc_prov;
1520 	const char *sval;
1521 	char *name = NULL;
1522 	pool_bindq_t bindq;
1523 	pool_value_t *props[] = { NULL, NULL };
1524 	uint_t nelem = 0;
1525 	pool_t **pools;
1526 	pool_resource_t **resources;
1527 	pool_value_t val = POOL_VALUE_INITIALIZER;
1528 
1529 	props[0] = &val;
1530 	bindq.pb_o_id_type = P_PID;
1531 	bindq.pb_o_id = pid;
1532 	if (ioctl(prov->pkc_fd, POOL_BINDQ, &bindq) < 0) {
1533 		pool_seterror(POE_SYSTEM);
1534 		return (NULL);
1535 	}
1536 
1537 	if (pool_value_set_name(props[0], "pool.sys_id") != PO_SUCCESS) {
1538 		return (NULL);
1539 	}
1540 	pool_value_set_int64(props[0], bindq.pb_i_id);
1541 	if ((pools = pool_query_pools(conf, &nelem, props)) == NULL) {
1542 		pool_seterror(POE_BADPARAM);
1543 		return (NULL);
1544 	}
1545 
1546 	if (nelem != 1) {
1547 		free(pools);
1548 		pool_seterror(POE_INVALID_CONF);
1549 		return (NULL);
1550 	}
1551 
1552 	if (pool_value_set_string(props[0], pool_resource_type_string(type)) !=
1553 	    PO_SUCCESS ||
1554 	    pool_value_set_name(props[0], c_type) != PO_SUCCESS) {
1555 		free(pools);
1556 		return (NULL);
1557 	}
1558 
1559 	if ((resources = pool_query_pool_resources(conf, pools[0], &nelem,
1560 	    NULL)) == NULL) {
1561 		free(pools);
1562 		pool_seterror(POE_INVALID_CONF);
1563 		return (NULL);
1564 	}
1565 	free(pools);
1566 	if (nelem != 1) {
1567 		free(resources);
1568 		pool_seterror(POE_INVALID_CONF);
1569 		return (NULL);
1570 	}
1571 	if (pool_get_ns_property(TO_ELEM(resources[0]), c_name, props[0]) ==
1572 	    POC_INVAL) {
1573 		free(resources);
1574 		return (NULL);
1575 	}
1576 	free(resources);
1577 	if (pool_value_get_string(props[0], &sval) != PO_SUCCESS) {
1578 		return (NULL);
1579 	}
1580 	if ((name = strdup(sval)) == NULL) {
1581 		pool_seterror(POE_SYSTEM);
1582 		return (NULL);
1583 	}
1584 	return (name);
1585 }
1586 
1587 /*
1588  * Allocate the required library data structure and initialise it.
1589  */
1590 pool_knl_elem_t *
1591 pool_knl_elem_wrap(pool_conf_t *conf, pool_elem_class_t class,
1592     pool_resource_elem_class_t res_class,
1593     pool_component_elem_class_t comp_class)
1594 {
1595 	pool_knl_elem_t *elem;
1596 	pool_elem_t *pe;
1597 
1598 	switch (class) {
1599 	case PEC_SYSTEM:
1600 		if ((elem = malloc(sizeof (pool_knl_system_t))) == NULL) {
1601 			pool_seterror(POE_SYSTEM);
1602 			return (NULL);
1603 		}
1604 		(void) memset(elem, 0, sizeof (pool_knl_system_t));
1605 		break;
1606 	case PEC_POOL:
1607 		if ((elem = malloc(sizeof (pool_knl_pool_t))) == NULL) {
1608 			pool_seterror(POE_SYSTEM);
1609 			return (NULL);
1610 		}
1611 		(void) memset(elem, 0, sizeof (pool_knl_pool_t));
1612 		break;
1613 	case PEC_RES_COMP:
1614 	case PEC_RES_AGG:
1615 		if ((elem = malloc(sizeof (pool_knl_resource_t))) == NULL) {
1616 			pool_seterror(POE_SYSTEM);
1617 			return (NULL);
1618 		}
1619 		(void) memset(elem, 0, sizeof (pool_knl_resource_t));
1620 		break;
1621 	case PEC_COMP:
1622 		if ((elem = malloc(sizeof (pool_knl_component_t))) == NULL) {
1623 			pool_seterror(POE_SYSTEM);
1624 			return (NULL);
1625 		}
1626 		(void) memset(elem, 0, sizeof (pool_knl_component_t));
1627 		break;
1628 	default:
1629 		pool_seterror(POE_BADPARAM);
1630 		return (NULL);
1631 	}
1632 	pe = TO_ELEM(elem);
1633 	pe->pe_conf = conf;
1634 	pe->pe_class = class;
1635 	pe->pe_resource_class = res_class;
1636 	pe->pe_component_class = comp_class;
1637 	/* Set up the function pointers for element manipulation */
1638 	pe->pe_get_prop = pool_knl_get_property;
1639 	pe->pe_put_prop = pool_knl_put_property;
1640 	pe->pe_rm_prop = pool_knl_rm_property;
1641 	pe->pe_get_props = pool_knl_get_properties;
1642 	pe->pe_remove = pool_knl_elem_remove;
1643 	pe->pe_get_container = pool_knl_get_container;
1644 	pe->pe_set_container = pool_knl_set_container;
1645 	/*
1646 	 * Specific initialisation for different types of element
1647 	 */
1648 	if (class == PEC_POOL) {
1649 		pool_knl_pool_t *pp = (pool_knl_pool_t *)elem;
1650 		pp->pp_associate = pool_knl_pool_associate;
1651 		pp->pp_dissociate = pool_knl_pool_dissociate;
1652 		pp->pkp_assoc[PREC_PSET] = (pool_knl_resource_t *)
1653 		    resource_by_sysid(conf, PS_NONE, "pset");
1654 	}
1655 	if (class == PEC_RES_COMP || class == PEC_RES_AGG) {
1656 		pool_knl_resource_t *pr = (pool_knl_resource_t *)elem;
1657 		pr->pr_is_system = pool_knl_resource_is_system;
1658 		pr->pr_can_associate = pool_knl_resource_can_associate;
1659 	}
1660 #if DEBUG
1661 	if (dict_put(((pool_knl_connection_t *)conf->pc_prov)->pkc_leaks,
1662 	    elem, elem) != NULL)
1663 		assert(!"leak map put failed");
1664 	dprintf("allocated %p\n", elem);
1665 #endif	/* DEBUG */
1666 	return (elem);
1667 }
1668 
1669 /*
1670  * Allocate a new pool_knl_elem_t in the supplied configuration of the
1671  * specified class.
1672  * Returns element pointer/NULL
1673  */
1674 pool_elem_t *
1675 pool_knl_elem_create(pool_conf_t *conf, pool_elem_class_t class,
1676     pool_resource_elem_class_t res_class,
1677     pool_component_elem_class_t comp_class)
1678 {
1679 	pool_knl_elem_t *elem;
1680 	pool_create_undo_t *create;
1681 	pool_knl_connection_t *prov = (pool_knl_connection_t *)conf->pc_prov;
1682 	static int id = -3;
1683 	char_buf_t *cb;
1684 
1685 	if ((elem = pool_knl_elem_wrap(conf, class, res_class, comp_class)) ==
1686 	    NULL)
1687 		return (NULL);
1688 
1689 	/*
1690 	 * Allocate an nvlist to hold properties
1691 	 */
1692 	if (nvlist_alloc(&elem->pke_properties, NV_UNIQUE_NAME_TYPE, 0) != 0) {
1693 		pool_knl_elem_free(elem, PO_FALSE);
1694 		pool_seterror(POE_SYSTEM);
1695 		return (NULL);
1696 	}
1697 	/*
1698 	 * Allocate a temporary ID and name until the element is
1699 	 * created for real
1700 	 */
1701 	if ((cb = alloc_char_buf(CB_DEFAULT_LEN)) == NULL) {
1702 		pool_knl_elem_free(elem, PO_TRUE);
1703 		return (NULL);
1704 	}
1705 	if (set_char_buf(cb, "%s.sys_id",
1706 	    pool_elem_class_string((pool_elem_t *)elem)) != PO_SUCCESS) {
1707 		pool_knl_elem_free(elem, PO_TRUE);
1708 		free_char_buf(cb);
1709 		return (NULL);
1710 	}
1711 	(void) nvlist_add_int64(elem->pke_properties, cb->cb_buf, id--);
1712 	if (set_char_buf(cb, "%s.name",
1713 	    pool_elem_class_string((pool_elem_t *)elem)) != PO_SUCCESS) {
1714 		pool_knl_elem_free(elem, PO_TRUE);
1715 		free_char_buf(cb);
1716 		return (NULL);
1717 	}
1718 	(void) nvlist_add_string(elem->pke_properties, cb->cb_buf, "");
1719 	/*
1720 	 * If it's a resource class, it will need an initial size
1721 	 */
1722 	if (class == PEC_RES_COMP || class == PEC_RES_AGG) {
1723 		if (set_char_buf(cb, "%s.size",
1724 		    pool_elem_class_string((pool_elem_t *)elem)) !=
1725 		    PO_SUCCESS) {
1726 			pool_knl_elem_free(elem, PO_TRUE);
1727 			free_char_buf(cb);
1728 			return (NULL);
1729 		}
1730 		(void) nvlist_add_uint64(elem->pke_properties, cb->cb_buf, 0);
1731 	}
1732 	free_char_buf(cb);
1733 
1734 	/*
1735 	 * Register the newly created element
1736 	 */
1737 	if (dict_put(prov->pkc_elements, elem, elem) != NULL) {
1738 		pool_knl_elem_free(elem, PO_TRUE);
1739 		pool_seterror(POE_SYSTEM);
1740 		return (NULL);
1741 	}
1742 
1743 	if (prov->pkc_log->l_state != LS_DO)
1744 		return ((pool_elem_t *)elem);
1745 
1746 	/*
1747 	 * The remaining logic is setting up the arguments for the
1748 	 * POOL_CREATE ioctl and appending the details into the log.
1749 	 */
1750 	if ((create = malloc(sizeof (pool_create_undo_t))) == NULL) {
1751 		pool_seterror(POE_SYSTEM);
1752 		return (NULL);
1753 	}
1754 	create->pcu_ioctl.pc_o_type = class;
1755 	switch (class) {
1756 	case PEC_SYSTEM:
1757 		pool_seterror(POE_BADPARAM);
1758 		free(create);
1759 		return (NULL);
1760 	case PEC_POOL: /* NO-OP */
1761 		break;
1762 	case PEC_RES_COMP:
1763 	case PEC_RES_AGG:
1764 		create->pcu_ioctl.pc_o_sub_type = res_class;
1765 		break;
1766 	case PEC_COMP:
1767 		create->pcu_ioctl.pc_o_sub_type = comp_class;
1768 		break;
1769 	default:
1770 		pool_seterror(POE_BADPARAM);
1771 		free(create);
1772 		return (NULL);
1773 	}
1774 
1775 	create->pcu_elem = (pool_elem_t *)elem;
1776 
1777 	if (log_append(prov->pkc_log, POOL_CREATE, (void *)create) !=
1778 	    PO_SUCCESS) {
1779 		free(create);
1780 		return (NULL);
1781 	}
1782 	return ((pool_elem_t *)elem);
1783 }
1784 
1785 /*
1786  * Remove the details of the element from our userland copy and destroy
1787  * the element (if appropriate) in the kernel.
1788  */
1789 int
1790 pool_knl_elem_remove(pool_elem_t *pe)
1791 {
1792 	pool_knl_connection_t *prov;
1793 	pool_destroy_undo_t *destroy;
1794 
1795 	prov = (pool_knl_connection_t *)(TO_CONF(pe))->pc_prov;
1796 
1797 	if (dict_remove(prov->pkc_elements, pe) == NULL) {
1798 		pool_seterror(POE_SYSTEM);
1799 		return (PO_FAIL);
1800 	}
1801 	if (prov->pkc_log->l_state != LS_DO) {
1802 		return (PO_SUCCESS);
1803 	}
1804 
1805 	/*
1806 	 * The remaining logic is setting up the arguments for the
1807 	 * POOL_DESTROY ioctl and appending the details into the log.
1808 	 */
1809 	if ((destroy = malloc(sizeof (pool_destroy_undo_t))) == NULL) {
1810 		pool_seterror(POE_SYSTEM);
1811 		return (PO_FAIL);
1812 	}
1813 	destroy->pdu_ioctl.pd_o_type = pool_elem_class(pe);
1814 
1815 	if (destroy->pdu_ioctl.pd_o_type == PEC_RES_COMP ||
1816 	    destroy->pdu_ioctl.pd_o_type == PEC_RES_AGG)
1817 		destroy->pdu_ioctl.pd_o_sub_type = pool_resource_elem_class(pe);
1818 
1819 	if (destroy->pdu_ioctl.pd_o_type == PEC_COMP)
1820 		destroy->pdu_ioctl.pd_o_sub_type =
1821 		    pool_component_elem_class(pe);
1822 
1823 	destroy->pdu_elem = pe;
1824 
1825 	if (log_append(prov->pkc_log, POOL_DESTROY, (void *)destroy) !=
1826 	    PO_SUCCESS) {
1827 		free(destroy);
1828 		return (PO_FAIL);
1829 	}
1830 	return (PO_SUCCESS);
1831 }
1832 
1833 /*
1834  * Set the parent of the supplied child to the supplied parent
1835  */
1836 int
1837 pool_knl_set_container(pool_elem_t *pp, pool_elem_t *pc)
1838 {
1839 	pool_knl_elem_t *pkp = (pool_knl_elem_t *)pp;
1840 	pool_knl_elem_t *pkc = (pool_knl_elem_t *)pc;
1841 
1842 	pkc->pke_parent = pkp;
1843 	return (PO_SUCCESS);
1844 }
1845 
1846 /*
1847  * TODO: Needed for msets and ssets.
1848  */
1849 /* ARGSUSED */
1850 int
1851 pool_knl_res_transfer(pool_resource_t *src, pool_resource_t *tgt,
1852     uint64_t size) {
1853 	return (PO_FAIL);
1854 }
1855 
1856 /*
1857  * Transfer resource components from one resource set to another.
1858  */
1859 int
1860 pool_knl_res_xtransfer(pool_resource_t *src, pool_resource_t *tgt,
1861     pool_component_t **rl) {
1862 	pool_elem_t *src_e = TO_ELEM(src);
1863 	pool_elem_t *tgt_e = TO_ELEM(tgt);
1864 	pool_xtransfer_undo_t *xtransfer;
1865 	size_t size;
1866 	pool_knl_connection_t *prov =
1867 	    (pool_knl_connection_t *)TO_CONF(src_e)->pc_prov;
1868 
1869 	if (prov->pkc_log->l_state != LS_DO) {
1870 		/*
1871 		 * Walk the Result Set and move the resource components
1872 		 */
1873 		for (size = 0; rl[size] != NULL; size++) {
1874 			if (pool_set_container(TO_ELEM(tgt),
1875 			    TO_ELEM(rl[size])) == PO_FAIL) {
1876 				return (PO_FAIL);
1877 			}
1878 		}
1879 		return (PO_SUCCESS);
1880 	}
1881 
1882 	/*
1883 	 * The remaining logic is setting up the arguments for the
1884 	 * POOL_XTRANSFER ioctl and appending the details into the log.
1885 	 */
1886 	if ((xtransfer = malloc(sizeof (pool_xtransfer_undo_t))) == NULL) {
1887 		pool_seterror(POE_SYSTEM);
1888 		return (PO_FAIL);
1889 	}
1890 
1891 	if (pool_elem_class(src_e) == PEC_RES_COMP) {
1892 		xtransfer->pxu_ioctl.px_o_id_type =
1893 		    pool_resource_elem_class(src_e);
1894 	} else {
1895 		pool_seterror(POE_BADPARAM);
1896 		return (PO_FAIL);
1897 	}
1898 
1899 
1900 	for (xtransfer->pxu_ioctl.px_o_complist_size = 0;
1901 	    rl[xtransfer->pxu_ioctl.px_o_complist_size] != NULL;
1902 	    xtransfer->pxu_ioctl.px_o_complist_size++)
1903 		/* calculate the size using the terminating NULL */;
1904 	if ((xtransfer->pxu_ioctl.px_o_comp_list =
1905 		calloc(xtransfer->pxu_ioctl.px_o_complist_size,
1906 		sizeof (id_t))) == NULL) {
1907 		pool_seterror(POE_SYSTEM);
1908 		return (PO_FAIL);
1909 	}
1910 	if ((xtransfer->pxu_rl = calloc(
1911 	    xtransfer->pxu_ioctl.px_o_complist_size + 1,
1912 	    sizeof (pool_component_t *))) == NULL) {
1913 		pool_seterror(POE_SYSTEM);
1914 		return (PO_FAIL);
1915 	}
1916 	(void) memcpy(xtransfer->pxu_rl, rl,
1917 	    xtransfer->pxu_ioctl.px_o_complist_size *
1918 	    sizeof (pool_component_t *));
1919 	xtransfer->pxu_src = src_e;
1920 	xtransfer->pxu_tgt = tgt_e;
1921 
1922 	if (log_append(prov->pkc_log, POOL_XTRANSFER, (void *)xtransfer) !=
1923 	    PO_SUCCESS) {
1924 		free(xtransfer);
1925 		return (PO_FAIL);
1926 	}
1927 	for (size = 0; rl[size] != NULL; size++) {
1928 		if (pool_set_container(TO_ELEM(tgt), TO_ELEM(rl[size])) ==
1929 		    PO_FAIL) {
1930 			return (PO_FAIL);
1931 		}
1932 	}
1933 	return (PO_SUCCESS);
1934 }
1935 
1936 /*
1937  * Return the parent of an element.
1938  */
1939 pool_elem_t *
1940 pool_knl_get_container(const pool_elem_t *pe)
1941 {
1942 	pool_knl_elem_t *pke = (pool_knl_elem_t *)pe;
1943 
1944 	return ((pool_elem_t *)pke->pke_parent);
1945 }
1946 
1947 /*
1948  * Note: This function is resource specific, needs extending for other
1949  * resource types
1950  */
1951 int
1952 pool_knl_resource_is_system(const pool_resource_t *pr)
1953 {
1954 	switch (pool_resource_elem_class(TO_ELEM(pr))) {
1955 	case PREC_PSET:
1956 		return (PSID_IS_SYSSET(
1957 		    elem_get_sysid(TO_ELEM(pr))));
1958 	default:
1959 		return (PO_FALSE);
1960 	}
1961 }
1962 
1963 /*
1964  * Note: This function is resource specific, needs extending for other
1965  * resource types
1966  */
1967 int
1968 pool_knl_resource_can_associate(const pool_resource_t *pr)
1969 {
1970 	switch (pool_resource_elem_class(TO_ELEM(pr))) {
1971 	case PREC_PSET:
1972 		return (PO_TRUE);
1973 	default:
1974 		return (PO_FALSE);
1975 	}
1976 }
1977 
1978 /*
1979  * pool_knl_pool_associate() associates the supplied resource to the
1980  * supplied pool.
1981  *
1982  * Returns: PO_SUCCESS/PO_FAIL
1983  */
1984 int
1985 pool_knl_pool_associate(pool_t *pool, const pool_resource_t *resource)
1986 {
1987 	pool_knl_connection_t *prov;
1988 	pool_knl_pool_t *pkp = (pool_knl_pool_t *)pool;
1989 	pool_resource_elem_class_t res_class =
1990 	    pool_resource_elem_class(TO_ELEM(resource));
1991 	pool_assoc_undo_t *assoc;
1992 	pool_knl_resource_t *orig_res = pkp->pkp_assoc[res_class];
1993 
1994 	/*
1995 	 * Are we allowed to associate with this target?
1996 	 */
1997 	if (pool_knl_resource_can_associate(resource) == PO_FALSE) {
1998 		pool_seterror(POE_BADPARAM);
1999 		return (PO_FAIL);
2000 	}
2001 	prov = (pool_knl_connection_t *)(TO_CONF(TO_ELEM(pool)))->pc_prov;
2002 
2003 	if (prov->pkc_log->l_state != LS_DO) {
2004 		pkp->pkp_assoc[res_class] = (pool_knl_resource_t *)resource;
2005 		return (PO_SUCCESS);
2006 	}
2007 
2008 	/*
2009 	 * The remaining logic is setting up the arguments for the
2010 	 * POOL_ASSOC ioctl and appending the details into the log.
2011 	 */
2012 	if ((assoc = malloc(sizeof (pool_assoc_undo_t))) == NULL) {
2013 		pool_seterror(POE_SYSTEM);
2014 		return (PO_FAIL);
2015 	}
2016 	assoc->pau_assoc = TO_ELEM(pool);
2017 	assoc->pau_oldres = (pool_elem_t *)orig_res;
2018 	assoc->pau_newres = TO_ELEM(resource);
2019 
2020 	assoc->pau_ioctl.pa_o_id_type = res_class;
2021 
2022 	if (log_append(prov->pkc_log, POOL_ASSOC, (void *)assoc) !=
2023 	    PO_SUCCESS) {
2024 		free(assoc);
2025 		pkp->pkp_assoc[res_class] = orig_res;
2026 		return (PO_FAIL);
2027 	}
2028 	pkp->pkp_assoc[res_class] = (pool_knl_resource_t *)resource;
2029 	return (PO_SUCCESS);
2030 }
2031 
2032 /*
2033  * pool_knl_pool_dissociate() dissociates the supplied resource from
2034  * the supplied pool.
2035  *
2036  * Returns: PO_SUCCESS/PO_FAIL
2037  */
2038 int
2039 pool_knl_pool_dissociate(pool_t *pool, const pool_resource_t *resource)
2040 {
2041 	pool_knl_connection_t *prov;
2042 	pool_dissoc_undo_t *dissoc;
2043 	pool_knl_pool_t *pkp = (pool_knl_pool_t *)pool;
2044 	pool_resource_t *default_res = (pool_resource_t *)get_default_resource(
2045 	    resource);
2046 	pool_resource_elem_class_t res_class =
2047 	    pool_resource_elem_class(TO_ELEM(resource));
2048 
2049 	prov = (pool_knl_connection_t *)(TO_CONF(TO_ELEM(pool)))->pc_prov;
2050 
2051 	if (prov->pkc_log->l_state != LS_DO) {
2052 		pkp->pkp_assoc[res_class] = (pool_knl_resource_t *)default_res;
2053 		return (PO_SUCCESS);
2054 	}
2055 	/*
2056 	 * The remaining logic is setting up the arguments for the
2057 	 * POOL_DISSOC ioctl and appending the details into the log.
2058 	 */
2059 	if ((dissoc = malloc(sizeof (pool_dissoc_undo_t))) == NULL) {
2060 		pool_seterror(POE_SYSTEM);
2061 		return (PO_FAIL);
2062 	}
2063 	dissoc->pdu_dissoc = TO_ELEM(pool);
2064 	dissoc->pdu_oldres = TO_ELEM(resource);
2065 	dissoc->pdu_newres = TO_ELEM(default_res);
2066 
2067 	dissoc->pdu_ioctl.pd_o_id_type = res_class;
2068 
2069 	if (log_append(prov->pkc_log, POOL_DISSOC, (void *)dissoc) !=
2070 	    PO_SUCCESS) {
2071 		free(dissoc);
2072 		pkp->pkp_assoc[res_class] = (pool_knl_resource_t *)resource;
2073 		return (PO_FAIL);
2074 	}
2075 
2076 	/*
2077 	 * Update our local copy
2078 	 */
2079 	pkp->pkp_assoc[res_class] = (pool_knl_resource_t *)default_res;
2080 	return (PO_SUCCESS);
2081 }
2082 
2083 /*
2084  * Allocate a data provider for the supplied configuration and optionally
2085  * discover resources.
2086  * The data provider is the cross over point from the "abstract" configuration
2087  * functions into the data representation specific manipulation routines.
2088  * This function sets up all the required pointers to create a kernel aware
2089  * data provider.
2090  * Returns PO_SUCCESS/PO_FAIL
2091  */
2092 int
2093 pool_knl_connection_alloc(pool_conf_t *conf, int oflags)
2094 {
2095 	pool_knl_connection_t *prov;
2096 
2097 	if ((prov = malloc(sizeof (pool_knl_connection_t))) == NULL) {
2098 		pool_seterror(POE_SYSTEM);
2099 		return (PO_FAIL);
2100 	}
2101 	(void) memset(prov, 0, sizeof (pool_knl_connection_t));
2102 	/*
2103 	 * Initialise data members
2104 	 */
2105 	prov->pc_name = strdup("kernel");
2106 	prov->pc_store_type = KERNEL_DATA_STORE;
2107 	prov->pc_oflags = oflags;
2108 	/*
2109 	 * Initialise function pointers
2110 	 */
2111 	prov->pc_close = pool_knl_close;
2112 	prov->pc_validate = pool_knl_validate;
2113 	prov->pc_commit = pool_knl_commit;
2114 	prov->pc_export = pool_knl_export;
2115 	prov->pc_rollback = pool_knl_rollback;
2116 	prov->pc_exec_query = pool_knl_exec_query;
2117 	prov->pc_elem_create = pool_knl_elem_create;
2118 	prov->pc_remove = pool_knl_remove;
2119 	prov->pc_res_xfer = pool_knl_res_transfer;
2120 	prov->pc_res_xxfer = pool_knl_res_xtransfer;
2121 	prov->pc_get_binding = pool_knl_get_binding;
2122 	prov->pc_set_binding = pool_knl_set_binding;
2123 	prov->pc_get_resource_binding = pool_knl_get_resource_binding;
2124 	/*
2125 	 * Associate the provider to it's configuration
2126 	 */
2127 	conf->pc_prov = (pool_connection_t *)prov;
2128 	/*
2129 	 * End of common initialisation
2130 	 */
2131 	/*
2132 	 * Attempt to open the pseudo device, if the configuration is opened
2133 	 * readonly then try to open an info device, otherwise try to open
2134 	 * the writeable device.
2135 	 */
2136 	if (oflags & PO_RDWR) {
2137 		if ((prov->pkc_fd = blocking_open(pool_dynamic_location(),
2138 		    O_RDWR)) < 0) {
2139 			free(prov);
2140 			conf->pc_prov = NULL;
2141 			pool_seterror(POE_SYSTEM);
2142 			return (PO_FAIL);
2143 		}
2144 	} else {
2145 		if ((prov->pkc_fd = open(pool_info_location, O_RDWR)) < 0) {
2146 			free(prov);
2147 			conf->pc_prov = NULL;
2148 			pool_seterror(POE_SYSTEM);
2149 			return (PO_FAIL);
2150 		}
2151 	}
2152 	/*
2153 	 * Allocate the element dictionary
2154 	 */
2155 	if ((prov->pkc_elements = dict_new((int (*)(const void *, const void *))
2156 	    pool_elem_compare, (uint64_t (*)(const void *))hash_id)) == NULL) {
2157 		(void) close(prov->pkc_fd);
2158 		free(prov);
2159 		conf->pc_prov = NULL;
2160 		pool_seterror(POE_SYSTEM);
2161 		return (PO_FAIL);
2162 	}
2163 #if DEBUG
2164 	if ((prov->pkc_leaks = dict_new(NULL, NULL)) == NULL) {
2165 		dict_free(&prov->pkc_elements);
2166 		(void) close(prov->pkc_fd);
2167 		free(prov);
2168 		conf->pc_prov = NULL;
2169 		pool_seterror(POE_SYSTEM);
2170 		return (PO_FAIL);
2171 	}
2172 #endif	/* DEBUG */
2173 	/*
2174 	 * Allocate the transaction log
2175 	 */
2176 	if ((prov->pkc_log = log_alloc(conf)) == NULL) {
2177 #if DEBUG
2178 		dict_free(&prov->pkc_leaks);
2179 #endif	/* DEBUG */
2180 		dict_free(&prov->pkc_elements);
2181 		(void) close(prov->pkc_fd);
2182 		free(prov);
2183 		conf->pc_prov = NULL;
2184 		return (PO_FAIL);
2185 	}
2186 	/*
2187 	 * At this point the configuration provider has been initialized,
2188 	 * mark the configuration as valid so that the various routines
2189 	 * which rely on a valid configuration will work correctly.
2190 	 */
2191 	conf->pc_state = POF_VALID;
2192 	/*
2193 	 * Update the library snapshot from the kernel
2194 	 */
2195 	if (pool_knl_update(conf, NULL) != PO_SUCCESS) {
2196 #if DEBUG
2197 		dict_free(&prov->pkc_leaks);
2198 #endif	/* DEBUG */
2199 		dict_free(&prov->pkc_elements);
2200 		(void) close(prov->pkc_fd);
2201 		free(prov);
2202 		conf->pc_prov = NULL;
2203 		conf->pc_state = POF_INVALID;
2204 		return (PO_FAIL);
2205 	}
2206 	return (PO_SUCCESS);
2207 }
2208 
2209 #if DEBUG
2210 static void
2211 pool_knl_elem_printf_cb(const void *key, void **value, void *cl)
2212 {
2213 	pool_knl_elem_t *pke = (pool_knl_elem_t *)key;
2214 	dict_hdl_t *map = (dict_hdl_t *)cl;
2215 
2216 	dprintf("leak elem:%p\n", pke);
2217 	if (pke->pke_properties != NULL) {
2218 		nvlist_print(stdout, pke->pke_properties);
2219 	} else
2220 		dprintf("no properties\n");
2221 	assert(dict_get(map, pke) == NULL);
2222 }
2223 #endif	/* DEBUG */
2224 /*
2225  * pool_knl_elem_free() releases the resources associated with the
2226  * supplied element.
2227  */
2228 static void
2229 pool_knl_elem_free(pool_knl_elem_t *pke, int freeprop)
2230 {
2231 #if DEBUG
2232 	pool_conf_t *conf = TO_CONF(TO_ELEM(pke));
2233 	if (dict_remove(((pool_knl_connection_t *)conf->pc_prov)->pkc_leaks,
2234 	    pke) == NULL)
2235 		dprintf("%p, wasn't in the leak map\n", pke);
2236 	if (freeprop == PO_TRUE) {
2237 		pool_elem_dprintf(TO_ELEM(pke));
2238 	}
2239 	dprintf("released %p\n", pke);
2240 #endif	/* DEBUG */
2241 	if (freeprop == PO_TRUE) {
2242 		nvlist_free(pke->pke_properties);
2243 	}
2244 	free(pke);
2245 }
2246 
2247 /*
2248  * pool_knl_elem_free_cb() is designed to be used with
2249  * dict_map(). When a connection is freed, this function is used to
2250  * free all element resources.
2251  */
2252 /* ARGSUSED1 */
2253 static void
2254 pool_knl_elem_free_cb(const void *key, void **value, void *cl)
2255 {
2256 	pool_knl_elem_t *pke = (pool_knl_elem_t *)key;
2257 
2258 #ifdef DEBUG
2259 	dprintf("pool_knl_elem_free_cb:\n");
2260 	dprintf("about to release %p ", pke);
2261 	pool_elem_dprintf(TO_ELEM(pke));
2262 #endif	/* DEBUG */
2263 	pool_knl_elem_free(pke, PO_TRUE);
2264 }
2265 
2266 /*
2267  * Free the resources for a kernel data provider.
2268  */
2269 void
2270 pool_knl_connection_free(pool_knl_connection_t *prov)
2271 {
2272 	if (prov->pkc_log != NULL) {
2273 		(void) log_walk(prov->pkc_log, log_item_release);
2274 		log_free(prov->pkc_log);
2275 	}
2276 	if (prov->pkc_elements != NULL) {
2277 		dict_map(prov->pkc_elements, pool_knl_elem_free_cb, NULL);
2278 #if DEBUG
2279 		dprintf("dict length is %llu\n", dict_length(prov->pkc_leaks));
2280 		dict_map(prov->pkc_leaks, pool_knl_elem_printf_cb,
2281 		    prov->pkc_elements);
2282 		assert(dict_length(prov->pkc_leaks) == 0);
2283 		dict_free(&prov->pkc_leaks);
2284 #endif	/* DEBUG */
2285 		dict_free(&prov->pkc_elements);
2286 	}
2287 	free((void *)prov->pc_name);
2288 	free(prov);
2289 }
2290 
2291 /*
2292  * Return the specified property value.
2293  *
2294  * POC_INVAL is returned if an error is detected and the error code is updated
2295  * to indicate the cause of the error.
2296  */
2297 pool_value_class_t
2298 pool_knl_get_property(const pool_elem_t *pe, const char *name,
2299     pool_value_t *val)
2300 {
2301 	pool_knl_elem_t *pke = (pool_knl_elem_t *)pe;
2302 	nvpair_t *pair;
2303 	const pool_prop_t *prop;
2304 
2305 	if ((prop = provider_get_prop(pe, name)) != NULL)
2306 		if (prop_is_stored(prop) == PO_FALSE)
2307 			return (pool_knl_get_dynamic_property(pe, name, val));
2308 
2309 	if ((pair = pool_knl_find_nvpair(pke->pke_properties, name)) == NULL) {
2310 		pool_seterror(POE_BADPARAM);
2311 		return (POC_INVAL);
2312 	}
2313 
2314 	if (pool_value_from_nvpair(val, pair) == PO_FAIL) {
2315 		return (POC_INVAL);
2316 	}
2317 
2318 	return (pool_value_get_type(val));
2319 }
2320 
2321 /*
2322  * Return the specified property value.
2323  *
2324  * If a property is designated as dynamic, then this function will
2325  * always try to return the latest value of the property from the
2326  * kernel.
2327  *
2328  * POC_INVAL is returned if an error is detected and the error code is updated
2329  * to indicate the cause of the error.
2330  */
2331 pool_value_class_t
2332 pool_knl_get_dynamic_property(const pool_elem_t *pe, const char *name,
2333     pool_value_t *val)
2334 {
2335 	pool_knl_connection_t *prov;
2336 	pool_propget_t propget = { 0 };
2337 	nvlist_t *proplist;
2338 	nvpair_t *pair;
2339 
2340 	propget.pp_o_id_type = pool_elem_class(pe);
2341 	if (pool_elem_class(pe) == PEC_RES_COMP ||
2342 	    pool_elem_class(pe) == PEC_RES_AGG)
2343 		propget.pp_o_id_subtype = pool_resource_elem_class(pe);
2344 	if (pool_elem_class(pe) == PEC_COMP)
2345 		propget.pp_o_id_subtype =
2346 		    (pool_resource_elem_class_t)pool_component_elem_class(pe);
2347 
2348 	propget.pp_o_id = elem_get_sysid(pe);
2349 	propget.pp_o_prop_name_size = strlen(name);
2350 	propget.pp_o_prop_name = (char *)name;
2351 	propget.pp_i_bufsize = KERNEL_SNAPSHOT_BUF_SZ;
2352 	propget.pp_i_buf = malloc(KERNEL_SNAPSHOT_BUF_SZ);
2353 	bzero(propget.pp_i_buf, KERNEL_SNAPSHOT_BUF_SZ);
2354 
2355 	prov = (pool_knl_connection_t *)(TO_CONF(pe))->pc_prov;
2356 	if (ioctl(prov->pkc_fd, POOL_PROPGET, &propget) < 0) {
2357 		free(propget.pp_i_buf);
2358 		pool_seterror(POE_SYSTEM);
2359 		return (POC_INVAL);
2360 	}
2361 	if (nvlist_unpack(propget.pp_i_buf, propget.pp_i_bufsize,
2362 	    &proplist, 0) != 0) {
2363 		free(propget.pp_i_buf);
2364 		pool_seterror(POE_SYSTEM);
2365 		return (POC_INVAL);
2366 	}
2367 	free(propget.pp_i_buf);
2368 
2369 	if ((pair = nvlist_next_nvpair(proplist, NULL)) == NULL) {
2370 		nvlist_free(proplist);
2371 		pool_seterror(POE_SYSTEM);
2372 		return (POC_INVAL);
2373 	}
2374 
2375 	if (pool_value_from_nvpair(val, pair) == PO_FAIL) {
2376 		nvlist_free(proplist);
2377 		return (POC_INVAL);
2378 	}
2379 	nvlist_free(proplist);
2380 	return (pool_value_get_type(val));
2381 }
2382 
2383 /*
2384  * Update the specified property value.
2385  *
2386  * PO_FAIL is returned if an error is detected and the error code is updated
2387  * to indicate the cause of the error.
2388  */
2389 int
2390 pool_knl_put_property(pool_elem_t *pe, const char *name,
2391     const pool_value_t *val)
2392 {
2393 	pool_knl_elem_t *pke = (pool_knl_elem_t *)pe;
2394 	pool_knl_connection_t *prov =
2395 	    (pool_knl_connection_t *)(TO_CONF(pe))->pc_prov;
2396 	nvpair_t *bp, *ap;
2397 	pool_propput_undo_t *propput;
2398 	nvlist_t *bl = NULL;
2399 	const pool_prop_t *prop;
2400 
2401 	if ((bp = pool_knl_find_nvpair(pke->pke_properties, name)) != NULL) {
2402 		if (nvlist_alloc(&bl, NV_UNIQUE_NAME_TYPE, 0) != 0) {
2403 			pool_seterror(POE_SYSTEM);
2404 			return (PO_FAIL);
2405 		}
2406 		if (nvlist_add_nvpair(bl, bp) != 0) {
2407 			nvlist_free(bl);
2408 			pool_seterror(POE_SYSTEM);
2409 			return (PO_FAIL);
2410 		}
2411 	}
2412 	if (pool_knl_nvlist_add_value(pke->pke_properties, name, val) !=
2413 	    PO_SUCCESS)
2414 		return (PO_FAIL);
2415 
2416 	if (prov->pkc_log->l_state != LS_DO) {
2417 		if (bl)
2418 			nvlist_free(bl);
2419 		return (PO_SUCCESS);
2420 	}
2421 	/*
2422 	 * The remaining logic is setting up the arguments for the
2423 	 * POOL_PROPPUT ioctl and appending the details into the log.
2424 	 */
2425 	if ((propput = malloc(sizeof (pool_propput_undo_t))) == NULL) {
2426 		pool_seterror(POE_SYSTEM);
2427 		return (PO_FAIL);
2428 	}
2429 	(void) memset(propput, 0, sizeof (pool_propput_undo_t));
2430 	propput->ppu_blist = bl;
2431 
2432 	ap = pool_knl_find_nvpair(pke->pke_properties, name);
2433 
2434 	if (nvlist_alloc(&propput->ppu_alist, NV_UNIQUE_NAME_TYPE, 0) != 0) {
2435 		nvlist_free(propput->ppu_blist);
2436 		free(propput);
2437 		pool_seterror(POE_SYSTEM);
2438 		return (PO_FAIL);
2439 	}
2440 	if (nvlist_add_nvpair(propput->ppu_alist, ap) != 0) {
2441 		nvlist_free(propput->ppu_blist);
2442 		nvlist_free(propput->ppu_alist);
2443 		free(propput);
2444 		pool_seterror(POE_SYSTEM);
2445 		return (PO_FAIL);
2446 	}
2447 
2448 	if (nvlist_pack(propput->ppu_alist,
2449 	    (char **)&propput->ppu_ioctl.pp_o_buf,
2450 	    &propput->ppu_ioctl.pp_o_bufsize, NV_ENCODE_NATIVE, 0) != 0) {
2451 		pool_seterror(POE_SYSTEM);
2452 		return (PO_FAIL);
2453 	}
2454 	nvlist_free(propput->ppu_alist);
2455 	propput->ppu_ioctl.pp_o_id_type = pool_elem_class(pe);
2456 	if (pool_elem_class(pe) == PEC_RES_COMP ||
2457 	    pool_elem_class(pe) == PEC_RES_AGG)
2458 		propput->ppu_ioctl.pp_o_id_sub_type =
2459 		    pool_resource_elem_class(pe);
2460 	if (pool_elem_class(pe) == PEC_COMP)
2461 		propput->ppu_ioctl.pp_o_id_sub_type =
2462 		    (pool_resource_elem_class_t)pool_component_elem_class(pe);
2463 
2464 	propput->ppu_elem = pe;
2465 	if ((prop = provider_get_prop(propput->ppu_elem, name)) != NULL) {
2466 		if (prop_is_readonly(prop) == PO_TRUE)
2467 			propput->ppu_doioctl |= KERNEL_PROP_RDONLY;
2468 	}
2469 
2470 	if (log_append(prov->pkc_log, POOL_PROPPUT, (void *)propput) !=
2471 	    PO_SUCCESS) {
2472 		nvlist_free(propput->ppu_blist);
2473 		free(propput);
2474 		return (PO_FAIL);
2475 	}
2476 	return (PO_SUCCESS);
2477 }
2478 
2479 /*
2480  * Remove the specified property value.
2481  *
2482  * PO_FAIL is returned if an error is detected and the error code is
2483  * updated to indicate the cause of the error.
2484  */
2485 int
2486 pool_knl_rm_property(pool_elem_t *pe, const char *name)
2487 {
2488 	pool_knl_elem_t *pke = (pool_knl_elem_t *)pe;
2489 	pool_knl_connection_t *prov =
2490 	    (pool_knl_connection_t *)(TO_CONF(pe))->pc_prov;
2491 	pool_proprm_undo_t *proprm;
2492 
2493 	if (pool_knl_find_nvpair(pke->pke_properties, name) == NULL) {
2494 		pool_seterror(POE_BADPARAM);
2495 		return (PO_FAIL);
2496 	}
2497 
2498 	if ((proprm = malloc(sizeof (pool_proprm_undo_t))) == NULL) {
2499 		pool_seterror(POE_SYSTEM);
2500 		return (PO_FAIL);
2501 	}
2502 	(void) memset(proprm, 0, sizeof (pool_proprm_undo_t));
2503 	proprm->pru_oldval.pv_class = POC_INVAL;
2504 	(void) pool_get_property(TO_CONF(pe), pe, name, &proprm->pru_oldval);
2505 
2506 	if (prov->pkc_log->l_state != LS_DO) {
2507 		free(proprm);
2508 		(void) nvlist_remove_all(pke->pke_properties, (char *)name);
2509 		return (PO_SUCCESS);
2510 	}
2511 	/*
2512 	 * The remaining logic is setting up the arguments for the
2513 	 * POOL_PROPRM ioctl and appending the details into the log.
2514 	 */
2515 
2516 	proprm->pru_ioctl.pp_o_id_type = pool_elem_class(pe);
2517 	if (pool_elem_class(pe) == PEC_RES_COMP ||
2518 	    pool_elem_class(pe) == PEC_RES_AGG)
2519 		proprm->pru_ioctl.pp_o_id_sub_type =
2520 		    pool_resource_elem_class(pe);
2521 
2522 	if (pool_elem_class(pe) == PEC_COMP)
2523 		proprm->pru_ioctl.pp_o_id_sub_type =
2524 		    (pool_resource_elem_class_t)pool_component_elem_class(pe);
2525 
2526 	proprm->pru_ioctl.pp_o_prop_name_size = strlen(name);
2527 	proprm->pru_ioctl.pp_o_prop_name =
2528 	    (char *)pool_value_get_name(&proprm->pru_oldval);
2529 	proprm->pru_elem = pe;
2530 
2531 	if (log_append(prov->pkc_log, POOL_PROPRM, (void *)proprm) !=
2532 	    PO_SUCCESS) {
2533 		free(proprm);
2534 		return (PO_FAIL);
2535 	}
2536 
2537 	(void) nvlist_remove_all(pke->pke_properties, (char *)name);
2538 	return (PO_SUCCESS);
2539 }
2540 
2541 /*
2542  * Return a NULL terminated array of pool_value_t which represents all
2543  * of the properties stored for an element
2544  *
2545  * Return NULL on failure. It is the caller's responsibility to free
2546  * the returned array of values.
2547  */
2548 pool_value_t **
2549 pool_knl_get_properties(const pool_elem_t *pe, uint_t *nprops)
2550 {
2551 	nvpair_t *pair;
2552 	pool_value_t **result;
2553 	pool_knl_elem_t *pke = (pool_knl_elem_t *)pe;
2554 	int i = 0;
2555 
2556 	*nprops = 0;
2557 
2558 	for (pair = nvlist_next_nvpair(pke->pke_properties, NULL); pair != NULL;
2559 	    pair = nvlist_next_nvpair(pke->pke_properties, pair))
2560 		(*nprops)++;
2561 	if ((result = calloc(*nprops + 1, sizeof (pool_value_t *))) == NULL) {
2562 		pool_seterror(POE_SYSTEM);
2563 		return (NULL);
2564 	}
2565 	for (pair = nvlist_next_nvpair(pke->pke_properties, NULL); pair != NULL;
2566 	    pair = nvlist_next_nvpair(pke->pke_properties, pair), i++) {
2567 		result[i] = pool_value_alloc();
2568 		if (pool_value_from_nvpair(result[i], pair) == PO_FAIL) {
2569 			while (i-- >= 0)
2570 				pool_value_free(result[i]);
2571 			free(result);
2572 			return (NULL);
2573 		}
2574 	}
2575 	return (result);
2576 }
2577 
2578 /*
2579  * Append an entry to a result set. Reallocate the array used to store
2580  * results if it's full.
2581  * Returns PO_SUCCESS/PO_FAIL
2582  */
2583 int
2584 pool_knl_result_set_append(pool_knl_result_set_t *rs, pool_knl_elem_t *pke)
2585 {
2586 	if (rs->pkr_count == rs->pkr_size)
2587 		if (pool_knl_result_set_realloc(rs) != PO_SUCCESS)
2588 			return (PO_FAIL);
2589 
2590 	rs->pkr_list[rs->pkr_count++] = pke;
2591 
2592 	return (PO_SUCCESS);
2593 }
2594 
2595 /*
2596  * Resize the array used to store results. A simple doubling strategy
2597  * is used.
2598  * Returns PO_SUCCESS/PO_FAIL
2599  */
2600 int
2601 pool_knl_result_set_realloc(pool_knl_result_set_t *rs)
2602 {
2603 	pool_knl_elem_t **old_list = rs->pkr_list;
2604 	int new_size = rs->pkr_size * 2;
2605 
2606 	if ((rs->pkr_list = realloc(rs->pkr_list,
2607 	    new_size * sizeof (pool_knl_elem_t *))) == NULL) {
2608 		rs->pkr_list = old_list;
2609 		pool_seterror(POE_SYSTEM);
2610 		return (PO_FAIL);
2611 	}
2612 	rs->pkr_size = new_size;
2613 
2614 	return (PO_SUCCESS);
2615 }
2616 
2617 /*
2618  * Allocate a result set. The Result Set stores the result of a query.
2619  * Returns pool_knl_result_set_t pointer/NULL
2620  */
2621 pool_knl_result_set_t *
2622 pool_knl_result_set_alloc(const pool_conf_t *conf)
2623 {
2624 	pool_knl_result_set_t *rs;
2625 
2626 	if ((rs = malloc(sizeof (pool_knl_result_set_t))) == NULL) {
2627 		pool_seterror(POE_SYSTEM);
2628 		return (NULL);
2629 	}
2630 	(void) memset(rs, 0, sizeof (pool_knl_result_set_t));
2631 	rs->pkr_size = KERNEL_RS_INITIAL_SZ;
2632 	if (pool_knl_result_set_realloc(rs) == PO_FAIL) {
2633 		free(rs);
2634 		pool_seterror(POE_SYSTEM);
2635 		return (NULL);
2636 	}
2637 	rs->prs_conf = conf;
2638 	rs->prs_index = -1;
2639 	rs->prs_active = PO_TRUE;
2640 	/* Fix up the result set accessor functions to the knl specfic ones */
2641 	rs->prs_next = pool_knl_rs_next;
2642 	rs->prs_prev = pool_knl_rs_prev;
2643 	rs->prs_first = pool_knl_rs_first;
2644 	rs->prs_last = pool_knl_rs_last;
2645 	rs->prs_get_index = pool_knl_rs_get_index;
2646 	rs->prs_set_index = pool_knl_rs_set_index;
2647 	rs->prs_close = pool_knl_rs_close;
2648 	rs->prs_count = pool_knl_rs_count;
2649 	return (rs);
2650 }
2651 
2652 /*
2653  * Free a result set. Ensure that the resources are all released at
2654  * this point.
2655  */
2656 void
2657 pool_knl_result_set_free(pool_knl_result_set_t *rs)
2658 {
2659 	free(rs->pkr_list);
2660 	free(rs);
2661 }
2662 /*
2663  * Return the next element in a result set.
2664  * Returns pool_elem_t pointer/NULL
2665  */
2666 pool_elem_t *
2667 pool_knl_rs_next(pool_result_set_t *set)
2668 {
2669 	pool_knl_result_set_t *kset = (pool_knl_result_set_t *)set;
2670 
2671 	if (kset->prs_index == kset->pkr_count - 1)
2672 		return (NULL);
2673 	return ((pool_elem_t *)kset->pkr_list[++kset->prs_index]);
2674 }
2675 
2676 /*
2677  * Return the previous element in a result set.
2678  * Returns pool_elem_t pointer/NULL
2679  */
2680 pool_elem_t *
2681 pool_knl_rs_prev(pool_result_set_t *set)
2682 {
2683 	pool_knl_result_set_t *kset = (pool_knl_result_set_t *)set;
2684 
2685 	if (kset->prs_index < 0)
2686 		return (NULL);
2687 	return ((pool_elem_t *)kset->pkr_list[kset->prs_index--]);
2688 }
2689 
2690 /*
2691  * Sets the current index in a result set.
2692  * Returns PO_SUCCESS/PO_FAIL
2693  */
2694 int
2695 pool_knl_rs_set_index(pool_result_set_t *set, int index)
2696 {
2697 	pool_knl_result_set_t *kset = (pool_knl_result_set_t *)set;
2698 
2699 	if (index < 0 || index >= kset->pkr_count) {
2700 		pool_seterror(POE_BADPARAM);
2701 		return (PO_FAIL);
2702 	}
2703 	kset->prs_index = index;
2704 	return (PO_SUCCESS);
2705 }
2706 
2707 /*
2708  * Return the current index in a result set.
2709  * Returns current index
2710  */
2711 int
2712 pool_knl_rs_get_index(pool_result_set_t *set)
2713 {
2714 	pool_knl_result_set_t *kset = (pool_knl_result_set_t *)set;
2715 
2716 	return (kset->prs_index);
2717 }
2718 
2719 /*
2720  * Return the first element in a result set.
2721  * Returns pool_elem_t pointer/NULL
2722  */
2723 pool_elem_t *
2724 pool_knl_rs_first(pool_result_set_t *set)
2725 {
2726 	pool_knl_result_set_t *kset = (pool_knl_result_set_t *)set;
2727 
2728 	return ((pool_elem_t *)kset->pkr_list[0]);
2729 }
2730 
2731 /*
2732  * Return the last element in a result set.
2733  * Returns pool_elem_t pointer/NULL
2734  */
2735 pool_elem_t *
2736 pool_knl_rs_last(pool_result_set_t *set)
2737 {
2738 	pool_knl_result_set_t *kset = (pool_knl_result_set_t *)set;
2739 
2740 	return ((pool_elem_t *)kset->pkr_list[kset->pkr_count - 1]);
2741 }
2742 
2743 /*
2744  * Return the number of results in a result set.
2745  * Returns result count
2746  */
2747 int
2748 pool_knl_rs_count(pool_result_set_t *set)
2749 {
2750 	pool_knl_result_set_t *kset = (pool_knl_result_set_t *)set;
2751 
2752 	return (kset->pkr_count);
2753 }
2754 
2755 
2756 /*
2757  * Close a result set. Free the resources
2758  * Returns PO_SUCCESS/PO_FAIL
2759  */
2760 int
2761 pool_knl_rs_close(pool_result_set_t *set)
2762 {
2763 	pool_knl_result_set_t *kset = (pool_knl_result_set_t *)set;
2764 
2765 	pool_knl_result_set_free(kset);
2766 	return (PO_SUCCESS);
2767 }
2768 
2769 /*
2770  * Commit an individual transaction log item(). This processing is
2771  * essential to the pool_conf_commit() logic. When pool_conf_commit()
2772  * is invoked, the pending transaction log for the configuration is
2773  * walked and all pending changes to the kernel are invoked. If a
2774  * change succeeds it is marked in the log as successful and
2775  * processing continues, if it fails then failure is returned and the
2776  * log will be "rolled back" to undo changes to the library snapshot
2777  * and the kernel.
2778  */
2779 int
2780 log_item_commit(log_item_t *li)
2781 {
2782 	pool_knl_connection_t *prov =
2783 	    (pool_knl_connection_t *)li->li_log->l_conf->pc_prov;
2784 	pool_create_undo_t *create;
2785 	pool_destroy_undo_t *destroy;
2786 	pool_assoc_undo_t *assoc;
2787 	pool_dissoc_undo_t *dissoc;
2788 	pool_propput_undo_t *propput;
2789 	pool_proprm_undo_t *proprm;
2790 	pool_xtransfer_undo_t *xtransfer;
2791 	char_buf_t *cb;
2792 	size_t size;
2793 	pool_elem_t *pair;
2794 	pool_value_t val = POOL_VALUE_INITIALIZER;
2795 	int ret;
2796 
2797 	switch (li->li_op) {
2798 	case POOL_CREATE:
2799 		create = (pool_create_undo_t *)li->li_details;
2800 		if ((cb = alloc_char_buf(CB_DEFAULT_LEN)) == NULL)
2801 			return (PO_FAIL);
2802 		if (set_char_buf(cb, "%s.sys_id",
2803 		    pool_elem_class_string(create->pcu_elem)) != PO_SUCCESS) {
2804 			free_char_buf(cb);
2805 			return (PO_FAIL);
2806 		}
2807 #ifdef DEBUG
2808 		dprintf("log_item_commit: POOL_CREATE, remove from dict\n");
2809 		pool_elem_dprintf(create->pcu_elem);
2810 #endif	/* DEBUG */
2811 		/*
2812 		 * May not need to remove the element if it was
2813 		 * already destroyed before commit. Just cast the
2814 		 * return to void.
2815 		 */
2816 		(void) dict_remove(prov->pkc_elements,
2817 		    (pool_knl_elem_t *)create->pcu_elem);
2818 
2819 		if (ioctl(prov->pkc_fd, POOL_CREATE, &create->pcu_ioctl) < 0) {
2820 			pool_seterror(POE_SYSTEM);
2821 			return (PO_FAIL);
2822 		}
2823 		/*
2824 		 * Now that we have created our element in the kernel,
2825 		 * it has a valid allocated system id. Remove the
2826 		 * element from the element dictionary, using the
2827 		 * current key, and then re-insert under the new key.
2828 		 */
2829 #ifdef DEBUG
2830 		pool_elem_dprintf(create->pcu_elem);
2831 #endif	/* DEBUG */
2832 		assert(nvlist_add_int64(
2833 		    ((pool_knl_elem_t *)create->pcu_elem)->pke_properties,
2834 		    cb->cb_buf, create->pcu_ioctl.pc_i_id) == 0);
2835 		free_char_buf(cb);
2836 		assert(dict_put(prov->pkc_elements, create->pcu_elem,
2837 		    create->pcu_elem) == NULL);
2838 		/*
2839 		 * If the element has a pair in the static
2840 		 * configuration, update it with the sys_id
2841 		 */
2842 		if ((pair = pool_get_pair(create->pcu_elem)) != NULL) {
2843 			pool_value_set_int64(&val, create->pcu_ioctl.pc_i_id);
2844 			assert(pool_put_any_ns_property(pair, c_sys_prop, &val)
2845 			    == PO_SUCCESS);
2846 		}
2847 		li->li_state = LS_UNDO;
2848 		break;
2849 	case POOL_DESTROY:
2850 		destroy = (pool_destroy_undo_t *)li->li_details;
2851 
2852 		destroy->pdu_ioctl.pd_o_id = elem_get_sysid(destroy->pdu_elem);
2853 
2854 		/*
2855 		 * It may be that this element was created in the last
2856 		 * transaction. In which case POOL_CREATE, above, will
2857 		 * have re-inserted the element in the dictionary. Try
2858 		 * to remove it just in case this has occurred.
2859 		 */
2860 		(void) dict_remove(prov->pkc_elements,
2861 		    (pool_knl_elem_t *)destroy->pdu_elem);
2862 		while ((ret = ioctl(prov->pkc_fd, POOL_DESTROY,
2863 		    &destroy->pdu_ioctl)) < 0 && errno == EAGAIN)
2864 			;
2865 		if (ret < 0) {
2866 			pool_seterror(POE_SYSTEM);
2867 			return (PO_FAIL);
2868 		}
2869 #ifdef DEBUG
2870 		dprintf("log_item_commit: POOL_DESTROY\n");
2871 		pool_elem_dprintf(destroy->pdu_elem);
2872 #endif	/* DEBUG */
2873 		li->li_state = LS_UNDO;
2874 		break;
2875 	case POOL_ASSOC:
2876 		assoc = (pool_assoc_undo_t *)li->li_details;
2877 
2878 		assoc->pau_ioctl.pa_o_pool_id =
2879 		    elem_get_sysid(assoc->pau_assoc);
2880 		assoc->pau_ioctl.pa_o_res_id =
2881 		    elem_get_sysid(assoc->pau_newres);
2882 		while ((ret = ioctl(prov->pkc_fd, POOL_ASSOC,
2883 		    &assoc->pau_ioctl)) < 0 && errno == EAGAIN)
2884 			;
2885 		if (ret < 0) {
2886 			pool_seterror(POE_SYSTEM);
2887 			return (PO_FAIL);
2888 		}
2889 		li->li_state = LS_UNDO;
2890 		break;
2891 	case POOL_DISSOC:
2892 		dissoc = (pool_dissoc_undo_t *)li->li_details;
2893 
2894 		dissoc->pdu_ioctl.pd_o_pool_id =
2895 		    elem_get_sysid(dissoc->pdu_dissoc);
2896 
2897 		while ((ret = ioctl(prov->pkc_fd, POOL_DISSOC,
2898 		    &dissoc->pdu_ioctl)) < 0 && errno == EAGAIN)
2899 			;
2900 		if (ret < 0) {
2901 			pool_seterror(POE_SYSTEM);
2902 			return (PO_FAIL);
2903 		}
2904 		li->li_state = LS_UNDO;
2905 		break;
2906 	case POOL_TRANSFER:
2907 		li->li_state = LS_UNDO;
2908 		pool_seterror(POE_BADPARAM);
2909 		return (PO_FAIL);
2910 		break;
2911 	case POOL_XTRANSFER:
2912 		xtransfer = (pool_xtransfer_undo_t *)li->li_details;
2913 
2914 		xtransfer->pxu_ioctl.px_o_src_id =
2915 		    elem_get_sysid(xtransfer->pxu_src);
2916 		xtransfer->pxu_ioctl.px_o_tgt_id =
2917 		    elem_get_sysid(xtransfer->pxu_tgt);
2918 		for (size = 0; xtransfer->pxu_rl[size] != NULL; size ++) {
2919 			xtransfer->pxu_ioctl.px_o_comp_list[size] =
2920 			    elem_get_sysid(TO_ELEM(xtransfer->pxu_rl[size]));
2921 #ifdef DEBUG
2922 			dprintf("log_item_commit: POOL_XTRANSFER\n");
2923 			pool_elem_dprintf(TO_ELEM(xtransfer->pxu_rl[size]));
2924 #endif	/* DEBUG */
2925 		}
2926 
2927 		/*
2928 		 * Don't actually transfer resources if the configuration
2929 		 * is in POF_DESTROY state. This is to prevent problems
2930 		 * relating to transferring off-line CPUs. Instead rely
2931 		 * on the POOL_DESTROY ioctl to transfer the CPUS.
2932 		 */
2933 		if (li->li_log->l_conf->pc_state != POF_DESTROY &&
2934 		    ioctl(prov->pkc_fd, POOL_XTRANSFER,
2935 		    &xtransfer->pxu_ioctl) < 0) {
2936 #ifdef DEBUG
2937 			dprintf("log_item_commit: POOL_XTRANSFER, ioctl "
2938 			    "failed\n");
2939 #endif	/* DEBUG */
2940 			pool_seterror(POE_SYSTEM);
2941 			return (PO_FAIL);
2942 		}
2943 		li->li_state = LS_UNDO;
2944 		break;
2945 	case POOL_PROPPUT:
2946 		propput = (pool_propput_undo_t *)li->li_details;
2947 
2948 		if (pool_elem_class(propput->ppu_elem) != PEC_SYSTEM) {
2949 			propput->ppu_ioctl.pp_o_id =
2950 			    elem_get_sysid(propput->ppu_elem);
2951 		}
2952 		/*
2953 		 * Some properties, e.g. pset.size, are read-only in the
2954 		 * kernel and attempting to change them will fail and cause
2955 		 * problems. Although this property is read-only through the
2956 		 * public interface, the library needs to modify it's value.
2957 		 */
2958 		if ((propput->ppu_doioctl & KERNEL_PROP_RDONLY) == 0) {
2959 			if (ioctl(prov->pkc_fd, POOL_PROPPUT,
2960 			    &propput->ppu_ioctl) < 0) {
2961 				pool_seterror(POE_SYSTEM);
2962 				return (PO_FAIL);
2963 			}
2964 		}
2965 		li->li_state = LS_UNDO;
2966 		break;
2967 	case POOL_PROPRM:
2968 		proprm = (pool_proprm_undo_t *)li->li_details;
2969 
2970 		if (pool_elem_class(proprm->pru_elem) != PEC_SYSTEM) {
2971 			proprm->pru_ioctl.pp_o_id =
2972 			    elem_get_sysid(proprm->pru_elem);
2973 		}
2974 		if (ioctl(prov->pkc_fd, POOL_PROPRM, &proprm->pru_ioctl) < 0) {
2975 			pool_seterror(POE_SYSTEM);
2976 			return (PO_FAIL);
2977 		}
2978 		li->li_state = LS_UNDO;
2979 		break;
2980 	default:
2981 		return (PO_FAIL);
2982 	}
2983 	return (PO_SUCCESS);
2984 }
2985 
2986 /*
2987  * Undo an individual transaction log item(). This processing is
2988  * essential to the pool_conf_commit() and pool_conf_rollback()
2989  * logic. Changes to the libpool snapshot and the kernel are carried
2990  * out separately. The library snapshot is updated synchronously,
2991  * however the kernel update is delayed until the user calls
2992  * pool_conf_commit().
2993  *
2994  * When undoing transactions, library changes will be undone unless
2995  * this invocation is as a result of a commit failure, in which case
2996  * the log state will be LS_RECOVER. Kernel changes will only be
2997  * undone if they are marked as having been done, in which case the
2998  * log item state will be LS_UNDO.
2999  */
3000 int
3001 log_item_undo(log_item_t *li)
3002 {
3003 	pool_knl_connection_t *prov =
3004 	    (pool_knl_connection_t *)li->li_log->l_conf->pc_prov;
3005 	pool_create_undo_t *create;
3006 	pool_destroy_undo_t *destroy;
3007 	pool_assoc_undo_t *assoc;
3008 	pool_dissoc_undo_t *dissoc;
3009 	pool_propput_undo_t *propput;
3010 	pool_proprm_undo_t *proprm;
3011 	pool_xtransfer_undo_t *xtransfer;
3012 	char_buf_t *cb;
3013 	size_t size;
3014 	pool_destroy_t u_destroy;
3015 	pool_create_t u_create;
3016 	pool_assoc_t u_assoc;
3017 	pool_xtransfer_t u_xtransfer;
3018 	pool_propput_t u_propput;
3019 	pool_proprm_t u_proprm;
3020 	pool_conf_t *conf = li->li_log->l_conf;
3021 	nvpair_t *pair;
3022 	nvlist_t *tmplist;
3023 	int ret;
3024 
3025 	if (li->li_log->l_state != LS_RECOVER) {
3026 	switch (li->li_op) {
3027 	case POOL_CREATE:
3028 		create = (pool_create_undo_t *)li->li_details;
3029 
3030 		(void) dict_remove(prov->pkc_elements, create->pcu_elem);
3031 #ifdef DEBUG
3032 		dprintf("log_item_undo: POOL_CREATE\n");
3033 		assert(create->pcu_elem != NULL);
3034 		dprintf("log_item_undo: POOL_CREATE %p\n", create->pcu_elem);
3035 		pool_elem_dprintf(create->pcu_elem);
3036 #endif	/* DEBUG */
3037 		pool_knl_elem_free((pool_knl_elem_t *)create->pcu_elem,
3038 		    PO_TRUE);
3039 		break;
3040 	case POOL_DESTROY:
3041 		destroy = (pool_destroy_undo_t *)li->li_details;
3042 
3043 		assert(dict_put(prov->pkc_elements, destroy->pdu_elem,
3044 		    destroy->pdu_elem) == NULL);
3045 		break;
3046 	case POOL_ASSOC:
3047 		assoc = (pool_assoc_undo_t *)li->li_details;
3048 
3049 		if (assoc->pau_oldres != NULL)
3050 			((pool_knl_pool_t *)assoc->pau_assoc)->pkp_assoc
3051 			    [pool_resource_elem_class(assoc->pau_oldres)] =
3052 			    (pool_knl_resource_t *)assoc->pau_oldres;
3053 		break;
3054 	case POOL_DISSOC:
3055 		dissoc = (pool_dissoc_undo_t *)li->li_details;
3056 
3057 		if (dissoc->pdu_oldres != NULL)
3058 			((pool_knl_pool_t *)dissoc->pdu_dissoc)->pkp_assoc
3059 			    [pool_resource_elem_class(dissoc->pdu_oldres)] =
3060 			    (pool_knl_resource_t *)dissoc->pdu_oldres;
3061 		break;
3062 	case POOL_TRANSFER:
3063 		pool_seterror(POE_BADPARAM);
3064 		return (PO_FAIL);
3065 		break;
3066 	case POOL_XTRANSFER:
3067 		xtransfer = (pool_xtransfer_undo_t *)li->li_details;
3068 
3069 		for (size = 0; xtransfer->pxu_rl[size] != NULL; size++) {
3070 			pool_value_t val = POOL_VALUE_INITIALIZER;
3071 			uint64_t src_size;
3072 			uint64_t tgt_size;
3073 
3074 			if (pool_set_container(xtransfer->pxu_src,
3075 			    TO_ELEM(xtransfer->pxu_rl[size])) == PO_FAIL) {
3076 				return (PO_FAIL);
3077 			}
3078 			/*
3079 			 * Maintain the library view of the size
3080 			 */
3081 			if (resource_get_size(pool_elem_res(xtransfer->pxu_src),
3082 			    &src_size) != PO_SUCCESS ||
3083 			    resource_get_size(pool_elem_res(xtransfer->pxu_tgt),
3084 			    &tgt_size) != PO_SUCCESS) {
3085 				pool_seterror(POE_BADPARAM);
3086 				return (PO_FAIL);
3087 			}
3088 			src_size++;
3089 			tgt_size--;
3090 			pool_value_set_uint64(&val, src_size);
3091 			(void) pool_put_any_ns_property(xtransfer->pxu_src,
3092 			    c_size_prop, &val);
3093 			pool_value_set_uint64(&val, tgt_size);
3094 			(void) pool_put_any_ns_property(xtransfer->pxu_tgt,
3095 			    c_size_prop, &val);
3096 		}
3097 		break;
3098 	case POOL_PROPPUT:
3099 		propput = (pool_propput_undo_t *)li->li_details;
3100 
3101 		if ((propput->ppu_doioctl & KERNEL_PROP_RDONLY) == 0) {
3102 			if (propput->ppu_blist != NULL) {
3103 				if (nvlist_merge(
3104 				    ((pool_knl_elem_t *)propput->ppu_elem)->
3105 				    pke_properties, propput->ppu_blist, 0)
3106 				    != 0) {
3107 					pool_seterror(POE_SYSTEM);
3108 					return (PO_FAIL);
3109 				}
3110 			} else {
3111 				if (nvlist_unpack(propput->ppu_ioctl.pp_o_buf,
3112 				    propput->ppu_ioctl.pp_o_bufsize,
3113 				    &propput->ppu_alist, 0) != 0) {
3114 					pool_seterror(POE_SYSTEM);
3115 					return (PO_FAIL);
3116 				}
3117 				pair = nvlist_next_nvpair(propput->ppu_alist,
3118 				    NULL);
3119 				(void) nvlist_remove_all(((pool_knl_elem_t *)
3120 				    propput->ppu_elem)->pke_properties,
3121 				    nvpair_name(pair));
3122 				nvlist_free(propput->ppu_alist);
3123 			}
3124 		}
3125 		break;
3126 	case POOL_PROPRM:
3127 		proprm = (pool_proprm_undo_t *)li->li_details;
3128 
3129 		if (pool_value_get_type(&proprm->pru_oldval) != POC_INVAL) {
3130 			if (pool_put_property(conf, proprm->pru_elem,
3131 			    proprm->pru_ioctl.pp_o_prop_name,
3132 			    &proprm->pru_oldval) != PO_SUCCESS) {
3133 				return (PO_FAIL);
3134 			}
3135 		}
3136 		break;
3137 	default:
3138 		return (PO_FAIL);
3139 	}
3140 	}
3141 	/*
3142 	 * Only try to undo the state of the kernel if we modified it.
3143 	 */
3144 	if (li->li_state == LS_DO) {
3145 		return (PO_SUCCESS);
3146 	}
3147 
3148 	switch (li->li_op) {
3149 	case POOL_CREATE:
3150 		create = (pool_create_undo_t *)li->li_details;
3151 
3152 		u_destroy.pd_o_type = create->pcu_ioctl.pc_o_type;
3153 		u_destroy.pd_o_sub_type = create->pcu_ioctl.pc_o_sub_type;
3154 		u_destroy.pd_o_id = create->pcu_ioctl.pc_i_id;
3155 
3156 		while ((ret = ioctl(prov->pkc_fd, POOL_DESTROY,
3157 		    &u_destroy)) < 0 && errno == EAGAIN)
3158 			;
3159 		if (ret < 0) {
3160 			pool_seterror(POE_SYSTEM);
3161 			return (PO_FAIL);
3162 		}
3163 		li->li_state = LS_DO;
3164 		break;
3165 	case POOL_DESTROY:
3166 		destroy = (pool_destroy_undo_t *)li->li_details;
3167 
3168 		u_create.pc_o_type = destroy->pdu_ioctl.pd_o_type;
3169 		u_create.pc_o_sub_type = destroy->pdu_ioctl.pd_o_sub_type;
3170 
3171 		if (ioctl(prov->pkc_fd, POOL_CREATE, &u_create) < 0) {
3172 			pool_seterror(POE_SYSTEM);
3173 			return (PO_FAIL);
3174 		}
3175 
3176 		if ((cb = alloc_char_buf(CB_DEFAULT_LEN)) == NULL) {
3177 			return (PO_FAIL);
3178 		}
3179 		if (set_char_buf(cb, "%s.sys_id",
3180 		    pool_elem_class_string(destroy->pdu_elem)) != PO_SUCCESS) {
3181 			free_char_buf(cb);
3182 			return (PO_FAIL);
3183 		}
3184 		(void) nvlist_add_int64(
3185 		    ((pool_knl_elem_t *)destroy->pdu_elem)->pke_properties,
3186 		    cb->cb_buf, u_create.pc_i_id);
3187 		free_char_buf(cb);
3188 		if (dict_put(prov->pkc_elements, destroy->pdu_elem,
3189 		    destroy->pdu_elem) != NULL) {
3190 			pool_seterror(POE_SYSTEM);
3191 			return (PO_FAIL);
3192 		}
3193 		/*
3194 		 * Now we need to reset all the properties and
3195 		 * associations in the kernel for this newly created
3196 		 * replacement.
3197 		 */
3198 		u_propput.pp_o_id_type = destroy->pdu_ioctl.pd_o_type;
3199 		u_propput.pp_o_id_sub_type = destroy->pdu_ioctl.pd_o_sub_type;
3200 		u_propput.pp_o_id = u_create.pc_i_id;
3201 		u_propput.pp_o_buf = NULL;
3202 		/*
3203 		 * Remove the read-only properties before attempting
3204 		 * to restore the state of the newly created property
3205 		 */
3206 		(void) nvlist_dup(((pool_knl_elem_t *)destroy->pdu_elem)->
3207 		    pke_properties, &tmplist, 0);
3208 		for (pair = nvlist_next_nvpair(tmplist, NULL); pair != NULL;
3209 		    pair = nvlist_next_nvpair(tmplist, pair)) {
3210 			const pool_prop_t *prop;
3211 			char *name = nvpair_name(pair);
3212 			if ((prop = provider_get_prop(destroy->pdu_elem,
3213 			    name)) != NULL)
3214 				if (prop_is_readonly(prop) == PO_TRUE)
3215 					(void) nvlist_remove_all(tmplist, name);
3216 		}
3217 		if (nvlist_pack(tmplist, (char **)&u_propput.pp_o_buf,
3218 		    &u_propput.pp_o_bufsize, NV_ENCODE_NATIVE, 0) != 0) {
3219 			pool_seterror(POE_SYSTEM);
3220 			return (PO_FAIL);
3221 		}
3222 		nvlist_free(tmplist);
3223 		if (ioctl(prov->pkc_fd, POOL_PROPPUT, &u_propput) < 0) {
3224 			free(u_propput.pp_o_buf);
3225 			pool_seterror(POE_SYSTEM);
3226 			return (PO_FAIL);
3227 		}
3228 		free(u_propput.pp_o_buf);
3229 		/*
3230 		 * Now reset the associations for all the resource
3231 		 * types if the thing which we are recreating is a
3232 		 * pool
3233 		 *
3234 		 * TODO: This is resource specific and must be
3235 		 * extended for additional resource types.
3236 		 */
3237 		if (destroy->pdu_ioctl.pd_o_type == PEC_POOL) {
3238 			u_assoc.pa_o_pool_id = u_create.pc_i_id;
3239 			u_assoc.pa_o_res_id =
3240 			    elem_get_sysid(
3241 			    TO_ELEM(((pool_knl_pool_t *)destroy->pdu_elem)->
3242 			    pkp_assoc[PREC_PSET]));
3243 			u_assoc.pa_o_id_type = PREC_PSET;
3244 
3245 			if (ioctl(prov->pkc_fd, POOL_ASSOC, &u_assoc) < 0) {
3246 				pool_seterror(POE_SYSTEM);
3247 				return (PO_FAIL);
3248 			}
3249 		}
3250 		li->li_state = LS_DO;
3251 		break;
3252 	case POOL_ASSOC:
3253 		assoc = (pool_assoc_undo_t *)li->li_details;
3254 
3255 		u_assoc.pa_o_pool_id = elem_get_sysid(assoc->pau_assoc);
3256 		u_assoc.pa_o_res_id = elem_get_sysid(assoc->pau_oldres);
3257 		u_assoc.pa_o_id_type = assoc->pau_ioctl.pa_o_id_type;
3258 
3259 		while ((ret = ioctl(prov->pkc_fd, POOL_ASSOC, &u_assoc)) < 0 &&
3260 		    errno == EAGAIN)
3261 			;
3262 		if (ret < 0) {
3263 			pool_seterror(POE_SYSTEM);
3264 			return (PO_FAIL);
3265 		}
3266 		li->li_state = LS_DO;
3267 		break;
3268 	case POOL_DISSOC:
3269 		dissoc = (pool_dissoc_undo_t *)li->li_details;
3270 
3271 		u_assoc.pa_o_pool_id = elem_get_sysid(dissoc->pdu_dissoc);
3272 		u_assoc.pa_o_res_id = elem_get_sysid(dissoc->pdu_oldres);
3273 		u_assoc.pa_o_id_type = dissoc->pdu_ioctl.pd_o_id_type;
3274 
3275 		while ((ret = ioctl(prov->pkc_fd, POOL_ASSOC, &u_assoc)) < 0 &&
3276 		    errno == EAGAIN)
3277 			;
3278 		if (ret < 0) {
3279 			pool_seterror(POE_SYSTEM);
3280 			return (PO_FAIL);
3281 		}
3282 		li->li_state = LS_DO;
3283 		break;
3284 	case POOL_TRANSFER:
3285 		li->li_state = LS_DO;
3286 		pool_seterror(POE_BADPARAM);
3287 		return (PO_FAIL);
3288 		break;
3289 	case POOL_XTRANSFER:
3290 		xtransfer = (pool_xtransfer_undo_t *)li->li_details;
3291 
3292 		(void) memcpy(&u_xtransfer, &xtransfer->pxu_ioctl,
3293 		    sizeof (pool_xtransfer_t));
3294 		u_xtransfer.px_o_src_id = elem_get_sysid(xtransfer->pxu_tgt);
3295 		u_xtransfer.px_o_tgt_id = elem_get_sysid(xtransfer->pxu_src);
3296 
3297 		if (ioctl(prov->pkc_fd, POOL_XTRANSFER, &u_xtransfer) < 0) {
3298 			pool_seterror(POE_SYSTEM);
3299 			return (PO_FAIL);
3300 		}
3301 		li->li_state = LS_DO;
3302 		break;
3303 	case POOL_PROPPUT:
3304 		propput = (pool_propput_undo_t *)li->li_details;
3305 
3306 		if ((propput->ppu_doioctl & KERNEL_PROP_RDONLY) == 0) {
3307 			if (propput->ppu_blist) {
3308 				(void) memcpy(&u_propput, &propput->ppu_ioctl,
3309 				    sizeof (pool_propput_t));
3310 				u_propput.pp_o_id =
3311 				    elem_get_sysid(propput->ppu_elem);
3312 				u_propput.pp_o_buf = NULL;
3313 				if (nvlist_pack(propput->ppu_blist,
3314 				    (char **)&u_propput.pp_o_buf,
3315 				    &u_propput.pp_o_bufsize,
3316 				    NV_ENCODE_NATIVE, 0) != 0) {
3317 					pool_seterror(POE_SYSTEM);
3318 					return (PO_FAIL);
3319 				}
3320 				if (ioctl(prov->pkc_fd, POOL_PROPPUT,
3321 				    &u_propput) < 0) {
3322 					free(u_propput.pp_o_buf);
3323 					pool_seterror(POE_SYSTEM);
3324 					return (PO_FAIL);
3325 				}
3326 				free(u_propput.pp_o_buf);
3327 			} else {
3328 				if (nvlist_unpack(propput->
3329 				    ppu_ioctl.pp_o_buf,
3330 				    propput->ppu_ioctl.pp_o_bufsize,
3331 				    &propput->ppu_alist, 0) != 0) {
3332 					pool_seterror(POE_SYSTEM);
3333 					return (PO_FAIL);
3334 				}
3335 				u_proprm.pp_o_id_type =
3336 				    propput->ppu_ioctl.pp_o_id_type;
3337 				u_proprm.pp_o_id_sub_type =
3338 				    propput->ppu_ioctl.pp_o_id_sub_type;
3339 				u_proprm.pp_o_id =
3340 				    elem_get_sysid(propput->ppu_elem);
3341 				pair = nvlist_next_nvpair(propput->ppu_alist,
3342 				    NULL);
3343 				u_proprm.pp_o_prop_name = nvpair_name(pair);
3344 				u_proprm.pp_o_prop_name_size =
3345 				    strlen(u_proprm.pp_o_prop_name);
3346 
3347 				if (provider_get_prop(propput->ppu_elem,
3348 				    u_proprm.pp_o_prop_name) == NULL) {
3349 					if (ioctl(prov->pkc_fd, POOL_PROPRM,
3350 					    &u_proprm) < 0) {
3351 						nvlist_free(propput->ppu_alist);
3352 						pool_seterror(POE_SYSTEM);
3353 						return (PO_FAIL);
3354 					}
3355 				}
3356 				nvlist_free(propput->ppu_alist);
3357 			}
3358 		}
3359 		li->li_state = LS_DO;
3360 		break;
3361 	case POOL_PROPRM:
3362 		proprm = (pool_proprm_undo_t *)li->li_details;
3363 
3364 		u_propput.pp_o_id_type = proprm->pru_ioctl.pp_o_id_type;
3365 		u_propput.pp_o_id_sub_type =
3366 		    proprm->pru_ioctl.pp_o_id_sub_type;
3367 		u_propput.pp_o_id = elem_get_sysid(proprm->pru_elem);
3368 		u_propput.pp_o_buf = NULL;
3369 		/*
3370 		 * Only try to remove the appropriate property
3371 		 */
3372 		if (nvlist_alloc(&tmplist, NV_UNIQUE_NAME_TYPE, 0) !=
3373 		    0) {
3374 			pool_seterror(POE_SYSTEM);
3375 			return (PO_FAIL);
3376 		}
3377 		if (pool_knl_nvlist_add_value(tmplist,
3378 		    pool_value_get_name(&proprm->pru_oldval),
3379 		    &proprm->pru_oldval) != PO_SUCCESS)
3380 			return (PO_FAIL);
3381 
3382 		if (nvlist_pack(tmplist,
3383 		    (char **)&u_propput.pp_o_buf, &u_propput.pp_o_bufsize,
3384 		    NV_ENCODE_NATIVE, 0) != 0) {
3385 			nvlist_free(tmplist);
3386 			pool_seterror(POE_SYSTEM);
3387 			return (PO_FAIL);
3388 		}
3389 		nvlist_free(tmplist);
3390 		if (ioctl(prov->pkc_fd, POOL_PROPPUT, &u_propput) < 0) {
3391 			free(u_propput.pp_o_buf);
3392 			pool_seterror(POE_SYSTEM);
3393 			return (PO_FAIL);
3394 		}
3395 		free(u_propput.pp_o_buf);
3396 		li->li_state = LS_DO;
3397 		break;
3398 	default:
3399 		return (PO_FAIL);
3400 	}
3401 		return (PO_SUCCESS);
3402 }
3403 
3404 /*
3405  * A log item stores state about the transaction it represents. This
3406  * function releases the resources associated with the transaction and
3407  * used to store the transaction state.
3408  */
3409 int
3410 log_item_release(log_item_t *li)
3411 {
3412 	pool_create_undo_t *create;
3413 	pool_destroy_undo_t *destroy;
3414 	pool_assoc_undo_t *assoc;
3415 	pool_dissoc_undo_t *dissoc;
3416 	pool_propput_undo_t *propput;
3417 	pool_proprm_undo_t *proprm;
3418 	pool_xtransfer_undo_t *xtransfer;
3419 
3420 	switch (li->li_op) {
3421 	case POOL_CREATE:
3422 		create = (pool_create_undo_t *)li->li_details;
3423 
3424 		free(create);
3425 		break;
3426 	case POOL_DESTROY:
3427 		destroy = (pool_destroy_undo_t *)li->li_details;
3428 
3429 #ifdef DEBUG
3430 		dprintf("log_item_release: POOL_DESTROY\n");
3431 #endif	/* DEBUG */
3432 
3433 		if (li->li_state == LS_UNDO) {
3434 #ifdef DEBUG
3435 			pool_elem_dprintf(destroy->pdu_elem);
3436 #endif	/* DEBUG */
3437 			pool_knl_elem_free((pool_knl_elem_t *)destroy->
3438 			    pdu_elem, PO_TRUE);
3439 		}
3440 		free(destroy);
3441 		break;
3442 	case POOL_ASSOC:
3443 		assoc = (pool_assoc_undo_t *)li->li_details;
3444 
3445 		free(assoc);
3446 		break;
3447 	case POOL_DISSOC:
3448 		dissoc = (pool_dissoc_undo_t *)li->li_details;
3449 
3450 		free(dissoc);
3451 		break;
3452 	case POOL_TRANSFER:
3453 		pool_seterror(POE_BADPARAM);
3454 		return (PO_FAIL);
3455 		break;
3456 	case POOL_XTRANSFER:
3457 		xtransfer = (pool_xtransfer_undo_t *)li->li_details;
3458 
3459 		free(xtransfer->pxu_rl);
3460 		free(xtransfer->pxu_ioctl.px_o_comp_list);
3461 		free(xtransfer);
3462 		break;
3463 	case POOL_PROPPUT:
3464 		propput = (pool_propput_undo_t *)li->li_details;
3465 
3466 		if (propput->ppu_blist)
3467 			nvlist_free(propput->ppu_blist);
3468 		free(propput->ppu_ioctl.pp_o_buf);
3469 		free(propput);
3470 		break;
3471 	case POOL_PROPRM:
3472 		proprm = (pool_proprm_undo_t *)li->li_details;
3473 
3474 		free(proprm);
3475 		break;
3476 	default:
3477 		return (PO_FAIL);
3478 	}
3479 	return (PO_SUCCESS);
3480 }
3481 
3482 /*
3483  * pool_knl_nvlist_add_value() adds a pool_value_t to an nvlist.
3484  */
3485 int
3486 pool_knl_nvlist_add_value(nvlist_t *list, const char *name,
3487     const pool_value_t *pv)
3488 {
3489 	uint64_t uval;
3490 	int64_t ival;
3491 	double dval;
3492 	uchar_t dval_b[sizeof (double)];
3493 	uchar_t bval;
3494 	const char *sval;
3495 	pool_value_class_t type;
3496 	char *nv_name;
3497 
3498 	if ((type = pool_value_get_type(pv)) == POC_INVAL) {
3499 		pool_seterror(POE_BADPARAM);
3500 		return (PO_FAIL);
3501 	}
3502 	nv_name = (char *)name;
3503 
3504 	switch (type) {
3505 	case POC_UINT:
3506 		if (pool_value_get_uint64(pv, &uval) == POC_INVAL) {
3507 			return (PO_FAIL);
3508 		}
3509 		if (nvlist_add_uint64(list, nv_name, uval) != 0) {
3510 			pool_seterror(POE_SYSTEM);
3511 			return (PO_FAIL);
3512 		}
3513 		break;
3514 	case POC_INT:
3515 		if (pool_value_get_int64(pv, &ival) == POC_INVAL) {
3516 			return (PO_FAIL);
3517 		}
3518 		if (nvlist_add_int64(list, nv_name, ival) != 0) {
3519 			pool_seterror(POE_SYSTEM);
3520 			return (PO_FAIL);
3521 		}
3522 		break;
3523 	case POC_DOUBLE:
3524 		if (pool_value_get_double(pv, &dval) == POC_INVAL) {
3525 			return (PO_FAIL);
3526 		}
3527 		/*
3528 		 * Since there is no support for doubles in the
3529 		 * kernel, store the double value in a byte array.
3530 		 */
3531 		(void) memcpy(dval_b, &dval, sizeof (double));
3532 		if (nvlist_add_byte_array(list, nv_name, dval_b,
3533 		    sizeof (double)) != 0) {
3534 			pool_seterror(POE_SYSTEM);
3535 			return (PO_FAIL);
3536 		}
3537 		break;
3538 	case POC_BOOL:
3539 		if (pool_value_get_bool(pv, &bval) == POC_INVAL) {
3540 			return (PO_FAIL);
3541 		}
3542 		if (nvlist_add_byte(list, nv_name, bval) != 0) {
3543 			pool_seterror(POE_SYSTEM);
3544 			return (PO_FAIL);
3545 		}
3546 		break;
3547 	case POC_STRING:
3548 		if (pool_value_get_string(pv, &sval) == POC_INVAL) {
3549 			return (PO_FAIL);
3550 		}
3551 		if (nvlist_add_string(list, nv_name, (char *)sval) != 0) {
3552 			pool_seterror(POE_SYSTEM);
3553 			return (PO_FAIL);
3554 		}
3555 		break;
3556 	default:
3557 		pool_seterror(POE_BADPARAM);
3558 		return (PO_FAIL);
3559 	}
3560 	return (PO_SUCCESS);
3561 }
3562 
3563 /*
3564  * hash_id() hashes all elements in a pool configuration using the
3565  * "sys_id" property. Not all elements have a "sys_id" property,
3566  * however elem_get_sysid() caters for this by always returning a
3567  * constant value for those elements. This isn't anticipated to lead
3568  * to a performance degradation in the hash, since those elements
3569  * which are likely to be most prevalent in a configuration do have
3570  * "sys_id" as a property.
3571  */
3572 uint64_t
3573 hash_id(const pool_elem_t *pe)
3574 {
3575 	id_t id;
3576 
3577 	id = elem_get_sysid(pe);
3578 	return (hash_buf(&id, sizeof (id)));
3579 }
3580 
3581 /*
3582  *  blocking_open() guarantees access to the pool device, if open()
3583  * is failing with EBUSY.
3584  */
3585 int
3586 blocking_open(const char *path, int oflag)
3587 {
3588 	int fd;
3589 
3590 	while ((fd = open(path, oflag)) == -1 && errno == EBUSY)
3591 		(void) poll(NULL, 0, 1 * MILLISEC);
3592 
3593 	return (fd);
3594 }
3595