xref: /titanic_41/usr/src/cmd/rcm_daemon/common/svm_rcm.c (revision 9c9af2590af49bb395bc8d2eace0f2d4ea16d165)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2008 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <stdlib.h>
29 #include <stdio.h>
30 #include <string.h>
31 #include <sys/types.h>
32 #include <errno.h>
33 #include <meta.h>
34 #include <sys/lvm/mdio.h>
35 #include <sys/lvm/md_sp.h>
36 #include <sdssc.h>
37 
38 #include "rcm_module.h"
39 
40 /*
41  * This module is the RCM Module for SVM. The policy adopted by this module
42  * is to block offline requests for any SVM resource that is in use. A
43  * resource is considered to be in use if it contains a metadb or if it is
44  * a non-errored component of a metadevice that is open.
45  *
46  * The module uses the library libmeta to access the current state of the
47  * metadevices. On entry, and when svm_register() is called, the module
48  * builds a cache of all of the SVM resources and their dependencies. Each
49  * metadevice has an entry of type deventry_t which is accessed by a hash
50  * function. When the cache is built each SVM resource is registered with
51  * the RCM framework.  The check_device code path uses meta_invalidate_name to
52  * ensure that the caching in libmeta will not conflict with the cache
53  * we build within this code.
54  *
55  * When an RCM operation occurs that affects a registered SVM resource, the RCM
56  * framework will call the appropriate routine in this module. The cache
57  * entry will be found and if the resource has dependants, a callback will
58  * be made into the RCM framework to pass the request on to the dependants,
59  * which may themselves by SVM resources.
60  *
61  * Locking:
62  *      The cache is protected by a mutex
63  */
64 
65 /*
66  * Private constants
67  */
68 
69 /*
70  * Generic Messages
71  */
72 #define	MSG_UNRECOGNIZED	gettext("SVM: \"%s\" is not a SVM resource")
73 #define	MSG_NODEPS		gettext("SVM: can't find dependents")
74 #define	MSG_NORECACHE		gettext("SVM: WARNING: couldn't re-cache.")
75 #define	MSG_OPENERR		gettext("SVM: can't open \"%s\"")
76 #define	MSG_CACHEFAIL		gettext("SVM: can't malloc cache")
77 
78 #define	ERR_UNRECOGNIZED	gettext("unrecognized SVM resource")
79 #define	ERR_NODEPS		gettext("can't find SVM resource dependents")
80 
81 /*
82  * Macros to produce a quoted string containing the value of a preprocessor
83  * macro. For example, if SIZE is defined to be 256, VAL2STR(SIZE) is "256".
84  * This is used to construct format strings for scanf-family functions below.
85  */
86 #define	QUOTE(x)	#x
87 #define	VAL2STR(x)	QUOTE(x)
88 
89 typedef enum {
90     SVM_SLICE = 0,
91     SVM_STRIPE,
92     SVM_CONCAT,
93     SVM_MIRROR,
94     SVM_RAID,
95     SVM_TRANS,
96     SVM_SOFTPART,
97     SVM_HS
98 } svm_type_t;
99 
100 /* Hash table parameters */
101 #define	HASH_DEFAULT	251
102 
103 /* Hot spare pool users */
104 typedef struct hspuser {
105 	struct hspuser  *next;		/* next user */
106 	char		*hspusername;	/* name */
107 	dev_t		hspuserkey;	/* key */
108 } hspuser_t;
109 
110 /* Hot spare pool entry */
111 typedef struct hspentry {
112 	struct hspentry *link;		/* link through all hsp entries */
113 	struct hspentry *next;		/* next hsp entry for a slice */
114 	char		*hspname;	/* name */
115 	hspuser_t	*hspuser;	/* first hsp user */
116 } hspentry_t;
117 
118 /* Hash table entry */
119 typedef struct deventry {
120 	struct deventry		*next;		/* next entry with same hash */
121 	svm_type_t		devtype;	/* device type */
122 	dev_t			devkey;		/* key */
123 	char			*devname;	/* name in /dev */
124 	char			*devicesname;	/* name in /devices */
125 	struct deventry		*dependent;	/* 1st dependent */
126 	struct deventry		*next_dep;	/* next dependent */
127 	struct deventry		*antecedent;	/* antecedent */
128 	hspentry_t		*hsp_list;	/* list of hot spare pools */
129 	int			flags;		/* flags */
130 } deventry_t;
131 
132 /* flag values */
133 #define	 REMOVED	0x1
134 #define	 IN_HSP		0x2
135 #define	 TRANS_LOG	0x4
136 #define	 CONT_SOFTPART	0x8
137 #define	 CONT_METADB	0x10
138 
139 /*
140  * Device redundancy flags. If the device can be removed from the
141  * metadevice configuration then it is considered a redundant device,
142  * otherwise not.
143  */
144 #define	NOTINDEVICE	-1
145 #define	NOTREDUNDANT	0
146 #define	REDUNDANT	1
147 
148 /* Cache */
149 typedef struct cache {
150 	deventry_t	**hashline;	/* hash table */
151 	int32_t		size;		/* sizer of hash table */
152 	uint32_t	registered;	/* cache regsitered */
153 } cache_t;
154 
155 /*
156  * Forward declarations of private functions
157  */
158 
159 static int svm_register(rcm_handle_t *hd);
160 static int svm_unregister(rcm_handle_t *hd);
161 static int svm_unregister_device(rcm_handle_t *hd, deventry_t *d);
162 static deventry_t *cache_dependent(cache_t *cache, char *devname, int devflags,
163     deventry_t *dependents);
164 static deventry_t *cache_device(cache_t *cache, char *devname,
165     svm_type_t devtype, md_dev64_t devkey, int devflags);
166 static hspentry_t *find_hsp(char *hspname);
167 static hspuser_t *add_hsp_user(char *hspname, deventry_t *deventry);
168 static hspentry_t *add_hsp(char *hspname, deventry_t *deventry);
169 static void free_names(mdnamelist_t *nlp);
170 static int cache_all_devices(cache_t *cache);
171 static int cache_hsp(cache_t *cache, mdhspnamelist_t *nlp, md_hsp_t *hsp);
172 static int cache_trans(cache_t *cache, mdnamelist_t *nlp, md_trans_t *trans);
173 static int cache_mirror(cache_t *cache, mdnamelist_t *nlp,
174     md_mirror_t *mirror);
175 static int cache_raid(cache_t *cache, mdnamelist_t *nlp, md_raid_t *raid);
176 static int cache_stripe(cache_t *cache, mdnamelist_t *nlp,
177     md_stripe_t *stripe);
178 static int cache_sp(cache_t *cache, mdnamelist_t *nlp, md_sp_t *soft_part);
179 static int cache_all_devices_in_set(cache_t *cache, mdsetname_t *sp);
180 static cache_t  *create_cache();
181 static deventry_t *create_deventry(char *devname, svm_type_t devtype,
182     md_dev64_t devkey, int devflags);
183 static void cache_remove(cache_t *cache, deventry_t *deventry);
184 static deventry_t *cache_lookup(cache_t *cache, char *devname);
185 static void cache_sync(rcm_handle_t *hd, cache_t **cachep);
186 static char *cache_walk(cache_t *cache, uint32_t *i, deventry_t **hashline);
187 static void free_cache(cache_t **cache);
188 static void free_deventry(deventry_t **deventry);
189 static uint32_t hash(uint32_t h, char *s);
190 static void svm_register_device(rcm_handle_t *hd, char *devname);
191 static int add_dep(int *ndeps, char ***depsp, deventry_t *deventry);
192 static int get_dependents(deventry_t *deventry, char *** dependentsp);
193 char *add_to_usage(char ** usagep, char *string);
194 char *add_to_usage_fmt(char **usagep, char *fmt, char *string);
195 static int is_open(dev_t devkey);
196 static int svm_offline(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flags,
197     char **errorp, rcm_info_t **infop);
198 static int svm_online(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flags,
199     char **errorp, rcm_info_t **infop);
200 static int svm_get_info(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flags,
201     char **usagep, char **errorp, nvlist_t *props, rcm_info_t **infop);
202 static int svm_suspend(rcm_handle_t *hd, char *rsrc, id_t id,
203     timespec_t *interval, uint_t flags, char **errorp,
204     rcm_info_t **infop);
205 static int svm_resume(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flags,
206     char **errorp, rcm_info_t **infop);
207 static int svm_remove(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flags,
208     char **errorp, rcm_info_t **infop);
209 static int check_device(deventry_t *deventry);
210 static int check_mirror(mdsetname_t *sp, mdname_t *np, md_error_t *ep);
211 
212 /*
213  * Module-Private data
214  */
215 static struct rcm_mod_ops svm_ops =
216 {
217 	RCM_MOD_OPS_VERSION,
218 	svm_register,
219 	svm_unregister,
220 	svm_get_info,
221 	svm_suspend,
222 	svm_resume,
223 	svm_offline,
224 	svm_online,
225 	svm_remove,
226 	NULL,
227 	NULL,
228 	NULL
229 };
230 
231 static cache_t *svm_cache = NULL;
232 static mutex_t svm_cache_lock;
233 static hspentry_t *hsp_head = NULL;
234 
235 /*
236  * Module Interface Routines
237  */
238 
239 /*
240  *      rcm_mod_init()
241  *
242  *      Create a cache, and return the ops structure.
243  *      Input: None
244  *      Return: rcm_mod_ops structure
245  */
246 struct rcm_mod_ops *
247 rcm_mod_init()
248 {
249 	/* initialize the lock mutex */
250 	if (mutex_init(&svm_cache_lock, USYNC_THREAD, NULL)) {
251 		rcm_log_message(RCM_ERROR,
252 		    gettext("SVM: can't init mutex"));
253 		return (NULL);
254 	}
255 
256 	/* need to initialize the cluster library to avoid seg faults */
257 	if (sdssc_bind_library() == SDSSC_ERROR) {
258 		rcm_log_message(RCM_ERROR,
259 			gettext("SVM: Interface error with libsds_sc.so,"
260 			    " aborting."));
261 		return (NULL);
262 	}
263 
264 	/* Create a cache */
265 	if ((svm_cache = create_cache()) == NULL) {
266 		rcm_log_message(RCM_ERROR,
267 			gettext("SVM: module can't function, aborting."));
268 		return (NULL);
269 	}
270 
271 	/* Return the ops vectors */
272 	return (&svm_ops);
273 }
274 
275 /*
276  *	rcm_mod_info()
277  *
278  *	Return a string describing this module.
279  *	Input: None
280  *	Return: String
281  *	Locking: None
282  */
283 const char *
284 rcm_mod_info()
285 {
286 	return (gettext("Solaris Volume Manager module 1.9"));
287 }
288 
289 /*
290  *	rcm_mod_fini()
291  *
292  *	Destroy the cache and mutex
293  *	Input: None
294  *	Return: RCM_SUCCESS
295  *	Locking: None
296  */
297 int
298 rcm_mod_fini()
299 {
300 	(void) mutex_lock(&svm_cache_lock);
301 	if (svm_cache) {
302 		free_cache(&svm_cache);
303 	}
304 	(void) mutex_unlock(&svm_cache_lock);
305 	(void) mutex_destroy(&svm_cache_lock);
306 	return (RCM_SUCCESS);
307 }
308 
309 /*
310  *	svm_register()
311  *
312  *	Make sure the cache is properly sync'ed, and its registrations are in
313  *	order.
314  *
315  *	Input:
316  *		rcm_handle_t	*hd
317  *	Return:
318  *		RCM_SUCCESS
319  *      Locking: the cache is locked throughout the execution of this routine
320  *      because it reads and possibly modifies cache links continuously.
321  */
322 static int
323 svm_register(rcm_handle_t *hd)
324 {
325 	uint32_t i = 0;
326 	deventry_t *l = NULL;
327 	char    *devicename;
328 
329 
330 	rcm_log_message(RCM_TRACE1, "SVM: register\n");
331 	/* Guard against bad arguments */
332 	assert(hd != NULL);
333 
334 	/* Lock the cache */
335 	(void) mutex_lock(&svm_cache_lock);
336 
337 	/* If the cache has already been registered, then just sync it.  */
338 	if (svm_cache && svm_cache->registered) {
339 		cache_sync(hd, &svm_cache);
340 		(void) mutex_unlock(&svm_cache_lock);
341 		return (RCM_SUCCESS);
342 	}
343 
344 	/* If not, register the whole cache and mark it as registered. */
345 	while ((devicename = cache_walk(svm_cache, &i, &l)) != NULL) {
346 			svm_register_device(hd, devicename);
347 	}
348 	svm_cache->registered = 1;
349 
350 	/* Unlock the cache */
351 	(void) mutex_unlock(&svm_cache_lock);
352 
353 	return (RCM_SUCCESS);
354 }
355 
356 /*
357  *	svm_unregister()
358  *
359  *	Manually walk through the cache, unregistering all the special files and
360  *	mount points.
361  *
362  *	Input:
363  *		rcm_handle_t	*hd
364  *	Return:
365  *		RCM_SUCCESS
366  *      Locking: the cache is locked throughout the execution of this routine
367  *      because it reads and modifies cache links continuously.
368  */
369 static int
370 svm_unregister(rcm_handle_t *hd)
371 {
372 	deventry_t *l = NULL;
373 	uint32_t i = 0;
374 
375 	rcm_log_message(RCM_TRACE1, "SVM: unregister\n");
376 	/* Guard against bad arguments */
377 	assert(hd != NULL);
378 
379 	/* Walk the cache, unregistering everything */
380 	(void) mutex_lock(&svm_cache_lock);
381 	if (svm_cache != NULL) {
382 		while (cache_walk(svm_cache, &i, &l) != NULL) {
383 			(void) svm_unregister_device(hd, l);
384 		}
385 		svm_cache->registered = 0;
386 	}
387 	(void) mutex_unlock(&svm_cache_lock);
388 	return (RCM_SUCCESS);
389 }
390 
391 /*
392  *      svm_offline()
393  *
394  *      Determine dependents of the resource being offlined, and offline
395  *      them all.
396  *
397  *      Input:
398  *		rcm_handle_t	*hd		handle
399  *		char*		*rsrc		resource name
400  *		id_t		id		0
401  *		char		**errorp	ptr to error message
402  *		rcm_info_t	**infop		ptr to info string
403  *      Output:
404  *		char		**errorp	pass back error message
405  *      Return:
406  *		int		RCM_SUCCESS or RCM_FAILURE
407  *      Locking: the cache is locked for most of this routine, except while
408  *      processing dependents.
409  */
410 /*ARGSUSED*/
411 static int
412 svm_offline(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flags,
413     char **errorp, rcm_info_t **infop)
414 {
415 	int		rv = RCM_SUCCESS;
416 	int		ret;
417 	char		**dependents;
418 	deventry_t	*deventry;
419 	hspentry_t	*hspentry;
420 	hspuser_t	*hspuser;
421 
422 	/* Guard against bad arguments */
423 	assert(hd != NULL);
424 	assert(rsrc != NULL);
425 	assert(id == (id_t)0);
426 	assert(errorp != NULL);
427 
428 	/* Trace */
429 	rcm_log_message(RCM_TRACE1, "SVM: offline(%s), flags(%d)\n",
430 	    rsrc, flags);
431 
432 	/* Lock the cache */
433 	(void) mutex_lock(&svm_cache_lock);
434 
435 	/* Lookup the resource in the cache. */
436 	if ((deventry = cache_lookup(svm_cache, rsrc)) == NULL) {
437 		rcm_log_message(RCM_ERROR, MSG_UNRECOGNIZED);
438 		*errorp = strdup(ERR_UNRECOGNIZED);
439 		(void) mutex_unlock(&svm_cache_lock);
440 		rv = RCM_FAILURE;
441 		rcm_log_message(RCM_TRACE1, "SVM: svm_offline(%s) exit %d\n",
442 		    rsrc, rv);
443 		return (rv);
444 	}
445 	/* If it is a TRANS device, do not allow the offline */
446 	if (deventry->devtype == SVM_TRANS) {
447 		rv = RCM_FAILURE;
448 		(void) mutex_unlock(&svm_cache_lock);
449 		goto exit;
450 	}
451 
452 	if (deventry->flags&IN_HSP) {
453 		/*
454 		 * If this is in a hot spare pool, check to see
455 		 * if any of the hot spare pool users are open
456 		 */
457 		hspentry = deventry->hsp_list;
458 		while (hspentry) {
459 			hspuser = hspentry->hspuser;
460 			while (hspuser) {
461 				/* Check if open */
462 				if (is_open(hspuser->hspuserkey)) {
463 					rv = RCM_FAILURE;
464 					(void) mutex_unlock(&svm_cache_lock);
465 					goto exit;
466 				}
467 				hspuser = hspuser->next;
468 			}
469 			hspentry = hspentry->next;
470 		}
471 	}
472 
473 	/* Fail if the device contains a metadb replica */
474 	if (deventry->flags&CONT_METADB) {
475 		/*
476 		 * The user should delete the replica before continuing,
477 		 * so force the error.
478 		 */
479 		rcm_log_message(RCM_TRACE1, "SVM: %s has a replica\n",
480 		    deventry->devname);
481 		rv = RCM_FAILURE;
482 		(void) mutex_unlock(&svm_cache_lock);
483 		goto exit;
484 	}
485 
486 	/* Get dependents */
487 	if (get_dependents(deventry, &dependents) != 0) {
488 		rcm_log_message(RCM_ERROR, MSG_NODEPS);
489 		rv = RCM_FAILURE;
490 		(void) mutex_unlock(&svm_cache_lock);
491 		goto exit;
492 	}
493 
494 	if (dependents) {
495 		/* Check if the device is broken (needs maintanence). */
496 		if (check_device(deventry) == REDUNDANT) {
497 			/*
498 			 * The device is broken, the offline request should
499 			 * succeed, so ignore any of the dependents.
500 			 */
501 			rcm_log_message(RCM_TRACE1,
502 			    "SVM: ignoring dependents\n");
503 			(void) mutex_unlock(&svm_cache_lock);
504 			free(dependents);
505 			goto exit;
506 		}
507 		(void) mutex_unlock(&svm_cache_lock);
508 		ret = rcm_request_offline_list(hd, dependents, flags, infop);
509 		if (ret != RCM_SUCCESS) {
510 			rv = ret;
511 		}
512 		free(dependents);
513 	} else {
514 		/* If no dependents, check if the metadevice is open */
515 		if ((deventry->devkey) && (is_open(deventry->devkey))) {
516 			rv = RCM_FAILURE;
517 			(void) mutex_unlock(&svm_cache_lock);
518 			goto exit;
519 		}
520 		(void) mutex_unlock(&svm_cache_lock);
521 	}
522 exit:
523 	rcm_log_message(RCM_TRACE1, "SVM: svm_offline(%s) exit %d\n", rsrc, rv);
524 	if (rv != RCM_SUCCESS)
525 		*errorp = strdup(gettext("unable to offline"));
526 	return (rv);
527 }
528 
529 /*
530  *      svm_online()
531  *
532  *      Just pass the online notification on to the dependents of this resource
533  *
534  *      Input:
535  *		rcm_handle_t	*hd		handle
536  *		char*		*rsrc		resource name
537  *		id_t		id		0
538  *		char		**errorp	ptr to error message
539  *		rcm_info_t	**infop		ptr to info string
540  *      Output:
541  *		char		**errorp	pass back error message
542  *      Return:
543  *		int		RCM_SUCCESS or RCM_FAILURE
544  *      Locking: the cache is locked for most of this routine, except while
545  *      processing dependents.
546  */
547 /*ARGSUSED*/
548 static int
549 svm_online(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flags, char **errorp,
550     rcm_info_t **infop)
551 {
552 	int		rv = RCM_SUCCESS;
553 	char		**dependents;
554 	deventry_t	*deventry;
555 
556 	/* Guard against bad arguments */
557 	assert(hd != NULL);
558 	assert(rsrc != NULL);
559 	assert(id == (id_t)0);
560 
561 	/* Trace */
562 	rcm_log_message(RCM_TRACE1, "SVM: online(%s)\n", rsrc);
563 
564 	/* Lookup this resource in the cache (cache gets locked) */
565 	(void) mutex_lock(&svm_cache_lock);
566 	deventry = cache_lookup(svm_cache, rsrc);
567 	if (deventry == NULL) {
568 		(void) mutex_unlock(&svm_cache_lock);
569 		rcm_log_message(RCM_ERROR, MSG_UNRECOGNIZED, rsrc);
570 		*errorp = strdup(ERR_UNRECOGNIZED);
571 		return (RCM_FAILURE);
572 	}
573 
574 	/* Get dependents */
575 	if (get_dependents(deventry, &dependents) != 0) {
576 		(void) mutex_unlock(&svm_cache_lock);
577 		rcm_log_message(RCM_ERROR, MSG_NODEPS);
578 		*errorp = strdup(ERR_NODEPS);
579 		return (RCM_FAILURE);
580 	}
581 	(void) mutex_unlock(&svm_cache_lock);
582 
583 	if (dependents) {
584 		rv = rcm_notify_online_list(hd, dependents, flags, infop);
585 		if (rv != RCM_SUCCESS)
586 			*errorp = strdup(gettext("unable to online"));
587 		free(dependents);
588 	}
589 
590 	return (rv);
591 }
592 
593 /*
594  *      svm_get_info()
595  *
596  *      Gather usage information for this resource.
597  *
598  *      Input:
599  *		rcm_handle_t	*hd		handle
600  *		char*		*rsrc		resource name
601  *		id_t		id		0
602  *		char		**errorp	ptr to error message
603  *		nvlist_t	*props		Not used
604  *		rcm_info_t	**infop		ptr to info string
605  *      Output:
606  *		char		**infop		pass back info string
607  *      Return:
608  *		int		RCM_SUCCESS or RCM_FAILURE
609  *      Locking: the cache is locked  throughout the whole function
610  */
611 /*ARGSUSED*/
612 static int
613 svm_get_info(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flags, char **usagep,
614     char **errorp, nvlist_t *props, rcm_info_t **infop)
615 {
616 	int 		rv = RCM_SUCCESS;
617 	deventry_t	*deventry;
618 	deventry_t	*dependent;
619 	hspentry_t	*hspentry;
620 	char		**dependents;
621 
622 	/* Guard against bad arguments */
623 	assert(hd != NULL);
624 	assert(rsrc != NULL);
625 	assert(id == (id_t)0);
626 	assert(usagep != NULL);
627 	assert(errorp != NULL);
628 
629 	/* Trace */
630 	rcm_log_message(RCM_TRACE1, "SVM: get_info(%s)\n", rsrc);
631 
632 	/* Lookup this resource in the cache (cache gets locked) */
633 	(void) mutex_lock(&svm_cache_lock);
634 	deventry = cache_lookup(svm_cache, rsrc);
635 	if (deventry == NULL) {
636 		(void) mutex_unlock(&svm_cache_lock);
637 		rcm_log_message(RCM_ERROR, MSG_UNRECOGNIZED, rsrc);
638 		*errorp = strdup(ERR_UNRECOGNIZED);
639 		return (RCM_FAILURE);
640 	}
641 
642 	*usagep = NULL; /* Initialise usage string */
643 	if (deventry->flags&CONT_METADB) {
644 		*usagep = add_to_usage(usagep, gettext("contains metadb(s)"));
645 	}
646 	if (deventry->flags&CONT_SOFTPART) {
647 		*usagep = add_to_usage(usagep,
648 		    gettext("contains soft partition(s)"));
649 	}
650 	if (deventry->devtype == SVM_SOFTPART) {
651 		*usagep = add_to_usage_fmt(usagep,
652 		    gettext("soft partition based on \"%s\""),
653 		    deventry->antecedent->devname);
654 	}
655 
656 	if (deventry->flags&IN_HSP) {
657 		int	hspflag = 0;
658 		hspentry = deventry->hsp_list;
659 		while (hspentry) {
660 			if (hspflag == 0) {
661 				*usagep = add_to_usage(usagep,
662 				    gettext("member of hot spare pool"));
663 				hspflag = 1;
664 			}
665 			*usagep = add_to_usage_fmt(usagep, "\"%s\"",
666 			    hspentry->hspname);
667 			hspentry = hspentry->next;
668 		}
669 	} else {
670 		dependent = deventry->dependent;
671 		while (dependent) {
672 			/* Resource has dependents */
673 			switch (dependent->devtype) {
674 			case SVM_STRIPE:
675 				*usagep = add_to_usage_fmt(usagep,
676 				    gettext("component of stripe \"%s\""),
677 				    dependent->devname);
678 				break;
679 			case SVM_CONCAT:
680 				*usagep = add_to_usage_fmt(usagep,
681 				    gettext("component of concat \"%s\""),
682 				    dependent->devname);
683 				break;
684 			case SVM_MIRROR:
685 				*usagep = add_to_usage_fmt(usagep,
686 				    gettext("submirror of \"%s\""),
687 				    dependent->devname);
688 				break;
689 			case SVM_RAID:
690 				*usagep = add_to_usage_fmt(usagep,
691 				    gettext("component of RAID \"%s\""),
692 				    dependent->devname);
693 				break;
694 			case SVM_TRANS:
695 				if (deventry->flags&TRANS_LOG) {
696 					*usagep = add_to_usage_fmt(usagep,
697 					    gettext("trans log for \"%s\""),
698 					    dependent->devname);
699 				} else {
700 					*usagep = add_to_usage_fmt(usagep,
701 					    gettext("trans master for \"%s\""),
702 					    dependent->devname);
703 				}
704 				break;
705 			case SVM_SOFTPART:
706 				/* Contains soft parts, already processed */
707 				break;
708 			default:
709 				rcm_log_message(RCM_ERROR,
710 				    gettext("Unknown type %d\n"),
711 				    dependent->devtype);
712 			}
713 			dependent = dependent->next_dep;
714 		}
715 	}
716 
717 	/* Get dependents  and recurse if necessary */
718 	if (get_dependents(deventry, &dependents) != 0) {
719 		(void) mutex_unlock(&svm_cache_lock);
720 		rcm_log_message(RCM_ERROR, MSG_NODEPS);
721 		*errorp = strdup(ERR_NODEPS);
722 		return (RCM_FAILURE);
723 	}
724 	(void) mutex_unlock(&svm_cache_lock);
725 
726 	if ((flags & RCM_INCLUDE_DEPENDENT) && (dependents != NULL)) {
727 		rv = rcm_get_info_list(hd, dependents, flags, infop);
728 		if (rv != RCM_SUCCESS)
729 			*errorp = strdup(gettext("unable to get info"));
730 	}
731 	free(dependents);
732 
733 	if (*usagep != NULL)
734 		rcm_log_message(RCM_TRACE1, "SVM: usage = %s\n", *usagep);
735 	return (rv);
736 }
737 
738 /*
739  *      svm_suspend()
740  *
741  *      Notify all dependents that the resource is being suspended.
742  *      Since no real operation is involved, QUERY or not doesn't matter.
743  *
744  *      Input:
745  *		rcm_handle_t	*hd		handle
746  *		char*		*rsrc		resource name
747  *		id_t		id		0
748  *		char		**errorp	ptr to error message
749  *		rcm_info_t	**infop		ptr to info string
750  *      Output:
751  *		char		**errorp	pass back error message
752  *      Return:
753  *		int		RCM_SUCCESS or RCM_FAILURE
754  *      Locking: the cache is locked for most of this routine, except while
755  *      processing dependents.
756  */
757 static int
758 svm_suspend(rcm_handle_t *hd, char *rsrc, id_t id, timespec_t *interval,
759     uint_t flags, char **errorp, rcm_info_t **infop)
760 {
761 	int		rv = RCM_SUCCESS;
762 	deventry_t	*deventry;
763 	char		**dependents;
764 
765 	/* Guard against bad arguments */
766 	assert(hd != NULL);
767 	assert(rsrc != NULL);
768 	assert(id == (id_t)0);
769 	assert(interval != NULL);
770 	assert(errorp != NULL);
771 
772 	/* Trace */
773 	rcm_log_message(RCM_TRACE1, "SVM: suspend(%s)\n", rsrc);
774 
775 	/* Lock the cache and extract information about this resource.  */
776 	(void) mutex_lock(&svm_cache_lock);
777 	if ((deventry = cache_lookup(svm_cache, rsrc)) == NULL) {
778 		(void) mutex_unlock(&svm_cache_lock);
779 		rcm_log_message(RCM_ERROR, MSG_UNRECOGNIZED, rsrc);
780 		*errorp = strdup(ERR_UNRECOGNIZED);
781 		return (RCM_SUCCESS);
782 	}
783 
784 	/* Get dependents */
785 	if (get_dependents(deventry, &dependents) != 0) {
786 		(void) mutex_unlock(&svm_cache_lock);
787 		rcm_log_message(RCM_ERROR, MSG_NODEPS);
788 		*errorp = strdup(ERR_NODEPS);
789 		return (RCM_FAILURE);
790 	}
791 	(void) mutex_unlock(&svm_cache_lock);
792 
793 	if (dependents) {
794 		rv = rcm_request_suspend_list(hd, dependents, flags,
795 		    interval, infop);
796 		if (rv != RCM_SUCCESS)
797 			*errorp = strdup(gettext("unable to suspend"));
798 		free(dependents);
799 	}
800 
801 	return (rv);
802 }
803 
804 /*
805  *      svm_resume()
806  *
807  *      Notify all dependents that the resource is being resumed.
808  *
809  *      Input:
810  *		rcm_handle_t	*hd		handle
811  *		char*		*rsrc		resource name
812  *		id_t		id		0
813  *		char		**errorp	ptr to error message
814  *		rcm_info_t	**infop		ptr to info string
815  *      Output:
816  *		char		**errorp	pass back error message
817  *      Return:
818  *		int		RCM_SUCCESS or RCM_FAILURE
819  *      Locking: the cache is locked for most of this routine, except while
820  *      processing dependents.
821  *
822  */
823 static int
824 svm_resume(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flags, char **errorp,
825     rcm_info_t **infop)
826 {
827 	int		rv = RCM_SUCCESS;
828 	deventry_t	*deventry;
829 	char		**dependents;
830 
831 	/* Guard against bad arguments */
832 	assert(hd != NULL);
833 	assert(rsrc != NULL);
834 	assert(id == (id_t)0);
835 	assert(errorp != NULL);
836 
837 	/* Trace */
838 	rcm_log_message(RCM_TRACE1, "SVM: resume(%s)\n", rsrc);
839 
840 	/*
841 	 * Lock the cache just long enough to extract information about this
842 	 * resource.
843 	 */
844 	(void) mutex_lock(&svm_cache_lock);
845 	if ((deventry = cache_lookup(svm_cache, rsrc)) == NULL) {
846 		(void) mutex_unlock(&svm_cache_lock);
847 		rcm_log_message(RCM_ERROR, MSG_UNRECOGNIZED, rsrc);
848 		*errorp = strdup(ERR_UNRECOGNIZED);
849 		return (RCM_SUCCESS);
850 	}
851 
852 	/* Get dependents */
853 
854 	if (get_dependents(deventry, &dependents) != 0) {
855 		(void) mutex_unlock(&svm_cache_lock);
856 		rcm_log_message(RCM_ERROR, MSG_NODEPS);
857 		*errorp = strdup(ERR_NODEPS);
858 		return (RCM_FAILURE);
859 	}
860 
861 	(void) mutex_unlock(&svm_cache_lock);
862 	if (dependents) {
863 		rv = rcm_notify_resume_list(hd, dependents, flags, infop);
864 		if (rv != RCM_SUCCESS)
865 			*errorp = strdup(gettext("unable to resume"));
866 		free(dependents);
867 	}
868 
869 	return (rv);
870 }
871 
872 
873 /*
874  *	svm_remove()
875  *
876  *      Remove the resource from the cache and notify all dependents that
877  *      the resource has been removed.
878  *
879  *      Input:
880  *		rcm_handle_t	*hd		handle
881  *		char*		*rsrc		resource name
882  *		id_t		id		0
883  *		char		**errorp	ptr to error message
884  *		rcm_info_t	**infop		ptr to info string
885  *      Output:
886  *		char		**errorp	pass back error message
887  *      Return:
888  *		int		RCM_SUCCESS or RCM_FAILURE
889  *      Locking: the cache is locked for most of this routine, except while
890  *      processing dependents.
891  */
892 static int
893 svm_remove(rcm_handle_t *hd, char *rsrc, id_t id, uint_t flags, char **errorp,
894     rcm_info_t **infop)
895 {
896 	int		rv = RCM_SUCCESS;
897 	char		**dependents;
898 	deventry_t	*deventry;
899 
900 	/* Guard against bad arguments */
901 	assert(hd != NULL);
902 	assert(rsrc != NULL);
903 	assert(id == (id_t)0);
904 
905 	/* Trace */
906 	rcm_log_message(RCM_TRACE1, "SVM: svm_remove(%s)\n", rsrc);
907 
908 	/* Lock the cache while removing resource */
909 	(void) mutex_lock(&svm_cache_lock);
910 	if ((deventry = cache_lookup(svm_cache, rsrc)) == NULL) {
911 		(void) mutex_unlock(&svm_cache_lock);
912 		return (RCM_SUCCESS);
913 	}
914 
915 	/* Get dependents */
916 	if (get_dependents(deventry, &dependents) != 0) {
917 		(void) mutex_unlock(&svm_cache_lock);
918 		rcm_log_message(RCM_ERROR, MSG_NODEPS);
919 		deventry->flags |= REMOVED;
920 		*errorp = strdup(ERR_NODEPS);
921 		return (RCM_FAILURE);
922 	}
923 
924 	if (dependents) {
925 		(void) mutex_unlock(&svm_cache_lock);
926 		rv = rcm_notify_remove_list(hd, dependents, flags, infop);
927 		(void) mutex_lock(&svm_cache_lock);
928 		if (rv != RCM_SUCCESS)
929 			*errorp = strdup(gettext("unable to remove"));
930 		free(dependents);
931 	}
932 
933 	/* Mark entry as removed */
934 	deventry->flags |= REMOVED;
935 
936 	(void) mutex_unlock(&svm_cache_lock);
937 	rcm_log_message(RCM_TRACE1, "SVM: exit svm_remove(%s)\n", rsrc);
938 	/* Clean up and return success */
939 	return (RCM_SUCCESS);
940 }
941 
942 /*
943  * Definitions of private functions
944  *
945  */
946 
947 /*
948  *	find_hsp()
949  *
950  *	Find the hot spare entry from the linked list of all hotspare pools
951  *
952  *	Input:
953  *		char		*hspname	name of hot spare pool
954  *	Return:
955  *		hspentry_t	hot spare entry
956  */
957 static hspentry_t *
958 find_hsp(char *hspname)
959 {
960 	hspentry_t	*hspentry = hsp_head;
961 
962 	while (hspentry) {
963 		if (strcmp(hspname, hspentry->hspname) == 0)
964 			return (hspentry);
965 		hspentry = hspentry->link;
966 	}
967 	return (NULL);
968 }
969 
970 /*
971  *      add_hsp_user()
972  *
973  *      Add a hot spare pool user to the list for the hsp specfied by
974  *	hspname. The memory allocated here will be freed by free_cache()
975  *
976  *      Input:
977  *		char		*hspname	hot spare pool name
978  *		deventry_t	*deventry	specified hsp user
979  *      Return:
980  *		hspuser_t	entry in hsp user list
981  */
982 static hspuser_t *
983 add_hsp_user(char *hspname, deventry_t *deventry)
984 {
985 	hspuser_t	*newhspuser;
986 	char		*newhspusername;
987 	hspuser_t	*previous;
988 	hspentry_t	*hspentry;
989 
990 	hspentry = find_hsp(hspname);
991 	if (hspentry == NULL)
992 		return (NULL);
993 	rcm_log_message(RCM_TRACE1, "SVM: Enter add_hsp_user %s, %x, %x\n",
994 	    hspname, hspentry, hspentry->hspuser);
995 
996 	newhspuser = (hspuser_t *)malloc(sizeof (*newhspuser));
997 	if (newhspuser == NULL) {
998 		rcm_log_message(RCM_ERROR,
999 		    gettext("SVM: can't malloc hspuser"));
1000 		return (NULL);
1001 	}
1002 	(void) memset((char *)newhspuser, 0, sizeof (*newhspuser));
1003 
1004 	newhspusername = strdup(deventry->devname);
1005 	if (newhspusername == NULL) {
1006 		rcm_log_message(RCM_ERROR,
1007 		    gettext("SVM: can't malloc hspusername"));
1008 		free(newhspuser);
1009 		return (NULL);
1010 	}
1011 	newhspuser->hspusername = newhspusername;
1012 	newhspuser->hspuserkey = deventry->devkey;
1013 
1014 	if ((previous = hspentry->hspuser) == NULL) {
1015 		hspentry->hspuser = newhspuser;
1016 	} else {
1017 		hspuser_t	*temp = previous->next;
1018 		previous->next = newhspuser;
1019 		newhspuser->next = temp;
1020 	}
1021 	rcm_log_message(RCM_TRACE1, "SVM: Added hsp_user %s (dev %x) to %s\n",
1022 	    newhspusername, newhspuser->hspuserkey, hspname);
1023 	return (newhspuser);
1024 }
1025 
1026 /*
1027  *      add_hsp()
1028  *
1029  *      Add a hot spare pool entry to the list for the slice, deventry.
1030  *      Also add to the linked list of all hsp pools
1031  *	The memory alllocated here will be freed by free_cache()
1032  *
1033  *      Input:
1034  *		char		*hspname	name of hsp pool entry
1035  *		deventry_t	*deventry	device entry for the slice
1036  *      Return:
1037  *		hspentry_t	end of hsp list
1038  *      Locking: None
1039  */
1040 static hspentry_t *
1041 add_hsp(char *hspname, deventry_t *deventry)
1042 {
1043 	hspentry_t	*newhspentry;
1044 	hspentry_t	*previous;
1045 	char		*newhspname;
1046 
1047 	rcm_log_message(RCM_TRACE1, "SVM: Enter add_hsp %s\n",
1048 	    hspname);
1049 	newhspentry = (hspentry_t *)malloc(sizeof (*newhspentry));
1050 	if (newhspentry == NULL) {
1051 		rcm_log_message(RCM_ERROR,
1052 		    gettext("SVM: can't malloc hspentry"));
1053 		return (NULL);
1054 	}
1055 	(void) memset((char *)newhspentry, 0, sizeof (*newhspentry));
1056 
1057 	newhspname = strdup(hspname);
1058 	if (newhspname == NULL) {
1059 		rcm_log_message(RCM_ERROR,
1060 		    gettext("SVM: can't malloc hspname"));
1061 		free(newhspentry);
1062 		return (NULL);
1063 	}
1064 	newhspentry->hspname = newhspname;
1065 
1066 	/* Add to linked list of all hotspare pools */
1067 	newhspentry->link = hsp_head;
1068 	hsp_head = newhspentry;
1069 
1070 	/* Add to list of hotspare pools containing this slice */
1071 	if ((previous = deventry->hsp_list) == NULL) {
1072 		deventry->hsp_list = newhspentry;
1073 	} else {
1074 		hspentry_t	*temp = previous->next;
1075 		previous->next = newhspentry;
1076 		newhspentry->next = temp;
1077 	}
1078 	rcm_log_message(RCM_TRACE1, "SVM: Exit add_hsp %s\n",
1079 	    hspname);
1080 	return (newhspentry);
1081 }
1082 
1083 /*
1084  *      cache_dependent()
1085  *
1086  *      Add a dependent for a deventry to the cache and return the cache entry
1087  *	If the name is not in the cache, we assume that it a SLICE. If it
1088  *	turns out to be any other type of metadevice, when it is processed
1089  *	in cache_all_devices_in_set(), cache_device() will be called to
1090  *	set the type to the actual value.
1091  *
1092  *      Input:
1093  *		cache_t		*cache		cache
1094  *		char		*devname	metadevice name
1095  *		int		devflags	metadevice flags
1096  *		deventry_t	*dependent	dependent of this metadevice
1097  *      Return:
1098  *		deventry_t	metadevice entry added to cache
1099  *      Locking: None
1100  */
1101 static deventry_t *
1102 cache_dependent(cache_t *cache, char *devname, int devflags,
1103     deventry_t *dependent)
1104 {
1105 
1106 	deventry_t	*newdeventry = NULL;
1107 	deventry_t	*hashprev = NULL;
1108 	deventry_t	*deventry = NULL;
1109 	deventry_t	*previous = NULL;
1110 	uint32_t	hash_index;
1111 	int		comp;
1112 
1113 	rcm_log_message(RCM_TRACE1, "SVM: Enter cache_dep %s, %x, %s\n",
1114 	    devname, devflags, dependent->devname);
1115 
1116 	hash_index = hash(cache->size, devname);
1117 	if (hash_index >= cache->size) {
1118 		rcm_log_message(RCM_ERROR,
1119 		    gettext("SVM: can't hash device."));
1120 		return (NULL);
1121 	}
1122 
1123 	deventry = cache->hashline[hash_index];
1124 
1125 	/* if the hash table slot is empty, then this is easy */
1126 	if (deventry == NULL) {
1127 		deventry = create_deventry(devname, SVM_SLICE, 0, devflags);
1128 		cache->hashline[hash_index] = deventry;
1129 	} else {
1130 	/* if the hash table slot isn't empty, find the immediate successor */
1131 		hashprev = NULL;
1132 		while ((comp = strcmp(deventry->devname, devname)) < 0 &&
1133 		    deventry->next != NULL) {
1134 			hashprev = deventry;
1135 			deventry = deventry->next;
1136 		}
1137 
1138 		if (comp == 0) {
1139 			/* if already in cache, just update the flags */
1140 			deventry->flags |= devflags;
1141 		} else {
1142 			/* insert the entry if it's not already there */
1143 			if ((newdeventry = create_deventry(devname,
1144 			    SVM_SLICE, 0, devflags)) == NULL) {
1145 				rcm_log_message(RCM_ERROR,
1146 				    gettext("SVM: can't create hash line."));
1147 				return (NULL);
1148 			}
1149 			if (comp > 0) {
1150 				newdeventry->next = deventry;
1151 				if (hashprev)
1152 					hashprev->next = newdeventry;
1153 				else
1154 					cache->hashline[hash_index] =
1155 					    newdeventry;
1156 			} else if (comp < 0) {
1157 				newdeventry->next = deventry->next;
1158 				deventry->next = newdeventry;
1159 			}
1160 			deventry = newdeventry;
1161 		}
1162 	}
1163 	/* complete deventry by linking the dependent to it */
1164 	dependent->antecedent = deventry;
1165 	if ((previous = deventry->dependent) != NULL) {
1166 		deventry_t *temp = previous->next_dep;
1167 		previous->next_dep = dependent;
1168 		dependent->next_dep = temp;
1169 	} else deventry->dependent = dependent;
1170 	return (deventry);
1171 
1172 }
1173 
1174 /*
1175  *      cache_device()
1176  *
1177  *      Add an entry to the cache for devname
1178  *
1179  *      Input:
1180  *		cache_t		*cache		cache
1181  *		char		*devname	metadevice named
1182  *		svm_type_t	devtype		metadevice type
1183  *		md_dev64_t	devkey		dev_t of device
1184  *		int		devflags	device flags
1185  *      Return:
1186  *		deventry_t	metadevice added to cache
1187  *      Locking: None
1188  */
1189 static deventry_t *
1190 cache_device(cache_t *cache, char *devname, svm_type_t devtype,
1191     md_dev64_t devkey, int devflags)
1192 {
1193 	deventry_t	*newdeventry = NULL;
1194 	deventry_t	*previous = NULL;
1195 	deventry_t	*deventry = NULL;
1196 	uint32_t	hash_index;
1197 	int		comp;
1198 
1199 	rcm_log_message(RCM_TRACE1, "SVM: Enter cache_device %s, %x, %lx, %x\n",
1200 	    devname, devtype, devkey, devflags);
1201 
1202 	hash_index = hash(cache->size, devname);
1203 	if (hash_index >= cache->size) {
1204 		rcm_log_message(RCM_ERROR,
1205 		    gettext("SVM: can't hash device."));
1206 		return (NULL);
1207 	}
1208 
1209 	deventry = cache->hashline[hash_index];
1210 
1211 	/* if the hash table slot is empty, then this is easy */
1212 	if (deventry == NULL) {
1213 		deventry = create_deventry(devname, devtype, devkey,
1214 		    devflags);
1215 		cache->hashline[hash_index] = deventry;
1216 	} else {
1217 	/* if the hash table slot isn't empty, find the immediate successor */
1218 		previous = NULL;
1219 		while ((comp = strcmp(deventry->devname, devname)) < 0 &&
1220 		    deventry->next != NULL) {
1221 			previous = deventry;
1222 			deventry = deventry->next;
1223 		}
1224 
1225 		if (comp == 0) {
1226 			/*
1227 			 * If entry already exists, just set the type, key
1228 			 * and flags
1229 			 */
1230 			deventry->devtype = devtype;
1231 			deventry->devkey = meta_cmpldev(devkey);
1232 			deventry->flags |= devflags;
1233 		} else {
1234 			/* insert the entry if it's not already there */
1235 			if ((newdeventry = create_deventry(devname, devtype,
1236 			    devkey, devflags)) == NULL) {
1237 				rcm_log_message(RCM_ERROR,
1238 				    gettext("SVM: can't create hash line."));
1239 			}
1240 			if (comp > 0) {
1241 				newdeventry->next = deventry;
1242 				if (previous)
1243 					previous->next = newdeventry;
1244 				else
1245 					cache->hashline[hash_index] =
1246 					    newdeventry;
1247 			} else if (comp < 0) {
1248 				newdeventry->next = deventry->next;
1249 				deventry->next = newdeventry;
1250 			}
1251 			deventry = newdeventry;
1252 		}
1253 	}
1254 	return (deventry);
1255 }
1256 /*
1257  *	free_names()
1258  *
1259  *	Free all name list entries
1260  *
1261  *	Input:
1262  *		mdnamelist_t		*np		namelist pointer
1263  *	Return: None
1264  */
1265 
1266 static void
1267 free_names(mdnamelist_t *nlp)
1268 {
1269 	mdnamelist_t *p;
1270 
1271 	for (p = nlp; p != NULL; p = p->next) {
1272 	    meta_invalidate_name(p->namep);
1273 	    p->namep = NULL;
1274 	}
1275 	metafreenamelist(nlp);
1276 }
1277 
1278 /*
1279  * cache_hsp()
1280  *
1281  *	Add an entry to the cache for each slice in the hot spare
1282  *	pool. Call add_hsp() to add the hot spare pool to the list
1283  *	of all hot spare pools.
1284  *
1285  *	Input:
1286  *		cache_t		*cache	cache
1287  *		mdnamelist_t	*nlp	pointer to hsp name
1288  *		md_hsp_t	*hsp
1289  *	Return:
1290  *		0 if successful or error code
1291  */
1292 static int
1293 cache_hsp(cache_t *cache, mdhspnamelist_t *nlp, md_hsp_t *hsp)
1294 {
1295 	int		i;
1296 	deventry_t	*deventry;
1297 	md_hs_t		*hs;
1298 
1299 	for (i = 0; i < hsp->hotspares.hotspares_len; i++) {
1300 		hs = &hsp->hotspares.hotspares_val[i];
1301 		if ((deventry = cache_device(cache, hs->hsnamep->bname,
1302 		    SVM_SLICE, hs->hsnamep->dev,
1303 		    IN_HSP)) == NULL) {
1304 			return (ENOMEM);
1305 		}
1306 		if (add_hsp(nlp->hspnamep->hspname, deventry) == NULL) {
1307 			return (ENOMEM);
1308 		}
1309 	}
1310 	return (0);
1311 }
1312 
1313 /*
1314  * cache_trans()
1315  *
1316  *	Add an entry to the cache for trans metadevice, the master
1317  *	and the log. Call cache_dependent() to link that master and
1318  *	the log to the trans metadevice.
1319  *
1320  *	Input:
1321  *		cache_t		*cache	cache
1322  *		mdnamelist_t	*nlp	pointer to trans name
1323  *		md_trans_t	*trans
1324  *	Return:
1325  *		0 if successful or error code
1326  *
1327  */
1328 static int
1329 cache_trans(cache_t *cache, mdnamelist_t *nlp, md_trans_t *trans)
1330 {
1331 	deventry_t	*antecedent;
1332 
1333 	if ((antecedent = cache_device(cache, nlp->namep->bname, SVM_TRANS,
1334 	    nlp->namep->dev, 0)) == NULL) {
1335 		return (ENOMEM);
1336 	}
1337 
1338 	if (cache_device(cache, trans->masternamep->bname, SVM_SLICE,
1339 	    trans->masternamep->dev, 0) == NULL) {
1340 		return (ENOMEM);
1341 	}
1342 
1343 	if (cache_dependent(cache, trans->masternamep->bname, 0,
1344 	    antecedent) == NULL) {
1345 		return (ENOMEM);
1346 	}
1347 
1348 	if (trans->lognamep != NULL) {
1349 		if (cache_device(cache, trans->lognamep->bname, SVM_SLICE,
1350 		    trans->lognamep->dev, TRANS_LOG) == NULL) {
1351 			return (ENOMEM);
1352 		}
1353 
1354 		if (cache_dependent(cache, trans->lognamep->bname, 0,
1355 		    antecedent) == NULL) {
1356 			return (ENOMEM);
1357 		}
1358 	}
1359 	return (0);
1360 }
1361 
1362 /*
1363  * cache_mirror()
1364  *
1365  *	Add an entry to the cache for the mirror. For each
1366  *	submirror, call cache_dependent() to add an entry to the
1367  *	cache and to link it to mirror entry.
1368  *
1369  *	Input:
1370  *		cache_t		*cache	cache
1371  *		mdnamelist_t	*nlp	pointer to mirror name
1372  *		md_mirror_t	*mirror
1373  *	Return:
1374  *		0 if successful or error code
1375  *
1376  */
1377 static int
1378 cache_mirror(cache_t *cache, mdnamelist_t *nlp, md_mirror_t *mirror)
1379 {
1380 	int i;
1381 	deventry_t	*antecedent;
1382 
1383 	if ((antecedent = cache_device(cache, nlp->namep->bname, SVM_MIRROR,
1384 	    nlp->namep->dev, 0)) == NULL) {
1385 		return (ENOMEM);
1386 	}
1387 	for (i = 0; i <  NMIRROR; i++) {
1388 		md_submirror_t	*submirror;
1389 
1390 		submirror = &mirror->submirrors[i];
1391 		if (submirror->state == SMS_UNUSED)
1392 			continue;
1393 
1394 		if (!submirror->submirnamep)
1395 			continue;
1396 
1397 		if (cache_dependent(cache, submirror->submirnamep->bname,
1398 		    0, antecedent) == NULL) {
1399 			return (ENOMEM);
1400 		}
1401 	}
1402 	return (0);
1403 }
1404 
1405 /*
1406  * cache_raid()
1407  *
1408  *	Add an entry to the cache for the RAID metadevice. For
1409  *	each component of the RAID call cache_dependent() to add
1410  *	add it to the cache and to link it to the RAID metadevice.
1411  *
1412  *	Input:
1413  *		cache_t		*cache	cache
1414  *		mdnamelist_t	*nlp	pointer to raid name
1415  *		md_raid_t	*raid	mirror
1416  *	Return:
1417  *		0 if successful or error code
1418  */
1419 static int
1420 cache_raid(cache_t *cache, mdnamelist_t *nlp, md_raid_t *raid)
1421 {
1422 	int i;
1423 	deventry_t	*antecedent;
1424 
1425 	if ((antecedent = cache_device(cache, nlp->namep->bname, SVM_RAID,
1426 	    nlp->namep->dev, 0)) == NULL) {
1427 		return (ENOMEM);
1428 	}
1429 	if (raid->hspnamep) {
1430 		if (add_hsp_user(raid->hspnamep->hspname,
1431 		    antecedent) == NULL) {
1432 			return (ENOMEM);
1433 		}
1434 	}
1435 	for (i = 0; i < raid->cols.cols_len; i++) {
1436 		if (cache_dependent(cache,
1437 		    raid->cols.cols_val[i].colnamep->bname, 0,
1438 		    antecedent) == NULL) {
1439 			return (ENOMEM);
1440 		}
1441 	}
1442 	return (0);
1443 }
1444 
1445 /*
1446  * cache_stripe()
1447  *
1448  *	Add a CONCAT or a STRIPE entry entry to the cache for the
1449  *	metadevice and call cache_dependent() to add each
1450  *	component to the cache.
1451  *
1452  *	Input:
1453  *		cache_t		*cache	cache
1454  *		mdnamelist_t	*nlp	pointer to stripe name
1455  *		md_stripe_t	*stripe
1456  *	Return:
1457  *		0 if successful or error code
1458  *
1459  */
1460 static int
1461 cache_stripe(cache_t *cache, mdnamelist_t *nlp, md_stripe_t *stripe)
1462 {
1463 	int i;
1464 	deventry_t	*antecedent;
1465 
1466 	if ((antecedent = cache_device(cache, nlp->namep->bname, SVM_CONCAT,
1467 	    nlp->namep->dev, 0)) == NULL) {
1468 		return (ENOMEM);
1469 	}
1470 
1471 	if (stripe->hspnamep) {
1472 		if (add_hsp_user(stripe->hspnamep->hspname,
1473 		    antecedent) == NULL) {
1474 			return (ENOMEM);
1475 		}
1476 	}
1477 	for (i = 0; i < stripe->rows.rows_len; i++) {
1478 		md_row_t	*rowp;
1479 		int		j;
1480 
1481 		rowp = &stripe->rows.rows_val[i];
1482 		if (stripe->rows.rows_len == 1 && rowp->comps.comps_len > 1) {
1483 			if ((void*) cache_device(cache, nlp->namep->bname,
1484 			    SVM_STRIPE, nlp->namep->dev, 0) == NULL)
1485 				return (ENOMEM);
1486 		}
1487 		for (j = 0; j < rowp->comps.comps_len; j++) {
1488 			md_comp_t	*component;
1489 
1490 			component = &rowp->comps.comps_val[j];
1491 			if (cache_dependent(cache,
1492 			    component->compnamep->bname, 0,
1493 			    antecedent) == NULL) {
1494 				return (ENOMEM);
1495 			}
1496 		}
1497 	}
1498 	return (0);
1499 }
1500 
1501 /*
1502  * cache_sp()
1503  *
1504  *	Add an entry to the cache for the softpart and also call
1505  *	cache_dependent() to set the CONT_SOFTPART flag in the
1506  *	cache entry for the metadevice that contains the softpart.
1507  *
1508  *	Input:
1509  *		cache_t		*cache	cache
1510  *		mdnamelist_t	*nlp	pointer to soft part name
1511  *		md_sp_t		*soft_part
1512  *	Return:
1513  *		0 if successful or error code
1514  *
1515  */
1516 static int
1517 cache_sp(cache_t *cache, mdnamelist_t *nlp, md_sp_t *soft_part)
1518 {
1519 	deventry_t	*antecedent;
1520 
1521 	if ((antecedent = cache_device(cache, nlp->namep->bname,
1522 	    SVM_SOFTPART, nlp->namep->dev, 0)) == NULL) {
1523 			    return (ENOMEM);
1524 	}
1525 	if (cache_dependent(cache, soft_part->compnamep->bname,
1526 	    CONT_SOFTPART, antecedent) == NULL) {
1527 		return (ENOMEM);
1528 	}
1529 	return (0);
1530 }
1531 
1532 /*
1533  *      cache_all_devices_in_set()
1534  *
1535  *      Add all of the metadevices and mddb replicas in the set to the
1536  *	cache
1537  *
1538  *      Input:
1539  *		cache_t		*cache		cache
1540  *		mdsetname_t	*sp		setname
1541  *      Return:
1542  *		0 if successful or error code
1543  */
1544 
1545 static int
1546 cache_all_devices_in_set(cache_t *cache, mdsetname_t *sp)
1547 {
1548 	md_error_t		error = mdnullerror;
1549 	md_replicalist_t	*replica_list = NULL;
1550 	md_replicalist_t	*mdbp;
1551 	mdnamelist_t		*nlp;
1552 	mdnamelist_t		*trans_list = NULL;
1553 	mdnamelist_t		*mirror_list = NULL;
1554 	mdnamelist_t		*raid_list = NULL;
1555 	mdnamelist_t		*stripe_list = NULL;
1556 	mdnamelist_t		*sp_list = NULL;
1557 	mdhspnamelist_t		*hsp_list = NULL;
1558 
1559 	rcm_log_message(RCM_TRACE1, "SVM: cache_all_devices_in_set\n");
1560 
1561 	/* Add each mddb replica to the cache */
1562 	if (metareplicalist(sp, MD_BASICNAME_OK, &replica_list, &error) < 0) {
1563 	    /* there are no metadb's; that is ok, no need to check the rest */
1564 	    mdclrerror(&error);
1565 	    return (0);
1566 	}
1567 
1568 	for (mdbp = replica_list; mdbp != NULL; mdbp = mdbp->rl_next) {
1569 		if (cache_device(cache, mdbp->rl_repp->r_namep->bname,
1570 		    SVM_SLICE, mdbp->rl_repp->r_namep->dev,
1571 		    CONT_METADB) == NULL) {
1572 			metafreereplicalist(replica_list);
1573 			return (ENOMEM);
1574 		}
1575 	}
1576 	metafreereplicalist(replica_list);
1577 
1578 	/* Process Hot Spare pools */
1579 	if (meta_get_hsp_names(sp, &hsp_list, 0, &error) >= 0) {
1580 	    mdhspnamelist_t *nlp;
1581 
1582 		for (nlp = hsp_list; nlp != NULL; nlp = nlp->next) {
1583 			md_hsp_t	*hsp;
1584 
1585 			hsp = meta_get_hsp(sp, nlp->hspnamep, &error);
1586 			if (hsp != NULL) {
1587 				if (cache_hsp(cache, nlp, hsp) != 0) {
1588 					metafreehspnamelist(hsp_list);
1589 					return (ENOMEM);
1590 				}
1591 			}
1592 			meta_invalidate_hsp(nlp->hspnamep);
1593 		}
1594 		metafreehspnamelist(hsp_list);
1595 	}
1596 
1597 	/* Process Trans devices */
1598 	if (meta_get_trans_names(sp, &trans_list, 0, &error) >= 0) {
1599 		for (nlp = trans_list; nlp != NULL; nlp = nlp->next) {
1600 			mdname_t	*mdn;
1601 			md_trans_t	*trans;
1602 
1603 			mdn = metaname(&sp, nlp->namep->cname, META_DEVICE,
1604 			    &error);
1605 			if (mdn == NULL) {
1606 				continue;
1607 			}
1608 
1609 			trans = meta_get_trans(sp, mdn, &error);
1610 
1611 			if (trans != NULL && trans->masternamep != NULL) {
1612 				if (cache_trans(cache, nlp, trans) != NULL) {
1613 					free_names(trans_list);
1614 					return (ENOMEM);
1615 				}
1616 			}
1617 		}
1618 		free_names(trans_list);
1619 	}
1620 
1621 	/* Process Mirrors */
1622 	if (meta_get_mirror_names(sp, &mirror_list, 0, &error) >= 0) {
1623 		for (nlp = mirror_list; nlp != NULL; nlp = nlp->next) {
1624 			mdname_t	*mdn;
1625 			md_mirror_t	*mirror;
1626 
1627 			mdn = metaname(&sp, nlp->namep->cname, META_DEVICE,
1628 			    &error);
1629 			if (mdn == NULL) {
1630 				continue;
1631 			}
1632 
1633 			mirror = meta_get_mirror(sp, mdn, &error);
1634 
1635 			if (mirror != NULL) {
1636 				if (cache_mirror(cache, nlp, mirror) != 0) {
1637 					free_names(mirror_list);
1638 					return (ENOMEM);
1639 				}
1640 			}
1641 		}
1642 		free_names(mirror_list);
1643 	}
1644 
1645 	/* Process Raid devices */
1646 	if (meta_get_raid_names(sp, &raid_list, 0, &error) >= 0) {
1647 		for (nlp = raid_list; nlp != NULL; nlp = nlp->next) {
1648 			mdname_t	*mdn;
1649 			md_raid_t	*raid;
1650 
1651 			mdn = metaname(&sp, nlp->namep->cname, META_DEVICE,
1652 			    &error);
1653 			if (mdn == NULL) {
1654 				continue;
1655 			}
1656 
1657 			raid = meta_get_raid(sp, mdn, &error);
1658 
1659 			if (raid != NULL) {
1660 				if (cache_raid(cache, nlp, raid) != 0) {
1661 					free_names(raid_list);
1662 					return (ENOMEM);
1663 				}
1664 			}
1665 		}
1666 		free_names(raid_list);
1667 	}
1668 
1669 	/* Process Slices */
1670 	if (meta_get_stripe_names(sp, &stripe_list, 0, &error) >= 0) {
1671 		for (nlp = stripe_list; nlp != NULL; nlp = nlp->next) {
1672 			mdname_t	*mdn;
1673 			md_stripe_t	*stripe;
1674 
1675 			mdn = metaname(&sp, nlp->namep->cname, META_DEVICE,
1676 			    &error);
1677 			if (mdn == NULL) {
1678 				continue;
1679 			}
1680 
1681 			stripe = meta_get_stripe(sp, mdn, &error);
1682 
1683 			if (stripe != NULL) {
1684 				if (cache_stripe(cache, nlp, stripe) != 0) {
1685 					free_names(stripe_list);
1686 					return (ENOMEM);
1687 				}
1688 			}
1689 		}
1690 		free_names(stripe_list);
1691 	}
1692 
1693 	/* Process Soft partitions */
1694 	if (meta_get_sp_names(sp, &sp_list, 0, &error) >= 0) {
1695 		for (nlp = sp_list; nlp != NULL; nlp = nlp->next) {
1696 			mdname_t	*mdn;
1697 			md_sp_t		*soft_part;
1698 
1699 			mdn = metaname(&sp, nlp->namep->cname, META_DEVICE,
1700 			    &error);
1701 			if (mdn == NULL) {
1702 				continue;
1703 			}
1704 
1705 			soft_part = meta_get_sp(sp, mdn, &error);
1706 
1707 			if (soft_part != NULL) {
1708 				if (cache_sp(cache, nlp, soft_part) != 0) {
1709 					free_names(sp_list);
1710 					return (ENOMEM);
1711 				}
1712 			}
1713 		}
1714 		free_names(sp_list);
1715 	}
1716 	mdclrerror(&error);
1717 	return (0);
1718 }
1719 
1720 /*
1721  *      create_all_devices()
1722  *
1723  *      Cache all devices in all sets
1724  *
1725  *      Input:
1726  *		cache_t		cache
1727  *      Return:
1728  *		0 if successful, error code if not
1729  *      Locking: None
1730  */
1731 static int
1732 cache_all_devices(cache_t *cache)
1733 {
1734 	int		max_sets;
1735 	md_error_t	error = mdnullerror;
1736 	int		i;
1737 
1738 	if ((max_sets = get_max_sets(&error)) == 0) {
1739 		return (0);
1740 	}
1741 	if (!mdisok(&error)) {
1742 		mdclrerror(&error);
1743 		return (0);
1744 	}
1745 
1746 	rcm_log_message(RCM_TRACE1,
1747 	    "SVM: cache_all_devices,max sets = %d\n", max_sets);
1748 	/* for each possible set number, see if we really have a diskset */
1749 	for (i = 0; i < max_sets; i++) {
1750 		mdsetname_t	*sp;
1751 
1752 		if ((sp = metasetnosetname(i, &error)) == NULL) {
1753 			rcm_log_message(RCM_TRACE1,
1754 			    "SVM: cache_all_devices no set: setno %d\n", i);
1755 			if (!mdisok(&error) &&
1756 			    ((error.info.errclass == MDEC_RPC) ||
1757 			    (mdiserror(&error, MDE_SMF_NO_SERVICE)))) {
1758 				/*
1759 				 * metad rpc program not available
1760 				 * - no metasets.  metad rpc not available
1761 				 * is indicated either by an RPC error or
1762 				 * the fact that the service is not
1763 				 * enabled.
1764 				 */
1765 				break;
1766 			}
1767 
1768 			continue;
1769 		}
1770 
1771 		if (cache_all_devices_in_set(cache, sp)) {
1772 			metaflushsetname(sp);
1773 			return (ENOMEM);
1774 		}
1775 		metaflushsetname(sp);
1776 	}
1777 	mdclrerror(&error);
1778 	rcm_log_message(RCM_TRACE1, "SVM: exit cache_all_devices\n");
1779 	return (0);
1780 }
1781 
1782 /*
1783  *      create_cache()
1784  *
1785  *      Create an empty cache
1786  *	If the function fails free_cache() will be called to free any
1787  *	allocated memory.
1788  *
1789  *      Input: None
1790  *      Return:
1791  *		cache_t		cache created
1792  *      Locking: None
1793  */
1794 static cache_t *
1795 create_cache()
1796 {
1797 	cache_t		*cache;
1798 	uint32_t	size;
1799 	int		ret;
1800 
1801 	size = HASH_DEFAULT;
1802 	/* try allocating storage for a new, empty cache */
1803 	if ((cache = (cache_t *)malloc(sizeof (cache_t))) == NULL) {
1804 		rcm_log_message(RCM_ERROR, MSG_CACHEFAIL);
1805 		return (NULL);
1806 	}
1807 
1808 	(void) memset((char *)cache, 0, sizeof (*cache));
1809 	cache->hashline = (deventry_t **)calloc(size, sizeof (deventry_t *));
1810 	if (cache->hashline == NULL) {
1811 		rcm_log_message(RCM_ERROR, MSG_CACHEFAIL);
1812 		free(cache);
1813 		return (NULL);
1814 	}
1815 	cache->size = size;
1816 
1817 	/* Initialise linked list of hsp entries */
1818 	hsp_head = NULL;
1819 
1820 	/* add entries to cache */
1821 	ret = cache_all_devices(cache);
1822 	if (ret != 0) {
1823 		free_cache(&cache);
1824 		return (NULL);
1825 	}
1826 
1827 	/* Mark the cache as new */
1828 	cache->registered = 0;
1829 
1830 	/* Finished - return the new cache */
1831 	return (cache);
1832 }
1833 
1834 /*
1835  *      create_deventry()
1836  *
1837  *      Create a new deventry entry for device with name devname
1838  *	The memory alllocated here will be freed by free_cache()
1839  *
1840  *      Input:
1841  *		char		*devname	device name
1842  *		svm_type_t	devtype		metadevice type
1843  *		md_dev64_t	devkey		device key
1844  *		int		devflags	device flags
1845  *      Return:
1846  *		deventry_t	New deventry
1847  *      Locking: None
1848  */
1849 static deventry_t *
1850 create_deventry(char *devname, svm_type_t devtype, md_dev64_t devkey,
1851     int devflags)
1852 {
1853 	const char	*devprefix = "/dev/";
1854 	deventry_t	*newdeventry = NULL;
1855 	char		*newdevname = NULL;
1856 	char		*devicesname = NULL;
1857 
1858 	newdeventry = (deventry_t *)malloc(sizeof (*newdeventry));
1859 	if (newdeventry == NULL) {
1860 		rcm_log_message(RCM_ERROR,
1861 		    gettext("SVM: can't malloc deventrys"));
1862 		goto errout;
1863 	}
1864 	(void) memset((char *)newdeventry, 0, sizeof (*newdeventry));
1865 
1866 	newdevname = strdup(devname);
1867 	if (newdevname == NULL) {
1868 		rcm_log_message(RCM_ERROR,
1869 		    gettext("SVM: can't malloc devname"));
1870 		goto errout;
1871 	}
1872 
1873 	/*
1874 	 * When we register interest in a name starting with /dev/, RCM
1875 	 * will use realpath to convert the name to a /devices name before
1876 	 * storing it.  metaclear removes both the /dev and the /devices
1877 	 * form of the name of a metadevice from the file system.  Thus,
1878 	 * when we later call rcm_unregister_interest to get rid of a
1879 	 * metacleared device, RCM will not be able to derive the /devices
1880 	 * name for the /dev name.  Thus, to unregister we will need to use
1881 	 * the /devices name.  We will save it now, so that we have it when
1882 	 * it comes time to unregister.
1883 	 */
1884 	if (strncmp(devname, devprefix, strlen(devprefix)) == 0) {
1885 		devicesname = (char *)malloc(PATH_MAX);
1886 		if (devicesname == NULL) {
1887 			rcm_log_message(RCM_ERROR,
1888 			    gettext("SVM: can't malloc PATH_MAX bytes"));
1889 			goto errout;
1890 		}
1891 		if (realpath(devname, devicesname) == NULL) {
1892 			free(devicesname);
1893 			devicesname = NULL;
1894 		}
1895 	}
1896 	newdeventry->devname = newdevname;
1897 	newdeventry->devicesname = devicesname;
1898 	newdeventry->devtype = devtype;
1899 	newdeventry->devkey = meta_cmpldev(devkey);
1900 	newdeventry->flags = devflags;
1901 	if (newdeventry->devicesname == NULL) {
1902 		rcm_log_message(RCM_TRACE1,
1903 			"SVM created deventry for %s\n", newdeventry->devname);
1904 	} else {
1905 		rcm_log_message(RCM_TRACE1,
1906 			"SVM created deventry for %s (%s)\n",
1907 			newdeventry->devname, newdeventry->devicesname);
1908 	}
1909 	return (newdeventry);
1910 
1911 errout:
1912 	if (devicesname != NULL)
1913 		free(devicesname);
1914 	if (newdevname != NULL)
1915 		free(newdevname);
1916 	if (newdeventry != NULL)
1917 		free(newdeventry);
1918 	return (NULL);
1919 }
1920 
1921 /*
1922  *      cache_remove()
1923  *
1924  *      Given a cache and a deventry, the deventry is
1925  *      removed from the cache's tables and memory for the deventry is
1926  *      free'ed.
1927  *
1928  *      Input:
1929  *		cache_t		*cache		cache
1930  *		deventry_t	*deventry	deventry to be removed
1931  *      Return: None
1932  *      Locking: The cache must be locked by the caller prior to calling
1933  *      this routine.
1934  */
1935 static void
1936 cache_remove(cache_t *cache, deventry_t *deventry)
1937 {
1938 	deventry_t	*olddeventry;
1939 	deventry_t	*previous;
1940 	hspentry_t	*hspentry;
1941 	hspentry_t	*oldhspentry;
1942 	hspuser_t	*hspuser;
1943 	hspuser_t	*oldhspuser;
1944 	uint32_t	hash_index;
1945 
1946 	/* sanity check */
1947 	if (cache == NULL || deventry == NULL || deventry->devname == NULL)
1948 		return;
1949 
1950 
1951 	/* If this is in the hash table, remove it from there */
1952 	hash_index = hash(cache->size, deventry->devname);
1953 	if (hash_index >= cache->size) {
1954 		rcm_log_message(RCM_ERROR,
1955 		    gettext("SVM: can't hash device."));
1956 		return;
1957 	}
1958 	olddeventry = cache->hashline[hash_index];
1959 	previous = NULL;
1960 	while (olddeventry) {
1961 		if (olddeventry->devname &&
1962 		    strcmp(olddeventry->devname, deventry->devname) == 0) {
1963 			break;
1964 		}
1965 		previous = olddeventry;
1966 		olddeventry = olddeventry->next;
1967 	}
1968 	if (olddeventry) {
1969 		if (previous)
1970 			previous->next = olddeventry->next;
1971 		else
1972 			cache->hashline[hash_index] = olddeventry->next;
1973 
1974 		if (olddeventry->flags&IN_HSP) {
1975 			/*
1976 			 * If this is in a hot spare pool, remove the list
1977 			 * of hot spare pools that it is in along with
1978 			 * all of the volumes that are users of the pool
1979 			 */
1980 			hspentry = olddeventry->hsp_list;
1981 			while (hspentry) {
1982 				oldhspentry = hspentry;
1983 				hspuser = hspentry->hspuser;
1984 				while (hspuser) {
1985 					oldhspuser = hspuser;
1986 					free(hspuser->hspusername);
1987 					hspuser = hspuser->next;
1988 					free(oldhspuser);
1989 				}
1990 				free(hspentry->hspname);
1991 				hspentry = hspentry->next;
1992 				free(oldhspentry);
1993 			}
1994 		}
1995 		free(olddeventry->devname);
1996 		free(olddeventry);
1997 	}
1998 
1999 }
2000 
2001 /*
2002  *      cache_lookup()
2003  *
2004  *      Return the deventry corresponding to devname from the cache
2005  *      Input:
2006  *		cache_t		cache		cache
2007  *		char		*devname	name to lookup in cache
2008  *      Return:
2009  *		deventry_t	deventry of name, NULL if not found
2010  *      Locking: cache lock held on entry and on exit
2011  */
2012 static deventry_t *
2013 cache_lookup(cache_t *cache, char *devname)
2014 {
2015 	int		comp;
2016 	uint32_t	hash_index;
2017 	deventry_t	*deventry;
2018 
2019 	hash_index = hash(cache->size, devname);
2020 	if (hash_index >= cache->size) {
2021 		rcm_log_message(RCM_ERROR,
2022 		    gettext("SVM: can't hash resource."));
2023 		return (NULL);
2024 	}
2025 
2026 	deventry = cache->hashline[hash_index];
2027 	while (deventry) {
2028 		comp = strcmp(deventry->devname, devname);
2029 		if (comp == 0)
2030 			return (deventry);
2031 		if (comp > 0)
2032 			return (NULL);
2033 		deventry = deventry->next;
2034 	}
2035 	return (NULL);
2036 }
2037 
2038 /*
2039  *      cache_sync()
2040  *
2041  *	Resync cache with the svm database.  First a new cache is created
2042  *	that represents the current state of the SVM database.  The
2043  *	function walks the new cache to look for new entries that must be
2044  *	registered.  The new entries are kept in a list, because we cannot
2045  *	register them at this point.  Entries that appear in both caches
2046  *	are removed from the old cache.  Because of this at the end of the
2047  *	walk, the old cache will only contain devices that have been
2048  *	removed and need to be unregistered.
2049  *
2050  *	Next the old cache is walked, so that we can unregister the devices
2051  *	that are no longer present.
2052  *
2053  *	Finally, we process the list of new devices that must be
2054  *	registered.  There is a reason why we must unregister the removed
2055  *	(metacleared) devices before registering the new ones.  It has to
2056  *	do with the fact that rcm_register_interest calls realpath(3C) to
2057  *	convert a /dev name to a /devices name.  It uses the /devices name
2058  *	for storing the device information.
2059  *
2060  *	It can happen that between cache_syncs that the administrator
2061  *	metaclears one metadevice and metacreates a new one.  For example,
2062  *
2063  *		metaclear acct
2064  *		metainit engr 1 1 c1t12d0s0
2065  *
2066  *	The metaclear operation frees up the minor number that was being
2067  *	used by acct.  The metainit operation can then reuse the minor
2068  *	number.  This means that both metadevices would have the same
2069  *	/devices name even though they had different /dev names.  Since
2070  *	rcm_register_interest uses /devices names for storing records, we
2071  *	need to unregister acct before registering engr.  Otherwise we
2072  *	would get an EALREADY errno and a failed registration.  This is why
2073  *	cache_sync creates a list of devices to be registered after all the
2074  *	removed devices have been unregistered.
2075  *
2076  *      Input:
2077  *		rcm_handle_t	*hd		rcm handle
2078  *		cache_t		**cachep	pointer to cache
2079  *      Return:
2080  *		cache_t		**cachep	pointer to new cache
2081  *      Return: None
2082  *      Locking: The cache must be locked prior to entry
2083  */
2084 static void
2085 cache_sync(rcm_handle_t *hd, cache_t **cachep)
2086 {
2087 	char		*devicename;
2088 	deventry_t	*deventry;
2089 	cache_t		*new_cache;
2090 	cache_t		*old_cache = *cachep;
2091 	deventry_t	*hashline = NULL;
2092 	deventry_t	**register_list = NULL;
2093 	deventry_t	*register_this;
2094 	uint32_t	register_count = 0;	/* # entrys in register_list */
2095 	uint32_t	allocated = 0;		/* # entrys allocated in */
2096 						/* register_list */
2097 	uint32_t	allocate_incr = 16;
2098 	uint32_t	i = 0;
2099 
2100 	/* Get a new cache */
2101 	if ((new_cache = create_cache()) == NULL) {
2102 		rcm_log_message(RCM_WARNING, MSG_NORECACHE);
2103 		return;
2104 	}
2105 
2106 	/* For every entry in the new cache... */
2107 	while ((devicename = cache_walk(new_cache, &i, &hashline)) != NULL) {
2108 		register_this = NULL;
2109 
2110 		/* Look for this entry in the old cache */
2111 		deventry = cache_lookup(old_cache, devicename);
2112 		/*
2113 		 * If no entry in old cache, register the resource. If there
2114 		 * is an entry, but it is marked as removed, register it
2115 		 * again and remove it from the old cache
2116 		 */
2117 		if (deventry == NULL) {
2118 			register_this = hashline;
2119 		} else {
2120 			if (deventry->flags&REMOVED)
2121 				register_this = hashline;
2122 			cache_remove(old_cache, deventry);
2123 		}
2124 
2125 		/* Save this entry if we need to register it later. */
2126 		if (register_this) {
2127 			if (register_count >= allocated) {
2128 				/* Need to extend our array */
2129 				allocated += allocate_incr;
2130 				register_list =
2131 					(deventry_t **)realloc(register_list,
2132 					allocated * sizeof (*register_list));
2133 				if (register_list == NULL) {
2134 					/* Out of memory.  Give up. */
2135 					rcm_log_message(RCM_WARNING,
2136 						MSG_NORECACHE);
2137 					free(new_cache);
2138 					return;
2139 				}
2140 			}
2141 			*(register_list + register_count) = register_this;
2142 			register_count++;
2143 		}
2144 	}
2145 
2146 	/*
2147 	 * For every device left in the old cache, just unregister if
2148 	 * it has not already been removed
2149 	 */
2150 	i = 0;
2151 	hashline = NULL;
2152 	while ((devicename = cache_walk(old_cache, &i, &hashline)) != NULL) {
2153 		if (!(hashline->flags&REMOVED)) {
2154 			(void) svm_unregister_device(hd, hashline);
2155 		}
2156 	}
2157 
2158 	/* Register the new devices. */
2159 	for (i = 0; i < register_count; i++) {
2160 		deventry = *(register_list + i);
2161 		svm_register_device(hd, deventry->devname);
2162 	}
2163 	if (register_list)
2164 		free(register_list);
2165 
2166 	/* Swap pointers */
2167 	*cachep = new_cache;
2168 
2169 	/* Destroy old cache */
2170 	free_cache(&old_cache);
2171 
2172 	/* Mark the new cache as registered */
2173 	new_cache-> registered = 1;
2174 }
2175 
2176 /*
2177  * cache_walk()
2178  *
2179  *      Perform one step of a walk through the cache.  The i and hashline
2180  *      parameters are updated to store progress of the walk for future steps.
2181  *      They must all be initialized for the beginning of the walk
2182  *      (i = 0, line = NULL). Initialize variables to these values for these
2183  *      parameters, and then pass in the address of each of the variables
2184  *      along with the cache.  A NULL return value will be given to indicate
2185  *      when there are no more cached items to be returned.
2186  *
2187  *      Input:
2188  *		cache_t		*cache		cache
2189  *		uint32_t	*i		hash table index of prev entry
2190  *		deventry_t	**line		ptr to previous device entry
2191  *      Output:
2192  *		uint32_t	*i		updated hash table index
2193  *		deventry_t	**line		ptr to device entry
2194  *      Return:
2195  *		char*		device name (NULL for end of cache)
2196  *      Locking: The cache must be locked prior to calling this routine.
2197  */
2198 static char *
2199 cache_walk(cache_t *cache, uint32_t *i, deventry_t **line)
2200 {
2201 	uint32_t	j;
2202 
2203 	/* sanity check */
2204 	if (cache == NULL || i == NULL || line == NULL ||
2205 	    *i >= cache->size)
2206 		return (NULL);
2207 
2208 	/* if initial values were given, look for the first entry */
2209 	if (*i == 0 && *line == NULL) {
2210 		for (j = 0; j < cache->size; j++) {
2211 			if (cache->hashline[j]) {
2212 				*i = j;
2213 				*line = cache->hashline[j];
2214 				return ((*line)->devname);
2215 			}
2216 		}
2217 	} else {
2218 		/* otherwise, look for the next entry for this hash value */
2219 		if (*line && (*line)->next) {
2220 			*line = (*line)->next;
2221 			return ((*line)->devname);
2222 		} else {
2223 		/* next look further down in the hash table */
2224 			for (j = (*i) + 1; j < cache->size; j++) {
2225 				if (cache->hashline[j]) {
2226 					*i = j;
2227 					*line = cache->hashline[j];
2228 					return ((*line)->devname);
2229 				}
2230 			}
2231 		}
2232 	}
2233 
2234 	/*
2235 	 * We would have returned somewhere above if there were any more
2236 	 * entries.  So set the sentinel values and return a NULL.
2237 	 */
2238 	*i = cache->size;
2239 	*line = NULL;
2240 	return (NULL);
2241 }
2242 
2243 /*
2244  *      free_cache()
2245  *
2246  *      Given a pointer to a cache structure, this routine will free all
2247  *      of the memory allocated within the cache.
2248  *
2249  *      Input:
2250  *		cache_t		**cache		ptr to cache
2251  *      Return: None
2252  *      Locking: cache lock held on entry
2253  */
2254 static void
2255 free_cache(cache_t **cache)
2256 {
2257 	uint32_t	index;
2258 	cache_t		*realcache;
2259 
2260 	/* sanity check */
2261 	if (cache == NULL || *cache == NULL)
2262 		return;
2263 
2264 	/* de-reference the cache pointer */
2265 	realcache = *cache;
2266 
2267 	/* free the hash table */
2268 	for (index = 0; index < realcache->size; index++) {
2269 		free_deventry(&realcache->hashline[index]);
2270 	}
2271 	free(realcache->hashline);
2272 	realcache->hashline = NULL;
2273 
2274 	free(realcache);
2275 	*cache = NULL;
2276 }
2277 
2278 /*
2279  *      free_deventry()
2280  *
2281  *      This routine frees all of the memory allocated within a node of a
2282  *      deventry.
2283  *
2284  *      Input:
2285  *		deventry_t	**deventry	ptr to deventry
2286  *      Return: None
2287  *      Locking: cache lock held on entry
2288  */
2289 static void
2290 free_deventry(deventry_t **deventry)
2291 {
2292 	deventry_t	*olddeventry;
2293 	hspentry_t	*hspentry;
2294 	hspentry_t	*oldhspentry;
2295 	hspuser_t	*hspuser;
2296 	hspuser_t	*oldhspuser;
2297 
2298 	if (deventry != NULL) {
2299 		while (*deventry != NULL) {
2300 			olddeventry = (*deventry)->next;
2301 			if ((*deventry)->flags&IN_HSP) {
2302 				/*
2303 				 * If this is in a hot spare pool, remove the
2304 				 * memory allocated to hot spare pools and
2305 				 * the users of the pool
2306 				 */
2307 				hspentry = (*deventry)->hsp_list;
2308 				while (hspentry) {
2309 					oldhspentry = hspentry;
2310 					hspuser = hspentry->hspuser;
2311 					while (hspuser) {
2312 						oldhspuser = hspuser;
2313 						free(hspuser->hspusername);
2314 						hspuser = hspuser->next;
2315 						free(oldhspuser);
2316 					}
2317 					free(hspentry->hspname);
2318 					hspentry = hspentry->next;
2319 					free(oldhspentry);
2320 				}
2321 			}
2322 			if ((*deventry)->devicesname)
2323 				free((*deventry)->devicesname);
2324 			free((*deventry)->devname);
2325 			free (*deventry);
2326 			*deventry = olddeventry;
2327 		}
2328 	}
2329 }
2330 
2331 /*
2332  *      hash()
2333  *
2334  *	A rotating hashing function that converts a string 's' to an index
2335  *      in a hash table of size 'h'.
2336  *
2337  *      Input:
2338  *		uint32_t	h		hash table size
2339  *		char		*s		string to be hashed
2340  *      Return:
2341  *		uint32_t	hash value
2342  *      Locking: None
2343  */
2344 static uint32_t
2345 hash(uint32_t h, char *s)
2346 {
2347 
2348 	int	len;
2349 	int	hash, i;
2350 
2351 	len = strlen(s);
2352 
2353 	for (hash = len, i = 0; i < len; ++i) {
2354 		hash = (hash<<4)^(hash>>28)^s[i];
2355 	}
2356 	return (hash % h);
2357 }
2358 
2359 /*
2360  *      svm_register_device()
2361  *
2362  *      Register a device
2363  *
2364  *      Input:
2365  *		rcm_handle_t	*hd		rcm handle
2366  *		char		*devname	device name
2367  *      Return: None
2368  *      Locking: None
2369  */
2370 static void
2371 svm_register_device(rcm_handle_t *hd, char *devname)
2372 {
2373 	/* Sanity check */
2374 	if (devname == NULL)
2375 		return;
2376 
2377 	rcm_log_message(RCM_TRACE1, "SVM: Registering %s(%d)\n", devname,
2378 		devname);
2379 
2380 	if (rcm_register_interest(hd, devname, 0, NULL) != RCM_SUCCESS) {
2381 		rcm_log_message(RCM_ERROR,
2382 		    gettext("SVM: failed to register \"%s\"\n"), devname);
2383 	}
2384 }
2385 
2386 /*
2387  *      add_dep()
2388  *
2389  *      Add an entry to an array of dependent names for a device. Used to
2390  *      build an array to call the rcm framework with when passing on a
2391  *      DR request.
2392  *
2393  *      Input:
2394  *		int		*ndeps		ptr to current number of deps
2395  *		char		***depsp	ptr to current dependent array
2396  *		deventry_t	*deventry	deventry of device to be added
2397  *      Output:
2398  *		int		*ndeps		ptr to updated no of deps
2399  *		char		***depsp	ptr to new dependant array
2400  *      Return:
2401  *		int		0, of ok, -1 if failed to allocate memory
2402  *      Locking: None
2403  */
2404 static int
2405 add_dep(int *ndeps, char ***depsp, deventry_t *deventry)
2406 {
2407 	char	**deps_new;
2408 
2409 	*ndeps += 1;
2410 	deps_new = realloc(*depsp, ((*ndeps) + 1) * sizeof (char  *));
2411 	if (deps_new == NULL) {
2412 		rcm_log_message(RCM_ERROR,
2413 		    gettext("SVM: cannot allocate dependent array (%s).\n"),
2414 		    strerror(errno));
2415 		return (-1);
2416 	}
2417 	deps_new[(*ndeps-1)] = deventry->devname;
2418 	deps_new[(*ndeps)] = NULL;
2419 	*depsp = deps_new;
2420 	return (0);
2421 }
2422 
2423 
2424 /*
2425  *      get_dependent()
2426  *
2427  *      Create a list of all dependents of a device
2428  *      Do not add dependent if it is marked as removed
2429  *
2430  *      Input:
2431  *		deventry_t	*deventry	device entry
2432  *      Output:
2433  *		char		***dependentsp	pty to dependent list
2434  *      Return:
2435  *		int		0, if ok, -1 if failed
2436  *      Locking: None
2437  */
2438 static int
2439 get_dependents(deventry_t *deventry, char *** dependentsp)
2440 {
2441 	int		ndeps = 0;
2442 	deventry_t	*dependent;
2443 	char		**deps = NULL;
2444 
2445 
2446 	dependent = deventry->dependent;
2447 	if (dependent == NULL) {
2448 		*dependentsp = NULL;
2449 		return (0);
2450 	}
2451 	while (dependent != NULL) {
2452 		/*
2453 		 * do not add dependent if we have
2454 		 * already received a remove notifification
2455 		 */
2456 		if (!(dependent->flags&REMOVED))
2457 			if (add_dep(&ndeps, &deps, dependent) < 0)
2458 				return (-1);
2459 		dependent = dependent->next_dep;
2460 	}
2461 	if (ndeps == 0) {
2462 		*dependentsp = NULL;
2463 	} else {
2464 		*dependentsp = deps;
2465 	}
2466 	return (0);
2467 }
2468 
2469 /*
2470  *      add_to_usage()
2471  *      Add string to the usage string pointed at by usagep. Allocate memory
2472  *      for the new usage string and free the memory used by the original
2473  *      usage string
2474  *
2475  *      Input:
2476  *		char	**usagep	ptr to usage string
2477  *		char	*string		string to be added to usage
2478  *      Return:
2479  *		char	ptr to new usage string
2480  *      Locking: None
2481  */
2482 char *
2483 add_to_usage(char ** usagep, char *string)
2484 {
2485 	int	len;
2486 	char	*new_usage = NULL;
2487 
2488 	if (*usagep == NULL) {
2489 		len = 0;
2490 	} else {
2491 		len = strlen(*usagep) + 2; /* allow space for comma */
2492 	}
2493 	len += strlen(string) + 1;
2494 	if (new_usage = calloc(1, len)) {
2495 		if (*usagep) {
2496 			(void) strcpy(new_usage, *usagep);
2497 			free(*usagep);
2498 			(void) strcat(new_usage, ", ");
2499 		}
2500 		(void) strcat(new_usage, string);
2501 	}
2502 	return (new_usage);
2503 }
2504 
2505 /*
2506  *      add_to_usage_fmt()
2507  *
2508  *      Add a formatted string , of the form "blah %s" to the usage string
2509  *      pointed at by usagep. Allocate memory for the new usage string and free
2510  *      the memory used by the original usage string.
2511  *
2512  *      Input:
2513  *		char		**usagep	ptr to current usage string
2514  *		char		*fmt		format string
2515  *		char		*string		string to be added
2516  *      Return:
2517  *		char*		new usage string
2518  *      Locking: None
2519  */
2520 /*PRINTFLIKE2*/
2521 char *
2522 add_to_usage_fmt(char **usagep, char *fmt, char *string)
2523 {
2524 	int	len;
2525 	char	*usage;
2526 	char	*new_usage = NULL;
2527 
2528 	len = strlen(fmt)
2529 	    + strlen(string) + 1;
2530 	if (usage = calloc(1, len)) {
2531 		(void) sprintf(usage, fmt, string);
2532 		new_usage = add_to_usage(usagep, usage);
2533 		free(usage);
2534 	}
2535 	return (new_usage);
2536 }
2537 
2538 /*
2539  *      is_open()
2540  *
2541  *      Make ioctl call to find if a device is open
2542  *
2543  *      Input:
2544  *		dev_t 		devkey	dev_t for device
2545  *      Return:
2546  *		int		0 if not open,  !=0 if open
2547  *      Locking: None
2548  */
2549 static int
2550 is_open(dev_t devkey)
2551 {
2552 	int		fd;
2553 	md_isopen_t	isopen_ioc;
2554 
2555 	/* Open admin device */
2556 	if ((fd = open(ADMSPECIAL, O_RDONLY, 0)) < 0) {
2557 		rcm_log_message(RCM_ERROR, MSG_OPENERR, ADMSPECIAL);
2558 		return (0);
2559 	}
2560 
2561 	(void) memset(&isopen_ioc, 0, sizeof (isopen_ioc));
2562 	isopen_ioc.dev = devkey;
2563 	if (ioctl(fd, MD_IOCISOPEN, &isopen_ioc) < 0) {
2564 		(void) close(fd);
2565 		return (0);
2566 	}
2567 	(void) close(fd);
2568 	return (isopen_ioc.isopen);
2569 }
2570 
2571 /*
2572  *	check_softpart()
2573  *
2574  *	Check the status of the passed in device within the softpartition.
2575  *
2576  *	Input:
2577  *		mdsetname_t *	the name of the set
2578  *		mdname_t *	the softpartition device that is being examined
2579  *		char *		the device which needs to be checked
2580  *		md_error_t *	error pointer (not used)
2581  *	Return:
2582  *		int		REDUNDANT    - device is redundant and can be
2583  *					       removed
2584  *				NOTREDUNDANT - device cannot be removed
2585  *				NOTINDEVICE  - device is not part of this
2586  *					       component
2587  */
2588 static int
2589 check_softpart(mdsetname_t *sp, mdname_t *np, char *uname, md_error_t *ep)
2590 {
2591 	md_sp_t	*softp = NULL;
2592 
2593 	rcm_log_message(RCM_TRACE1, "SVM: softpart checking %s %s\n",
2594 	    np->bname, uname);
2595 
2596 	softp = meta_get_sp(sp, np, ep);
2597 
2598 	/* softp cannot be NULL, if it is then the RCM cache is corrupt */
2599 	assert(softp != NULL);
2600 
2601 	/*
2602 	 * if the softpartition is not a parent then nothing can be done, user
2603 	 * must close the device and then fix the under lying devices.
2604 	 */
2605 	if (!(MD_HAS_PARENT(softp->common.parent))) {
2606 		rcm_log_message(RCM_TRACE1,
2607 		    "SVM: softpart is a top level device\n");
2608 		return (NOTREDUNDANT);
2609 	}
2610 
2611 	if (strcmp(softp->compnamep->bname, uname) != 0) {
2612 		/*
2613 		 * This can occur if this function has been called by the
2614 		 * check_raid5 code as it is cycling through each column
2615 		 * in turn.
2616 		 */
2617 		rcm_log_message(RCM_TRACE1,
2618 		    "SVM: %s is not in softpart (%s)\n",
2619 		    uname, softp->compnamep->bname);
2620 		return (NOTINDEVICE);
2621 	}
2622 
2623 	/*
2624 	 * Check the status of the soft partition this only moves from
2625 	 * an okay state if the underlying devices fails while the soft
2626 	 * partition is open.
2627 	 */
2628 	if (softp->status != MD_SP_OK) {
2629 		rcm_log_message(RCM_TRACE1,
2630 		    "SVM: softpart is broken (state: 0x%x)\n",
2631 		    softp->status);
2632 		return (REDUNDANT);
2633 	}
2634 
2635 	return (NOTREDUNDANT);
2636 }
2637 
2638 /*
2639  *	check_raid5()
2640  *
2641  *	Check the status of the passed in device within the raid5 in question.
2642  *
2643  *	Input:
2644  *		mdsetname_t *	the name of the set
2645  *		mdname_t *	the raid5 device that is being examined
2646  *		char *		the device which needs to be checked
2647  *		md_error_t *	error pointer (not used)
2648  *	Return:
2649  *		int		REDUNDANT    - device is redundant and can be
2650  *					       removed
2651  *				NOTREDUNDANT - device cannot be removed
2652  */
2653 static int
2654 check_raid5(mdsetname_t *sp, mdname_t *np, char *uname, md_error_t *ep)
2655 {
2656 	md_raid_t	*raidp = NULL;
2657 	md_raidcol_t	*colp = NULL;
2658 	int		i;
2659 	int		rval = 0;
2660 
2661 	rcm_log_message(RCM_TRACE1, "SVM: raid5 checking %s %s\n",
2662 	    np->bname, uname);
2663 
2664 	raidp = meta_get_raid(sp, np, ep);
2665 
2666 	/* raidp cannot be NULL, if it is then the RCM cache is corrupt */
2667 	assert(raidp != NULL);
2668 
2669 	/*
2670 	 * Now check each column in the device. We cannot rely upon the state
2671 	 * of the device because if a hotspare is in use all the states are
2672 	 * set to Okay, both at the metadevice layer and the column layer.
2673 	 */
2674 	for (i = 0; (i < raidp->cols.cols_len); i++) {
2675 		colp = &raidp->cols.cols_val[i];
2676 		np = colp->colnamep;
2677 
2678 		rcm_log_message(RCM_TRACE1,
2679 		    "SVM: raid5 checking %s state %s 0x%x\n",
2680 		    np->bname, raid_col_state_to_name(colp, NULL, 0),
2681 		    colp->state);
2682 
2683 		/*
2684 		 * It is possible for the column to be a softpartition,
2685 		 * so need to check the softpartiton if this is the
2686 		 * case. It is *not* valid for the column to be a
2687 		 * stripe/concat/mirror, and so no check to see what
2688 		 * type of metadevice is being used.
2689 		 */
2690 		if (metaismeta(np)) {
2691 			/* this is a metadevice ie a softpartiton */
2692 			rval = check_softpart(sp, np, uname, ep);
2693 			if (rval == REDUNDANT) {
2694 				rcm_log_message(RCM_TRACE1,
2695 				    "SVM: raid5 %s is broken\n", uname);
2696 				meta_invalidate_name(np);
2697 				return (REDUNDANT);
2698 			} else if (rval == NOTREDUNDANT &&
2699 			    colp->hsnamep != NULL) {
2700 				rcm_log_message(RCM_TRACE1,
2701 				    "SVM: raid5 device is broken, hotspared\n");
2702 				meta_invalidate_name(np);
2703 				return (REDUNDANT);
2704 			}
2705 			meta_invalidate_name(np);
2706 			continue;
2707 		}
2708 		meta_invalidate_name(np);
2709 
2710 		if (strcmp(uname, np->bname) != 0)
2711 			continue;
2712 
2713 		/*
2714 		 * Found the device. Check if it is broken or hotspared.
2715 		 */
2716 		if (colp->state & RUS_ERRED) {
2717 			rcm_log_message(RCM_TRACE1,
2718 			    "SVM: raid5 column device is broken\n");
2719 			return (REDUNDANT);
2720 		}
2721 
2722 		if (colp->hsnamep != NULL) {
2723 			rcm_log_message(RCM_TRACE1,
2724 			    "SVM: raid5 column device is broken, hotspared\n");
2725 			return (REDUNDANT);
2726 		}
2727 	}
2728 	return (NOTREDUNDANT);
2729 }
2730 
2731 /*
2732  *	check_stripe()
2733  *
2734  *	Check the status of the passed in device within the stripe in question.
2735  *
2736  *	Input:
2737  *		mdsetname_t *	the name of the set
2738  *		mdname_t *	the stripe that is being examined
2739  *		char *		the device which needs to be checked
2740  *		md_error_t *	error pointer (not used)
2741  *	Return:
2742  *		int		REDUNDANT    - device is redundant and can be
2743  *					       removed
2744  *				NOTREDUNDANT - device cannot be removed
2745  *				NOTINDEVICE  - device is not part of this
2746  *					       component
2747  */
2748 static int
2749 check_stripe(mdsetname_t *sp, mdname_t *np, char *uname, md_error_t *ep)
2750 {
2751 	md_stripe_t	*stripep = NULL;
2752 	md_row_t	*mrp = NULL;
2753 	md_comp_t	*mcp;
2754 	mdname_t	*pnp;
2755 	char		*miscname;
2756 	int		row;
2757 	int		col;
2758 
2759 	rcm_log_message(RCM_TRACE1, "SVM: concat/stripe checking %s %s\n",
2760 	    np->bname, uname);
2761 	stripep = meta_get_stripe(sp, np, ep);
2762 
2763 	/* stripep cannot be NULL, if it is then the RCM cache is corrupt */
2764 	assert(stripep != NULL);
2765 
2766 	/*
2767 	 * If the stripe is not a parent then nothing can be done, user
2768 	 * must close the device and then fix the devices.
2769 	 */
2770 	if (!(MD_HAS_PARENT(stripep->common.parent))) {
2771 		rcm_log_message(RCM_TRACE1,
2772 		    "SVM: stripe is a top level device\n");
2773 		return (NOTREDUNDANT);
2774 	}
2775 
2776 	pnp = metamnumname(&sp, stripep->common.parent, 0, ep);
2777 
2778 	if (pnp == NULL) {
2779 		/*
2780 		 * Only NULL when the replicas are in an inconsistant state
2781 		 * ie the device says it is the parent of X but X does not
2782 		 * exist.
2783 		 */
2784 		rcm_log_message(RCM_TRACE1, "SVM: parent is not configured\n");
2785 		return (NOTREDUNDANT);
2786 	}
2787 
2788 	/*
2789 	 * Get the type of the parent and make sure that it is a mirror,
2790 	 * if it is then need to find out the number of submirrors, and
2791 	 * if it is not a mirror then this is not a REDUNDANT device.
2792 	 */
2793 	if ((miscname = metagetmiscname(pnp, ep)) == NULL) {
2794 		/*
2795 		 * Again something is wrong with the configuration.
2796 		 */
2797 		rcm_log_message(RCM_TRACE1,
2798 		    "SVM: unable to find the type of %s\n", pnp->cname);
2799 		meta_invalidate_name(pnp);
2800 		return (NOTREDUNDANT);
2801 	}
2802 
2803 	if (!(strcmp(miscname, MD_MIRROR) == 0 &&
2804 	    check_mirror(sp, pnp, ep) == REDUNDANT)) {
2805 		rcm_log_message(RCM_TRACE1,
2806 		    "SVM: %s is a %s and not redundant\n",
2807 		    pnp->cname, miscname);
2808 		meta_invalidate_name(pnp);
2809 		return (NOTREDUNDANT);
2810 	}
2811 
2812 	meta_invalidate_name(pnp);
2813 
2814 	for (row = 0; row < stripep->rows.rows_len; row++) {
2815 		mrp = &stripep->rows.rows_val[row];
2816 
2817 		/* now the components in the row */
2818 		for (col = 0; col < mrp->comps.comps_len; col++) {
2819 			mcp = &mrp->comps.comps_val[col];
2820 
2821 			rcm_log_message(RCM_TRACE1,
2822 			    "SVM: stripe comp %s check\n",
2823 			    mcp->compnamep->bname);
2824 
2825 			if (strcmp(mcp->compnamep->bname, uname) != 0)
2826 				continue;
2827 
2828 			rcm_log_message(RCM_TRACE1,
2829 			    "SVM: component state: %s\n",
2830 			    comp_state_to_name(mcp, NULL, 0));
2831 
2832 			if (mcp->hsnamep != NULL) {
2833 				/* device is broken and hotspared */
2834 				rcm_log_message(RCM_TRACE1,
2835 				    "SVM: stripe %s broken, hotspare active\n",
2836 				    uname);
2837 				return (REDUNDANT);
2838 			}
2839 
2840 			/*
2841 			 * LAST_ERRED is a special case.  If the state of a
2842 			 * component is CS_LAST_ERRED then this is the last
2843 			 * copy of the data and we need to keep using it, even
2844 			 * though we had errors.  Thus, we must block the DR
2845 			 * request.  If you follow the documented procedure for
2846 			 * fixing each component (fix devs in maintenance
2847 			 * before last erred) then the mirror will
2848 			 * automatically transition Last Erred components to
2849 			 * the Erred state after which they can be DRed out.
2850 			 */
2851 			if (mcp->state == CS_ERRED) {
2852 				/* device is broken */
2853 				rcm_log_message(RCM_TRACE1,
2854 				    "SVM: stripe %s is broken\n", uname);
2855 				return (REDUNDANT);
2856 			}
2857 
2858 			/*
2859 			 * Short circuit - if here the component has been
2860 			 * found in the column so no further processing is
2861 			 * required here.
2862 			 */
2863 			return (NOTREDUNDANT);
2864 		}
2865 	}
2866 
2867 	/*
2868 	 * Only get to this point if the device (uname) has not been
2869 	 * found in the stripe. This means that there is something
2870 	 * wrong with the device dependency list.
2871 	 */
2872 	rcm_log_message(RCM_TRACE1,
2873 	    "SVM: component %s is not part of %s\n",
2874 	    uname, np->bname);
2875 
2876 	return (NOTINDEVICE);
2877 }
2878 
2879 /*
2880  *	check_mirror()
2881  *
2882  *	Make sure that the mirror > 1 submirror.
2883  *
2884  *	Input:
2885  *		mdsetname_t *	the name of the set
2886  *		mdname_t *	the stripe that is being examined
2887  *	Return:
2888  *		int		REDUNDANT    - mirror > 1 submirrors
2889  *				NOTREDUNDANT - mirror has 1 submirror
2890  */
2891 static int
2892 check_mirror(mdsetname_t *sp, mdname_t *np, md_error_t *ep)
2893 {
2894 	uint_t		nsm = 0;	/* number of submirrors */
2895 	uint_t		smi = 0;	/* index into submirror array */
2896 	md_mirror_t	*mirrorp = NULL;
2897 
2898 	rcm_log_message(RCM_TRACE1, "SVM: mirror checking %s\n", np->bname);
2899 	mirrorp = meta_get_mirror(sp, np, ep);
2900 
2901 	/* mirrorp cannot be NULL, if it is then the RCM cache is corrupt */
2902 	assert(mirrorp != NULL);
2903 
2904 	/*
2905 	 * Need to check how many submirrors that the mirror has.
2906 	 */
2907 	for (smi = 0, nsm = 0; (smi < NMIRROR); ++smi) {
2908 		md_submirror_t	*mdsp = &mirrorp->submirrors[smi];
2909 		mdname_t	*submirnamep = mdsp->submirnamep;
2910 
2911 		/* Is this submirror being used ?  No, then continue */
2912 		if (submirnamep == NULL)
2913 			continue;
2914 		nsm++;
2915 	}
2916 
2917 	/*
2918 	 * If there is only one submirror then there is no redundancy
2919 	 * in the configuration and the user needs to take some other
2920 	 * action before using cfgadm on the device ie close the metadevice.
2921 	 */
2922 	if (nsm == 1) {
2923 		rcm_log_message(RCM_TRACE1,
2924 		    "SVM: only one submirror unable to allow action\n");
2925 		return (NOTREDUNDANT);
2926 	}
2927 
2928 	return (REDUNDANT);
2929 }
2930 
2931 /*
2932  *	check_device()
2933  *
2934  *	Check the current status of the underlying device.
2935  *
2936  *	Input:
2937  *		deventry_t *	the device that is being checked
2938  *	Return:
2939  *		int		REDUNDANT    - device is redundant and can be
2940  *					       removed
2941  *				NOTREDUNDANT - device cannot be removed
2942  *	Locking:
2943  *		None
2944  *
2945  * The check_device code path (the functions called by check_device) use
2946  * libmeta calls directly to determine if the specified device is
2947  * redundant or not.  The can lead to conflicts between data cached in
2948  * libmeta and data that is being cached by this rcm module.  Since the
2949  * rcm cache is our primary source of information here, we need to make
2950  * sure that we are not getting stale data from the libmeta caches.
2951  * We use meta_invalidate_name throughout this code path to clear the
2952  * cached data in libmeta in order to ensure that we are not using stale data.
2953  */
2954 static int
2955 check_device(deventry_t *deventry)
2956 {
2957 	mdsetname_t	*sp;
2958 	md_error_t	error = mdnullerror;
2959 	char		sname[BUFSIZ+1];
2960 	mdname_t	*np;
2961 	deventry_t	*dependent;
2962 	int		rval = NOTREDUNDANT;
2963 	int		ret;
2964 
2965 	dependent = deventry->dependent;
2966 
2967 	rcm_log_message(RCM_TRACE1, "SVM: check_device(%s)\n",
2968 	    deventry->devname);
2969 	/*
2970 	 * should not be null because the caller has already figured out
2971 	 * there are dependent devices.
2972 	 */
2973 	assert(dependent != NULL);
2974 
2975 	do {
2976 
2977 		rcm_log_message(RCM_TRACE1, "SVM: check dependent: %s\n",
2978 		    dependent->devname);
2979 
2980 		if (dependent->flags & REMOVED) {
2981 			dependent = dependent->next_dep;
2982 			continue;
2983 		}
2984 
2985 		/*
2986 		 * The device *should* be a metadevice and so need to see if
2987 		 * it contains a setname.
2988 		 */
2989 		ret = sscanf(dependent->devname,
2990 		    "/dev/md/%" VAL2STR(BUFSIZ) "[^/]/dsk/",
2991 		    sname);
2992 
2993 		if (ret != 1)
2994 			(void) strcpy(sname, MD_LOCAL_NAME);
2995 
2996 		if ((sp = metasetname(sname, &error)) == NULL) {
2997 			rcm_log_message(RCM_TRACE1,
2998 			    "SVM: unable to get setname for \"%s\", error %s\n",
2999 			    sname, mde_sperror(&error, ""));
3000 			break;
3001 		}
3002 
3003 		rcm_log_message(RCM_TRACE1, "SVM: processing: %s\n",
3004 		    dependent->devname);
3005 
3006 		np = metaname(&sp, dependent->devname, META_DEVICE, &error);
3007 
3008 		switch (dependent->devtype) {
3009 		case SVM_TRANS:
3010 			/*
3011 			 * No code to check trans devices because ufs logging
3012 			 * should be being used.
3013 			 */
3014 			rcm_log_message(RCM_TRACE1,
3015 			    "SVM: Use UFS logging instead of trans devices\n");
3016 			break;
3017 		case SVM_SLICE:
3018 		case SVM_STRIPE:
3019 		case SVM_CONCAT:
3020 			rval = check_stripe(sp, np, deventry->devname, &error);
3021 			break;
3022 		case SVM_MIRROR:
3023 			/*
3024 			 * No check here as this is performed by the one
3025 			 * above when the submirror is checked.
3026 			 */
3027 			rcm_log_message(RCM_TRACE1,
3028 			    "SVM: Mirror check is done by the stripe check\n");
3029 			break;
3030 		case SVM_RAID:
3031 			/*
3032 			 * Raid5 devices can be built on soft partitions or
3033 			 * slices and so the check here is for the raid5
3034 			 * device built on top of slices. Note, a raid5 cannot
3035 			 * be built on a stripe/concat.
3036 			 */
3037 			rval = check_raid5(sp, np, deventry->devname, &error);
3038 			break;
3039 		case SVM_SOFTPART:
3040 			/*
3041 			 * Raid5 devices can be built on top of soft partitions
3042 			 * and so they have to be checked.
3043 			 */
3044 			rval = check_softpart(sp, np, deventry->devname,
3045 			    &error);
3046 			break;
3047 		default:
3048 			rcm_log_message(RCM_TRACE1,
3049 			    "SVM: unknown devtype: %d\n", dependent->devtype);
3050 			break;
3051 		}
3052 
3053 		meta_invalidate_name(np);
3054 
3055 		if (rval == REDUNDANT)
3056 			break;
3057 	} while ((dependent = dependent->next_dep) != NULL);
3058 
3059 	rcm_log_message(RCM_TRACE1, "SVM: check_device return %d\n", rval);
3060 	return (rval);
3061 }
3062 
3063 /*
3064  *	svm_unregister_device
3065  *
3066  *	Unregister the device specified by the deventry
3067  *
3068  *	Input:
3069  *		rcm_handle_t *	information for RCM
3070  *		deventry_t *	description of the device to be
3071  *				unregistered
3072  *
3073  *	Return:
3074  *		int		0	- successfully unregistered
3075  *				!= 0	- failed to unregister
3076  *
3077  *	Locking:
3078  *		None
3079  *
3080  * If the deventry_t has a devicesname, we will first attempt to unregister
3081  * using that name.  If that fails then we'll attempt to unregister using
3082  * devname.  The reason for this strategy has to do with the way that
3083  * rcm_register_interest works.  If passed a /dev/ name,
3084  * rcm_register_interest uses realpath() to convert it to a /devices name.
3085  * Thus, we are more likely to succeed if we use devicesname first.
3086  */
3087 
3088 static int
3089 svm_unregister_device(rcm_handle_t *hd, deventry_t *d)
3090 {
3091 	int	deleted;
3092 
3093 	if (d->devicesname) {
3094 		rcm_log_message(RCM_TRACE1, "SVM: unregister_device %s (%s)\n",
3095 			d->devname, d->devicesname);
3096 	} else {
3097 		rcm_log_message(RCM_TRACE1, "SVM: unregister_device %s\n",
3098 			d->devname);
3099 	}
3100 	deleted = -1;
3101 	if (d->devicesname != NULL) {
3102 		/*
3103 		 * Try to unregister via the /devices entry first.  RCM
3104 		 * converts /dev/ entries to /devices entries before
3105 		 * storing them.  Thus, if this item has a /devices name
3106 		 * available, we should use it for unregistering.
3107 		 */
3108 		deleted = rcm_unregister_interest(hd,
3109 			d->devicesname, 0);
3110 	}
3111 	if (deleted != 0) {
3112 		/*
3113 		 * Either we did not have a /devices name or the attempt to
3114 		 * unregister using the /devices name failed.  Either way
3115 		 * we'll now try to unregister using the conventional name.
3116 		 */
3117 		deleted = rcm_unregister_interest(hd, d->devname, 0);
3118 	}
3119 	if (deleted != 0) {
3120 		rcm_log_message(RCM_TRACE1, "SVM: unregister_device failed "
3121 			"for %s\n", d->devname);
3122 	}
3123 	return (deleted);
3124 }
3125