xref: /titanic_41/usr/src/uts/common/io/lvm/hotspares/hotspares.c (revision 5aefb6555731130ca4fd295960123d71f2d21fe8)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/param.h>
29 #include <sys/systm.h>
30 #include <sys/conf.h>
31 #include <sys/file.h>
32 #include <sys/user.h>
33 #include <sys/uio.h>
34 #include <sys/t_lock.h>
35 #include <sys/kmem.h>
36 #include <vm/page.h>
37 #include <sys/sysmacros.h>
38 #include <sys/types.h>
39 #include <sys/mkdev.h>
40 #include <sys/stat.h>
41 #include <sys/open.h>
42 #include <sys/modctl.h>
43 #include <sys/ddi.h>
44 #include <sys/sunddi.h>
45 #include <sys/debug.h>
46 
47 #include <sys/lvm/md_hotspares.h>
48 #include <sys/lvm/md_convert.h>
49 
50 #include <sys/sysevent/eventdefs.h>
51 #include <sys/sysevent/svm.h>
52 
53 md_ops_t		hotspares_md_ops;
54 #ifndef	lint
55 char			_depends_on[] = "drv/md";
56 md_ops_t		*md_interface_ops = &hotspares_md_ops;
57 #endif
58 
59 extern md_ops_t		**md_ops;
60 extern md_ops_t		*md_opslist;
61 extern md_set_t		md_set[];
62 
63 extern kmutex_t		md_mx;		/* used to md global stuff */
64 extern kcondvar_t	md_cv;		/* md_status events */
65 extern int		md_status;
66 
67 extern void		md_clear_hot_spare_interface();
68 
69 static void
70 set_hot_spare_state(hot_spare_t *hs, hotspare_states_t newstate)
71 {
72 	hs->hs_state = newstate;
73 	uniqtime32(&hs->hs_timestamp);
74 }
75 
76 static hot_spare_t *
77 lookup_hot_spare(set_t setno, mddb_recid_t hs_id, int must_exist)
78 {
79 	hot_spare_t *hs;
80 
81 	for (hs = (hot_spare_t *)md_set[setno].s_hs; hs; hs = hs->hs_next) {
82 		if (hs->hs_record_id == hs_id)
83 			return (hs);
84 	}
85 	if (must_exist)
86 		ASSERT(0);
87 
88 	return ((hot_spare_t *)NULL);
89 }
90 
91 static hot_spare_pool_t *
92 find_hot_spare_pool(set_t setno, int hsp_id)
93 {
94 	hot_spare_pool_t *hsp;
95 
96 	hsp = (hot_spare_pool_t *)md_set[setno].s_hsp;
97 	while (hsp != NULL) {
98 		if (hsp->hsp_self_id == hsp_id)
99 			return (hsp);
100 		hsp = hsp->hsp_next;
101 	}
102 
103 	return ((hot_spare_pool_t *)0);
104 }
105 
106 
107 static int
108 seths_create_hsp(set_hs_params_t *shs)
109 {
110 	hot_spare_pool_t	*hsp;
111 	mddb_recid_t		recid;
112 	set_t			setno;
113 	mddb_type_t		typ1;
114 
115 	setno = HSP_SET(shs->shs_hot_spare_pool);
116 
117 	/* Scan the hot spare pool list */
118 	hsp = find_hot_spare_pool(setno, shs->shs_hot_spare_pool);
119 	if (hsp != (hot_spare_pool_t *)0)
120 		return (0);
121 
122 	typ1 = (mddb_type_t)md_getshared_key(setno,
123 	    hotspares_md_ops.md_driver.md_drivername);
124 
125 	/* create a hot spare pool record */
126 	if (shs->shs_options & MD_CRO_64BIT) {
127 #if defined(_ILP32)
128 		return (mdhsperror(&shs->mde, MDE_HSP_UNIT_TOO_LARGE,
129 		    shs->shs_hot_spare_pool));
130 #else
131 		recid = mddb_createrec(sizeof (hot_spare_pool_ond_t), typ1,
132 			HSP_REC, MD_CRO_64BIT | MD_CRO_HOTSPARE_POOL, setno);
133 #endif
134 	} else {
135 		recid = mddb_createrec(sizeof (hot_spare_pool_ond_t), typ1,
136 			HSP_REC, MD_CRO_32BIT | MD_CRO_HOTSPARE_POOL, setno);
137 	}
138 
139 	if (recid < 0) {
140 		return (mdhsperror(&shs->mde, MDE_HSP_CREATE_FAILURE,
141 		    shs->shs_hot_spare_pool));
142 	}
143 
144 	/* get the record addr */
145 	hsp = (hot_spare_pool_t *)mddb_getrecaddr_resize(recid, sizeof (*hsp),
146 		HSP_ONDSK_STR_OFF);
147 
148 	hsp->hsp_self_id = shs->shs_hot_spare_pool;
149 	hsp->hsp_record_id = recid;
150 	hsp->hsp_next = (hot_spare_pool_t *)md_set[setno].s_hsp;
151 	hsp->hsp_refcount = 0;
152 	hsp->hsp_nhotspares = 0;
153 
154 	md_set[setno].s_hsp = (void *) hsp;
155 
156 	mddb_commitrec_wrapper(recid);
157 	SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_CREATE, SVM_TAG_HSP, setno,
158 	    md_expldev(hsp->hsp_self_id));
159 
160 	rw_enter(&hotspares_md_ops.md_link_rw.lock, RW_WRITER);
161 	hsp->hsp_link.ln_next = hotspares_md_ops.md_head;
162 	hsp->hsp_link.ln_setno = setno;
163 	hsp->hsp_link.ln_id = hsp->hsp_self_id;
164 	hotspares_md_ops.md_head = &hsp->hsp_link;
165 	rw_exit(&hotspares_md_ops.md_link_rw.lock);
166 
167 	return (0);
168 }
169 
170 
171 static int
172 seths_add(set_hs_params_t *shs)
173 {
174 	hot_spare_t		*hs;
175 	hot_spare_pool_t	*hsp;
176 	hot_spare_pool_t	*prev_hsp;
177 	hot_spare_pool_t	*new_hsp;
178 	hot_spare_pool_t	*old_hsp;
179 	mddb_recid_t		recid;
180 	mddb_recid_t		recids[5];
181 	size_t			new_size;
182 	int			i;
183 	int			delete_hsp = 0;
184 	int			irecid;
185 	set_t			setno;
186 	mddb_type_t		typ1;
187 	int			hsp_created = 0;
188 	mdkey_t			key_old;
189 	int			num_keys_old = 0;
190 
191 	/* Not much to do here in case of a dryrun */
192 	if (shs->shs_options & HS_OPT_DRYRUN) {
193 		return (0);
194 	}
195 
196 	/* create an empty hot spare pool */
197 	if (shs->shs_options & HS_OPT_POOL) {
198 		return (seths_create_hsp(shs));
199 	}
200 
201 	setno = HSP_SET(shs->shs_hot_spare_pool);
202 	typ1 = (mddb_type_t)md_getshared_key(setno,
203 	    hotspares_md_ops.md_driver.md_drivername);
204 
205 	/* Scan the hot spare list */
206 	hs = (hot_spare_t *)md_set[setno].s_hs;
207 	while (hs) {
208 		if (hs->hs_devnum == shs->shs_component_old) {
209 			break;
210 		}
211 		hs = hs->hs_next;
212 	}
213 
214 	if (hs == NULL) {
215 		/*
216 		 * Did not find match for device using devnum so use
217 		 * key associated with shs_component_old just
218 		 * in case there is a match but the match's dev is NODEV.
219 		 * If unable to find a unique key for shs_component_old
220 		 * then fail since namespace has multiple entries
221 		 * for this old component and we shouldn't allow
222 		 * an addition of a hotspare in this case.
223 		 */
224 		if (md_getkeyfromdev(setno, mddb_getsidenum(setno),
225 		    shs->shs_component_old, &key_old, &num_keys_old) != 0) {
226 			return (mddeverror(&shs->mde, MDE_NAME_SPACE,
227 			    shs->shs_component_old));
228 		}
229 
230 		/*
231 		 * If more than one key matches given old_dev - fail command
232 		 * since shouldn't add new hotspare if namespace has
233 		 * multiple entries.
234 		 */
235 		if (num_keys_old > 1) {
236 			return (mddeverror(&shs->mde, MDE_MULTNM,
237 			    shs->shs_component_old));
238 		}
239 		/*
240 		 * If there is no key for this entry then fail since
241 		 * a key for this entry should exist.
242 		 */
243 		if (num_keys_old == 0) {
244 			return (mddeverror(&shs->mde, MDE_INVAL_HS,
245 			    shs->shs_component_old));
246 		}
247 		/* Scan the hot spare list again */
248 		hs = (hot_spare_t *)md_set[setno].s_hs;
249 		while (hs) {
250 			/*
251 			 * Only need to compare keys when hs_devnum is NODEV.
252 			 */
253 			if ((hs->hs_devnum == NODEV64) &&
254 			    (hs->hs_key == key_old)) {
255 				break;
256 			}
257 			hs = hs->hs_next;
258 		}
259 	}
260 
261 	if (hs == NULL) {
262 		/* create a hot spare record */
263 		if (shs->shs_size_option & MD_CRO_64BIT) {
264 #if defined(_ILP32)
265 			return (mdhserror(&shs->mde, MDE_HS_UNIT_TOO_LARGE,
266 			    shs->shs_hot_spare_pool, shs->shs_component_old));
267 #else
268 			recid = mddb_createrec(HS_ONDSK_STR_SIZE, typ1, HS_REC,
269 				MD_CRO_64BIT | MD_CRO_HOTSPARE, setno);
270 #endif
271 		} else {
272 			recid = mddb_createrec(HS_ONDSK_STR_SIZE, typ1, HS_REC,
273 				MD_CRO_32BIT | MD_CRO_HOTSPARE, setno);
274 		}
275 
276 		if (recid < 0) {
277 			return (mdhserror(&shs->mde, MDE_HS_CREATE_FAILURE,
278 			    shs->shs_hot_spare_pool,
279 			    shs->shs_component_old));
280 		}
281 
282 		/* get the addr */
283 		hs = (hot_spare_t *)mddb_getrecaddr_resize(recid, sizeof (*hs),
284 			0);
285 
286 		hs->hs_record_id = recid;
287 
288 		hs->hs_devnum = shs->shs_component_old;
289 		hs->hs_key = shs->shs_key_old;
290 		hs->hs_start_blk = shs->shs_start_blk;
291 		hs->hs_has_label = shs->shs_has_label;
292 		hs->hs_number_blks = shs->shs_number_blks;
293 		set_hot_spare_state(hs, HSS_AVAILABLE);
294 		hs->hs_refcount = 0;
295 		hs->hs_next = (hot_spare_t *)md_set[setno].s_hs;
296 		md_set[setno].s_hs = (void *) hs;
297 	}
298 
299 	/* Scan the hot spare pool list */
300 	hsp = (hot_spare_pool_t *)md_set[setno].s_hsp;
301 	prev_hsp = (hot_spare_pool_t *)0;
302 	while (hsp) {
303 		if (hsp->hsp_self_id == shs->shs_hot_spare_pool) {
304 			break;
305 		}
306 		prev_hsp = hsp;
307 		hsp = hsp->hsp_next;
308 	}
309 
310 	if (hsp == NULL) {
311 		/* create a hot spare pool record */
312 		recid = mddb_createrec(sizeof (hot_spare_pool_ond_t),
313 		    typ1, HSP_REC, MD_CRO_32BIT | MD_CRO_HOTSPARE_POOL, setno);
314 
315 		if (recid < 0) {
316 			return (mdhsperror(&shs->mde, MDE_HSP_CREATE_FAILURE,
317 			    shs->shs_hot_spare_pool));
318 		}
319 
320 		/* get the record addr */
321 		hsp = (hot_spare_pool_t *)mddb_getrecaddr_resize(recid,
322 			sizeof (*hsp), HSP_ONDSK_STR_OFF);
323 
324 		hsp->hsp_self_id = shs->shs_hot_spare_pool;
325 		hsp->hsp_record_id = recid;
326 		hsp->hsp_next = (hot_spare_pool_t *)md_set[setno].s_hsp;
327 		hsp->hsp_refcount = 0;
328 		hsp->hsp_nhotspares = 0;
329 
330 		/* force prev_hsp to NULL, this will cause hsp to be linked */
331 		prev_hsp = (hot_spare_pool_t *)0;
332 
333 		rw_enter(&hotspares_md_ops.md_link_rw.lock, RW_WRITER);
334 		hsp->hsp_link.ln_next = hotspares_md_ops.md_head;
335 		hsp->hsp_link.ln_setno = setno;
336 		hsp->hsp_link.ln_id = hsp->hsp_self_id;
337 		hotspares_md_ops.md_head = &hsp->hsp_link;
338 		rw_exit(&hotspares_md_ops.md_link_rw.lock);
339 		hsp_created = 1;
340 	} else {
341 
342 		/*
343 		 * Make sure the hot spare is not already in the pool.
344 		 */
345 		for (i = 0; i < hsp->hsp_nhotspares; i++)
346 			if (hsp->hsp_hotspares[i] == hs->hs_record_id) {
347 				return (mdhserror(&shs->mde, MDE_HS_INUSE,
348 					shs->shs_hot_spare_pool,
349 					hs->hs_devnum));
350 			}
351 		/*
352 		 * Create a new hot spare pool record
353 		 * This gives us the one extra hs slot,
354 		 * because there is one slot in the
355 		 * hot_spare_pool struct
356 		 */
357 		new_size = sizeof (hot_spare_pool_ond_t) +
358 			(sizeof (mddb_recid_t) * hsp->hsp_nhotspares);
359 		recid = mddb_createrec(new_size, typ1, HSP_REC,
360 		    MD_CRO_32BIT | MD_CRO_HOTSPARE_POOL, setno);
361 
362 		if (recid < 0) {
363 			return (mdhsperror(&shs->mde, MDE_HSP_CREATE_FAILURE,
364 			    hsp->hsp_self_id));
365 		}
366 		new_size = sizeof (hot_spare_pool_t) +
367 			(sizeof (mddb_recid_t) * hsp->hsp_nhotspares);
368 
369 		/* get the record addr */
370 		new_hsp = (hot_spare_pool_t *)mddb_getrecaddr_resize(recid,
371 			new_size, HSP_ONDSK_STR_OFF);
372 
373 		/* copy the old record into the new one */
374 		bcopy((caddr_t)hsp, (caddr_t)new_hsp,
375 		    (size_t)((sizeof (hot_spare_pool_t) +
376 		    (sizeof (mddb_recid_t) * hsp->hsp_nhotspares)
377 		    - sizeof (mddb_recid_t))));
378 		new_hsp->hsp_record_id = recid;
379 
380 		md_rem_link(setno, hsp->hsp_self_id,
381 		    &hotspares_md_ops.md_link_rw.lock,
382 		    &hotspares_md_ops.md_head);
383 
384 		rw_enter(&hotspares_md_ops.md_link_rw.lock, RW_WRITER);
385 		new_hsp->hsp_link.ln_next = hotspares_md_ops.md_head;
386 		new_hsp->hsp_link.ln_setno = setno;
387 		new_hsp->hsp_link.ln_id = new_hsp->hsp_self_id;
388 		hotspares_md_ops.md_head = &new_hsp->hsp_link;
389 		rw_exit(&hotspares_md_ops.md_link_rw.lock);
390 
391 		/* mark the old hsp to be deleted */
392 		delete_hsp = 1;
393 		old_hsp = hsp;
394 		hsp = new_hsp;
395 	}
396 
397 	if (shs->shs_size_option & MD_CRO_64BIT) {
398 		hs->hs_revision = MD_64BIT_META_DEV;
399 	} else {
400 		hs->hs_revision = MD_32BIT_META_DEV;
401 	}
402 
403 	/* lock the db records */
404 	recids[0] = hs->hs_record_id;
405 	recids[1] = hsp->hsp_record_id;
406 	irecid = 2;
407 	if (delete_hsp)
408 		recids[irecid++] = old_hsp->hsp_record_id;
409 	recids[irecid] = 0;
410 
411 	/* increment the reference count */
412 	hs->hs_refcount++;
413 
414 	/* add the hs at the end of the hot spare pool */
415 	hsp->hsp_hotspares[hsp->hsp_nhotspares] = hs->hs_record_id;
416 	hsp->hsp_nhotspares++;
417 
418 	/*
419 	 * NOTE: We do not commit the previous hot spare pool record.
420 	 *	 There is no need, the link gets rebuilt at boot time.
421 	 */
422 	if (prev_hsp)
423 		prev_hsp->hsp_next = hsp;
424 	else
425 		md_set[setno].s_hsp = (void *) hsp;
426 
427 	if (delete_hsp)
428 		old_hsp->hsp_self_id = MD_HSP_NONE;
429 
430 	/* commit the db records */
431 	mddb_commitrecs_wrapper(recids);
432 
433 	if (delete_hsp) {
434 		/* delete the old hot spare pool record */
435 		mddb_deleterec_wrapper(old_hsp->hsp_record_id);
436 	}
437 
438 	if (hsp_created) {
439 		SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_CREATE, SVM_TAG_HSP, setno,
440 		    md_expldev(hsp->hsp_self_id));
441 	}
442 	SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_ADD, SVM_TAG_HSP, setno,
443 	    md_expldev(hsp->hsp_self_id));
444 
445 	return (0);
446 }
447 
448 
449 static int
450 seths_delete_hsp(set_hs_params_t *shs)
451 {
452 
453 	hot_spare_pool_t	*prev_hsp;
454 	hot_spare_pool_t	*hsp;
455 	set_t			setno;
456 	hsp_t			hspid;
457 
458 	setno = HSP_SET(shs->shs_hot_spare_pool);
459 
460 	/* Scan the hot spare pool list */
461 	prev_hsp = (hot_spare_pool_t *)0;
462 	hsp = (hot_spare_pool_t *)md_set[setno].s_hsp;
463 	while (hsp) {
464 		if (hsp->hsp_self_id == shs->shs_hot_spare_pool) {
465 			break;
466 		}
467 		prev_hsp = hsp;
468 		hsp = hsp->hsp_next;
469 	}
470 
471 	if (hsp == NULL) {
472 		return (mdhsperror(&shs->mde, MDE_INVAL_HSP,
473 		    shs->shs_hot_spare_pool));
474 	}
475 
476 	if (hsp->hsp_nhotspares != 0) {
477 		return (mdhsperror(&shs->mde, MDE_HSP_BUSY,
478 		    shs->shs_hot_spare_pool));
479 	}
480 
481 	if (hsp->hsp_refcount != 0) {
482 		return (mdhsperror(&shs->mde, MDE_HSP_REF,
483 		    shs->shs_hot_spare_pool));
484 	}
485 
486 	/* In case of a dryrun, we're done here */
487 	if (shs->shs_options & HS_OPT_DRYRUN) {
488 		return (0);
489 	}
490 	/*
491 	 * NOTE: We do not commit the previous hot spare pool record.
492 	 *	 There is no need, the link gets rebuilt at boot time.
493 	 */
494 	if (prev_hsp)
495 		prev_hsp->hsp_next = hsp->hsp_next;
496 	else
497 		md_set[setno].s_hsp = (void *) hsp->hsp_next;
498 
499 	hspid = hsp->hsp_self_id;
500 
501 	md_rem_link(setno, hsp->hsp_self_id,
502 	    &hotspares_md_ops.md_link_rw.lock,
503 	    &hotspares_md_ops.md_head);
504 
505 	mddb_deleterec_wrapper(hsp->hsp_record_id);
506 
507 	SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_DELETE, SVM_TAG_HSP, setno,
508 	    md_expldev(hspid));
509 	return (0);
510 }
511 
512 
513 static int
514 seths_delete(set_hs_params_t *shs)
515 {
516 	hot_spare_t		*hs;
517 	hot_spare_t		*prev_hs;
518 	hot_spare_pool_t	*hsp;
519 	mddb_recid_t		recids[4];
520 	int			i;
521 	set_t			setno;
522 	sv_dev_t		sv;
523 	int			delete_hs = 0;
524 	mdkey_t			key_old;
525 	int			num_keys_old = 0;
526 
527 	/* delete the hot spare pool */
528 	if (shs->shs_options & HS_OPT_POOL) {
529 		return (seths_delete_hsp(shs));
530 	}
531 
532 	setno = HSP_SET(shs->shs_hot_spare_pool);
533 
534 	/* Scan the hot spare list */
535 	hs = (hot_spare_t *)md_set[setno].s_hs;
536 	prev_hs = (hot_spare_t *)0;
537 	while (hs) {
538 		if (hs->hs_devnum == shs->shs_component_old) {
539 			break;
540 		}
541 		prev_hs = hs;
542 		hs = hs->hs_next;
543 	}
544 
545 	if (hs == NULL) {
546 		/*
547 		 * Unable to find device using devnum so use
548 		 * key associated with shs_component_old instead.
549 		 * If unable to find a unique key for shs_component_old
550 		 * then fail since namespace has multiple entries
551 		 * for this old component and we're unable to determine
552 		 * which key is the valid match for shs_component_old.
553 		 *
554 		 * Only need to compare keys when hs_devnum is NODEV.
555 		 */
556 		if (md_getkeyfromdev(setno, mddb_getsidenum(setno),
557 		    shs->shs_component_old, &key_old, &num_keys_old) != 0) {
558 			return (mddeverror(&shs->mde, MDE_NAME_SPACE,
559 			    shs->shs_component_old));
560 		}
561 
562 		/*
563 		 * If more than one key matches given old_dev - fail command
564 		 * since shouldn't add new hotspare if namespace has
565 		 * multiple entries.
566 		 */
567 		if (num_keys_old > 1) {
568 			return (mddeverror(&shs->mde, MDE_MULTNM,
569 			    shs->shs_component_old));
570 		}
571 		/*
572 		 * If there is no key for this entry then fail since
573 		 * a key for this entry should exist.
574 		 */
575 		if (num_keys_old == 0) {
576 			return (mddeverror(&shs->mde, MDE_INVAL_HS,
577 			    shs->shs_component_old));
578 		}
579 		/* Scan the hot spare list again */
580 		hs = (hot_spare_t *)md_set[setno].s_hs;
581 		prev_hs = (hot_spare_t *)0;
582 		while (hs) {
583 			/*
584 			 * Only need to compare keys when hs_devnum is NODEV.
585 			 */
586 			if ((hs->hs_devnum == NODEV64) &&
587 			    (hs->hs_key == key_old)) {
588 				break;
589 			}
590 			prev_hs = hs;
591 			hs = hs->hs_next;
592 		}
593 	}
594 
595 	if (hs == NULL) {
596 		return (mddeverror(&shs->mde, MDE_INVAL_HS,
597 		    shs->shs_component_old));
598 	}
599 
600 	/* Scan the hot spare pool list */
601 	hsp = find_hot_spare_pool(setno, shs->shs_hot_spare_pool);
602 	if (hsp == (hot_spare_pool_t *)0) {
603 		return (mdhsperror(&shs->mde, MDE_INVAL_HSP,
604 		    shs->shs_hot_spare_pool));
605 	}
606 
607 	/* check for force flag and state of hot spare */
608 	if (((shs->shs_options & HS_OPT_FORCE) == 0) &&
609 	    (hs->hs_state == HSS_RESERVED)) {
610 		return (mdhserror(&shs->mde, MDE_HS_RESVD,
611 		    shs->shs_hot_spare_pool, shs->shs_component_old));
612 	}
613 
614 	if (hsp->hsp_refcount && (hs->hs_state == HSS_RESERVED)) {
615 		return (mdhserror(&shs->mde, MDE_HS_RESVD,
616 		    shs->shs_hot_spare_pool, shs->shs_component_old));
617 	}
618 
619 	/*
620 	 * Make sure the device is in the pool.
621 	 */
622 	for (i = 0; i < hsp->hsp_nhotspares; i++) {
623 		if (hsp->hsp_hotspares[i] == hs->hs_record_id) {
624 			break;
625 		}
626 	}
627 
628 	if (i >= hsp->hsp_nhotspares) {
629 		return (mddeverror(&shs->mde, MDE_INVAL_HS,
630 		    hs->hs_devnum));
631 	}
632 
633 	/* In case of a dryrun, we're done here */
634 	if (shs->shs_options & HS_OPT_DRYRUN) {
635 		return (0);
636 	}
637 
638 	/* lock the db records */
639 	recids[0] = hs->hs_record_id;
640 	recids[1] = hsp->hsp_record_id;
641 	recids[2] = 0;
642 
643 	sv.setno = setno;
644 	sv.key = hs->hs_key;
645 
646 	hs->hs_refcount--;
647 	if (hs->hs_refcount == 0) {
648 		/*
649 		 * NOTE: We do not commit the previous hot spare record.
650 		 *	 There is no need, the link we get rebuilt at boot time.
651 		 */
652 		if (prev_hs) {
653 			prev_hs->hs_next = hs->hs_next;
654 		} else
655 			md_set[setno].s_hs = (void *) hs->hs_next;
656 
657 		/* mark the hot spare to be deleted */
658 		delete_hs = 1;
659 		recids[0] = hsp->hsp_record_id;
660 		recids[1] = 0;
661 	}
662 
663 	/* find the location of the hs in the hsp */
664 	for (i = 0; i < hsp->hsp_nhotspares; i++) {
665 		if (hsp->hsp_hotspares[i] == hs->hs_record_id)
666 			break;
667 	}
668 
669 	/* remove the hs from the hsp */
670 	for (i++; i < hsp->hsp_nhotspares; i++)
671 		hsp->hsp_hotspares[i - 1] = hsp->hsp_hotspares[i];
672 
673 	hsp->hsp_nhotspares--;
674 
675 	/* commit the db records */
676 	mddb_commitrecs_wrapper(recids);
677 
678 	if (delete_hs)
679 		mddb_deleterec_wrapper(hs->hs_record_id);
680 
681 	md_rem_names(&sv, 1);
682 
683 	SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_REMOVE, SVM_TAG_HSP, setno,
684 	    md_expldev(hsp->hsp_self_id));
685 
686 	return (0);
687 }
688 
689 static int
690 seths_replace(set_hs_params_t *shs)
691 {
692 	hot_spare_t		*hs;
693 	hot_spare_t		*prev_hs;
694 	hot_spare_t		*new_hs;
695 	hot_spare_pool_t	*hsp;
696 	int			new_found = 0;
697 	mddb_recid_t		recid;
698 	mddb_recid_t		recids[5];
699 	int			i;
700 	sv_dev_t		sv;
701 	int			delete_hs = 0;
702 	set_t			setno;
703 	mddb_type_t		typ1;
704 	mdkey_t			key_old;
705 	int			num_keys_old = 0;
706 
707 	setno = HSP_SET(shs->shs_hot_spare_pool);
708 	typ1 = (mddb_type_t)md_getshared_key(setno,
709 	    hotspares_md_ops.md_driver.md_drivername);
710 
711 	/* Scan the hot spare list */
712 	hs = (hot_spare_t *)md_set[setno].s_hs;
713 	prev_hs = (hot_spare_t *)0;
714 	while (hs) {
715 		if (hs->hs_devnum == shs->shs_component_old) {
716 			break;
717 		}
718 		prev_hs = hs;
719 		hs = hs->hs_next;
720 	}
721 
722 	if (hs == NULL) {
723 		/*
724 		 * Unable to find device using devnum so use
725 		 * key associated with shs_component_old instead.
726 		 * If unable to find a unique key for shs_component_old
727 		 * then fail since namespace has multiple entries
728 		 * for this old component and we're unable to determine
729 		 * which key is the valid match for shs_component_old.
730 		 *
731 		 * Only need to compare keys when hs_devnum is NODEV.
732 		 */
733 		if (md_getkeyfromdev(setno, mddb_getsidenum(setno),
734 		    shs->shs_component_old, &key_old, &num_keys_old) != 0) {
735 			return (mddeverror(&shs->mde, MDE_NAME_SPACE,
736 			    shs->shs_component_old));
737 		}
738 
739 		/*
740 		 * If more than one key matches given old_dev - fail command
741 		 * since unable to determine which key is correct.
742 		 */
743 		if (num_keys_old > 1) {
744 			return (mddeverror(&shs->mde, MDE_MULTNM,
745 			    shs->shs_component_old));
746 		}
747 		/*
748 		 * If there is no key for this entry then fail since
749 		 * a key for this entry should exist.
750 		 */
751 		if (num_keys_old == 0) {
752 			return (mddeverror(&shs->mde, MDE_INVAL_HS,
753 			    shs->shs_component_old));
754 		}
755 		/* Scan the hot spare list again */
756 		hs = (hot_spare_t *)md_set[setno].s_hs;
757 		prev_hs = (hot_spare_t *)0;
758 		while (hs) {
759 			/*
760 			 * Only need to compare keys when hs_devnum is NODEV.
761 			 */
762 			if ((hs->hs_devnum == NODEV64) &&
763 			    (hs->hs_key == key_old)) {
764 				break;
765 			}
766 			prev_hs = hs;
767 			hs = hs->hs_next;
768 		}
769 	}
770 
771 	if (hs == NULL) {
772 		return (mddeverror(&shs->mde, MDE_INVAL_HS,
773 		    shs->shs_component_old));
774 	}
775 
776 	/* check the force flag and the state of the hot spare */
777 	if (((shs->shs_options & HS_OPT_FORCE) == 0) &&
778 	    (hs->hs_state == HSS_RESERVED)) {
779 		return (mdhserror(&shs->mde, MDE_HS_RESVD,
780 		    shs->shs_hot_spare_pool,
781 		    hs->hs_devnum));
782 	}
783 
784 	/* Scan the hot spare pool list */
785 	hsp = find_hot_spare_pool(setno, shs->shs_hot_spare_pool);
786 	if (hsp == (hot_spare_pool_t *)0) {
787 		return (mdhsperror(&shs->mde, MDE_INVAL_HSP,
788 		    shs->shs_hot_spare_pool));
789 	}
790 
791 	/*
792 	 * Make sure the old device is in the pool.
793 	 */
794 	for (i = 0; i < hsp->hsp_nhotspares; i++) {
795 		if (hsp->hsp_hotspares[i] == hs->hs_record_id) {
796 			break;
797 		}
798 	}
799 	if (i >= hsp->hsp_nhotspares) {
800 		return (mddeverror(&shs->mde, MDE_INVAL_HS,
801 		    hs->hs_devnum));
802 	}
803 
804 	/* Scan the hot spare list for the new hs */
805 	new_hs = (hot_spare_t *)md_set[setno].s_hs;
806 	new_found = 0;
807 	while (new_hs) {
808 		if (new_hs->hs_devnum == shs->shs_component_new) {
809 			new_found = 1;
810 			break;
811 		}
812 		new_hs = new_hs->hs_next;
813 	}
814 
815 	/*
816 	 * Make sure the new device is not already in the pool.
817 	 * We don't have to search the hs in this hsp, if the
818 	 * new hs was just created. Only if the hot spare was found.
819 	 */
820 	if (new_found) {
821 		for (i = 0; i < hsp->hsp_nhotspares; i++)
822 			if (hsp->hsp_hotspares[i] == new_hs->hs_record_id) {
823 				return (mdhserror(&shs->mde, MDE_HS_INUSE,
824 				    shs->shs_hot_spare_pool,
825 				    new_hs->hs_devnum));
826 			}
827 	}
828 
829 	/* In case of a dryrun, we're done here */
830 	if (shs->shs_options & HS_OPT_DRYRUN) {
831 		return (0);
832 	}
833 
834 	/*
835 	 * Create the new hotspare
836 	 */
837 	if (!new_found) {
838 		/* create a hot spare record */
839 		if (shs->shs_size_option & MD_CRO_64BIT) {
840 #if defined(_ILP32)
841 			return (mdhserror(&shs->mde, MDE_HS_UNIT_TOO_LARGE,
842 			    shs->shs_hot_spare_pool, shs->shs_component_new));
843 #else
844 			recid = mddb_createrec(HS_ONDSK_STR_SIZE, typ1, HS_REC,
845 				MD_CRO_64BIT | MD_CRO_HOTSPARE, setno);
846 #endif
847 		} else {
848 			recid = mddb_createrec(HS_ONDSK_STR_SIZE, typ1, HS_REC,
849 				MD_CRO_32BIT | MD_CRO_HOTSPARE, setno);
850 		}
851 
852 		if (recid < 0) {
853 			return (mdhserror(&shs->mde, MDE_HS_CREATE_FAILURE,
854 			    shs->shs_hot_spare_pool,
855 			    shs->shs_component_new));
856 		}
857 
858 		/* get the addr */
859 		new_hs = (hot_spare_t *)mddb_getrecaddr_resize(recid,
860 			sizeof (*new_hs), 0);
861 
862 		new_hs->hs_record_id = recid;
863 		new_hs->hs_devnum = shs->shs_component_new;
864 		new_hs->hs_key = shs->shs_key_new;
865 		new_hs->hs_start_blk = shs->shs_start_blk;
866 		new_hs->hs_has_label = shs->shs_has_label;
867 		new_hs->hs_number_blks = shs->shs_number_blks;
868 		set_hot_spare_state(new_hs, HSS_AVAILABLE);
869 		new_hs->hs_refcount = 0;
870 		new_hs->hs_isopen = 1;
871 	}
872 
873 	/* lock the db records */
874 	recids[0] = hs->hs_record_id;
875 	recids[1] = new_hs->hs_record_id;
876 	recids[2] = hsp->hsp_record_id;
877 	recids[3] = 0;
878 
879 	sv.setno = setno;
880 	sv.key = hs->hs_key;
881 
882 	hs->hs_refcount--;
883 	if (hs->hs_refcount == 0) {
884 		/*
885 		 * NOTE: We do not commit the previous hot spare record.
886 		 *	 There is no need, the link we get rebuilt at boot time.
887 		 */
888 		if (prev_hs) {
889 			prev_hs->hs_next = hs->hs_next;
890 		} else
891 			md_set[setno].s_hs = (void *) hs->hs_next;
892 
893 		/* mark hs to be deleted in the correct order */
894 		delete_hs = 1;
895 
896 		recids[0] = new_hs->hs_record_id;
897 		recids[1] = hsp->hsp_record_id;
898 		recids[2] = 0;
899 	}
900 
901 	/* link into the hs list */
902 	new_hs->hs_refcount++;
903 	if (!new_found) {
904 		/* do this AFTER the old dev is possibly removed */
905 		new_hs->hs_next = (hot_spare_t *)md_set[setno].s_hs;
906 		md_set[setno].s_hs = (void *) new_hs;
907 	}
908 
909 	/* find the location of the old hs in the hsp */
910 	for (i = 0; i < hsp->hsp_nhotspares; i++) {
911 		if (hsp->hsp_hotspares[i] == hs->hs_record_id) {
912 			hsp->hsp_hotspares[i] = new_hs->hs_record_id;
913 			break;
914 		}
915 	}
916 
917 	if (shs->shs_size_option & MD_CRO_64BIT) {
918 		new_hs->hs_revision = MD_64BIT_META_DEV;
919 	} else {
920 		new_hs->hs_revision = MD_32BIT_META_DEV;
921 	}
922 
923 	/* commit the db records */
924 	mddb_commitrecs_wrapper(recids);
925 
926 	if (delete_hs)
927 		mddb_deleterec_wrapper(hs->hs_record_id);
928 
929 	md_rem_names(&sv, 1);
930 
931 	SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_REPLACE, SVM_TAG_HSP, setno,
932 	    md_expldev(hsp->hsp_self_id));
933 	return (0);
934 }
935 
936 static int
937 seths_enable(set_hs_params_t *shs)
938 {
939 	hot_spare_t	*hs;
940 	mddb_recid_t	recids[2];
941 	set_t		setno = shs->md_driver.md_setno;
942 	mdkey_t		key_old;
943 	int		num_keys_old = 0;
944 
945 
946 	/*
947 	 * Find device by using key associated with shs_component_old.
948 	 * If unable to find a unique key for shs_component_old
949 	 * then fail since namespace has multiple entries
950 	 * for this old component and we're unable to determine
951 	 * which key is the valid match for shs_component_old.
952 	 * This failure keeps a hotspare from being enabled on a slice
953 	 * that may already be in use by another metadevice.
954 	 */
955 	if (md_getkeyfromdev(setno, mddb_getsidenum(setno),
956 	    shs->shs_component_old, &key_old, &num_keys_old) != 0) {
957 		return (mddeverror(&shs->mde, MDE_NAME_SPACE,
958 		    shs->shs_component_old));
959 	}
960 
961 	/*
962 	 * If more than one key matches given old_dev - fail command
963 	 * since unable to determine which key is correct.
964 	 */
965 	if (num_keys_old > 1) {
966 		return (mddeverror(&shs->mde, MDE_MULTNM,
967 		    shs->shs_component_old));
968 	}
969 	/*
970 	 * If there is no key for this entry then fail since
971 	 * a key for this entry should exist.
972 	 */
973 	if (num_keys_old == 0) {
974 		return (mddeverror(&shs->mde, MDE_INVAL_HS,
975 		    shs->shs_component_old));
976 	}
977 
978 	/* Scan the hot spare list for the hs */
979 	hs = (hot_spare_t *)md_set[setno].s_hs;
980 	while (hs) {
981 		/*
982 		 * Since component may or may not be currently in the system,
983 		 * use the keys to find a match (not the devt).
984 		 */
985 		if (hs->hs_key == key_old) {
986 			break;
987 		}
988 		hs = hs->hs_next;
989 	}
990 
991 	if (hs == NULL) {
992 		return (mddeverror(&shs->mde, MDE_INVAL_HS,
993 			shs->shs_component_old));
994 	}
995 
996 	/* make sure it's broken */
997 	if (hs->hs_state != HSS_BROKEN) {
998 		return (mddeverror(&shs->mde, MDE_FIX_INVAL_HS_STATE,
999 		    hs->hs_devnum));
1000 	}
1001 
1002 	/* In case of a dryrun, we're done here */
1003 	if (shs->shs_options & HS_OPT_DRYRUN) {
1004 		return (0);
1005 	}
1006 
1007 	/* fix it */
1008 	set_hot_spare_state(hs, HSS_AVAILABLE);
1009 	hs->hs_start_blk = shs->shs_start_blk;
1010 	hs->hs_has_label = shs->shs_has_label;
1011 	hs->hs_number_blks = shs->shs_number_blks;
1012 
1013 	/* commit the db records */
1014 	recids[0] = hs->hs_record_id;
1015 	recids[1] = 0;
1016 	mddb_commitrecs_wrapper(recids);
1017 	SE_NOTIFY(EC_SVM_STATE, ESC_SVM_ENABLE, SVM_TAG_HS, setno,
1018 	    shs->shs_component_old);
1019 
1020 	return (0);
1021 }
1022 
1023 static int
1024 get_hs(
1025 	get_hs_params_t	*ghs
1026 )
1027 {
1028 	hot_spare_t	*hs;
1029 	set_t		setno = ghs->md_driver.md_setno;
1030 
1031 	mdclrerror(&ghs->mde);
1032 
1033 	/* Scan the hot spare list for the hs */
1034 	hs = (hot_spare_t *)md_set[setno].s_hs;
1035 	while (hs) {
1036 		if (hs->hs_key == ghs->ghs_key) {
1037 			break;
1038 		}
1039 		hs = hs->hs_next;
1040 	}
1041 
1042 	if (hs == NULL) {
1043 		return (mddeverror(&ghs->mde, MDE_INVAL_HS,
1044 		    ghs->ghs_devnum));
1045 	}
1046 
1047 	ghs->ghs_start_blk = hs->hs_start_blk;
1048 	ghs->ghs_number_blks = hs->hs_number_blks;
1049 	ghs->ghs_state = hs->hs_state;
1050 	ghs->ghs_timestamp = hs->hs_timestamp;
1051 	ghs->ghs_revision = hs->hs_revision;
1052 	return (0);
1053 }
1054 
1055 static void
1056 build_key_list(set_t setno, hot_spare_pool_t *hsp, mdkey_t *list)
1057 {
1058 	int	i;
1059 
1060 	for (i = 0; i < hsp->hsp_nhotspares; i++) {
1061 		hot_spare_t *hs;
1062 		hs = lookup_hot_spare(setno, hsp->hsp_hotspares[i], 1);
1063 		list[i] = hs->hs_key;
1064 	}
1065 }
1066 
1067 static int
1068 get_hsp(
1069 	void			*d,
1070 	int			mode
1071 )
1072 {
1073 	hot_spare_pool_t	*hsp;
1074 	get_hsp_t		*ghsp;
1075 	size_t			size;
1076 	set_t			setno;
1077 	int			err = 0;
1078 	md_i_get_t		*migp = (md_i_get_t *)d;
1079 
1080 
1081 	setno = migp->md_driver.md_setno;
1082 
1083 	mdclrerror(&migp->mde);
1084 
1085 	/* Scan the hot spare pool list */
1086 	hsp = find_hot_spare_pool(setno, migp->id);
1087 	if (hsp == NULL) {
1088 		return (mdhsperror(&migp->mde, MDE_INVAL_HSP,
1089 			migp->id));
1090 	}
1091 
1092 	size = (sizeof (ghsp->ghsp_hs_keys[0]) * (hsp->hsp_nhotspares - 1)) +
1093 	    sizeof (get_hsp_t);
1094 
1095 	if (migp->size == 0) {
1096 		migp->size = (int)size;
1097 		return (0);
1098 	}
1099 
1100 	if (migp->size < size)
1101 		return (EFAULT);
1102 
1103 	ghsp = kmem_alloc(size, KM_SLEEP);
1104 
1105 	ghsp->ghsp_id = hsp->hsp_self_id;
1106 	ghsp->ghsp_refcount = hsp->hsp_refcount;
1107 	ghsp->ghsp_nhotspares = hsp->hsp_nhotspares;
1108 	build_key_list(setno, hsp, ghsp->ghsp_hs_keys);
1109 	if (ddi_copyout(ghsp, (caddr_t)(uintptr_t)migp->mdp, size, mode))
1110 		err = EFAULT;
1111 	kmem_free(ghsp, size);
1112 	return (err);
1113 }
1114 
1115 static int
1116 set_hs(
1117 	set_hs_params_t	*shs
1118 )
1119 {
1120 	mdclrerror(&shs->mde);
1121 
1122 	if (md_get_setstatus(shs->md_driver.md_setno) & MD_SET_STALE)
1123 		return (mdmddberror(&shs->mde, MDE_DB_STALE, NODEV32,
1124 		    shs->md_driver.md_setno));
1125 
1126 	switch (shs->shs_cmd) {
1127 	case ADD_HOT_SPARE:
1128 		return (seths_add(shs));
1129 	case DELETE_HOT_SPARE:
1130 		return (seths_delete(shs));
1131 	case REPLACE_HOT_SPARE:
1132 		return (seths_replace(shs));
1133 	case FIX_HOT_SPARE:
1134 		return (seths_enable(shs));
1135 	default:
1136 		return (mderror(&shs->mde, MDE_INVAL_HSOP));
1137 	}
1138 }
1139 
1140 static void
1141 hotspares_poke_hotspares(void)
1142 {
1143 	intptr_t	(*poke_hs)();
1144 	int		i;
1145 
1146 	for (i = 0; i < MD_NOPS; i++) {
1147 		/* handle change */
1148 		poke_hs = md_get_named_service(NODEV64, i, "poke hotspares", 0);
1149 		if (poke_hs)
1150 			(void) (*poke_hs)();
1151 	}
1152 }
1153 
1154 
1155 /*ARGSUSED4*/
1156 static int
1157 hotspares_ioctl(
1158 	dev_t	dev,
1159 	int	cmd,
1160 	void	*data,
1161 	int	mode,
1162 	IOLOCK	*lockp
1163 )
1164 {
1165 	size_t	sz = 0;
1166 	void	*d = NULL;
1167 	int	err = 0;
1168 
1169 	/* single thread */
1170 	if (getminor(dev) != MD_ADM_MINOR)
1171 		return (ENOTTY);
1172 
1173 	/* We can only handle 32-bit clients for internal commands */
1174 	if ((mode & DATAMODEL_MASK) != DATAMODEL_ILP32) {
1175 		return (EINVAL);
1176 	}
1177 
1178 	mutex_enter(&md_mx);
1179 	while (md_status & MD_GBL_HS_LOCK)
1180 		cv_wait(&md_cv, &md_mx);
1181 	md_status |= MD_GBL_HS_LOCK;
1182 	mutex_exit(&md_mx);
1183 
1184 	/* dispatch ioctl */
1185 	switch (cmd) {
1186 
1187 	case MD_IOCSET_HS:	/* setup hot spares and pools */
1188 	{
1189 		if (! (mode & FWRITE)) {
1190 			err = EACCES;
1191 			break;
1192 		}
1193 
1194 		sz = sizeof (set_hs_params_t);
1195 		d = kmem_alloc(sz, KM_SLEEP);
1196 
1197 		if (ddi_copyin(data, d, sz, mode)) {
1198 			err = EFAULT;
1199 			break;
1200 		}
1201 
1202 		err = set_hs(d);
1203 		break;
1204 	}
1205 
1206 	case MD_IOCGET_HS:	/* get hot spare info */
1207 	{
1208 		if (! (mode & FREAD)) {
1209 			err = EACCES;
1210 			break;
1211 		}
1212 
1213 		sz = sizeof (get_hs_params_t);
1214 		d = kmem_alloc(sz, KM_SLEEP);
1215 
1216 		if (ddi_copyin(data, d, sz, mode)) {
1217 			err = EFAULT;
1218 			break;
1219 		}
1220 
1221 		err = get_hs(d);
1222 		break;
1223 	}
1224 
1225 	case MD_IOCGET:		/* get hot spare pool info */
1226 	{
1227 		if (! (mode & FREAD)) {
1228 			err = EACCES;
1229 			break;
1230 		}
1231 
1232 		sz = sizeof (md_i_get_t);
1233 		d = kmem_alloc(sz, KM_SLEEP);
1234 
1235 		if (ddi_copyin(data, d, sz, mode)) {
1236 			err = EFAULT;
1237 			break;
1238 		}
1239 
1240 		err = get_hsp(d, mode);
1241 		break;
1242 	}
1243 
1244 	default:
1245 		err = ENOTTY;
1246 	}
1247 
1248 	/*
1249 	 * copyout and free any args
1250 	 */
1251 	if (sz != 0) {
1252 		if (err == 0) {
1253 			if (ddi_copyout(d, data, sz, mode) != 0) {
1254 				err = EFAULT;
1255 			}
1256 		}
1257 		kmem_free(d, sz);
1258 	}
1259 
1260 	/* un single thread */
1261 	mutex_enter(&md_mx);
1262 	md_status &= ~MD_GBL_HS_LOCK;
1263 	cv_broadcast(&md_cv);
1264 	mutex_exit(&md_mx);
1265 
1266 	/* handle change */
1267 	hotspares_poke_hotspares();
1268 
1269 	/* return success */
1270 	return (err);
1271 }
1272 
1273 
1274 static void
1275 load_hotspare(set_t setno, mddb_recid_t recid)
1276 {
1277 	hot_spare_t	*hs;
1278 	mddb_de_ic_t	*dep;
1279 	mddb_rb32_t	*rbp;
1280 	size_t		newreqsize;
1281 	hot_spare_t	*b_hs;
1282 	hot_spare32_od_t *s_hs;
1283 
1284 	mddb_setrecprivate(recid, MD_PRV_GOTIT);
1285 
1286 	dep = mddb_getrecdep(recid);
1287 	dep->de_flags = MDDB_F_HOTSPARE;
1288 	rbp = dep->de_rb;
1289 	if (rbp->rb_revision == MDDB_REV_RB) {
1290 		/*
1291 		 * Needs to convert to internal 64 bit
1292 		 */
1293 		s_hs = (hot_spare32_od_t *)mddb_getrecaddr(recid);
1294 		newreqsize = sizeof (hot_spare_t);
1295 		b_hs = (hot_spare_t *)kmem_zalloc(newreqsize, KM_SLEEP);
1296 		hs_convert((caddr_t)s_hs, (caddr_t)b_hs, SMALL_2_BIG);
1297 		kmem_free(s_hs, dep->de_reqsize);
1298 		dep->de_rb_userdata = b_hs;
1299 		dep->de_reqsize = newreqsize;
1300 		hs = b_hs;
1301 	} else {
1302 		hs = (hot_spare_t *)mddb_getrecaddr_resize
1303 			(recid, sizeof (*hs), 0);
1304 	}
1305 
1306 
1307 #if defined(_ILP32)
1308 	if (hs->hs_revision == MD_64BIT_META_DEV) {
1309 		char	devname[MD_MAX_CTDLEN];
1310 
1311 		set_hot_spare_state(hs, HSS_BROKEN);
1312 		(void) md_devname(setno, hs->hs_devnum, devname,
1313 		    sizeof (devname));
1314 		cmn_err(CE_NOTE, "%s is unavailable because 64 bit hotspares "
1315 		    "are not accessible on a 32 bit kernel\n", devname);
1316 	}
1317 #endif
1318 
1319 	ASSERT(hs != NULL);
1320 
1321 	if (hs->hs_refcount == 0) {
1322 		mddb_setrecprivate(recid, MD_PRV_PENDDEL);
1323 		return;
1324 	}
1325 
1326 	hs->hs_next = (hot_spare_t *)md_set[setno].s_hs;
1327 	md_set[setno].s_hs = (void *)hs;
1328 
1329 	hs->hs_isopen = 0;
1330 
1331 	hs->hs_devnum = md_getdevnum(setno, mddb_getsidenum(setno),
1332 		hs->hs_key, MD_NOTRUST_DEVT);
1333 }
1334 
1335 
1336 static void
1337 load_hotsparepool(set_t setno, mddb_recid_t recid)
1338 {
1339 	hot_spare_pool_t *hsp;
1340 	hot_spare_pool_ond_t *hsp_ond;
1341 	size_t hsp_icsize;
1342 
1343 	mddb_setrecprivate(recid, MD_PRV_GOTIT);
1344 
1345 	hsp_ond = (hot_spare_pool_ond_t *)mddb_getrecaddr(recid);
1346 	ASSERT(hsp_ond != NULL);
1347 
1348 	if (hsp_ond->hsp_self_id == MD_HSP_NONE) {
1349 		mddb_setrecprivate(recid, MD_PRV_PENDDEL);
1350 		return;
1351 	}
1352 
1353 	hsp_icsize =  HSP_ONDSK_STR_OFF + mddb_getrecsize(recid);
1354 
1355 	hsp = (hot_spare_pool_t *)mddb_getrecaddr_resize(recid, hsp_icsize,
1356 		HSP_ONDSK_STR_OFF);
1357 	hsp->hsp_next = (hot_spare_pool_t *)md_set[setno].s_hsp;
1358 	md_set[setno].s_hsp = (void *) hsp;
1359 
1360 	rw_enter(&hotspares_md_ops.md_link_rw.lock, RW_WRITER);
1361 	hsp->hsp_link.ln_next = hotspares_md_ops.md_head;
1362 	hsp->hsp_link.ln_setno = setno;
1363 	hsp->hsp_link.ln_id = hsp->hsp_self_id;
1364 	hotspares_md_ops.md_head = &hsp->hsp_link;
1365 	rw_exit(&hotspares_md_ops.md_link_rw.lock);
1366 }
1367 
1368 static int
1369 hotspares_snarf(md_snarfcmd_t cmd, set_t setno)
1370 {
1371 	mddb_recid_t	recid;
1372 	int		gotsomething;
1373 	mddb_type_t	typ1;
1374 
1375 	if (cmd == MD_SNARF_CLEANUP)
1376 		return (0);
1377 
1378 	gotsomething = 0;
1379 
1380 	typ1 = (mddb_type_t)md_getshared_key(setno,
1381 	    hotspares_md_ops.md_driver.md_drivername);
1382 	recid = mddb_makerecid(setno, 0);
1383 	while ((recid = mddb_getnextrec(recid, typ1, 0)) > 0) {
1384 		if (mddb_getrecprivate(recid) & MD_PRV_GOTIT)
1385 			continue;
1386 
1387 		switch (mddb_getrectype2(recid)) {
1388 		case HSP_REC:
1389 			load_hotsparepool(setno, recid);
1390 			gotsomething = 1;
1391 			break;
1392 		case HS_REC:
1393 			load_hotspare(setno, recid);
1394 			gotsomething = 1;
1395 			break;
1396 		default:
1397 			ASSERT(0);
1398 		}
1399 	}
1400 
1401 	if (gotsomething)
1402 		return (gotsomething);
1403 
1404 	recid = mddb_makerecid(setno, 0);
1405 	while ((recid = mddb_getnextrec(recid, typ1, 0)) > 0)
1406 		if (!(mddb_getrecprivate(recid) & MD_PRV_GOTIT))
1407 			mddb_setrecprivate(recid, MD_PRV_PENDDEL);
1408 
1409 	return (0);
1410 }
1411 
1412 static int
1413 hotspares_halt(md_haltcmd_t cmd, set_t setno)
1414 {
1415 	hot_spare_t		*hs, **p_hs;
1416 	hot_spare_pool_t	*hsp, **p_hsp;
1417 
1418 	if (cmd == MD_HALT_CLOSE)
1419 		return (0);
1420 
1421 	if (cmd == MD_HALT_OPEN)
1422 		return (0);
1423 
1424 	if (cmd == MD_HALT_CHECK)
1425 		return (0);
1426 
1427 	if (cmd == MD_HALT_UNLOAD)
1428 		return (0);
1429 
1430 	if (cmd != MD_HALT_DOIT)
1431 		return (1);
1432 	/*
1433 	 * Find all the hotspares for set "setno"
1434 	 *   and remove them from the hot_spare_list.
1435 	 */
1436 	p_hs = (hot_spare_t **)&md_set[setno].s_hs;
1437 	hs = (hot_spare_t *)md_set[setno].s_hs;
1438 	for (; hs != NULL; hs = *p_hs)
1439 		*p_hs = hs->hs_next;
1440 
1441 	/*
1442 	 * Find all the hotspare pools for set "setno"
1443 	 *   and remove them from the hot_spare_pools list.
1444 	 * Also remove from the get_next list.
1445 	 */
1446 	p_hsp = (hot_spare_pool_t **)&md_set[setno].s_hsp;
1447 	hsp = (hot_spare_pool_t *)md_set[setno].s_hsp;
1448 	for (; hsp != NULL; hsp = *p_hsp) {
1449 		md_rem_link(setno, hsp->hsp_self_id,
1450 		    &hotspares_md_ops.md_link_rw.lock,
1451 		    &hotspares_md_ops.md_head);
1452 		*p_hsp = hsp->hsp_next;
1453 	}
1454 
1455 	return (0);
1456 }
1457 
1458 static hot_spare_t *
1459 usable_hs(
1460 	set_t		setno,
1461 	mddb_recid_t	hs_id,
1462 	diskaddr_t	nblks,
1463 	int		labeled,
1464 	diskaddr_t	*start)
1465 {
1466 	hot_spare_t	*hs;
1467 
1468 	hs = lookup_hot_spare(setno, hs_id, 1);
1469 
1470 	if (hs->hs_state != HSS_AVAILABLE)
1471 		return ((hot_spare_t *)0);
1472 
1473 	if (labeled && hs->hs_has_label && (hs->hs_number_blks >= nblks)) {
1474 		*start = 0;
1475 		return (hs);
1476 	} else if ((hs->hs_number_blks - hs->hs_start_blk) >= nblks) {
1477 		*start = hs->hs_start_blk;
1478 		return (hs);
1479 	}
1480 	return ((hot_spare_t *)0);
1481 }
1482 
1483 static int
1484 reserve_a_hs(
1485 	set_t		setno,
1486 	mddb_recid_t	id,
1487 	uint64_t	size,
1488 	int		labeled,
1489 	mddb_recid_t	*hs_id,
1490 	mdkey_t		*key,
1491 	md_dev64_t	*dev,
1492 	diskaddr_t	*sblock)
1493 {
1494 	hot_spare_pool_t	*hsp;
1495 	hot_spare_t		*hs;
1496 	int			i;
1497 
1498 	*hs_id = 0;
1499 
1500 	hsp = find_hot_spare_pool(setno, id);
1501 	if (hsp == NULL)
1502 		return (-1);
1503 
1504 	for (i = 0; i < hsp->hsp_nhotspares; i++) {
1505 		hs = usable_hs(setno, hsp->hsp_hotspares[i],
1506 		    size, labeled, sblock);
1507 		if (hs == NULL)
1508 			continue;
1509 
1510 		set_hot_spare_state(hs, HSS_RESERVED);
1511 		*hs_id = hs->hs_record_id;
1512 		*key = hs->hs_key;
1513 		*dev = hs->hs_devnum;
1514 		/* NOTE: Mirror code commits the hs record */
1515 		return (0);
1516 	}
1517 
1518 	return (-1);
1519 }
1520 
1521 
1522 /* ARGSUSED3 */
1523 static int
1524 return_a_hs(
1525 	set_t			setno,
1526 	mddb_recid_t		id,
1527 	mddb_recid_t		*hs_id,
1528 	mdkey_t			key,
1529 	diskaddr_t		sblock,
1530 	uint64_t		size,
1531 	hotspare_states_t	new_state)
1532 {
1533 	hot_spare_pool_t	*hsp;
1534 	hot_spare_t		*hs;
1535 	int			i;
1536 
1537 	/*
1538 	 * NOTE: sblock/size are not currently being used.
1539 	 *	 That is because we always allocate the whole hs.
1540 	 *	 Later if we choose to allocate only what is needed
1541 	 *	 then the sblock/size can be used to determine
1542 	 *	 which part is being unreseved.
1543 	 */
1544 	*hs_id = 0;
1545 
1546 	hsp = find_hot_spare_pool(setno, id);
1547 	if (hsp == NULL)
1548 		return (-1);
1549 
1550 	for (i = 0; i < hsp->hsp_nhotspares; i++) {
1551 		hs = lookup_hot_spare(setno, hsp->hsp_hotspares[i], 1);
1552 		if (hs->hs_key != key)
1553 			continue;
1554 
1555 		set_hot_spare_state(hs, new_state);
1556 		*hs_id = hs->hs_record_id;
1557 		if (new_state == HSS_BROKEN) {
1558 			SE_NOTIFY(EC_SVM_STATE, ESC_SVM_ERRED, SVM_TAG_HS,
1559 			    setno, hs->hs_devnum);
1560 		}
1561 		if (new_state == HSS_AVAILABLE) {
1562 			SE_NOTIFY(EC_SVM_STATE, ESC_SVM_HS_FREED, SVM_TAG_HS,
1563 			    setno, hs->hs_devnum);
1564 		}
1565 
1566 		/* NOTE: Mirror/Raid code commits the hs record */
1567 		return (0);
1568 	}
1569 
1570 	return (-1);
1571 }
1572 
1573 
1574 static int
1575 modify_hsp_ref(set_t setno, mddb_recid_t id, int incref,  mddb_recid_t *hsp_id)
1576 {
1577 	hot_spare_pool_t	*hsp;
1578 
1579 	*hsp_id = 0;
1580 
1581 	if (id  < 0)
1582 		return (0);
1583 
1584 	hsp = find_hot_spare_pool(setno, id);
1585 	if (hsp == NULL)
1586 		return (-1);
1587 
1588 	if (incref)
1589 		hsp->hsp_refcount++;
1590 	else
1591 		hsp->hsp_refcount--;
1592 
1593 	*hsp_id = hsp->hsp_record_id;
1594 
1595 	/* NOTE: Stripe code commits the hsp record */
1596 	return (0);
1597 }
1598 
1599 
1600 static int
1601 mkdev_for_a_hs(mddb_recid_t hs_id, md_dev64_t *dev)
1602 {
1603 	hot_spare_t	*hs;
1604 
1605 	hs = lookup_hot_spare(mddb_getsetnum(hs_id), hs_id, 0);
1606 	if (hs == NULL)
1607 		return (0);
1608 
1609 	*dev = hs->hs_devnum;
1610 	return (0);
1611 }
1612 
1613 static intptr_t
1614 hotspares_interface(
1615 	hs_cmds_t	cmd,
1616 	mddb_recid_t	id,
1617 	uint64_t	size,
1618 	int		bool,
1619 	mddb_recid_t	*hs_id,
1620 	mdkey_t		*key,
1621 	md_dev64_t	*dev,
1622 	diskaddr_t	*sblock)
1623 {
1624 	set_t	setno;
1625 	int	err = -1;
1626 
1627 	mutex_enter(&md_mx);
1628 	while (md_status & MD_GBL_HS_LOCK)
1629 		cv_wait(&md_cv, &md_mx);
1630 
1631 	/* If md_halt has been run do not continue */
1632 	if (md_status & (MD_GBL_HALTED | MD_GBL_DAEMONS_DIE)) {
1633 		mutex_exit(&md_mx);
1634 		return (ENXIO);
1635 	}
1636 
1637 	md_status |= MD_GBL_HS_LOCK;
1638 	mutex_exit(&md_mx);
1639 
1640 	setno = mddb_getsetnum(id);
1641 
1642 	switch (cmd) {
1643 	case HS_GET:
1644 		err = reserve_a_hs(setno, id, size, bool, hs_id,
1645 		    key, dev, sblock);
1646 		break;
1647 	case HS_FREE:
1648 		err = return_a_hs(setno, id, hs_id, *key, 0, 0, HSS_AVAILABLE);
1649 		hotspares_poke_hotspares();
1650 		break;
1651 	case HS_BAD:
1652 		err = return_a_hs(setno, id, hs_id, *key, 0, 0, HSS_BROKEN);
1653 		break;
1654 	case HSP_INCREF:
1655 		err = modify_hsp_ref(setno, id, 1, hs_id);
1656 		break;
1657 	case HSP_DECREF:
1658 		err = modify_hsp_ref(setno, id, 0, hs_id);
1659 		break;
1660 	case HS_MKDEV:
1661 		err = mkdev_for_a_hs(*hs_id, dev);
1662 		break;
1663 	}
1664 
1665 	mutex_enter(&md_mx);
1666 	md_status &= ~MD_GBL_HS_LOCK;
1667 	cv_broadcast(&md_cv);
1668 	mutex_exit(&md_mx);
1669 
1670 	return (err);
1671 }
1672 
1673 static void
1674 imp_hotsparepool(
1675 	set_t	setno,
1676 	mddb_recid_t	recid
1677 )
1678 {
1679 	hot_spare_pool_ond_t	*hsp_ond;
1680 	mddb_recid_t		*hsp_recid, *hs_recid;
1681 	int			i;
1682 	uint_t			*hsp_selfid;
1683 
1684 	mddb_setrecprivate(recid, MD_PRV_GOTIT);
1685 
1686 	hsp_ond = (hot_spare_pool_ond_t *)mddb_getrecaddr(recid);
1687 	hsp_recid = &(hsp_ond->hsp_record_id);
1688 	hsp_selfid = &(hsp_ond->hsp_self_id);
1689 	/*
1690 	 * Fixup the pool and hotspares
1691 	 */
1692 	*hsp_recid = MAKERECID(setno, DBID(*hsp_recid));
1693 	*hsp_selfid = MAKERECID(setno, DBID(*hsp_selfid));
1694 
1695 	for (i = 0; i < hsp_ond->hsp_nhotspares; i++) {
1696 		hs_recid = &(hsp_ond->hsp_hotspares[i]);
1697 		*hs_recid = MAKERECID(setno, DBID(*hs_recid));
1698 	}
1699 }
1700 
1701 static void
1702 imp_hotspare(
1703 	set_t	setno,
1704 	mddb_recid_t	recid
1705 )
1706 {
1707 	mddb_de_ic_t	*dep;
1708 	mddb_rb32_t	*rbp;
1709 	hot_spare_t	*hs64;
1710 	hot_spare32_od_t	*hs32;
1711 	mddb_recid_t	*hs_recid;
1712 
1713 	mddb_setrecprivate(recid, MD_PRV_GOTIT);
1714 
1715 	dep = mddb_getrecdep(recid);
1716 	rbp = dep->de_rb;
1717 	if (rbp->rb_revision == MDDB_REV_RB) {
1718 		/*
1719 		 * 32 bit hotspare
1720 		 */
1721 		hs32 = (hot_spare32_od_t *)mddb_getrecaddr(recid);
1722 		hs_recid = &(hs32->hs_record_id);
1723 	} else {
1724 		hs64 = (hot_spare_t *)mddb_getrecaddr(recid);
1725 		hs_recid = &(hs64->hs_record_id);
1726 	}
1727 
1728 	/*
1729 	 * Fixup the setno
1730 	 */
1731 	*hs_recid = MAKERECID(setno, DBID(*hs_recid));
1732 }
1733 
1734 static int
1735 hotspares_imp_set(
1736 	set_t	setno
1737 )
1738 {
1739 	mddb_recid_t	recid;
1740 	int		gotsomething;
1741 	mddb_type_t	typ1;
1742 
1743 
1744 	gotsomething = 0;
1745 
1746 	typ1 = (mddb_type_t)md_getshared_key(setno,
1747 	    hotspares_md_ops.md_driver.md_drivername);
1748 	recid = mddb_makerecid(setno, 0);
1749 	while ((recid = mddb_getnextrec(recid, typ1, 0)) > 0) {
1750 		if (mddb_getrecprivate(recid) & MD_PRV_GOTIT)
1751 			continue;
1752 
1753 		switch (mddb_getrectype2(recid)) {
1754 		case HSP_REC:
1755 			imp_hotsparepool(setno, recid);
1756 			gotsomething = 1;
1757 			break;
1758 		case HS_REC:
1759 			imp_hotspare(setno, recid);
1760 			gotsomething = 1;
1761 			break;
1762 		default:
1763 			ASSERT(0);
1764 		}
1765 	}
1766 
1767 	return (gotsomething);
1768 }
1769 
1770 static md_named_services_t hotspares_named_services[] = {
1771 	{hotspares_interface,	"hot spare interface"},
1772 	{NULL,			0}
1773 };
1774 
1775 md_ops_t hotspares_md_ops = {
1776 	NULL,			/* open */
1777 	NULL,			/* close */
1778 	NULL,			/* strategy */
1779 	NULL,			/* print */
1780 	NULL,			/* dump */
1781 	NULL,			/* read */
1782 	NULL,			/* write */
1783 	hotspares_ioctl,	/* hotspares_ioctl, */
1784 	hotspares_snarf,	/* hotspares_snarf */
1785 	hotspares_halt,		/* halt */
1786 	NULL,			/* aread */
1787 	NULL,			/* awrite */
1788 	hotspares_imp_set,	/* import set */
1789 	hotspares_named_services /* named_services */
1790 };
1791 
1792 static void
1793 fini_uninit()
1794 {
1795 	/* prevent access to services that may have been imported */
1796 	md_clear_hot_spare_interface();
1797 }
1798 
1799 /* define the module linkage */
1800 MD_PLUGIN_MISC_MODULE("hot spares module %I%", md_noop, fini_uninit())
1801