xref: /titanic_41/usr/src/uts/common/io/lvm/hotspares/hotspares.c (revision 6dfee4834394825da35b977ca71cdc965bc7b6a4)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <sys/param.h>
30 #include <sys/systm.h>
31 #include <sys/conf.h>
32 #include <sys/file.h>
33 #include <sys/user.h>
34 #include <sys/uio.h>
35 #include <sys/t_lock.h>
36 #include <sys/kmem.h>
37 #include <vm/page.h>
38 #include <sys/sysmacros.h>
39 #include <sys/types.h>
40 #include <sys/mkdev.h>
41 #include <sys/stat.h>
42 #include <sys/open.h>
43 #include <sys/modctl.h>
44 #include <sys/ddi.h>
45 #include <sys/sunddi.h>
46 #include <sys/debug.h>
47 
48 #include <sys/lvm/md_hotspares.h>
49 #include <sys/lvm/md_convert.h>
50 
51 #include <sys/sysevent/eventdefs.h>
52 #include <sys/sysevent/svm.h>
53 
54 md_ops_t		hotspares_md_ops;
55 #ifndef	lint
56 static char		_depends_on[] = "drv/md";
57 md_ops_t		*md_interface_ops = &hotspares_md_ops;
58 #endif
59 
60 extern md_ops_t		**md_ops;
61 extern md_ops_t		*md_opslist;
62 extern md_set_t		md_set[];
63 
64 extern kmutex_t		md_mx;		/* used to md global stuff */
65 extern kcondvar_t	md_cv;		/* md_status events */
66 extern int		md_status;
67 
68 extern void		md_clear_hot_spare_interface();
69 
70 static void
71 set_hot_spare_state(hot_spare_t *hs, hotspare_states_t newstate)
72 {
73 	hs->hs_state = newstate;
74 	uniqtime32(&hs->hs_timestamp);
75 }
76 
77 static hot_spare_t *
78 lookup_hot_spare(set_t setno, mddb_recid_t hs_id, int must_exist)
79 {
80 	hot_spare_t *hs;
81 
82 	for (hs = (hot_spare_t *)md_set[setno].s_hs; hs; hs = hs->hs_next) {
83 		if (hs->hs_record_id == hs_id)
84 			return (hs);
85 	}
86 	if (must_exist)
87 		ASSERT(0);
88 
89 	return ((hot_spare_t *)NULL);
90 }
91 
92 static hot_spare_pool_t *
93 find_hot_spare_pool(set_t setno, int hsp_id)
94 {
95 	hot_spare_pool_t *hsp;
96 
97 	hsp = (hot_spare_pool_t *)md_set[setno].s_hsp;
98 	while (hsp != NULL) {
99 		if (hsp->hsp_self_id == hsp_id)
100 			return (hsp);
101 		hsp = hsp->hsp_next;
102 	}
103 
104 	return ((hot_spare_pool_t *)0);
105 }
106 
107 
108 static int
109 seths_create_hsp(set_hs_params_t *shs)
110 {
111 	hot_spare_pool_t	*hsp;
112 	mddb_recid_t		recid;
113 	set_t			setno;
114 	mddb_type_t		typ1;
115 
116 	setno = HSP_SET(shs->shs_hot_spare_pool);
117 
118 	/* Scan the hot spare pool list */
119 	hsp = find_hot_spare_pool(setno, shs->shs_hot_spare_pool);
120 	if (hsp != (hot_spare_pool_t *)0)
121 		return (0);
122 
123 	typ1 = (mddb_type_t)md_getshared_key(setno,
124 	    hotspares_md_ops.md_driver.md_drivername);
125 
126 	/* create a hot spare pool record */
127 	if (shs->shs_options & MD_CRO_64BIT) {
128 #if defined(_ILP32)
129 		return (mdhsperror(&shs->mde, MDE_HSP_UNIT_TOO_LARGE,
130 		    shs->shs_hot_spare_pool));
131 #else
132 		recid = mddb_createrec(sizeof (hot_spare_pool_ond_t), typ1,
133 			HSP_REC, MD_CRO_64BIT | MD_CRO_HOTSPARE_POOL, setno);
134 #endif
135 	} else {
136 		recid = mddb_createrec(sizeof (hot_spare_pool_ond_t), typ1,
137 			HSP_REC, MD_CRO_32BIT | MD_CRO_HOTSPARE_POOL, setno);
138 	}
139 
140 	if (recid < 0) {
141 		return (mdhsperror(&shs->mde, MDE_HSP_CREATE_FAILURE,
142 		    shs->shs_hot_spare_pool));
143 	}
144 
145 	/* get the record addr */
146 	hsp = (hot_spare_pool_t *)mddb_getrecaddr_resize(recid, sizeof (*hsp),
147 		HSP_ONDSK_STR_OFF);
148 
149 	hsp->hsp_self_id = shs->shs_hot_spare_pool;
150 	hsp->hsp_record_id = recid;
151 	hsp->hsp_next = (hot_spare_pool_t *)md_set[setno].s_hsp;
152 	hsp->hsp_refcount = 0;
153 	hsp->hsp_nhotspares = 0;
154 
155 	md_set[setno].s_hsp = (void *) hsp;
156 
157 	mddb_commitrec_wrapper(recid);
158 	SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_CREATE, SVM_TAG_HSP, setno,
159 	    md_expldev(hsp->hsp_self_id));
160 
161 	rw_enter(&hotspares_md_ops.md_link_rw.lock, RW_WRITER);
162 	hsp->hsp_link.ln_next = hotspares_md_ops.md_head;
163 	hsp->hsp_link.ln_setno = setno;
164 	hsp->hsp_link.ln_id = hsp->hsp_self_id;
165 	hotspares_md_ops.md_head = &hsp->hsp_link;
166 	rw_exit(&hotspares_md_ops.md_link_rw.lock);
167 
168 	return (0);
169 }
170 
171 
172 static int
173 seths_add(set_hs_params_t *shs)
174 {
175 	hot_spare_t		*hs;
176 	hot_spare_pool_t	*hsp;
177 	hot_spare_pool_t	*prev_hsp;
178 	hot_spare_pool_t	*new_hsp;
179 	hot_spare_pool_t	*old_hsp;
180 	mddb_recid_t		recid;
181 	mddb_recid_t		recids[5];
182 	size_t			new_size;
183 	int			i;
184 	int			delete_hsp = 0;
185 	int			irecid;
186 	set_t			setno;
187 	mddb_type_t		typ1;
188 	int			hsp_created = 0;
189 	mdkey_t			key_old;
190 	int			num_keys_old = 0;
191 
192 	/* Not much to do here in case of a dryrun */
193 	if (shs->shs_options & HS_OPT_DRYRUN) {
194 		return (0);
195 	}
196 
197 	/* create an empty hot spare pool */
198 	if (shs->shs_options & HS_OPT_POOL) {
199 		return (seths_create_hsp(shs));
200 	}
201 
202 	setno = HSP_SET(shs->shs_hot_spare_pool);
203 	typ1 = (mddb_type_t)md_getshared_key(setno,
204 	    hotspares_md_ops.md_driver.md_drivername);
205 
206 	/* Scan the hot spare list */
207 	hs = (hot_spare_t *)md_set[setno].s_hs;
208 	while (hs) {
209 		if (hs->hs_devnum == shs->shs_component_old) {
210 			break;
211 		}
212 		hs = hs->hs_next;
213 	}
214 
215 	if (hs == NULL) {
216 		/*
217 		 * Did not find match for device using devnum so use
218 		 * key associated with shs_component_old just
219 		 * in case there is a match but the match's dev is NODEV.
220 		 * If unable to find a unique key for shs_component_old
221 		 * then fail since namespace has multiple entries
222 		 * for this old component and we shouldn't allow
223 		 * an addition of a hotspare in this case.
224 		 */
225 		if (md_getkeyfromdev(setno, mddb_getsidenum(setno),
226 		    shs->shs_component_old, &key_old, &num_keys_old) != 0) {
227 			return (mddeverror(&shs->mde, MDE_NAME_SPACE,
228 			    shs->shs_component_old));
229 		}
230 
231 		/*
232 		 * If more than one key matches given old_dev - fail command
233 		 * since shouldn't add new hotspare if namespace has
234 		 * multiple entries.
235 		 */
236 		if (num_keys_old > 1) {
237 			return (mddeverror(&shs->mde, MDE_MULTNM,
238 			    shs->shs_component_old));
239 		}
240 		/*
241 		 * If there is no key for this entry then fail since
242 		 * a key for this entry should exist.
243 		 */
244 		if (num_keys_old == 0) {
245 			return (mddeverror(&shs->mde, MDE_INVAL_HS,
246 			    shs->shs_component_old));
247 		}
248 		/* Scan the hot spare list again */
249 		hs = (hot_spare_t *)md_set[setno].s_hs;
250 		while (hs) {
251 			/*
252 			 * Only need to compare keys when hs_devnum is NODEV.
253 			 */
254 			if ((hs->hs_devnum == NODEV64) &&
255 			    (hs->hs_key == key_old)) {
256 				break;
257 			}
258 			hs = hs->hs_next;
259 		}
260 	}
261 
262 	if (hs == NULL) {
263 		/* create a hot spare record */
264 		if (shs->shs_size_option & MD_CRO_64BIT) {
265 #if defined(_ILP32)
266 			return (mdhserror(&shs->mde, MDE_HS_UNIT_TOO_LARGE,
267 			    shs->shs_hot_spare_pool, shs->shs_component_old));
268 #else
269 			recid = mddb_createrec(HS_ONDSK_STR_SIZE, typ1, HS_REC,
270 				MD_CRO_64BIT | MD_CRO_HOTSPARE, setno);
271 #endif
272 		} else {
273 			recid = mddb_createrec(HS_ONDSK_STR_SIZE, typ1, HS_REC,
274 				MD_CRO_32BIT | MD_CRO_HOTSPARE, setno);
275 		}
276 
277 		if (recid < 0) {
278 			return (mdhserror(&shs->mde, MDE_HS_CREATE_FAILURE,
279 			    shs->shs_hot_spare_pool,
280 			    shs->shs_component_old));
281 		}
282 
283 		/* get the addr */
284 		hs = (hot_spare_t *)mddb_getrecaddr_resize(recid, sizeof (*hs),
285 			0);
286 
287 		hs->hs_record_id = recid;
288 
289 		hs->hs_devnum = shs->shs_component_old;
290 		hs->hs_key = shs->shs_key_old;
291 		hs->hs_start_blk = shs->shs_start_blk;
292 		hs->hs_has_label = shs->shs_has_label;
293 		hs->hs_number_blks = shs->shs_number_blks;
294 		set_hot_spare_state(hs, HSS_AVAILABLE);
295 		hs->hs_refcount = 0;
296 		hs->hs_next = (hot_spare_t *)md_set[setno].s_hs;
297 		md_set[setno].s_hs = (void *) hs;
298 	}
299 
300 	/* Scan the hot spare pool list */
301 	hsp = (hot_spare_pool_t *)md_set[setno].s_hsp;
302 	prev_hsp = (hot_spare_pool_t *)0;
303 	while (hsp) {
304 		if (hsp->hsp_self_id == shs->shs_hot_spare_pool) {
305 			break;
306 		}
307 		prev_hsp = hsp;
308 		hsp = hsp->hsp_next;
309 	}
310 
311 	if (hsp == NULL) {
312 		/* create a hot spare pool record */
313 		recid = mddb_createrec(sizeof (hot_spare_pool_ond_t),
314 		    typ1, HSP_REC, MD_CRO_32BIT | MD_CRO_HOTSPARE_POOL, setno);
315 
316 		if (recid < 0) {
317 			return (mdhsperror(&shs->mde, MDE_HSP_CREATE_FAILURE,
318 			    shs->shs_hot_spare_pool));
319 		}
320 
321 		/* get the record addr */
322 		hsp = (hot_spare_pool_t *)mddb_getrecaddr_resize(recid,
323 			sizeof (*hsp), HSP_ONDSK_STR_OFF);
324 
325 		hsp->hsp_self_id = shs->shs_hot_spare_pool;
326 		hsp->hsp_record_id = recid;
327 		hsp->hsp_next = (hot_spare_pool_t *)md_set[setno].s_hsp;
328 		hsp->hsp_refcount = 0;
329 		hsp->hsp_nhotspares = 0;
330 
331 		/* force prev_hsp to NULL, this will cause hsp to be linked */
332 		prev_hsp = (hot_spare_pool_t *)0;
333 
334 		rw_enter(&hotspares_md_ops.md_link_rw.lock, RW_WRITER);
335 		hsp->hsp_link.ln_next = hotspares_md_ops.md_head;
336 		hsp->hsp_link.ln_setno = setno;
337 		hsp->hsp_link.ln_id = hsp->hsp_self_id;
338 		hotspares_md_ops.md_head = &hsp->hsp_link;
339 		rw_exit(&hotspares_md_ops.md_link_rw.lock);
340 		hsp_created = 1;
341 	} else {
342 
343 		/*
344 		 * Make sure the hot spare is not already in the pool.
345 		 */
346 		for (i = 0; i < hsp->hsp_nhotspares; i++)
347 			if (hsp->hsp_hotspares[i] == hs->hs_record_id) {
348 				return (mdhserror(&shs->mde, MDE_HS_INUSE,
349 					shs->shs_hot_spare_pool,
350 					hs->hs_devnum));
351 			}
352 		/*
353 		 * Create a new hot spare pool record
354 		 * This gives us the one extra hs slot,
355 		 * because there is one slot in the
356 		 * hot_spare_pool struct
357 		 */
358 		new_size = sizeof (hot_spare_pool_ond_t) +
359 			(sizeof (mddb_recid_t) * hsp->hsp_nhotspares);
360 		recid = mddb_createrec(new_size, typ1, HSP_REC,
361 		    MD_CRO_32BIT | MD_CRO_HOTSPARE_POOL, setno);
362 
363 		if (recid < 0) {
364 			return (mdhsperror(&shs->mde, MDE_HSP_CREATE_FAILURE,
365 			    hsp->hsp_self_id));
366 		}
367 		new_size = sizeof (hot_spare_pool_t) +
368 			(sizeof (mddb_recid_t) * hsp->hsp_nhotspares);
369 
370 		/* get the record addr */
371 		new_hsp = (hot_spare_pool_t *)mddb_getrecaddr_resize(recid,
372 			new_size, HSP_ONDSK_STR_OFF);
373 
374 		/* copy the old record into the new one */
375 		bcopy((caddr_t)hsp, (caddr_t)new_hsp,
376 		    (size_t)((sizeof (hot_spare_pool_t) +
377 		    (sizeof (mddb_recid_t) * hsp->hsp_nhotspares)
378 		    - sizeof (mddb_recid_t))));
379 		new_hsp->hsp_record_id = recid;
380 
381 		md_rem_link(setno, hsp->hsp_self_id,
382 		    &hotspares_md_ops.md_link_rw.lock,
383 		    &hotspares_md_ops.md_head);
384 
385 		rw_enter(&hotspares_md_ops.md_link_rw.lock, RW_WRITER);
386 		new_hsp->hsp_link.ln_next = hotspares_md_ops.md_head;
387 		new_hsp->hsp_link.ln_setno = setno;
388 		new_hsp->hsp_link.ln_id = new_hsp->hsp_self_id;
389 		hotspares_md_ops.md_head = &new_hsp->hsp_link;
390 		rw_exit(&hotspares_md_ops.md_link_rw.lock);
391 
392 		/* mark the old hsp to be deleted */
393 		delete_hsp = 1;
394 		old_hsp = hsp;
395 		hsp = new_hsp;
396 	}
397 
398 	if (shs->shs_size_option & MD_CRO_64BIT) {
399 		hs->hs_revision = MD_64BIT_META_DEV;
400 	} else {
401 		hs->hs_revision = MD_32BIT_META_DEV;
402 	}
403 
404 	/* lock the db records */
405 	recids[0] = hs->hs_record_id;
406 	recids[1] = hsp->hsp_record_id;
407 	irecid = 2;
408 	if (delete_hsp)
409 		recids[irecid++] = old_hsp->hsp_record_id;
410 	recids[irecid] = 0;
411 
412 	/* increment the reference count */
413 	hs->hs_refcount++;
414 
415 	/* add the hs at the end of the hot spare pool */
416 	hsp->hsp_hotspares[hsp->hsp_nhotspares] = hs->hs_record_id;
417 	hsp->hsp_nhotspares++;
418 
419 	/*
420 	 * NOTE: We do not commit the previous hot spare pool record.
421 	 *	 There is no need, the link gets rebuilt at boot time.
422 	 */
423 	if (prev_hsp)
424 		prev_hsp->hsp_next = hsp;
425 	else
426 		md_set[setno].s_hsp = (void *) hsp;
427 
428 	if (delete_hsp)
429 		old_hsp->hsp_self_id = MD_HSP_NONE;
430 
431 	/* commit the db records */
432 	mddb_commitrecs_wrapper(recids);
433 
434 	if (delete_hsp) {
435 		/* delete the old hot spare pool record */
436 		mddb_deleterec_wrapper(old_hsp->hsp_record_id);
437 	}
438 
439 	if (hsp_created) {
440 		SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_CREATE, SVM_TAG_HSP, setno,
441 		    md_expldev(hsp->hsp_self_id));
442 	}
443 	SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_ADD, SVM_TAG_HSP, setno,
444 	    md_expldev(hsp->hsp_self_id));
445 
446 	return (0);
447 }
448 
449 
450 static int
451 seths_delete_hsp(set_hs_params_t *shs)
452 {
453 
454 	hot_spare_pool_t	*prev_hsp;
455 	hot_spare_pool_t	*hsp;
456 	set_t			setno;
457 	hsp_t			hspid;
458 
459 	setno = HSP_SET(shs->shs_hot_spare_pool);
460 
461 	/* Scan the hot spare pool list */
462 	prev_hsp = (hot_spare_pool_t *)0;
463 	hsp = (hot_spare_pool_t *)md_set[setno].s_hsp;
464 	while (hsp) {
465 		if (hsp->hsp_self_id == shs->shs_hot_spare_pool) {
466 			break;
467 		}
468 		prev_hsp = hsp;
469 		hsp = hsp->hsp_next;
470 	}
471 
472 	if (hsp == NULL) {
473 		return (mdhsperror(&shs->mde, MDE_INVAL_HSP,
474 		    shs->shs_hot_spare_pool));
475 	}
476 
477 	if (hsp->hsp_nhotspares != 0) {
478 		return (mdhsperror(&shs->mde, MDE_HSP_BUSY,
479 		    shs->shs_hot_spare_pool));
480 	}
481 
482 	if (hsp->hsp_refcount != 0) {
483 		return (mdhsperror(&shs->mde, MDE_HSP_REF,
484 		    shs->shs_hot_spare_pool));
485 	}
486 
487 	/* In case of a dryrun, we're done here */
488 	if (shs->shs_options & HS_OPT_DRYRUN) {
489 		return (0);
490 	}
491 	/*
492 	 * NOTE: We do not commit the previous hot spare pool record.
493 	 *	 There is no need, the link gets rebuilt at boot time.
494 	 */
495 	if (prev_hsp)
496 		prev_hsp->hsp_next = hsp->hsp_next;
497 	else
498 		md_set[setno].s_hsp = (void *) hsp->hsp_next;
499 
500 	hspid = hsp->hsp_self_id;
501 
502 	md_rem_link(setno, hsp->hsp_self_id,
503 	    &hotspares_md_ops.md_link_rw.lock,
504 	    &hotspares_md_ops.md_head);
505 
506 	mddb_deleterec_wrapper(hsp->hsp_record_id);
507 
508 	SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_DELETE, SVM_TAG_HSP, setno,
509 	    md_expldev(hspid));
510 	return (0);
511 }
512 
513 
514 static int
515 seths_delete(set_hs_params_t *shs)
516 {
517 	hot_spare_t		*hs;
518 	hot_spare_t		*prev_hs;
519 	hot_spare_pool_t	*hsp;
520 	mddb_recid_t		recids[4];
521 	int			i;
522 	set_t			setno;
523 	sv_dev_t		sv;
524 	int			delete_hs = 0;
525 	mdkey_t			key_old;
526 	int			num_keys_old = 0;
527 
528 	/* delete the hot spare pool */
529 	if (shs->shs_options & HS_OPT_POOL) {
530 		return (seths_delete_hsp(shs));
531 	}
532 
533 	setno = HSP_SET(shs->shs_hot_spare_pool);
534 
535 	/* Scan the hot spare list */
536 	hs = (hot_spare_t *)md_set[setno].s_hs;
537 	prev_hs = (hot_spare_t *)0;
538 	while (hs) {
539 		if (hs->hs_devnum == shs->shs_component_old) {
540 			break;
541 		}
542 		prev_hs = hs;
543 		hs = hs->hs_next;
544 	}
545 
546 	if (hs == NULL) {
547 		/*
548 		 * Unable to find device using devnum so use
549 		 * key associated with shs_component_old instead.
550 		 * If unable to find a unique key for shs_component_old
551 		 * then fail since namespace has multiple entries
552 		 * for this old component and we're unable to determine
553 		 * which key is the valid match for shs_component_old.
554 		 *
555 		 * Only need to compare keys when hs_devnum is NODEV.
556 		 */
557 		if (md_getkeyfromdev(setno, mddb_getsidenum(setno),
558 		    shs->shs_component_old, &key_old, &num_keys_old) != 0) {
559 			return (mddeverror(&shs->mde, MDE_NAME_SPACE,
560 			    shs->shs_component_old));
561 		}
562 
563 		/*
564 		 * If more than one key matches given old_dev - fail command
565 		 * since shouldn't add new hotspare if namespace has
566 		 * multiple entries.
567 		 */
568 		if (num_keys_old > 1) {
569 			return (mddeverror(&shs->mde, MDE_MULTNM,
570 			    shs->shs_component_old));
571 		}
572 		/*
573 		 * If there is no key for this entry then fail since
574 		 * a key for this entry should exist.
575 		 */
576 		if (num_keys_old == 0) {
577 			return (mddeverror(&shs->mde, MDE_INVAL_HS,
578 			    shs->shs_component_old));
579 		}
580 		/* Scan the hot spare list again */
581 		hs = (hot_spare_t *)md_set[setno].s_hs;
582 		prev_hs = (hot_spare_t *)0;
583 		while (hs) {
584 			/*
585 			 * Only need to compare keys when hs_devnum is NODEV.
586 			 */
587 			if ((hs->hs_devnum == NODEV64) &&
588 			    (hs->hs_key == key_old)) {
589 				break;
590 			}
591 			prev_hs = hs;
592 			hs = hs->hs_next;
593 		}
594 	}
595 
596 	if (hs == NULL) {
597 		return (mddeverror(&shs->mde, MDE_INVAL_HS,
598 		    shs->shs_component_old));
599 	}
600 
601 	/* Scan the hot spare pool list */
602 	hsp = find_hot_spare_pool(setno, shs->shs_hot_spare_pool);
603 	if (hsp == (hot_spare_pool_t *)0) {
604 		return (mdhsperror(&shs->mde, MDE_INVAL_HSP,
605 		    shs->shs_hot_spare_pool));
606 	}
607 
608 	/* check for force flag and state of hot spare */
609 	if (((shs->shs_options & HS_OPT_FORCE) == 0) &&
610 	    (hs->hs_state == HSS_RESERVED)) {
611 		return (mdhserror(&shs->mde, MDE_HS_RESVD,
612 		    shs->shs_hot_spare_pool, shs->shs_component_old));
613 	}
614 
615 	if (hsp->hsp_refcount && (hs->hs_state == HSS_RESERVED)) {
616 		return (mdhserror(&shs->mde, MDE_HS_RESVD,
617 		    shs->shs_hot_spare_pool, shs->shs_component_old));
618 	}
619 
620 	/*
621 	 * Make sure the device is in the pool.
622 	 */
623 	for (i = 0; i < hsp->hsp_nhotspares; i++) {
624 		if (hsp->hsp_hotspares[i] == hs->hs_record_id) {
625 			break;
626 		}
627 	}
628 
629 	if (i >= hsp->hsp_nhotspares) {
630 		return (mddeverror(&shs->mde, MDE_INVAL_HS,
631 		    hs->hs_devnum));
632 	}
633 
634 	/* In case of a dryrun, we're done here */
635 	if (shs->shs_options & HS_OPT_DRYRUN) {
636 		return (0);
637 	}
638 
639 	/* lock the db records */
640 	recids[0] = hs->hs_record_id;
641 	recids[1] = hsp->hsp_record_id;
642 	recids[2] = 0;
643 
644 	sv.setno = setno;
645 	sv.key = hs->hs_key;
646 
647 	hs->hs_refcount--;
648 	if (hs->hs_refcount == 0) {
649 		/*
650 		 * NOTE: We do not commit the previous hot spare record.
651 		 *	 There is no need, the link we get rebuilt at boot time.
652 		 */
653 		if (prev_hs) {
654 			prev_hs->hs_next = hs->hs_next;
655 		} else
656 			md_set[setno].s_hs = (void *) hs->hs_next;
657 
658 		/* mark the hot spare to be deleted */
659 		delete_hs = 1;
660 		recids[0] = hsp->hsp_record_id;
661 		recids[1] = 0;
662 	}
663 
664 	/* find the location of the hs in the hsp */
665 	for (i = 0; i < hsp->hsp_nhotspares; i++) {
666 		if (hsp->hsp_hotspares[i] == hs->hs_record_id)
667 			break;
668 	}
669 
670 	/* remove the hs from the hsp */
671 	for (i++; i < hsp->hsp_nhotspares; i++)
672 		hsp->hsp_hotspares[i - 1] = hsp->hsp_hotspares[i];
673 
674 	hsp->hsp_nhotspares--;
675 
676 	/* commit the db records */
677 	mddb_commitrecs_wrapper(recids);
678 
679 	if (delete_hs)
680 		mddb_deleterec_wrapper(hs->hs_record_id);
681 
682 	md_rem_names(&sv, 1);
683 
684 	SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_REMOVE, SVM_TAG_HSP, setno,
685 	    md_expldev(hsp->hsp_self_id));
686 
687 	return (0);
688 }
689 
690 static int
691 seths_replace(set_hs_params_t *shs)
692 {
693 	hot_spare_t		*hs;
694 	hot_spare_t		*prev_hs;
695 	hot_spare_t		*new_hs;
696 	hot_spare_pool_t	*hsp;
697 	int			new_found = 0;
698 	mddb_recid_t		recid;
699 	mddb_recid_t		recids[5];
700 	int			i;
701 	sv_dev_t		sv;
702 	int			delete_hs = 0;
703 	set_t			setno;
704 	mddb_type_t		typ1;
705 	mdkey_t			key_old;
706 	int			num_keys_old = 0;
707 
708 	setno = HSP_SET(shs->shs_hot_spare_pool);
709 	typ1 = (mddb_type_t)md_getshared_key(setno,
710 	    hotspares_md_ops.md_driver.md_drivername);
711 
712 	/* Scan the hot spare list */
713 	hs = (hot_spare_t *)md_set[setno].s_hs;
714 	prev_hs = (hot_spare_t *)0;
715 	while (hs) {
716 		if (hs->hs_devnum == shs->shs_component_old) {
717 			break;
718 		}
719 		prev_hs = hs;
720 		hs = hs->hs_next;
721 	}
722 
723 	if (hs == NULL) {
724 		/*
725 		 * Unable to find device using devnum so use
726 		 * key associated with shs_component_old instead.
727 		 * If unable to find a unique key for shs_component_old
728 		 * then fail since namespace has multiple entries
729 		 * for this old component and we're unable to determine
730 		 * which key is the valid match for shs_component_old.
731 		 *
732 		 * Only need to compare keys when hs_devnum is NODEV.
733 		 */
734 		if (md_getkeyfromdev(setno, mddb_getsidenum(setno),
735 		    shs->shs_component_old, &key_old, &num_keys_old) != 0) {
736 			return (mddeverror(&shs->mde, MDE_NAME_SPACE,
737 			    shs->shs_component_old));
738 		}
739 
740 		/*
741 		 * If more than one key matches given old_dev - fail command
742 		 * since unable to determine which key is correct.
743 		 */
744 		if (num_keys_old > 1) {
745 			return (mddeverror(&shs->mde, MDE_MULTNM,
746 			    shs->shs_component_old));
747 		}
748 		/*
749 		 * If there is no key for this entry then fail since
750 		 * a key for this entry should exist.
751 		 */
752 		if (num_keys_old == 0) {
753 			return (mddeverror(&shs->mde, MDE_INVAL_HS,
754 			    shs->shs_component_old));
755 		}
756 		/* Scan the hot spare list again */
757 		hs = (hot_spare_t *)md_set[setno].s_hs;
758 		prev_hs = (hot_spare_t *)0;
759 		while (hs) {
760 			/*
761 			 * Only need to compare keys when hs_devnum is NODEV.
762 			 */
763 			if ((hs->hs_devnum == NODEV64) &&
764 			    (hs->hs_key == key_old)) {
765 				break;
766 			}
767 			prev_hs = hs;
768 			hs = hs->hs_next;
769 		}
770 	}
771 
772 	if (hs == NULL) {
773 		return (mddeverror(&shs->mde, MDE_INVAL_HS,
774 		    shs->shs_component_old));
775 	}
776 
777 	/* check the force flag and the state of the hot spare */
778 	if (((shs->shs_options & HS_OPT_FORCE) == 0) &&
779 	    (hs->hs_state == HSS_RESERVED)) {
780 		return (mdhserror(&shs->mde, MDE_HS_RESVD,
781 		    shs->shs_hot_spare_pool,
782 		    hs->hs_devnum));
783 	}
784 
785 	/* Scan the hot spare pool list */
786 	hsp = find_hot_spare_pool(setno, shs->shs_hot_spare_pool);
787 	if (hsp == (hot_spare_pool_t *)0) {
788 		return (mdhsperror(&shs->mde, MDE_INVAL_HSP,
789 		    shs->shs_hot_spare_pool));
790 	}
791 
792 	/*
793 	 * Make sure the old device is in the pool.
794 	 */
795 	for (i = 0; i < hsp->hsp_nhotspares; i++) {
796 		if (hsp->hsp_hotspares[i] == hs->hs_record_id) {
797 			break;
798 		}
799 	}
800 	if (i >= hsp->hsp_nhotspares) {
801 		return (mddeverror(&shs->mde, MDE_INVAL_HS,
802 		    hs->hs_devnum));
803 	}
804 
805 	/* Scan the hot spare list for the new hs */
806 	new_hs = (hot_spare_t *)md_set[setno].s_hs;
807 	new_found = 0;
808 	while (new_hs) {
809 		if (new_hs->hs_devnum == shs->shs_component_new) {
810 			new_found = 1;
811 			break;
812 		}
813 		new_hs = new_hs->hs_next;
814 	}
815 
816 	/*
817 	 * Make sure the new device is not already in the pool.
818 	 * We don't have to search the hs in this hsp, if the
819 	 * new hs was just created. Only if the hot spare was found.
820 	 */
821 	if (new_found) {
822 		for (i = 0; i < hsp->hsp_nhotspares; i++)
823 			if (hsp->hsp_hotspares[i] == new_hs->hs_record_id) {
824 				return (mdhserror(&shs->mde, MDE_HS_INUSE,
825 				    shs->shs_hot_spare_pool,
826 				    new_hs->hs_devnum));
827 			}
828 	}
829 
830 	/* In case of a dryrun, we're done here */
831 	if (shs->shs_options & HS_OPT_DRYRUN) {
832 		return (0);
833 	}
834 
835 	/*
836 	 * Create the new hotspare
837 	 */
838 	if (!new_found) {
839 		/* create a hot spare record */
840 		if (shs->shs_size_option & MD_CRO_64BIT) {
841 #if defined(_ILP32)
842 			return (mdhserror(&shs->mde, MDE_HS_UNIT_TOO_LARGE,
843 			    shs->shs_hot_spare_pool, shs->shs_component_new));
844 #else
845 			recid = mddb_createrec(HS_ONDSK_STR_SIZE, typ1, HS_REC,
846 				MD_CRO_64BIT | MD_CRO_HOTSPARE, setno);
847 #endif
848 		} else {
849 			recid = mddb_createrec(HS_ONDSK_STR_SIZE, typ1, HS_REC,
850 				MD_CRO_32BIT | MD_CRO_HOTSPARE, setno);
851 		}
852 
853 		if (recid < 0) {
854 			return (mdhserror(&shs->mde, MDE_HS_CREATE_FAILURE,
855 			    shs->shs_hot_spare_pool,
856 			    shs->shs_component_new));
857 		}
858 
859 		/* get the addr */
860 		new_hs = (hot_spare_t *)mddb_getrecaddr_resize(recid,
861 			sizeof (*new_hs), 0);
862 
863 		new_hs->hs_record_id = recid;
864 		new_hs->hs_devnum = shs->shs_component_new;
865 		new_hs->hs_key = shs->shs_key_new;
866 		new_hs->hs_start_blk = shs->shs_start_blk;
867 		new_hs->hs_has_label = shs->shs_has_label;
868 		new_hs->hs_number_blks = shs->shs_number_blks;
869 		set_hot_spare_state(new_hs, HSS_AVAILABLE);
870 		new_hs->hs_refcount = 0;
871 		new_hs->hs_isopen = 1;
872 	}
873 
874 	/* lock the db records */
875 	recids[0] = hs->hs_record_id;
876 	recids[1] = new_hs->hs_record_id;
877 	recids[2] = hsp->hsp_record_id;
878 	recids[3] = 0;
879 
880 	sv.setno = setno;
881 	sv.key = hs->hs_key;
882 
883 	hs->hs_refcount--;
884 	if (hs->hs_refcount == 0) {
885 		/*
886 		 * NOTE: We do not commit the previous hot spare record.
887 		 *	 There is no need, the link we get rebuilt at boot time.
888 		 */
889 		if (prev_hs) {
890 			prev_hs->hs_next = hs->hs_next;
891 		} else
892 			md_set[setno].s_hs = (void *) hs->hs_next;
893 
894 		/* mark hs to be deleted in the correct order */
895 		delete_hs = 1;
896 
897 		recids[0] = new_hs->hs_record_id;
898 		recids[1] = hsp->hsp_record_id;
899 		recids[2] = 0;
900 	}
901 
902 	/* link into the hs list */
903 	new_hs->hs_refcount++;
904 	if (!new_found) {
905 		/* do this AFTER the old dev is possibly removed */
906 		new_hs->hs_next = (hot_spare_t *)md_set[setno].s_hs;
907 		md_set[setno].s_hs = (void *) new_hs;
908 	}
909 
910 	/* find the location of the old hs in the hsp */
911 	for (i = 0; i < hsp->hsp_nhotspares; i++) {
912 		if (hsp->hsp_hotspares[i] == hs->hs_record_id) {
913 			hsp->hsp_hotspares[i] = new_hs->hs_record_id;
914 			break;
915 		}
916 	}
917 
918 	if (shs->shs_size_option & MD_CRO_64BIT) {
919 		new_hs->hs_revision = MD_64BIT_META_DEV;
920 	} else {
921 		new_hs->hs_revision = MD_32BIT_META_DEV;
922 	}
923 
924 	/* commit the db records */
925 	mddb_commitrecs_wrapper(recids);
926 
927 	if (delete_hs)
928 		mddb_deleterec_wrapper(hs->hs_record_id);
929 
930 	md_rem_names(&sv, 1);
931 
932 	SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_REPLACE, SVM_TAG_HSP, setno,
933 	    md_expldev(hsp->hsp_self_id));
934 	return (0);
935 }
936 
937 static int
938 seths_enable(set_hs_params_t *shs)
939 {
940 	hot_spare_t	*hs;
941 	mddb_recid_t	recids[2];
942 	set_t		setno = shs->md_driver.md_setno;
943 	mdkey_t		key_old;
944 	int		num_keys_old = 0;
945 
946 
947 	/*
948 	 * Find device by using key associated with shs_component_old.
949 	 * If unable to find a unique key for shs_component_old
950 	 * then fail since namespace has multiple entries
951 	 * for this old component and we're unable to determine
952 	 * which key is the valid match for shs_component_old.
953 	 * This failure keeps a hotspare from being enabled on a slice
954 	 * that may already be in use by another metadevice.
955 	 */
956 	if (md_getkeyfromdev(setno, mddb_getsidenum(setno),
957 	    shs->shs_component_old, &key_old, &num_keys_old) != 0) {
958 		return (mddeverror(&shs->mde, MDE_NAME_SPACE,
959 		    shs->shs_component_old));
960 	}
961 
962 	/*
963 	 * If more than one key matches given old_dev - fail command
964 	 * since unable to determine which key is correct.
965 	 */
966 	if (num_keys_old > 1) {
967 		return (mddeverror(&shs->mde, MDE_MULTNM,
968 		    shs->shs_component_old));
969 	}
970 	/*
971 	 * If there is no key for this entry then fail since
972 	 * a key for this entry should exist.
973 	 */
974 	if (num_keys_old == 0) {
975 		return (mddeverror(&shs->mde, MDE_INVAL_HS,
976 		    shs->shs_component_old));
977 	}
978 
979 	/* Scan the hot spare list for the hs */
980 	hs = (hot_spare_t *)md_set[setno].s_hs;
981 	while (hs) {
982 		/*
983 		 * Since component may or may not be currently in the system,
984 		 * use the keys to find a match (not the devt).
985 		 */
986 		if (hs->hs_key == key_old) {
987 			break;
988 		}
989 		hs = hs->hs_next;
990 	}
991 
992 	if (hs == NULL) {
993 		return (mddeverror(&shs->mde, MDE_INVAL_HS,
994 			shs->shs_component_old));
995 	}
996 
997 	/* make sure it's broken */
998 	if (hs->hs_state != HSS_BROKEN) {
999 		return (mddeverror(&shs->mde, MDE_FIX_INVAL_HS_STATE,
1000 		    hs->hs_devnum));
1001 	}
1002 
1003 	/* In case of a dryrun, we're done here */
1004 	if (shs->shs_options & HS_OPT_DRYRUN) {
1005 		return (0);
1006 	}
1007 
1008 	/* fix it */
1009 	set_hot_spare_state(hs, HSS_AVAILABLE);
1010 	hs->hs_start_blk = shs->shs_start_blk;
1011 	hs->hs_has_label = shs->shs_has_label;
1012 	hs->hs_number_blks = shs->shs_number_blks;
1013 
1014 	/* commit the db records */
1015 	recids[0] = hs->hs_record_id;
1016 	recids[1] = 0;
1017 	mddb_commitrecs_wrapper(recids);
1018 	SE_NOTIFY(EC_SVM_STATE, ESC_SVM_ENABLE, SVM_TAG_HS, setno,
1019 	    shs->shs_component_old);
1020 
1021 	return (0);
1022 }
1023 
1024 static int
1025 get_hs(
1026 	get_hs_params_t	*ghs
1027 )
1028 {
1029 	hot_spare_t	*hs;
1030 	set_t		setno = ghs->md_driver.md_setno;
1031 
1032 	mdclrerror(&ghs->mde);
1033 
1034 	/* Scan the hot spare list for the hs */
1035 	hs = (hot_spare_t *)md_set[setno].s_hs;
1036 	while (hs) {
1037 		if (hs->hs_key == ghs->ghs_key) {
1038 			break;
1039 		}
1040 		hs = hs->hs_next;
1041 	}
1042 
1043 	if (hs == NULL) {
1044 		return (mddeverror(&ghs->mde, MDE_INVAL_HS,
1045 		    ghs->ghs_devnum));
1046 	}
1047 
1048 	ghs->ghs_start_blk = hs->hs_start_blk;
1049 	ghs->ghs_number_blks = hs->hs_number_blks;
1050 	ghs->ghs_state = hs->hs_state;
1051 	ghs->ghs_timestamp = hs->hs_timestamp;
1052 	ghs->ghs_revision = hs->hs_revision;
1053 	return (0);
1054 }
1055 
1056 static void
1057 build_key_list(set_t setno, hot_spare_pool_t *hsp, mdkey_t *list)
1058 {
1059 	int	i;
1060 
1061 	for (i = 0; i < hsp->hsp_nhotspares; i++) {
1062 		hot_spare_t *hs;
1063 		hs = lookup_hot_spare(setno, hsp->hsp_hotspares[i], 1);
1064 		list[i] = hs->hs_key;
1065 	}
1066 }
1067 
1068 static int
1069 get_hsp(
1070 	void			*d,
1071 	int			mode
1072 )
1073 {
1074 	hot_spare_pool_t	*hsp;
1075 	get_hsp_t		*ghsp;
1076 	size_t			size;
1077 	set_t			setno;
1078 	int			err = 0;
1079 	md_i_get_t		*migp = (md_i_get_t *)d;
1080 
1081 
1082 	setno = migp->md_driver.md_setno;
1083 
1084 	mdclrerror(&migp->mde);
1085 
1086 	/* Scan the hot spare pool list */
1087 	hsp = find_hot_spare_pool(setno, migp->id);
1088 	if (hsp == NULL) {
1089 		return (mdhsperror(&migp->mde, MDE_INVAL_HSP,
1090 			migp->id));
1091 	}
1092 
1093 	size = (sizeof (ghsp->ghsp_hs_keys[0]) * (hsp->hsp_nhotspares - 1)) +
1094 	    sizeof (get_hsp_t);
1095 
1096 	if (migp->size == 0) {
1097 		migp->size = (int)size;
1098 		return (0);
1099 	}
1100 
1101 	if (migp->size < size)
1102 		return (EFAULT);
1103 
1104 	ghsp = kmem_alloc(size, KM_SLEEP);
1105 
1106 	ghsp->ghsp_id = hsp->hsp_self_id;
1107 	ghsp->ghsp_refcount = hsp->hsp_refcount;
1108 	ghsp->ghsp_nhotspares = hsp->hsp_nhotspares;
1109 	build_key_list(setno, hsp, ghsp->ghsp_hs_keys);
1110 	if (ddi_copyout(ghsp, (caddr_t)(uintptr_t)migp->mdp, size, mode))
1111 		err = EFAULT;
1112 	kmem_free(ghsp, size);
1113 	return (err);
1114 }
1115 
1116 static int
1117 set_hs(
1118 	set_hs_params_t	*shs
1119 )
1120 {
1121 	mdclrerror(&shs->mde);
1122 
1123 	if (md_get_setstatus(shs->md_driver.md_setno) & MD_SET_STALE)
1124 		return (mdmddberror(&shs->mde, MDE_DB_STALE, NODEV32,
1125 		    shs->md_driver.md_setno));
1126 
1127 	switch (shs->shs_cmd) {
1128 	case ADD_HOT_SPARE:
1129 		return (seths_add(shs));
1130 	case DELETE_HOT_SPARE:
1131 		return (seths_delete(shs));
1132 	case REPLACE_HOT_SPARE:
1133 		return (seths_replace(shs));
1134 	case FIX_HOT_SPARE:
1135 		return (seths_enable(shs));
1136 	default:
1137 		return (mderror(&shs->mde, MDE_INVAL_HSOP));
1138 	}
1139 }
1140 
1141 static void
1142 hotspares_poke_hotspares(void)
1143 {
1144 	intptr_t	(*poke_hs)();
1145 	int		i;
1146 
1147 	for (i = 0; i < MD_NOPS; i++) {
1148 		/* handle change */
1149 		poke_hs = md_get_named_service(NODEV64, i, "poke hotspares", 0);
1150 		if (poke_hs)
1151 			(void) (*poke_hs)();
1152 	}
1153 }
1154 
1155 
1156 /*ARGSUSED4*/
1157 static int
1158 hotspares_ioctl(
1159 	dev_t	dev,
1160 	int	cmd,
1161 	void	*data,
1162 	int	mode,
1163 	IOLOCK	*lockp
1164 )
1165 {
1166 	size_t	sz = 0;
1167 	void	*d = NULL;
1168 	int	err = 0;
1169 
1170 	/* single thread */
1171 	if (getminor(dev) != MD_ADM_MINOR)
1172 		return (ENOTTY);
1173 
1174 	/* We can only handle 32-bit clients for internal commands */
1175 	if ((mode & DATAMODEL_MASK) != DATAMODEL_ILP32) {
1176 		return (EINVAL);
1177 	}
1178 
1179 	mutex_enter(&md_mx);
1180 	while (md_status & MD_GBL_HS_LOCK)
1181 		cv_wait(&md_cv, &md_mx);
1182 	md_status |= MD_GBL_HS_LOCK;
1183 	mutex_exit(&md_mx);
1184 
1185 	/* dispatch ioctl */
1186 	switch (cmd) {
1187 
1188 	case MD_IOCSET_HS:	/* setup hot spares and pools */
1189 	{
1190 		if (! (mode & FWRITE)) {
1191 			err = EACCES;
1192 			break;
1193 		}
1194 
1195 		sz = sizeof (set_hs_params_t);
1196 		d = kmem_alloc(sz, KM_SLEEP);
1197 
1198 		if (ddi_copyin(data, d, sz, mode)) {
1199 			err = EFAULT;
1200 			break;
1201 		}
1202 
1203 		err = set_hs(d);
1204 		break;
1205 	}
1206 
1207 	case MD_IOCGET_HS:	/* get hot spare info */
1208 	{
1209 		if (! (mode & FREAD)) {
1210 			err = EACCES;
1211 			break;
1212 		}
1213 
1214 		sz = sizeof (get_hs_params_t);
1215 		d = kmem_alloc(sz, KM_SLEEP);
1216 
1217 		if (ddi_copyin(data, d, sz, mode)) {
1218 			err = EFAULT;
1219 			break;
1220 		}
1221 
1222 		err = get_hs(d);
1223 		break;
1224 	}
1225 
1226 	case MD_IOCGET:		/* get hot spare pool info */
1227 	{
1228 		if (! (mode & FREAD)) {
1229 			err = EACCES;
1230 			break;
1231 		}
1232 
1233 		sz = sizeof (md_i_get_t);
1234 		d = kmem_alloc(sz, KM_SLEEP);
1235 
1236 		if (ddi_copyin(data, d, sz, mode)) {
1237 			err = EFAULT;
1238 			break;
1239 		}
1240 
1241 		err = get_hsp(d, mode);
1242 		break;
1243 	}
1244 
1245 	default:
1246 		err = ENOTTY;
1247 	}
1248 
1249 	/*
1250 	 * copyout and free any args
1251 	 */
1252 	if (sz != 0) {
1253 		if (err == 0) {
1254 			if (ddi_copyout(d, data, sz, mode) != 0) {
1255 				err = EFAULT;
1256 			}
1257 		}
1258 		kmem_free(d, sz);
1259 	}
1260 
1261 	/* un single thread */
1262 	mutex_enter(&md_mx);
1263 	md_status &= ~MD_GBL_HS_LOCK;
1264 	cv_broadcast(&md_cv);
1265 	mutex_exit(&md_mx);
1266 
1267 	/* handle change */
1268 	hotspares_poke_hotspares();
1269 
1270 	/* return success */
1271 	return (err);
1272 }
1273 
1274 
1275 static void
1276 load_hotspare(set_t setno, mddb_recid_t recid)
1277 {
1278 	hot_spare_t	*hs;
1279 	mddb_de_ic_t	*dep;
1280 	mddb_rb32_t	*rbp;
1281 	size_t		newreqsize;
1282 	hot_spare_t	*b_hs;
1283 	hot_spare32_od_t *s_hs;
1284 
1285 	mddb_setrecprivate(recid, MD_PRV_GOTIT);
1286 
1287 	dep = mddb_getrecdep(recid);
1288 	dep->de_flags = MDDB_F_HOTSPARE;
1289 	rbp = dep->de_rb;
1290 	if (rbp->rb_revision == MDDB_REV_RB) {
1291 		/*
1292 		 * Needs to convert to internal 64 bit
1293 		 */
1294 		s_hs = (hot_spare32_od_t *)mddb_getrecaddr(recid);
1295 		newreqsize = sizeof (hot_spare_t);
1296 		b_hs = (hot_spare_t *)kmem_zalloc(newreqsize, KM_SLEEP);
1297 		hs_convert((caddr_t)s_hs, (caddr_t)b_hs, SMALL_2_BIG);
1298 		kmem_free(s_hs, dep->de_reqsize);
1299 		dep->de_rb_userdata = b_hs;
1300 		dep->de_reqsize = newreqsize;
1301 		hs = b_hs;
1302 	} else {
1303 		hs = (hot_spare_t *)mddb_getrecaddr_resize
1304 			(recid, sizeof (*hs), 0);
1305 	}
1306 
1307 
1308 #if defined(_ILP32)
1309 	if (hs->hs_revision == MD_64BIT_META_DEV) {
1310 		char	devname[MD_MAX_CTDLEN];
1311 
1312 		set_hot_spare_state(hs, HSS_BROKEN);
1313 		(void) md_devname(setno, hs->hs_devnum, devname,
1314 		    sizeof (devname));
1315 		cmn_err(CE_NOTE, "%s is unavailable because 64 bit hotspares "
1316 		    "are not accessible on a 32 bit kernel\n", devname);
1317 	}
1318 #endif
1319 
1320 	ASSERT(hs != NULL);
1321 
1322 	if (hs->hs_refcount == 0) {
1323 		mddb_setrecprivate(recid, MD_PRV_PENDDEL);
1324 		return;
1325 	}
1326 
1327 	hs->hs_next = (hot_spare_t *)md_set[setno].s_hs;
1328 	md_set[setno].s_hs = (void *)hs;
1329 
1330 	hs->hs_isopen = 0;
1331 
1332 	hs->hs_devnum = md_getdevnum(setno, mddb_getsidenum(setno),
1333 		hs->hs_key, MD_NOTRUST_DEVT);
1334 }
1335 
1336 
1337 static void
1338 load_hotsparepool(set_t setno, mddb_recid_t recid)
1339 {
1340 	hot_spare_pool_t *hsp;
1341 	hot_spare_pool_ond_t *hsp_ond;
1342 	size_t hsp_icsize;
1343 
1344 	mddb_setrecprivate(recid, MD_PRV_GOTIT);
1345 
1346 	hsp_ond = (hot_spare_pool_ond_t *)mddb_getrecaddr(recid);
1347 	ASSERT(hsp_ond != NULL);
1348 
1349 	if (hsp_ond->hsp_self_id == MD_HSP_NONE) {
1350 		mddb_setrecprivate(recid, MD_PRV_PENDDEL);
1351 		return;
1352 	}
1353 
1354 	hsp_icsize =  HSP_ONDSK_STR_OFF + mddb_getrecsize(recid);
1355 
1356 	hsp = (hot_spare_pool_t *)mddb_getrecaddr_resize(recid, hsp_icsize,
1357 		HSP_ONDSK_STR_OFF);
1358 	hsp->hsp_next = (hot_spare_pool_t *)md_set[setno].s_hsp;
1359 	md_set[setno].s_hsp = (void *) hsp;
1360 
1361 	rw_enter(&hotspares_md_ops.md_link_rw.lock, RW_WRITER);
1362 	hsp->hsp_link.ln_next = hotspares_md_ops.md_head;
1363 	hsp->hsp_link.ln_setno = setno;
1364 	hsp->hsp_link.ln_id = hsp->hsp_self_id;
1365 	hotspares_md_ops.md_head = &hsp->hsp_link;
1366 	rw_exit(&hotspares_md_ops.md_link_rw.lock);
1367 }
1368 
1369 static int
1370 hotspares_snarf(md_snarfcmd_t cmd, set_t setno)
1371 {
1372 	mddb_recid_t	recid;
1373 	int		gotsomething;
1374 	mddb_type_t	typ1;
1375 
1376 	if (cmd == MD_SNARF_CLEANUP)
1377 		return (0);
1378 
1379 	gotsomething = 0;
1380 
1381 	typ1 = (mddb_type_t)md_getshared_key(setno,
1382 	    hotspares_md_ops.md_driver.md_drivername);
1383 	recid = mddb_makerecid(setno, 0);
1384 	while ((recid = mddb_getnextrec(recid, typ1, 0)) > 0) {
1385 		if (mddb_getrecprivate(recid) & MD_PRV_GOTIT)
1386 			continue;
1387 
1388 		switch (mddb_getrectype2(recid)) {
1389 		case HSP_REC:
1390 			load_hotsparepool(setno, recid);
1391 			gotsomething = 1;
1392 			break;
1393 		case HS_REC:
1394 			load_hotspare(setno, recid);
1395 			gotsomething = 1;
1396 			break;
1397 		default:
1398 			ASSERT(0);
1399 		}
1400 	}
1401 
1402 	if (gotsomething)
1403 		return (gotsomething);
1404 
1405 	recid = mddb_makerecid(setno, 0);
1406 	while ((recid = mddb_getnextrec(recid, typ1, 0)) > 0)
1407 		if (!(mddb_getrecprivate(recid) & MD_PRV_GOTIT))
1408 			mddb_setrecprivate(recid, MD_PRV_PENDDEL);
1409 
1410 	return (0);
1411 }
1412 
1413 static int
1414 hotspares_halt(md_haltcmd_t cmd, set_t setno)
1415 {
1416 	hot_spare_t		*hs, **p_hs;
1417 	hot_spare_pool_t	*hsp, **p_hsp;
1418 
1419 	if (cmd == MD_HALT_CLOSE)
1420 		return (0);
1421 
1422 	if (cmd == MD_HALT_OPEN)
1423 		return (0);
1424 
1425 	if (cmd == MD_HALT_CHECK)
1426 		return (0);
1427 
1428 	if (cmd == MD_HALT_UNLOAD)
1429 		return (0);
1430 
1431 	if (cmd != MD_HALT_DOIT)
1432 		return (1);
1433 	/*
1434 	 * Find all the hotspares for set "setno"
1435 	 *   and remove them from the hot_spare_list.
1436 	 */
1437 	p_hs = (hot_spare_t **)&md_set[setno].s_hs;
1438 	hs = (hot_spare_t *)md_set[setno].s_hs;
1439 	for (; hs != NULL; hs = *p_hs)
1440 		*p_hs = hs->hs_next;
1441 
1442 	/*
1443 	 * Find all the hotspare pools for set "setno"
1444 	 *   and remove them from the hot_spare_pools list.
1445 	 * Also remove from the get_next list.
1446 	 */
1447 	p_hsp = (hot_spare_pool_t **)&md_set[setno].s_hsp;
1448 	hsp = (hot_spare_pool_t *)md_set[setno].s_hsp;
1449 	for (; hsp != NULL; hsp = *p_hsp) {
1450 		md_rem_link(setno, hsp->hsp_self_id,
1451 		    &hotspares_md_ops.md_link_rw.lock,
1452 		    &hotspares_md_ops.md_head);
1453 		*p_hsp = hsp->hsp_next;
1454 	}
1455 
1456 	return (0);
1457 }
1458 
1459 static hot_spare_t *
1460 usable_hs(
1461 	set_t		setno,
1462 	mddb_recid_t	hs_id,
1463 	diskaddr_t	nblks,
1464 	int		labeled,
1465 	diskaddr_t	*start)
1466 {
1467 	hot_spare_t	*hs;
1468 
1469 	hs = lookup_hot_spare(setno, hs_id, 1);
1470 
1471 	if (hs->hs_state != HSS_AVAILABLE)
1472 		return ((hot_spare_t *)0);
1473 
1474 	if (labeled && hs->hs_has_label && (hs->hs_number_blks >= nblks)) {
1475 		*start = 0;
1476 		return (hs);
1477 	} else if ((hs->hs_number_blks - hs->hs_start_blk) >= nblks) {
1478 		*start = hs->hs_start_blk;
1479 		return (hs);
1480 	}
1481 	return ((hot_spare_t *)0);
1482 }
1483 
1484 static int
1485 reserve_a_hs(
1486 	set_t		setno,
1487 	mddb_recid_t	id,
1488 	uint64_t	size,
1489 	int		labeled,
1490 	mddb_recid_t	*hs_id,
1491 	mdkey_t		*key,
1492 	md_dev64_t	*dev,
1493 	diskaddr_t	*sblock)
1494 {
1495 	hot_spare_pool_t	*hsp;
1496 	hot_spare_t		*hs;
1497 	int			i;
1498 
1499 	*hs_id = 0;
1500 
1501 	hsp = find_hot_spare_pool(setno, id);
1502 	if (hsp == NULL)
1503 		return (-1);
1504 
1505 	for (i = 0; i < hsp->hsp_nhotspares; i++) {
1506 		hs = usable_hs(setno, hsp->hsp_hotspares[i],
1507 		    size, labeled, sblock);
1508 		if (hs == NULL)
1509 			continue;
1510 
1511 		set_hot_spare_state(hs, HSS_RESERVED);
1512 		*hs_id = hs->hs_record_id;
1513 		*key = hs->hs_key;
1514 		*dev = hs->hs_devnum;
1515 		/* NOTE: Mirror code commits the hs record */
1516 		return (0);
1517 	}
1518 
1519 	return (-1);
1520 }
1521 
1522 
1523 /* ARGSUSED3 */
1524 static int
1525 return_a_hs(
1526 	set_t			setno,
1527 	mddb_recid_t		id,
1528 	mddb_recid_t		*hs_id,
1529 	mdkey_t			key,
1530 	diskaddr_t		sblock,
1531 	uint64_t		size,
1532 	hotspare_states_t	new_state)
1533 {
1534 	hot_spare_pool_t	*hsp;
1535 	hot_spare_t		*hs;
1536 	int			i;
1537 
1538 	/*
1539 	 * NOTE: sblock/size are not currently being used.
1540 	 *	 That is because we always allocate the whole hs.
1541 	 *	 Later if we choose to allocate only what is needed
1542 	 *	 then the sblock/size can be used to determine
1543 	 *	 which part is being unreseved.
1544 	 */
1545 	*hs_id = 0;
1546 
1547 	hsp = find_hot_spare_pool(setno, id);
1548 	if (hsp == NULL)
1549 		return (-1);
1550 
1551 	for (i = 0; i < hsp->hsp_nhotspares; i++) {
1552 		hs = lookup_hot_spare(setno, hsp->hsp_hotspares[i], 1);
1553 		if (hs->hs_key != key)
1554 			continue;
1555 
1556 		set_hot_spare_state(hs, new_state);
1557 		*hs_id = hs->hs_record_id;
1558 		if (new_state == HSS_BROKEN) {
1559 			SE_NOTIFY(EC_SVM_STATE, ESC_SVM_ERRED, SVM_TAG_HS,
1560 			    setno, hs->hs_devnum);
1561 		}
1562 		if (new_state == HSS_AVAILABLE) {
1563 			SE_NOTIFY(EC_SVM_STATE, ESC_SVM_HS_FREED, SVM_TAG_HS,
1564 			    setno, hs->hs_devnum);
1565 		}
1566 
1567 		/* NOTE: Mirror/Raid code commits the hs record */
1568 		return (0);
1569 	}
1570 
1571 	return (-1);
1572 }
1573 
1574 
1575 static int
1576 modify_hsp_ref(set_t setno, mddb_recid_t id, int incref,  mddb_recid_t *hsp_id)
1577 {
1578 	hot_spare_pool_t	*hsp;
1579 
1580 	*hsp_id = 0;
1581 
1582 	if (id  < 0)
1583 		return (0);
1584 
1585 	hsp = find_hot_spare_pool(setno, id);
1586 	if (hsp == NULL)
1587 		return (-1);
1588 
1589 	if (incref)
1590 		hsp->hsp_refcount++;
1591 	else
1592 		hsp->hsp_refcount--;
1593 
1594 	*hsp_id = hsp->hsp_record_id;
1595 
1596 	/* NOTE: Stripe code commits the hsp record */
1597 	return (0);
1598 }
1599 
1600 
1601 static int
1602 mkdev_for_a_hs(mddb_recid_t hs_id, md_dev64_t *dev)
1603 {
1604 	hot_spare_t	*hs;
1605 
1606 	hs = lookup_hot_spare(mddb_getsetnum(hs_id), hs_id, 0);
1607 	if (hs == NULL)
1608 		return (0);
1609 
1610 	*dev = hs->hs_devnum;
1611 	return (0);
1612 }
1613 
1614 static intptr_t
1615 hotspares_interface(
1616 	hs_cmds_t	cmd,
1617 	mddb_recid_t	id,
1618 	uint64_t	size,
1619 	int		bool,
1620 	mddb_recid_t	*hs_id,
1621 	mdkey_t		*key,
1622 	md_dev64_t	*dev,
1623 	diskaddr_t	*sblock)
1624 {
1625 	set_t	setno;
1626 	int	err = -1;
1627 
1628 	mutex_enter(&md_mx);
1629 	while (md_status & MD_GBL_HS_LOCK)
1630 		cv_wait(&md_cv, &md_mx);
1631 
1632 	/* If md_halt has been run do not continue */
1633 	if (md_status & (MD_GBL_HALTED | MD_GBL_DAEMONS_DIE)) {
1634 		mutex_exit(&md_mx);
1635 		return (ENXIO);
1636 	}
1637 
1638 	md_status |= MD_GBL_HS_LOCK;
1639 	mutex_exit(&md_mx);
1640 
1641 	setno = mddb_getsetnum(id);
1642 
1643 	switch (cmd) {
1644 	case HS_GET:
1645 		err = reserve_a_hs(setno, id, size, bool, hs_id,
1646 		    key, dev, sblock);
1647 		break;
1648 	case HS_FREE:
1649 		err = return_a_hs(setno, id, hs_id, *key, 0, 0, HSS_AVAILABLE);
1650 		hotspares_poke_hotspares();
1651 		break;
1652 	case HS_BAD:
1653 		err = return_a_hs(setno, id, hs_id, *key, 0, 0, HSS_BROKEN);
1654 		break;
1655 	case HSP_INCREF:
1656 		err = modify_hsp_ref(setno, id, 1, hs_id);
1657 		break;
1658 	case HSP_DECREF:
1659 		err = modify_hsp_ref(setno, id, 0, hs_id);
1660 		break;
1661 	case HS_MKDEV:
1662 		err = mkdev_for_a_hs(*hs_id, dev);
1663 		break;
1664 	}
1665 
1666 	mutex_enter(&md_mx);
1667 	md_status &= ~MD_GBL_HS_LOCK;
1668 	cv_broadcast(&md_cv);
1669 	mutex_exit(&md_mx);
1670 
1671 	return (err);
1672 }
1673 
1674 static void
1675 imp_hotsparepool(
1676 	set_t	setno,
1677 	mddb_recid_t	recid
1678 )
1679 {
1680 	hot_spare_pool_ond_t	*hsp_ond;
1681 	mddb_recid_t		*hsp_recid, *hs_recid;
1682 	int			i;
1683 	uint_t			*hsp_selfid;
1684 
1685 	mddb_setrecprivate(recid, MD_PRV_GOTIT);
1686 
1687 	hsp_ond = (hot_spare_pool_ond_t *)mddb_getrecaddr(recid);
1688 	hsp_recid = &(hsp_ond->hsp_record_id);
1689 	hsp_selfid = &(hsp_ond->hsp_self_id);
1690 	/*
1691 	 * Fixup the pool and hotspares
1692 	 */
1693 	*hsp_recid = MAKERECID(setno, DBID(*hsp_recid));
1694 	*hsp_selfid = MAKERECID(setno, DBID(*hsp_selfid));
1695 
1696 	for (i = 0; i < hsp_ond->hsp_nhotspares; i++) {
1697 		hs_recid = &(hsp_ond->hsp_hotspares[i]);
1698 		*hs_recid = MAKERECID(setno, DBID(*hs_recid));
1699 	}
1700 }
1701 
1702 static void
1703 imp_hotspare(
1704 	set_t	setno,
1705 	mddb_recid_t	recid
1706 )
1707 {
1708 	mddb_de_ic_t	*dep;
1709 	mddb_rb32_t	*rbp;
1710 	hot_spare_t	*hs64;
1711 	hot_spare32_od_t	*hs32;
1712 	mddb_recid_t	*hs_recid;
1713 
1714 	mddb_setrecprivate(recid, MD_PRV_GOTIT);
1715 
1716 	dep = mddb_getrecdep(recid);
1717 	rbp = dep->de_rb;
1718 	if (rbp->rb_revision == MDDB_REV_RB) {
1719 		/*
1720 		 * 32 bit hotspare
1721 		 */
1722 		hs32 = (hot_spare32_od_t *)mddb_getrecaddr(recid);
1723 		hs_recid = &(hs32->hs_record_id);
1724 	} else {
1725 		hs64 = (hot_spare_t *)mddb_getrecaddr(recid);
1726 		hs_recid = &(hs64->hs_record_id);
1727 	}
1728 
1729 	/*
1730 	 * Fixup the setno
1731 	 */
1732 	*hs_recid = MAKERECID(setno, DBID(*hs_recid));
1733 }
1734 
1735 static int
1736 hotspares_imp_set(
1737 	set_t	setno
1738 )
1739 {
1740 	mddb_recid_t	recid;
1741 	int		gotsomething;
1742 	mddb_type_t	typ1;
1743 
1744 
1745 	gotsomething = 0;
1746 
1747 	typ1 = (mddb_type_t)md_getshared_key(setno,
1748 	    hotspares_md_ops.md_driver.md_drivername);
1749 	recid = mddb_makerecid(setno, 0);
1750 	while ((recid = mddb_getnextrec(recid, typ1, 0)) > 0) {
1751 		if (mddb_getrecprivate(recid) & MD_PRV_GOTIT)
1752 			continue;
1753 
1754 		switch (mddb_getrectype2(recid)) {
1755 		case HSP_REC:
1756 			imp_hotsparepool(setno, recid);
1757 			gotsomething = 1;
1758 			break;
1759 		case HS_REC:
1760 			imp_hotspare(setno, recid);
1761 			gotsomething = 1;
1762 			break;
1763 		default:
1764 			ASSERT(0);
1765 		}
1766 	}
1767 
1768 	return (gotsomething);
1769 }
1770 
1771 static md_named_services_t hotspares_named_services[] = {
1772 	{hotspares_interface,	"hot spare interface"},
1773 	{NULL,			0}
1774 };
1775 
1776 md_ops_t hotspares_md_ops = {
1777 	NULL,			/* open */
1778 	NULL,			/* close */
1779 	NULL,			/* strategy */
1780 	NULL,			/* print */
1781 	NULL,			/* dump */
1782 	NULL,			/* read */
1783 	NULL,			/* write */
1784 	hotspares_ioctl,	/* hotspares_ioctl, */
1785 	hotspares_snarf,	/* hotspares_snarf */
1786 	hotspares_halt,		/* halt */
1787 	NULL,			/* aread */
1788 	NULL,			/* awrite */
1789 	hotspares_imp_set,	/* import set */
1790 	hotspares_named_services /* named_services */
1791 };
1792 
1793 static void
1794 fini_uninit()
1795 {
1796 	/* prevent access to services that may have been imported */
1797 	md_clear_hot_spare_interface();
1798 }
1799 
1800 /* define the module linkage */
1801 MD_PLUGIN_MISC_MODULE("hot spares module %I%", md_noop, fini_uninit())
1802