xref: /titanic_41/usr/src/uts/common/io/lvm/hotspares/hotspares.c (revision fbe82215144da71ed02c3a920667472cc567fafd)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2007 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  * Copyright (c) 2011 Bayard G. Bell. All rights reserved.
25  */
26 
27 #include <sys/param.h>
28 #include <sys/systm.h>
29 #include <sys/conf.h>
30 #include <sys/file.h>
31 #include <sys/user.h>
32 #include <sys/uio.h>
33 #include <sys/t_lock.h>
34 #include <sys/kmem.h>
35 #include <vm/page.h>
36 #include <sys/sysmacros.h>
37 #include <sys/types.h>
38 #include <sys/mkdev.h>
39 #include <sys/stat.h>
40 #include <sys/open.h>
41 #include <sys/modctl.h>
42 #include <sys/ddi.h>
43 #include <sys/sunddi.h>
44 #include <sys/debug.h>
45 
46 #include <sys/lvm/md_hotspares.h>
47 #include <sys/lvm/md_convert.h>
48 
49 #include <sys/sysevent/eventdefs.h>
50 #include <sys/sysevent/svm.h>
51 
52 md_ops_t		hotspares_md_ops;
53 #ifndef	lint
54 md_ops_t		*md_interface_ops = &hotspares_md_ops;
55 #endif
56 
57 extern md_ops_t		**md_ops;
58 extern md_ops_t		*md_opslist;
59 extern md_set_t		md_set[];
60 
61 extern kmutex_t		md_mx;		/* used to md global stuff */
62 extern kcondvar_t	md_cv;		/* md_status events */
63 extern int		md_status;
64 
65 extern void		md_clear_hot_spare_interface();
66 
67 static void
68 set_hot_spare_state(hot_spare_t *hs, hotspare_states_t newstate)
69 {
70 	hs->hs_state = newstate;
71 	uniqtime32(&hs->hs_timestamp);
72 }
73 
74 static hot_spare_t *
75 lookup_hot_spare(set_t setno, mddb_recid_t hs_id, int must_exist)
76 {
77 	hot_spare_t *hs;
78 
79 	for (hs = (hot_spare_t *)md_set[setno].s_hs; hs; hs = hs->hs_next) {
80 		if (hs->hs_record_id == hs_id)
81 			return (hs);
82 	}
83 	if (must_exist)
84 		ASSERT(0);
85 
86 	return ((hot_spare_t *)NULL);
87 }
88 
89 
90 static int
91 seths_create_hsp(set_hs_params_t *shs)
92 {
93 	hot_spare_pool_t	*hsp;
94 	mddb_recid_t		recid;
95 	set_t			setno;
96 	mddb_type_t		typ1;
97 
98 	setno = HSP_SET(shs->shs_hot_spare_pool);
99 
100 	/* Scan the hot spare pool list */
101 	hsp = find_hot_spare_pool(setno, shs->shs_hot_spare_pool);
102 	if (hsp != (hot_spare_pool_t *)0)
103 		return (0);
104 
105 	typ1 = (mddb_type_t)md_getshared_key(setno,
106 	    hotspares_md_ops.md_driver.md_drivername);
107 
108 	/* create a hot spare pool record */
109 	if (shs->shs_options & MD_CRO_64BIT) {
110 #if defined(_ILP32)
111 		return (mdhsperror(&shs->mde, MDE_HSP_UNIT_TOO_LARGE,
112 		    shs->shs_hot_spare_pool));
113 #else
114 		recid = mddb_createrec(sizeof (hot_spare_pool_ond_t), typ1,
115 		    HSP_REC, MD_CRO_64BIT | MD_CRO_HOTSPARE_POOL | MD_CRO_FN,
116 		    setno);
117 #endif
118 	} else {
119 		recid = mddb_createrec(sizeof (hot_spare_pool_ond_t), typ1,
120 		    HSP_REC, MD_CRO_32BIT | MD_CRO_HOTSPARE_POOL | MD_CRO_FN,
121 		    setno);
122 	}
123 
124 	if (recid < 0) {
125 		return (mdhsperror(&shs->mde, MDE_HSP_CREATE_FAILURE,
126 		    shs->shs_hot_spare_pool));
127 	}
128 
129 	/* get the record addr */
130 	hsp = (hot_spare_pool_t *)mddb_getrecaddr_resize(recid, sizeof (*hsp),
131 		HSP_ONDSK_STR_OFF);
132 
133 	hsp->hsp_self_id = shs->shs_hot_spare_pool;
134 	hsp->hsp_record_id = recid;
135 	hsp->hsp_next = (hot_spare_pool_t *)md_set[setno].s_hsp;
136 	hsp->hsp_refcount = 0;
137 	hsp->hsp_nhotspares = 0;
138 	hsp->hsp_revision |= MD_FN_META_DEV;
139 
140 	md_set[setno].s_hsp = (void *) hsp;
141 
142 	mddb_commitrec_wrapper(recid);
143 	SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_CREATE, SVM_TAG_HSP, setno,
144 	    md_expldev(hsp->hsp_self_id));
145 
146 	rw_enter(&hotspares_md_ops.md_link_rw.lock, RW_WRITER);
147 	hsp->hsp_link.ln_next = hotspares_md_ops.md_head;
148 	hsp->hsp_link.ln_setno = setno;
149 	hsp->hsp_link.ln_id = hsp->hsp_self_id;
150 	hotspares_md_ops.md_head = &hsp->hsp_link;
151 	rw_exit(&hotspares_md_ops.md_link_rw.lock);
152 
153 	return (0);
154 }
155 
156 
157 static int
158 seths_add(set_hs_params_t *shs)
159 {
160 	hot_spare_t		*hs;
161 	hot_spare_pool_t	*hsp;
162 	hot_spare_pool_t	*prev_hsp;
163 	hot_spare_pool_t	*new_hsp;
164 	hot_spare_pool_t	*old_hsp;
165 	md_create_rec_option_t	options;
166 	mddb_recid_t		recid;
167 	mddb_recid_t		recids[5];
168 	size_t			new_size;
169 	int			i;
170 	int			delete_hsp = 0;
171 	int			irecid;
172 	set_t			setno;
173 	mddb_type_t		typ1;
174 	int			hsp_created = 0;
175 	mdkey_t			key_old;
176 	int			num_keys_old = 0;
177 
178 	/* Not much to do here in case of a dryrun */
179 	if (shs->shs_options & HS_OPT_DRYRUN) {
180 		return (0);
181 	}
182 
183 	/* create an empty hot spare pool */
184 	if (shs->shs_options & HS_OPT_POOL) {
185 		return (seths_create_hsp(shs));
186 	}
187 
188 	setno = HSP_SET(shs->shs_hot_spare_pool);
189 	typ1 = (mddb_type_t)md_getshared_key(setno,
190 	    hotspares_md_ops.md_driver.md_drivername);
191 
192 	/* Scan the hot spare list */
193 	hs = (hot_spare_t *)md_set[setno].s_hs;
194 	while (hs) {
195 		if (hs->hs_devnum == shs->shs_component_old) {
196 			break;
197 		}
198 		hs = hs->hs_next;
199 	}
200 
201 	if (hs == NULL) {
202 		/*
203 		 * Did not find match for device using devnum so use
204 		 * key associated with shs_component_old just
205 		 * in case there is a match but the match's dev is NODEV.
206 		 * If unable to find a unique key for shs_component_old
207 		 * then fail since namespace has multiple entries
208 		 * for this old component and we shouldn't allow
209 		 * an addition of a hotspare in this case.
210 		 */
211 		if (md_getkeyfromdev(setno, mddb_getsidenum(setno),
212 		    shs->shs_component_old, &key_old, &num_keys_old) != 0) {
213 			return (mddeverror(&shs->mde, MDE_NAME_SPACE,
214 			    shs->shs_component_old));
215 		}
216 
217 		/*
218 		 * If more than one key matches given old_dev - fail command
219 		 * since shouldn't add new hotspare if namespace has
220 		 * multiple entries.
221 		 */
222 		if (num_keys_old > 1) {
223 			return (mddeverror(&shs->mde, MDE_MULTNM,
224 			    shs->shs_component_old));
225 		}
226 		/*
227 		 * If there is no key for this entry then fail since
228 		 * a key for this entry should exist.
229 		 */
230 		if (num_keys_old == 0) {
231 			return (mddeverror(&shs->mde, MDE_INVAL_HS,
232 			    shs->shs_component_old));
233 		}
234 		/* Scan the hot spare list again */
235 		hs = (hot_spare_t *)md_set[setno].s_hs;
236 		while (hs) {
237 			/*
238 			 * Only need to compare keys when hs_devnum is NODEV.
239 			 */
240 			if ((hs->hs_devnum == NODEV64) &&
241 			    (hs->hs_key == key_old)) {
242 				break;
243 			}
244 			hs = hs->hs_next;
245 		}
246 	}
247 
248 	if (hs == NULL) {
249 		/* create a hot spare record */
250 		if (shs->shs_size_option & MD_CRO_64BIT) {
251 #if defined(_ILP32)
252 			return (mdhserror(&shs->mde, MDE_HS_UNIT_TOO_LARGE,
253 			    shs->shs_hot_spare_pool, shs->shs_component_old));
254 #else
255 			recid = mddb_createrec(HS_ONDSK_STR_SIZE, typ1, HS_REC,
256 				MD_CRO_64BIT | MD_CRO_HOTSPARE, setno);
257 #endif
258 		} else {
259 			recid = mddb_createrec(HS_ONDSK_STR_SIZE, typ1, HS_REC,
260 				MD_CRO_32BIT | MD_CRO_HOTSPARE, setno);
261 		}
262 
263 		if (recid < 0) {
264 			return (mdhserror(&shs->mde, MDE_HS_CREATE_FAILURE,
265 			    shs->shs_hot_spare_pool,
266 			    shs->shs_component_old));
267 		}
268 
269 		/* get the addr */
270 		hs = (hot_spare_t *)mddb_getrecaddr_resize(recid, sizeof (*hs),
271 			0);
272 
273 		hs->hs_record_id = recid;
274 
275 		hs->hs_devnum = shs->shs_component_old;
276 		hs->hs_key = shs->shs_key_old;
277 		hs->hs_start_blk = shs->shs_start_blk;
278 		hs->hs_has_label = shs->shs_has_label;
279 		hs->hs_number_blks = shs->shs_number_blks;
280 		set_hot_spare_state(hs, HSS_AVAILABLE);
281 		hs->hs_refcount = 0;
282 		hs->hs_next = (hot_spare_t *)md_set[setno].s_hs;
283 		md_set[setno].s_hs = (void *) hs;
284 	}
285 
286 	/* Scan the hot spare pool list */
287 	hsp = (hot_spare_pool_t *)md_set[setno].s_hsp;
288 	prev_hsp = (hot_spare_pool_t *)0;
289 	while (hsp) {
290 		if (hsp->hsp_self_id == shs->shs_hot_spare_pool) {
291 			break;
292 		}
293 		prev_hsp = hsp;
294 		hsp = hsp->hsp_next;
295 	}
296 
297 	if (hsp == NULL) {
298 		/* create a hot spare pool record */
299 		recid = mddb_createrec(sizeof (hot_spare_pool_ond_t),
300 		    typ1, HSP_REC,
301 		    MD_CRO_32BIT | MD_CRO_HOTSPARE_POOL | MD_CRO_FN, setno);
302 
303 		if (recid < 0) {
304 			return (mdhsperror(&shs->mde, MDE_HSP_CREATE_FAILURE,
305 			    shs->shs_hot_spare_pool));
306 		}
307 
308 		/* get the record addr */
309 		hsp = (hot_spare_pool_t *)mddb_getrecaddr_resize(recid,
310 			sizeof (*hsp), HSP_ONDSK_STR_OFF);
311 
312 		hsp->hsp_self_id = shs->shs_hot_spare_pool;
313 		hsp->hsp_record_id = recid;
314 		hsp->hsp_next = (hot_spare_pool_t *)md_set[setno].s_hsp;
315 		hsp->hsp_refcount = 0;
316 		hsp->hsp_nhotspares = 0;
317 		hsp->hsp_revision |= MD_FN_META_DEV;
318 
319 		/* force prev_hsp to NULL, this will cause hsp to be linked */
320 		prev_hsp = (hot_spare_pool_t *)0;
321 
322 		rw_enter(&hotspares_md_ops.md_link_rw.lock, RW_WRITER);
323 		hsp->hsp_link.ln_next = hotspares_md_ops.md_head;
324 		hsp->hsp_link.ln_setno = setno;
325 		hsp->hsp_link.ln_id = hsp->hsp_self_id;
326 		hotspares_md_ops.md_head = &hsp->hsp_link;
327 		rw_exit(&hotspares_md_ops.md_link_rw.lock);
328 		hsp_created = 1;
329 	} else {
330 
331 		/*
332 		 * Make sure the hot spare is not already in the pool.
333 		 */
334 		for (i = 0; i < hsp->hsp_nhotspares; i++)
335 			if (hsp->hsp_hotspares[i] == hs->hs_record_id) {
336 				return (mdhserror(&shs->mde, MDE_HS_INUSE,
337 					shs->shs_hot_spare_pool,
338 					hs->hs_devnum));
339 			}
340 		/*
341 		 * Create a new hot spare pool record
342 		 * This gives us the one extra hs slot,
343 		 * because there is one slot in the
344 		 * hot_spare_pool struct
345 		 */
346 		new_size = sizeof (hot_spare_pool_ond_t) +
347 			(sizeof (mddb_recid_t) * hsp->hsp_nhotspares);
348 
349 		/*
350 		 * The Friendly Name status of the new HSP should duplicate
351 		 * the status of the existing one.
352 		 */
353 		if (hsp->hsp_revision & MD_FN_META_DEV) {
354 			options =
355 				MD_CRO_32BIT | MD_CRO_HOTSPARE_POOL | MD_CRO_FN;
356 		} else {
357 			options = MD_CRO_32BIT | MD_CRO_HOTSPARE_POOL;
358 		}
359 		recid = mddb_createrec(new_size, typ1, HSP_REC, options, setno);
360 
361 		if (recid < 0) {
362 			return (mdhsperror(&shs->mde, MDE_HSP_CREATE_FAILURE,
363 			    hsp->hsp_self_id));
364 		}
365 		new_size = sizeof (hot_spare_pool_t) +
366 			(sizeof (mddb_recid_t) * hsp->hsp_nhotspares);
367 
368 		/* get the record addr */
369 		new_hsp = (hot_spare_pool_t *)mddb_getrecaddr_resize(recid,
370 			new_size, HSP_ONDSK_STR_OFF);
371 
372 		/* copy the old record into the new one */
373 		bcopy((caddr_t)hsp, (caddr_t)new_hsp,
374 		    (size_t)((sizeof (hot_spare_pool_t) +
375 		    (sizeof (mddb_recid_t) * hsp->hsp_nhotspares)
376 		    - sizeof (mddb_recid_t))));
377 		new_hsp->hsp_record_id = recid;
378 
379 		md_rem_link(setno, hsp->hsp_self_id,
380 		    &hotspares_md_ops.md_link_rw.lock,
381 		    &hotspares_md_ops.md_head);
382 
383 		rw_enter(&hotspares_md_ops.md_link_rw.lock, RW_WRITER);
384 		new_hsp->hsp_link.ln_next = hotspares_md_ops.md_head;
385 		new_hsp->hsp_link.ln_setno = setno;
386 		new_hsp->hsp_link.ln_id = new_hsp->hsp_self_id;
387 		hotspares_md_ops.md_head = &new_hsp->hsp_link;
388 		rw_exit(&hotspares_md_ops.md_link_rw.lock);
389 
390 		/* mark the old hsp to be deleted */
391 		delete_hsp = 1;
392 		old_hsp = hsp;
393 		hsp = new_hsp;
394 	}
395 
396 	if (shs->shs_size_option & MD_CRO_64BIT) {
397 		hs->hs_revision |= MD_64BIT_META_DEV;
398 	} else {
399 		hs->hs_revision &= ~MD_64BIT_META_DEV;
400 	}
401 
402 	/* lock the db records */
403 	recids[0] = hs->hs_record_id;
404 	recids[1] = hsp->hsp_record_id;
405 	irecid = 2;
406 	if (delete_hsp)
407 		recids[irecid++] = old_hsp->hsp_record_id;
408 	recids[irecid] = 0;
409 
410 	/* increment the reference count */
411 	hs->hs_refcount++;
412 
413 	/* add the hs at the end of the hot spare pool */
414 	hsp->hsp_hotspares[hsp->hsp_nhotspares] = hs->hs_record_id;
415 	hsp->hsp_nhotspares++;
416 
417 	/*
418 	 * NOTE: We do not commit the previous hot spare pool record.
419 	 *	 There is no need, the link gets rebuilt at boot time.
420 	 */
421 	if (prev_hsp)
422 		prev_hsp->hsp_next = hsp;
423 	else
424 		md_set[setno].s_hsp = (void *) hsp;
425 
426 	if (delete_hsp)
427 		old_hsp->hsp_self_id = MD_HSP_NONE;
428 
429 	/* commit the db records */
430 	mddb_commitrecs_wrapper(recids);
431 
432 	if (delete_hsp) {
433 		/* delete the old hot spare pool record */
434 		mddb_deleterec_wrapper(old_hsp->hsp_record_id);
435 	}
436 
437 	if (hsp_created) {
438 		SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_CREATE, SVM_TAG_HSP, setno,
439 		    md_expldev(hsp->hsp_self_id));
440 	}
441 	SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_ADD, SVM_TAG_HSP, setno,
442 	    md_expldev(hsp->hsp_self_id));
443 
444 	return (0);
445 }
446 
447 
448 static int
449 seths_delete_hsp(set_hs_params_t *shs)
450 {
451 
452 	hot_spare_pool_t	*prev_hsp;
453 	hot_spare_pool_t	*hsp;
454 	set_t			setno;
455 	hsp_t			hspid;
456 
457 	setno = HSP_SET(shs->shs_hot_spare_pool);
458 
459 	/* Scan the hot spare pool list */
460 	prev_hsp = (hot_spare_pool_t *)0;
461 	hsp = (hot_spare_pool_t *)md_set[setno].s_hsp;
462 	while (hsp) {
463 		if (hsp->hsp_self_id == shs->shs_hot_spare_pool) {
464 			break;
465 		}
466 		prev_hsp = hsp;
467 		hsp = hsp->hsp_next;
468 	}
469 
470 	if (hsp == NULL) {
471 		return (mdhsperror(&shs->mde, MDE_INVAL_HSP,
472 		    shs->shs_hot_spare_pool));
473 	}
474 
475 	if (hsp->hsp_nhotspares != 0) {
476 		return (mdhsperror(&shs->mde, MDE_HSP_BUSY,
477 		    shs->shs_hot_spare_pool));
478 	}
479 
480 	if (hsp->hsp_refcount != 0) {
481 		return (mdhsperror(&shs->mde, MDE_HSP_REF,
482 		    shs->shs_hot_spare_pool));
483 	}
484 
485 	/* In case of a dryrun, we're done here */
486 	if (shs->shs_options & HS_OPT_DRYRUN) {
487 		return (0);
488 	}
489 	/*
490 	 * NOTE: We do not commit the previous hot spare pool record.
491 	 *	 There is no need, the link gets rebuilt at boot time.
492 	 */
493 	if (prev_hsp)
494 		prev_hsp->hsp_next = hsp->hsp_next;
495 	else
496 		md_set[setno].s_hsp = (void *) hsp->hsp_next;
497 
498 	hspid = hsp->hsp_self_id;
499 
500 	md_rem_link(setno, hsp->hsp_self_id,
501 	    &hotspares_md_ops.md_link_rw.lock,
502 	    &hotspares_md_ops.md_head);
503 
504 	mddb_deleterec_wrapper(hsp->hsp_record_id);
505 
506 	SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_DELETE, SVM_TAG_HSP, setno,
507 	    md_expldev(hspid));
508 	return (0);
509 }
510 
511 
512 static int
513 seths_delete(set_hs_params_t *shs)
514 {
515 	hot_spare_t		*hs;
516 	hot_spare_t		*prev_hs;
517 	hot_spare_pool_t	*hsp;
518 	mddb_recid_t		recids[4];
519 	int			i;
520 	set_t			setno;
521 	sv_dev_t		sv;
522 	int			delete_hs = 0;
523 	mdkey_t			key_old;
524 	int			num_keys_old = 0;
525 
526 	/* delete the hot spare pool */
527 	if (shs->shs_options & HS_OPT_POOL) {
528 		return (seths_delete_hsp(shs));
529 	}
530 
531 	setno = HSP_SET(shs->shs_hot_spare_pool);
532 
533 	/* Scan the hot spare list */
534 	hs = (hot_spare_t *)md_set[setno].s_hs;
535 	prev_hs = (hot_spare_t *)0;
536 	while (hs) {
537 		if (hs->hs_devnum == shs->shs_component_old) {
538 			break;
539 		}
540 		prev_hs = hs;
541 		hs = hs->hs_next;
542 	}
543 
544 	if (hs == NULL) {
545 		/*
546 		 * Unable to find device using devnum so use
547 		 * key associated with shs_component_old instead.
548 		 * If unable to find a unique key for shs_component_old
549 		 * then fail since namespace has multiple entries
550 		 * for this old component and we're unable to determine
551 		 * which key is the valid match for shs_component_old.
552 		 *
553 		 * Only need to compare keys when hs_devnum is NODEV.
554 		 */
555 		if (md_getkeyfromdev(setno, mddb_getsidenum(setno),
556 		    shs->shs_component_old, &key_old, &num_keys_old) != 0) {
557 			return (mddeverror(&shs->mde, MDE_NAME_SPACE,
558 			    shs->shs_component_old));
559 		}
560 
561 		/*
562 		 * If more than one key matches given old_dev - fail command
563 		 * since shouldn't add new hotspare if namespace has
564 		 * multiple entries.
565 		 */
566 		if (num_keys_old > 1) {
567 			return (mddeverror(&shs->mde, MDE_MULTNM,
568 			    shs->shs_component_old));
569 		}
570 		/*
571 		 * If there is no key for this entry then fail since
572 		 * a key for this entry should exist.
573 		 */
574 		if (num_keys_old == 0) {
575 			return (mddeverror(&shs->mde, MDE_INVAL_HS,
576 			    shs->shs_component_old));
577 		}
578 		/* Scan the hot spare list again */
579 		hs = (hot_spare_t *)md_set[setno].s_hs;
580 		prev_hs = (hot_spare_t *)0;
581 		while (hs) {
582 			/*
583 			 * Only need to compare keys when hs_devnum is NODEV.
584 			 */
585 			if ((hs->hs_devnum == NODEV64) &&
586 			    (hs->hs_key == key_old)) {
587 				break;
588 			}
589 			prev_hs = hs;
590 			hs = hs->hs_next;
591 		}
592 	}
593 
594 	if (hs == NULL) {
595 		return (mddeverror(&shs->mde, MDE_INVAL_HS,
596 		    shs->shs_component_old));
597 	}
598 
599 	/* Scan the hot spare pool list */
600 	hsp = find_hot_spare_pool(setno, shs->shs_hot_spare_pool);
601 	if (hsp == (hot_spare_pool_t *)0) {
602 		return (mdhsperror(&shs->mde, MDE_INVAL_HSP,
603 		    shs->shs_hot_spare_pool));
604 	}
605 
606 	/* check for force flag and state of hot spare */
607 	if (((shs->shs_options & HS_OPT_FORCE) == 0) &&
608 	    (hs->hs_state == HSS_RESERVED)) {
609 		return (mdhserror(&shs->mde, MDE_HS_RESVD,
610 		    shs->shs_hot_spare_pool, shs->shs_component_old));
611 	}
612 
613 	if (hsp->hsp_refcount && (hs->hs_state == HSS_RESERVED)) {
614 		return (mdhserror(&shs->mde, MDE_HS_RESVD,
615 		    shs->shs_hot_spare_pool, shs->shs_component_old));
616 	}
617 
618 	/*
619 	 * Make sure the device is in the pool.
620 	 */
621 	for (i = 0; i < hsp->hsp_nhotspares; i++) {
622 		if (hsp->hsp_hotspares[i] == hs->hs_record_id) {
623 			break;
624 		}
625 	}
626 
627 	if (i >= hsp->hsp_nhotspares) {
628 		return (mddeverror(&shs->mde, MDE_INVAL_HS,
629 		    hs->hs_devnum));
630 	}
631 
632 	/* In case of a dryrun, we're done here */
633 	if (shs->shs_options & HS_OPT_DRYRUN) {
634 		return (0);
635 	}
636 
637 	/* lock the db records */
638 	recids[0] = hs->hs_record_id;
639 	recids[1] = hsp->hsp_record_id;
640 	recids[2] = 0;
641 
642 	sv.setno = setno;
643 	sv.key = hs->hs_key;
644 
645 	hs->hs_refcount--;
646 	if (hs->hs_refcount == 0) {
647 		/*
648 		 * NOTE: We do not commit the previous hot spare record.
649 		 *	 There is no need, the link we get rebuilt at boot time.
650 		 */
651 		if (prev_hs) {
652 			prev_hs->hs_next = hs->hs_next;
653 		} else
654 			md_set[setno].s_hs = (void *) hs->hs_next;
655 
656 		/* mark the hot spare to be deleted */
657 		delete_hs = 1;
658 		recids[0] = hsp->hsp_record_id;
659 		recids[1] = 0;
660 	}
661 
662 	/* find the location of the hs in the hsp */
663 	for (i = 0; i < hsp->hsp_nhotspares; i++) {
664 		if (hsp->hsp_hotspares[i] == hs->hs_record_id)
665 			break;
666 	}
667 
668 	/* remove the hs from the hsp */
669 	for (i++; i < hsp->hsp_nhotspares; i++)
670 		hsp->hsp_hotspares[i - 1] = hsp->hsp_hotspares[i];
671 
672 	hsp->hsp_nhotspares--;
673 
674 	/* commit the db records */
675 	mddb_commitrecs_wrapper(recids);
676 
677 	if (delete_hs)
678 		mddb_deleterec_wrapper(hs->hs_record_id);
679 
680 	md_rem_names(&sv, 1);
681 
682 	SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_REMOVE, SVM_TAG_HSP, setno,
683 	    md_expldev(hsp->hsp_self_id));
684 
685 	return (0);
686 }
687 
688 static int
689 seths_replace(set_hs_params_t *shs)
690 {
691 	hot_spare_t		*hs;
692 	hot_spare_t		*prev_hs;
693 	hot_spare_t		*new_hs;
694 	hot_spare_pool_t	*hsp;
695 	int			new_found = 0;
696 	mddb_recid_t		recid;
697 	mddb_recid_t		recids[5];
698 	int			i;
699 	sv_dev_t		sv;
700 	int			delete_hs = 0;
701 	set_t			setno;
702 	mddb_type_t		typ1;
703 	mdkey_t			key_old;
704 	int			num_keys_old = 0;
705 
706 	setno = HSP_SET(shs->shs_hot_spare_pool);
707 	typ1 = (mddb_type_t)md_getshared_key(setno,
708 	    hotspares_md_ops.md_driver.md_drivername);
709 
710 	/* Scan the hot spare list */
711 	hs = (hot_spare_t *)md_set[setno].s_hs;
712 	prev_hs = (hot_spare_t *)0;
713 	while (hs) {
714 		if (hs->hs_devnum == shs->shs_component_old) {
715 			break;
716 		}
717 		prev_hs = hs;
718 		hs = hs->hs_next;
719 	}
720 
721 	if (hs == NULL) {
722 		/*
723 		 * Unable to find device using devnum so use
724 		 * key associated with shs_component_old instead.
725 		 * If unable to find a unique key for shs_component_old
726 		 * then fail since namespace has multiple entries
727 		 * for this old component and we're unable to determine
728 		 * which key is the valid match for shs_component_old.
729 		 *
730 		 * Only need to compare keys when hs_devnum is NODEV.
731 		 */
732 		if (md_getkeyfromdev(setno, mddb_getsidenum(setno),
733 		    shs->shs_component_old, &key_old, &num_keys_old) != 0) {
734 			return (mddeverror(&shs->mde, MDE_NAME_SPACE,
735 			    shs->shs_component_old));
736 		}
737 
738 		/*
739 		 * If more than one key matches given old_dev - fail command
740 		 * since unable to determine which key is correct.
741 		 */
742 		if (num_keys_old > 1) {
743 			return (mddeverror(&shs->mde, MDE_MULTNM,
744 			    shs->shs_component_old));
745 		}
746 		/*
747 		 * If there is no key for this entry then fail since
748 		 * a key for this entry should exist.
749 		 */
750 		if (num_keys_old == 0) {
751 			return (mddeverror(&shs->mde, MDE_INVAL_HS,
752 			    shs->shs_component_old));
753 		}
754 		/* Scan the hot spare list again */
755 		hs = (hot_spare_t *)md_set[setno].s_hs;
756 		prev_hs = (hot_spare_t *)0;
757 		while (hs) {
758 			/*
759 			 * Only need to compare keys when hs_devnum is NODEV.
760 			 */
761 			if ((hs->hs_devnum == NODEV64) &&
762 			    (hs->hs_key == key_old)) {
763 				break;
764 			}
765 			prev_hs = hs;
766 			hs = hs->hs_next;
767 		}
768 	}
769 
770 	if (hs == NULL) {
771 		return (mddeverror(&shs->mde, MDE_INVAL_HS,
772 		    shs->shs_component_old));
773 	}
774 
775 	/* check the force flag and the state of the hot spare */
776 	if (((shs->shs_options & HS_OPT_FORCE) == 0) &&
777 	    (hs->hs_state == HSS_RESERVED)) {
778 		return (mdhserror(&shs->mde, MDE_HS_RESVD,
779 		    shs->shs_hot_spare_pool,
780 		    hs->hs_devnum));
781 	}
782 
783 	/* Scan the hot spare pool list */
784 	hsp = find_hot_spare_pool(setno, shs->shs_hot_spare_pool);
785 	if (hsp == (hot_spare_pool_t *)0) {
786 		return (mdhsperror(&shs->mde, MDE_INVAL_HSP,
787 		    shs->shs_hot_spare_pool));
788 	}
789 
790 	/*
791 	 * Make sure the old device is in the pool.
792 	 */
793 	for (i = 0; i < hsp->hsp_nhotspares; i++) {
794 		if (hsp->hsp_hotspares[i] == hs->hs_record_id) {
795 			break;
796 		}
797 	}
798 	if (i >= hsp->hsp_nhotspares) {
799 		return (mddeverror(&shs->mde, MDE_INVAL_HS,
800 		    hs->hs_devnum));
801 	}
802 
803 	/* Scan the hot spare list for the new hs */
804 	new_hs = (hot_spare_t *)md_set[setno].s_hs;
805 	new_found = 0;
806 	while (new_hs) {
807 		if (new_hs->hs_devnum == shs->shs_component_new) {
808 			new_found = 1;
809 			break;
810 		}
811 		new_hs = new_hs->hs_next;
812 	}
813 
814 	/*
815 	 * Make sure the new device is not already in the pool.
816 	 * We don't have to search the hs in this hsp, if the
817 	 * new hs was just created. Only if the hot spare was found.
818 	 */
819 	if (new_found) {
820 		for (i = 0; i < hsp->hsp_nhotspares; i++)
821 			if (hsp->hsp_hotspares[i] == new_hs->hs_record_id) {
822 				return (mdhserror(&shs->mde, MDE_HS_INUSE,
823 				    shs->shs_hot_spare_pool,
824 				    new_hs->hs_devnum));
825 			}
826 	}
827 
828 	/* In case of a dryrun, we're done here */
829 	if (shs->shs_options & HS_OPT_DRYRUN) {
830 		return (0);
831 	}
832 
833 	/*
834 	 * Create the new hotspare
835 	 */
836 	if (!new_found) {
837 		/* create a hot spare record */
838 		if (shs->shs_size_option & MD_CRO_64BIT) {
839 #if defined(_ILP32)
840 			return (mdhserror(&shs->mde, MDE_HS_UNIT_TOO_LARGE,
841 			    shs->shs_hot_spare_pool, shs->shs_component_new));
842 #else
843 			recid = mddb_createrec(HS_ONDSK_STR_SIZE, typ1, HS_REC,
844 				MD_CRO_64BIT | MD_CRO_HOTSPARE, setno);
845 #endif
846 		} else {
847 			recid = mddb_createrec(HS_ONDSK_STR_SIZE, typ1, HS_REC,
848 				MD_CRO_32BIT | MD_CRO_HOTSPARE, setno);
849 		}
850 
851 		if (recid < 0) {
852 			return (mdhserror(&shs->mde, MDE_HS_CREATE_FAILURE,
853 			    shs->shs_hot_spare_pool,
854 			    shs->shs_component_new));
855 		}
856 
857 		/* get the addr */
858 		new_hs = (hot_spare_t *)mddb_getrecaddr_resize(recid,
859 			sizeof (*new_hs), 0);
860 
861 		new_hs->hs_record_id = recid;
862 		new_hs->hs_devnum = shs->shs_component_new;
863 		new_hs->hs_key = shs->shs_key_new;
864 		new_hs->hs_start_blk = shs->shs_start_blk;
865 		new_hs->hs_has_label = shs->shs_has_label;
866 		new_hs->hs_number_blks = shs->shs_number_blks;
867 		set_hot_spare_state(new_hs, HSS_AVAILABLE);
868 		new_hs->hs_refcount = 0;
869 		new_hs->hs_isopen = 1;
870 	}
871 
872 	/* lock the db records */
873 	recids[0] = hs->hs_record_id;
874 	recids[1] = new_hs->hs_record_id;
875 	recids[2] = hsp->hsp_record_id;
876 	recids[3] = 0;
877 
878 	sv.setno = setno;
879 	sv.key = hs->hs_key;
880 
881 	hs->hs_refcount--;
882 	if (hs->hs_refcount == 0) {
883 		/*
884 		 * NOTE: We do not commit the previous hot spare record.
885 		 *	 There is no need, the link we get rebuilt at boot time.
886 		 */
887 		if (prev_hs) {
888 			prev_hs->hs_next = hs->hs_next;
889 		} else
890 			md_set[setno].s_hs = (void *) hs->hs_next;
891 
892 		/* mark hs to be deleted in the correct order */
893 		delete_hs = 1;
894 
895 		recids[0] = new_hs->hs_record_id;
896 		recids[1] = hsp->hsp_record_id;
897 		recids[2] = 0;
898 	}
899 
900 	/* link into the hs list */
901 	new_hs->hs_refcount++;
902 	if (!new_found) {
903 		/* do this AFTER the old dev is possibly removed */
904 		new_hs->hs_next = (hot_spare_t *)md_set[setno].s_hs;
905 		md_set[setno].s_hs = (void *) new_hs;
906 	}
907 
908 	/* find the location of the old hs in the hsp */
909 	for (i = 0; i < hsp->hsp_nhotspares; i++) {
910 		if (hsp->hsp_hotspares[i] == hs->hs_record_id) {
911 			hsp->hsp_hotspares[i] = new_hs->hs_record_id;
912 			break;
913 		}
914 	}
915 
916 	if (shs->shs_size_option & MD_CRO_64BIT) {
917 		new_hs->hs_revision |= MD_64BIT_META_DEV;
918 	} else {
919 		new_hs->hs_revision &= ~MD_64BIT_META_DEV;
920 	}
921 
922 	/* commit the db records */
923 	mddb_commitrecs_wrapper(recids);
924 
925 	if (delete_hs)
926 		mddb_deleterec_wrapper(hs->hs_record_id);
927 
928 	md_rem_names(&sv, 1);
929 
930 	SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_REPLACE, SVM_TAG_HSP, setno,
931 	    md_expldev(hsp->hsp_self_id));
932 	return (0);
933 }
934 
935 static int
936 seths_enable(set_hs_params_t *shs)
937 {
938 	hot_spare_t	*hs;
939 	mddb_recid_t	recids[2];
940 	set_t		setno = shs->md_driver.md_setno;
941 	mdkey_t		key_old;
942 	int		num_keys_old = 0;
943 
944 
945 	/*
946 	 * Find device by using key associated with shs_component_old.
947 	 * If unable to find a unique key for shs_component_old
948 	 * then fail since namespace has multiple entries
949 	 * for this old component and we're unable to determine
950 	 * which key is the valid match for shs_component_old.
951 	 * This failure keeps a hotspare from being enabled on a slice
952 	 * that may already be in use by another metadevice.
953 	 */
954 	if (md_getkeyfromdev(setno, mddb_getsidenum(setno),
955 	    shs->shs_component_old, &key_old, &num_keys_old) != 0) {
956 		return (mddeverror(&shs->mde, MDE_NAME_SPACE,
957 		    shs->shs_component_old));
958 	}
959 
960 	/*
961 	 * If more than one key matches given old_dev - fail command
962 	 * since unable to determine which key is correct.
963 	 */
964 	if (num_keys_old > 1) {
965 		return (mddeverror(&shs->mde, MDE_MULTNM,
966 		    shs->shs_component_old));
967 	}
968 	/*
969 	 * If there is no key for this entry then fail since
970 	 * a key for this entry should exist.
971 	 */
972 	if (num_keys_old == 0) {
973 		return (mddeverror(&shs->mde, MDE_INVAL_HS,
974 		    shs->shs_component_old));
975 	}
976 
977 	/* Scan the hot spare list for the hs */
978 	hs = (hot_spare_t *)md_set[setno].s_hs;
979 	while (hs) {
980 		/*
981 		 * Since component may or may not be currently in the system,
982 		 * use the keys to find a match (not the devt).
983 		 */
984 		if (hs->hs_key == key_old) {
985 			break;
986 		}
987 		hs = hs->hs_next;
988 	}
989 
990 	if (hs == NULL) {
991 		return (mddeverror(&shs->mde, MDE_INVAL_HS,
992 			shs->shs_component_old));
993 	}
994 
995 	/* make sure it's broken */
996 	if (hs->hs_state != HSS_BROKEN) {
997 		return (mddeverror(&shs->mde, MDE_FIX_INVAL_HS_STATE,
998 		    hs->hs_devnum));
999 	}
1000 
1001 	/* In case of a dryrun, we're done here */
1002 	if (shs->shs_options & HS_OPT_DRYRUN) {
1003 		return (0);
1004 	}
1005 
1006 	/* fix it */
1007 	set_hot_spare_state(hs, HSS_AVAILABLE);
1008 	hs->hs_start_blk = shs->shs_start_blk;
1009 	hs->hs_has_label = shs->shs_has_label;
1010 	hs->hs_number_blks = shs->shs_number_blks;
1011 
1012 	/* commit the db records */
1013 	recids[0] = hs->hs_record_id;
1014 	recids[1] = 0;
1015 	mddb_commitrecs_wrapper(recids);
1016 	SE_NOTIFY(EC_SVM_STATE, ESC_SVM_ENABLE, SVM_TAG_HS, setno,
1017 	    shs->shs_component_old);
1018 
1019 	return (0);
1020 }
1021 
1022 static int
1023 get_hs(
1024 	get_hs_params_t	*ghs
1025 )
1026 {
1027 	hot_spare_t	*hs;
1028 	set_t		setno = ghs->md_driver.md_setno;
1029 
1030 	mdclrerror(&ghs->mde);
1031 
1032 	/* Scan the hot spare list for the hs */
1033 	hs = (hot_spare_t *)md_set[setno].s_hs;
1034 	while (hs) {
1035 		if (hs->hs_key == ghs->ghs_key) {
1036 			break;
1037 		}
1038 		hs = hs->hs_next;
1039 	}
1040 
1041 	if (hs == NULL) {
1042 		return (mddeverror(&ghs->mde, MDE_INVAL_HS,
1043 		    ghs->ghs_devnum));
1044 	}
1045 
1046 	ghs->ghs_start_blk = hs->hs_start_blk;
1047 	ghs->ghs_number_blks = hs->hs_number_blks;
1048 	ghs->ghs_state = hs->hs_state;
1049 	ghs->ghs_timestamp = hs->hs_timestamp;
1050 	ghs->ghs_revision = hs->hs_revision;
1051 	return (0);
1052 }
1053 
1054 static void
1055 build_key_list(set_t setno, hot_spare_pool_t *hsp, mdkey_t *list)
1056 {
1057 	int	i;
1058 
1059 	for (i = 0; i < hsp->hsp_nhotspares; i++) {
1060 		hot_spare_t *hs;
1061 		hs = lookup_hot_spare(setno, hsp->hsp_hotspares[i], 1);
1062 		list[i] = hs->hs_key;
1063 	}
1064 }
1065 
1066 static int
1067 get_hsp(
1068 	void			*d,
1069 	int			mode
1070 )
1071 {
1072 	hot_spare_pool_t	*hsp;
1073 	get_hsp_t		*ghsp;
1074 	size_t			size;
1075 	set_t			setno;
1076 	int			err = 0;
1077 	md_i_get_t		*migp = (md_i_get_t *)d;
1078 
1079 
1080 	setno = migp->md_driver.md_setno;
1081 
1082 	mdclrerror(&migp->mde);
1083 
1084 	/* Scan the hot spare pool list */
1085 	hsp = find_hot_spare_pool(setno, migp->id);
1086 	if (hsp == NULL) {
1087 		return (mdhsperror(&migp->mde, MDE_INVAL_HSP,
1088 			migp->id));
1089 	}
1090 
1091 	size = (sizeof (ghsp->ghsp_hs_keys[0]) * (hsp->hsp_nhotspares - 1)) +
1092 	    sizeof (get_hsp_t);
1093 
1094 	if (migp->size == 0) {
1095 		migp->size = (int)size;
1096 		return (0);
1097 	}
1098 
1099 	if (migp->size < size)
1100 		return (EFAULT);
1101 
1102 	ghsp = kmem_alloc(size, KM_SLEEP);
1103 
1104 	ghsp->ghsp_id = hsp->hsp_self_id;
1105 	ghsp->ghsp_refcount = hsp->hsp_refcount;
1106 	ghsp->ghsp_nhotspares = hsp->hsp_nhotspares;
1107 	build_key_list(setno, hsp, ghsp->ghsp_hs_keys);
1108 	if (ddi_copyout(ghsp, (caddr_t)(uintptr_t)migp->mdp, size, mode))
1109 		err = EFAULT;
1110 	kmem_free(ghsp, size);
1111 	return (err);
1112 }
1113 
1114 static int
1115 set_hs(
1116 	set_hs_params_t	*shs
1117 )
1118 {
1119 	mdclrerror(&shs->mde);
1120 
1121 	if (md_get_setstatus(shs->md_driver.md_setno) & MD_SET_STALE)
1122 		return (mdmddberror(&shs->mde, MDE_DB_STALE, NODEV32,
1123 		    shs->md_driver.md_setno));
1124 
1125 	switch (shs->shs_cmd) {
1126 	case ADD_HOT_SPARE:
1127 		return (seths_add(shs));
1128 	case DELETE_HOT_SPARE:
1129 		return (seths_delete(shs));
1130 	case REPLACE_HOT_SPARE:
1131 		return (seths_replace(shs));
1132 	case FIX_HOT_SPARE:
1133 		return (seths_enable(shs));
1134 	default:
1135 		return (mderror(&shs->mde, MDE_INVAL_HSOP));
1136 	}
1137 }
1138 
1139 static void
1140 hotspares_poke_hotspares(void)
1141 {
1142 	intptr_t	(*poke_hs)();
1143 	int		i;
1144 
1145 	for (i = 0; i < MD_NOPS; i++) {
1146 		/* handle change */
1147 		poke_hs = md_get_named_service(NODEV64, i, "poke hotspares", 0);
1148 		if (poke_hs)
1149 			(void) (*poke_hs)();
1150 	}
1151 }
1152 
1153 
1154 /*ARGSUSED4*/
1155 static int
1156 hotspares_ioctl(
1157 	dev_t	dev,
1158 	int	cmd,
1159 	void	*data,
1160 	int	mode,
1161 	IOLOCK	*lockp
1162 )
1163 {
1164 	size_t	sz = 0;
1165 	void	*d = NULL;
1166 	int	err = 0;
1167 
1168 	/* single thread */
1169 	if (getminor(dev) != MD_ADM_MINOR)
1170 		return (ENOTTY);
1171 
1172 	/* We can only handle 32-bit clients for internal commands */
1173 	if ((mode & DATAMODEL_MASK) != DATAMODEL_ILP32) {
1174 		return (EINVAL);
1175 	}
1176 
1177 	mutex_enter(&md_mx);
1178 	while (md_status & MD_GBL_HS_LOCK)
1179 		cv_wait(&md_cv, &md_mx);
1180 	md_status |= MD_GBL_HS_LOCK;
1181 	mutex_exit(&md_mx);
1182 
1183 	/* dispatch ioctl */
1184 	switch (cmd) {
1185 
1186 	case MD_IOCSET_HS:	/* setup hot spares and pools */
1187 	{
1188 		if (! (mode & FWRITE)) {
1189 			err = EACCES;
1190 			break;
1191 		}
1192 
1193 		sz = sizeof (set_hs_params_t);
1194 		d = kmem_alloc(sz, KM_SLEEP);
1195 
1196 		if (ddi_copyin(data, d, sz, mode)) {
1197 			err = EFAULT;
1198 			break;
1199 		}
1200 
1201 		err = set_hs(d);
1202 		break;
1203 	}
1204 
1205 	case MD_IOCGET_HS:	/* get hot spare info */
1206 	{
1207 		if (! (mode & FREAD)) {
1208 			err = EACCES;
1209 			break;
1210 		}
1211 
1212 		sz = sizeof (get_hs_params_t);
1213 		d = kmem_alloc(sz, KM_SLEEP);
1214 
1215 		if (ddi_copyin(data, d, sz, mode)) {
1216 			err = EFAULT;
1217 			break;
1218 		}
1219 
1220 		err = get_hs(d);
1221 		break;
1222 	}
1223 
1224 	case MD_IOCGET:		/* get hot spare pool info */
1225 	{
1226 		if (! (mode & FREAD)) {
1227 			err = EACCES;
1228 			break;
1229 		}
1230 
1231 		sz = sizeof (md_i_get_t);
1232 		d = kmem_alloc(sz, KM_SLEEP);
1233 
1234 		if (ddi_copyin(data, d, sz, mode)) {
1235 			err = EFAULT;
1236 			break;
1237 		}
1238 
1239 		err = get_hsp(d, mode);
1240 		break;
1241 	}
1242 
1243 	default:
1244 		err = ENOTTY;
1245 	}
1246 
1247 	/*
1248 	 * copyout and free any args
1249 	 */
1250 	if (sz != 0) {
1251 		if (err == 0) {
1252 			if (ddi_copyout(d, data, sz, mode) != 0) {
1253 				err = EFAULT;
1254 			}
1255 		}
1256 		kmem_free(d, sz);
1257 	}
1258 
1259 	/* un single thread */
1260 	mutex_enter(&md_mx);
1261 	md_status &= ~MD_GBL_HS_LOCK;
1262 	cv_broadcast(&md_cv);
1263 	mutex_exit(&md_mx);
1264 
1265 	/* handle change */
1266 	hotspares_poke_hotspares();
1267 
1268 	/* return success */
1269 	return (err);
1270 }
1271 
1272 
1273 static void
1274 load_hotspare(set_t setno, mddb_recid_t recid)
1275 {
1276 	hot_spare_t	*hs;
1277 	mddb_de_ic_t	*dep;
1278 	mddb_rb32_t	*rbp;
1279 	size_t		newreqsize;
1280 	hot_spare_t	*b_hs;
1281 	hot_spare32_od_t *s_hs;
1282 
1283 	mddb_setrecprivate(recid, MD_PRV_GOTIT);
1284 
1285 	dep = mddb_getrecdep(recid);
1286 	dep->de_flags = MDDB_F_HOTSPARE;
1287 	rbp = dep->de_rb;
1288 	switch (rbp->rb_revision) {
1289 	case MDDB_REV_RB:
1290 	case MDDB_REV_RBFN:
1291 		/*
1292 		 * Needs to convert to internal 64 bit
1293 		 */
1294 		s_hs = (hot_spare32_od_t *)mddb_getrecaddr(recid);
1295 		newreqsize = sizeof (hot_spare_t);
1296 		b_hs = (hot_spare_t *)kmem_zalloc(newreqsize, KM_SLEEP);
1297 		hs_convert((caddr_t)s_hs, (caddr_t)b_hs, SMALL_2_BIG);
1298 		kmem_free(s_hs, dep->de_reqsize);
1299 		dep->de_rb_userdata = b_hs;
1300 		dep->de_reqsize = newreqsize;
1301 		hs = b_hs;
1302 		break;
1303 	case MDDB_REV_RB64:
1304 	case MDDB_REV_RB64FN:
1305 		hs = (hot_spare_t *)mddb_getrecaddr_resize
1306 			(recid, sizeof (*hs), 0);
1307 		break;
1308 	}
1309 	MDDB_NOTE_FN(rbp->rb_revision, hs->hs_revision);
1310 
1311 #if defined(_ILP32)
1312 	if (hs->hs_revision & MD_64BIT_META_DEV) {
1313 		char	devname[MD_MAX_CTDLEN];
1314 
1315 		set_hot_spare_state(hs, HSS_BROKEN);
1316 		(void) md_devname(setno, hs->hs_devnum, devname,
1317 		    sizeof (devname));
1318 		cmn_err(CE_NOTE, "%s is unavailable because 64 bit hotspares "
1319 		    "are not accessible on a 32 bit kernel\n", devname);
1320 	}
1321 #endif
1322 
1323 	ASSERT(hs != NULL);
1324 
1325 	if (hs->hs_refcount == 0) {
1326 		mddb_setrecprivate(recid, MD_PRV_PENDDEL);
1327 		return;
1328 	}
1329 
1330 	hs->hs_next = (hot_spare_t *)md_set[setno].s_hs;
1331 	md_set[setno].s_hs = (void *)hs;
1332 
1333 	hs->hs_isopen = 0;
1334 
1335 	hs->hs_devnum = md_getdevnum(setno, mddb_getsidenum(setno),
1336 		hs->hs_key, MD_NOTRUST_DEVT);
1337 }
1338 
1339 
1340 static void
1341 load_hotsparepool(set_t setno, mddb_recid_t recid)
1342 {
1343 	hot_spare_pool_t *hsp;
1344 	hot_spare_pool_ond_t *hsp_ond;
1345 	size_t hsp_icsize;
1346 
1347 	mddb_setrecprivate(recid, MD_PRV_GOTIT);
1348 
1349 	hsp_ond = (hot_spare_pool_ond_t *)mddb_getrecaddr(recid);
1350 	ASSERT(hsp_ond != NULL);
1351 
1352 	if (hsp_ond->hsp_self_id == MD_HSP_NONE) {
1353 		mddb_setrecprivate(recid, MD_PRV_PENDDEL);
1354 		return;
1355 	}
1356 
1357 	hsp_icsize =  HSP_ONDSK_STR_OFF + mddb_getrecsize(recid);
1358 
1359 	hsp = (hot_spare_pool_t *)mddb_getrecaddr_resize(recid, hsp_icsize,
1360 		HSP_ONDSK_STR_OFF);
1361 	hsp->hsp_next = (hot_spare_pool_t *)md_set[setno].s_hsp;
1362 	md_set[setno].s_hsp = (void *) hsp;
1363 
1364 	rw_enter(&hotspares_md_ops.md_link_rw.lock, RW_WRITER);
1365 	hsp->hsp_link.ln_next = hotspares_md_ops.md_head;
1366 	hsp->hsp_link.ln_setno = setno;
1367 	hsp->hsp_link.ln_id = hsp->hsp_self_id;
1368 	hotspares_md_ops.md_head = &hsp->hsp_link;
1369 	rw_exit(&hotspares_md_ops.md_link_rw.lock);
1370 }
1371 
1372 static int
1373 hotspares_snarf(md_snarfcmd_t cmd, set_t setno)
1374 {
1375 	mddb_recid_t	recid;
1376 	int		gotsomething;
1377 	mddb_type_t	typ1;
1378 
1379 	if (cmd == MD_SNARF_CLEANUP)
1380 		return (0);
1381 
1382 	gotsomething = 0;
1383 
1384 	typ1 = (mddb_type_t)md_getshared_key(setno,
1385 	    hotspares_md_ops.md_driver.md_drivername);
1386 	recid = mddb_makerecid(setno, 0);
1387 	while ((recid = mddb_getnextrec(recid, typ1, 0)) > 0) {
1388 		if (mddb_getrecprivate(recid) & MD_PRV_GOTIT)
1389 			continue;
1390 
1391 		switch (mddb_getrectype2(recid)) {
1392 		case HSP_REC:
1393 			load_hotsparepool(setno, recid);
1394 			gotsomething = 1;
1395 			break;
1396 		case HS_REC:
1397 			load_hotspare(setno, recid);
1398 			gotsomething = 1;
1399 			break;
1400 		default:
1401 			ASSERT(0);
1402 		}
1403 	}
1404 
1405 	if (gotsomething)
1406 		return (gotsomething);
1407 
1408 	recid = mddb_makerecid(setno, 0);
1409 	while ((recid = mddb_getnextrec(recid, typ1, 0)) > 0)
1410 		if (!(mddb_getrecprivate(recid) & MD_PRV_GOTIT))
1411 			mddb_setrecprivate(recid, MD_PRV_PENDDEL);
1412 
1413 	return (0);
1414 }
1415 
1416 static int
1417 hotspares_halt(md_haltcmd_t cmd, set_t setno)
1418 {
1419 	hot_spare_t		*hs, **p_hs;
1420 	hot_spare_pool_t	*hsp, **p_hsp;
1421 
1422 	if (cmd == MD_HALT_CLOSE)
1423 		return (0);
1424 
1425 	if (cmd == MD_HALT_OPEN)
1426 		return (0);
1427 
1428 	if (cmd == MD_HALT_CHECK)
1429 		return (0);
1430 
1431 	if (cmd == MD_HALT_UNLOAD)
1432 		return (0);
1433 
1434 	if (cmd != MD_HALT_DOIT)
1435 		return (1);
1436 	/*
1437 	 * Find all the hotspares for set "setno"
1438 	 *   and remove them from the hot_spare_list.
1439 	 */
1440 	p_hs = (hot_spare_t **)&md_set[setno].s_hs;
1441 	hs = (hot_spare_t *)md_set[setno].s_hs;
1442 	for (; hs != NULL; hs = *p_hs)
1443 		*p_hs = hs->hs_next;
1444 
1445 	/*
1446 	 * Find all the hotspare pools for set "setno"
1447 	 *   and remove them from the hot_spare_pools list.
1448 	 * Also remove from the get_next list.
1449 	 */
1450 	p_hsp = (hot_spare_pool_t **)&md_set[setno].s_hsp;
1451 	hsp = (hot_spare_pool_t *)md_set[setno].s_hsp;
1452 	for (; hsp != NULL; hsp = *p_hsp) {
1453 		md_rem_link(setno, hsp->hsp_self_id,
1454 		    &hotspares_md_ops.md_link_rw.lock,
1455 		    &hotspares_md_ops.md_head);
1456 		*p_hsp = hsp->hsp_next;
1457 	}
1458 
1459 	return (0);
1460 }
1461 
1462 static hot_spare_t *
1463 usable_hs(
1464 	set_t		setno,
1465 	mddb_recid_t	hs_id,
1466 	diskaddr_t	nblks,
1467 	int		labeled,
1468 	diskaddr_t	*start)
1469 {
1470 	hot_spare_t	*hs;
1471 
1472 	hs = lookup_hot_spare(setno, hs_id, 1);
1473 
1474 	if (hs->hs_state != HSS_AVAILABLE)
1475 		return ((hot_spare_t *)0);
1476 
1477 	if (labeled && hs->hs_has_label && (hs->hs_number_blks >= nblks)) {
1478 		*start = 0;
1479 		return (hs);
1480 	} else if ((hs->hs_number_blks - hs->hs_start_blk) >= nblks) {
1481 		*start = hs->hs_start_blk;
1482 		return (hs);
1483 	}
1484 	return ((hot_spare_t *)0);
1485 }
1486 
1487 static int
1488 reserve_a_hs(
1489 	set_t		setno,
1490 	mddb_recid_t	id,
1491 	uint64_t	size,
1492 	int		labeled,
1493 	mddb_recid_t	*hs_id,
1494 	mdkey_t		*key,
1495 	md_dev64_t	*dev,
1496 	diskaddr_t	*sblock)
1497 {
1498 	hot_spare_pool_t	*hsp;
1499 	hot_spare_t		*hs;
1500 	int			i;
1501 
1502 	*hs_id = 0;
1503 
1504 	hsp = find_hot_spare_pool(setno, id);
1505 	if (hsp == NULL)
1506 		return (-1);
1507 
1508 	for (i = 0; i < hsp->hsp_nhotspares; i++) {
1509 		hs = usable_hs(setno, hsp->hsp_hotspares[i],
1510 		    size, labeled, sblock);
1511 		if (hs == NULL)
1512 			continue;
1513 
1514 		set_hot_spare_state(hs, HSS_RESERVED);
1515 		*hs_id = hs->hs_record_id;
1516 		*key = hs->hs_key;
1517 		*dev = hs->hs_devnum;
1518 		/* NOTE: Mirror code commits the hs record */
1519 		return (0);
1520 	}
1521 
1522 	return (-1);
1523 }
1524 
1525 
1526 /* ARGSUSED3 */
1527 static int
1528 return_a_hs(
1529 	set_t			setno,
1530 	mddb_recid_t		id,
1531 	mddb_recid_t		*hs_id,
1532 	mdkey_t			key,
1533 	diskaddr_t		sblock,
1534 	uint64_t		size,
1535 	hotspare_states_t	new_state)
1536 {
1537 	hot_spare_pool_t	*hsp;
1538 	hot_spare_t		*hs;
1539 	int			i;
1540 
1541 	/*
1542 	 * NOTE: sblock/size are not currently being used.
1543 	 *	 That is because we always allocate the whole hs.
1544 	 *	 Later if we choose to allocate only what is needed
1545 	 *	 then the sblock/size can be used to determine
1546 	 *	 which part is being unreseved.
1547 	 */
1548 	*hs_id = 0;
1549 
1550 	hsp = find_hot_spare_pool(setno, id);
1551 	if (hsp == NULL)
1552 		return (-1);
1553 
1554 	for (i = 0; i < hsp->hsp_nhotspares; i++) {
1555 		hs = lookup_hot_spare(setno, hsp->hsp_hotspares[i], 1);
1556 		if (hs->hs_key != key)
1557 			continue;
1558 
1559 		set_hot_spare_state(hs, new_state);
1560 		*hs_id = hs->hs_record_id;
1561 		if (new_state == HSS_BROKEN) {
1562 			SE_NOTIFY(EC_SVM_STATE, ESC_SVM_ERRED, SVM_TAG_HS,
1563 			    setno, hs->hs_devnum);
1564 		}
1565 		if (new_state == HSS_AVAILABLE) {
1566 			SE_NOTIFY(EC_SVM_STATE, ESC_SVM_HS_FREED, SVM_TAG_HS,
1567 			    setno, hs->hs_devnum);
1568 		}
1569 
1570 		/* NOTE: Mirror/Raid code commits the hs record */
1571 		return (0);
1572 	}
1573 
1574 	return (-1);
1575 }
1576 
1577 
1578 static int
1579 modify_hsp_ref(set_t setno, mddb_recid_t id, int incref,  mddb_recid_t *hsp_id)
1580 {
1581 	hot_spare_pool_t	*hsp;
1582 
1583 	*hsp_id = 0;
1584 
1585 	if (id  < 0)
1586 		return (0);
1587 
1588 	hsp = find_hot_spare_pool(setno, id);
1589 	if (hsp == NULL)
1590 		return (-1);
1591 
1592 	if (incref)
1593 		hsp->hsp_refcount++;
1594 	else
1595 		hsp->hsp_refcount--;
1596 
1597 	*hsp_id = hsp->hsp_record_id;
1598 
1599 	/* NOTE: Stripe code commits the hsp record */
1600 	return (0);
1601 }
1602 
1603 
1604 static int
1605 mkdev_for_a_hs(mddb_recid_t hs_id, md_dev64_t *dev)
1606 {
1607 	hot_spare_t	*hs;
1608 
1609 	hs = lookup_hot_spare(mddb_getsetnum(hs_id), hs_id, 0);
1610 	if (hs == NULL)
1611 		return (0);
1612 
1613 	*dev = hs->hs_devnum;
1614 	return (0);
1615 }
1616 
1617 static intptr_t
1618 hotspares_interface(
1619 	hs_cmds_t	cmd,
1620 	mddb_recid_t	id,
1621 	uint64_t	size,
1622 	int		bool,
1623 	mddb_recid_t	*hs_id,
1624 	mdkey_t		*key,
1625 	md_dev64_t	*dev,
1626 	diskaddr_t	*sblock)
1627 {
1628 	set_t	setno;
1629 	int	err = -1;
1630 
1631 	mutex_enter(&md_mx);
1632 	while (md_status & MD_GBL_HS_LOCK)
1633 		cv_wait(&md_cv, &md_mx);
1634 
1635 	/* If md_halt has been run do not continue */
1636 	if (md_status & (MD_GBL_HALTED | MD_GBL_DAEMONS_DIE)) {
1637 		mutex_exit(&md_mx);
1638 		return (ENXIO);
1639 	}
1640 
1641 	md_status |= MD_GBL_HS_LOCK;
1642 	mutex_exit(&md_mx);
1643 
1644 	setno = mddb_getsetnum(id);
1645 
1646 	switch (cmd) {
1647 	case HS_GET:
1648 		err = reserve_a_hs(setno, id, size, bool, hs_id,
1649 		    key, dev, sblock);
1650 		break;
1651 	case HS_FREE:
1652 		err = return_a_hs(setno, id, hs_id, *key, 0, 0, HSS_AVAILABLE);
1653 		hotspares_poke_hotspares();
1654 		break;
1655 	case HS_BAD:
1656 		err = return_a_hs(setno, id, hs_id, *key, 0, 0, HSS_BROKEN);
1657 		break;
1658 	case HSP_INCREF:
1659 		err = modify_hsp_ref(setno, id, 1, hs_id);
1660 		break;
1661 	case HSP_DECREF:
1662 		err = modify_hsp_ref(setno, id, 0, hs_id);
1663 		break;
1664 	case HS_MKDEV:
1665 		err = mkdev_for_a_hs(*hs_id, dev);
1666 		break;
1667 	}
1668 
1669 	mutex_enter(&md_mx);
1670 	md_status &= ~MD_GBL_HS_LOCK;
1671 	cv_broadcast(&md_cv);
1672 	mutex_exit(&md_mx);
1673 
1674 	return (err);
1675 }
1676 
1677 static void
1678 imp_hotsparepool(
1679 	set_t	setno,
1680 	mddb_recid_t	recid
1681 )
1682 {
1683 	hot_spare_pool_ond_t	*hsp_ond;
1684 	mddb_recid_t		*hsp_recid, *hs_recid;
1685 	int			i;
1686 	uint_t			*hsp_selfid;
1687 
1688 	mddb_setrecprivate(recid, MD_PRV_GOTIT);
1689 
1690 	hsp_ond = (hot_spare_pool_ond_t *)mddb_getrecaddr(recid);
1691 	hsp_recid = &(hsp_ond->hsp_record_id);
1692 	hsp_selfid = &(hsp_ond->hsp_self_id);
1693 	/*
1694 	 * Fixup the pool and hotspares
1695 	 */
1696 	*hsp_recid = MAKERECID(setno, DBID(*hsp_recid));
1697 	*hsp_selfid = MAKERECID(setno, DBID(*hsp_selfid));
1698 
1699 	for (i = 0; i < hsp_ond->hsp_nhotspares; i++) {
1700 		hs_recid = &(hsp_ond->hsp_hotspares[i]);
1701 		*hs_recid = MAKERECID(setno, DBID(*hs_recid));
1702 	}
1703 }
1704 
1705 static void
1706 imp_hotspare(
1707 	set_t	setno,
1708 	mddb_recid_t	recid
1709 )
1710 {
1711 	mddb_de_ic_t	*dep;
1712 	mddb_rb32_t	*rbp;
1713 	hot_spare_t	*hs64;
1714 	hot_spare32_od_t	*hs32;
1715 	mddb_recid_t	*hs_recid;
1716 
1717 	mddb_setrecprivate(recid, MD_PRV_GOTIT);
1718 
1719 	dep = mddb_getrecdep(recid);
1720 	rbp = dep->de_rb;
1721 	switch (rbp->rb_revision) {
1722 	case MDDB_REV_RB:
1723 	case MDDB_REV_RBFN:
1724 		/*
1725 		 * 32 bit hotspare
1726 		 */
1727 		hs32 = (hot_spare32_od_t *)mddb_getrecaddr(recid);
1728 		hs_recid = &(hs32->hs_record_id);
1729 		break;
1730 	case MDDB_REV_RB64:
1731 	case MDDB_REV_RB64FN:
1732 		hs64 = (hot_spare_t *)mddb_getrecaddr(recid);
1733 		hs_recid = &(hs64->hs_record_id);
1734 		break;
1735 	}
1736 
1737 	/*
1738 	 * Fixup the setno
1739 	 */
1740 	*hs_recid = MAKERECID(setno, DBID(*hs_recid));
1741 }
1742 
1743 static int
1744 hotspares_imp_set(
1745 	set_t	setno
1746 )
1747 {
1748 	mddb_recid_t	recid;
1749 	int		gotsomething;
1750 	mddb_type_t	typ1;
1751 
1752 
1753 	gotsomething = 0;
1754 
1755 	typ1 = (mddb_type_t)md_getshared_key(setno,
1756 	    hotspares_md_ops.md_driver.md_drivername);
1757 	recid = mddb_makerecid(setno, 0);
1758 	while ((recid = mddb_getnextrec(recid, typ1, 0)) > 0) {
1759 		if (mddb_getrecprivate(recid) & MD_PRV_GOTIT)
1760 			continue;
1761 
1762 		switch (mddb_getrectype2(recid)) {
1763 		case HSP_REC:
1764 			imp_hotsparepool(setno, recid);
1765 			gotsomething = 1;
1766 			break;
1767 		case HS_REC:
1768 			imp_hotspare(setno, recid);
1769 			gotsomething = 1;
1770 			break;
1771 		default:
1772 			ASSERT(0);
1773 		}
1774 	}
1775 
1776 	return (gotsomething);
1777 }
1778 
1779 static md_named_services_t hotspares_named_services[] = {
1780 	{hotspares_interface,	"hot spare interface"},
1781 	{NULL,			0}
1782 };
1783 
1784 md_ops_t hotspares_md_ops = {
1785 	NULL,			/* open */
1786 	NULL,			/* close */
1787 	NULL,			/* strategy */
1788 	NULL,			/* print */
1789 	NULL,			/* dump */
1790 	NULL,			/* read */
1791 	NULL,			/* write */
1792 	hotspares_ioctl,	/* hotspares_ioctl, */
1793 	hotspares_snarf,	/* hotspares_snarf */
1794 	hotspares_halt,		/* halt */
1795 	NULL,			/* aread */
1796 	NULL,			/* awrite */
1797 	hotspares_imp_set,	/* import set */
1798 	hotspares_named_services /* named_services */
1799 };
1800 
1801 static void
1802 fini_uninit()
1803 {
1804 	/* prevent access to services that may have been imported */
1805 	md_clear_hot_spare_interface();
1806 }
1807 
1808 /* define the module linkage */
1809 MD_PLUGIN_MISC_MODULE("hot spares module", md_noop, fini_uninit())
1810