xref: /titanic_41/usr/src/uts/common/io/lvm/hotspares/hotspares.c (revision 0b6016e6ff70af39f99c9cc28e0c2207c8f5413c)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 /*
22  * Copyright 2006 Sun Microsystems, Inc.  All rights reserved.
23  * Use is subject to license terms.
24  */
25 
26 #pragma ident	"%Z%%M%	%I%	%E% SMI"
27 
28 #include <sys/param.h>
29 #include <sys/systm.h>
30 #include <sys/conf.h>
31 #include <sys/file.h>
32 #include <sys/user.h>
33 #include <sys/uio.h>
34 #include <sys/t_lock.h>
35 #include <sys/kmem.h>
36 #include <vm/page.h>
37 #include <sys/sysmacros.h>
38 #include <sys/types.h>
39 #include <sys/mkdev.h>
40 #include <sys/stat.h>
41 #include <sys/open.h>
42 #include <sys/modctl.h>
43 #include <sys/ddi.h>
44 #include <sys/sunddi.h>
45 #include <sys/debug.h>
46 
47 #include <sys/lvm/md_hotspares.h>
48 #include <sys/lvm/md_convert.h>
49 
50 #include <sys/sysevent/eventdefs.h>
51 #include <sys/sysevent/svm.h>
52 
53 md_ops_t		hotspares_md_ops;
54 #ifndef	lint
55 char			_depends_on[] = "drv/md";
56 md_ops_t		*md_interface_ops = &hotspares_md_ops;
57 #endif
58 
59 extern md_ops_t		**md_ops;
60 extern md_ops_t		*md_opslist;
61 extern md_set_t		md_set[];
62 
63 extern kmutex_t		md_mx;		/* used to md global stuff */
64 extern kcondvar_t	md_cv;		/* md_status events */
65 extern int		md_status;
66 
67 extern void		md_clear_hot_spare_interface();
68 
69 static void
70 set_hot_spare_state(hot_spare_t *hs, hotspare_states_t newstate)
71 {
72 	hs->hs_state = newstate;
73 	uniqtime32(&hs->hs_timestamp);
74 }
75 
76 static hot_spare_t *
77 lookup_hot_spare(set_t setno, mddb_recid_t hs_id, int must_exist)
78 {
79 	hot_spare_t *hs;
80 
81 	for (hs = (hot_spare_t *)md_set[setno].s_hs; hs; hs = hs->hs_next) {
82 		if (hs->hs_record_id == hs_id)
83 			return (hs);
84 	}
85 	if (must_exist)
86 		ASSERT(0);
87 
88 	return ((hot_spare_t *)NULL);
89 }
90 
91 
92 static int
93 seths_create_hsp(set_hs_params_t *shs)
94 {
95 	hot_spare_pool_t	*hsp;
96 	mddb_recid_t		recid;
97 	set_t			setno;
98 	mddb_type_t		typ1;
99 
100 	setno = HSP_SET(shs->shs_hot_spare_pool);
101 
102 	/* Scan the hot spare pool list */
103 	hsp = find_hot_spare_pool(setno, shs->shs_hot_spare_pool);
104 	if (hsp != (hot_spare_pool_t *)0)
105 		return (0);
106 
107 	typ1 = (mddb_type_t)md_getshared_key(setno,
108 	    hotspares_md_ops.md_driver.md_drivername);
109 
110 	/* create a hot spare pool record */
111 	if (shs->shs_options & MD_CRO_64BIT) {
112 #if defined(_ILP32)
113 		return (mdhsperror(&shs->mde, MDE_HSP_UNIT_TOO_LARGE,
114 		    shs->shs_hot_spare_pool));
115 #else
116 		recid = mddb_createrec(sizeof (hot_spare_pool_ond_t), typ1,
117 		    HSP_REC, MD_CRO_64BIT | MD_CRO_HOTSPARE_POOL | MD_CRO_FN,
118 		    setno);
119 #endif
120 	} else {
121 		recid = mddb_createrec(sizeof (hot_spare_pool_ond_t), typ1,
122 		    HSP_REC, MD_CRO_32BIT | MD_CRO_HOTSPARE_POOL | MD_CRO_FN,
123 		    setno);
124 	}
125 
126 	if (recid < 0) {
127 		return (mdhsperror(&shs->mde, MDE_HSP_CREATE_FAILURE,
128 		    shs->shs_hot_spare_pool));
129 	}
130 
131 	/* get the record addr */
132 	hsp = (hot_spare_pool_t *)mddb_getrecaddr_resize(recid, sizeof (*hsp),
133 		HSP_ONDSK_STR_OFF);
134 
135 	hsp->hsp_self_id = shs->shs_hot_spare_pool;
136 	hsp->hsp_record_id = recid;
137 	hsp->hsp_next = (hot_spare_pool_t *)md_set[setno].s_hsp;
138 	hsp->hsp_refcount = 0;
139 	hsp->hsp_nhotspares = 0;
140 	hsp->hsp_revision |= MD_FN_META_DEV;
141 
142 	md_set[setno].s_hsp = (void *) hsp;
143 
144 	mddb_commitrec_wrapper(recid);
145 	SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_CREATE, SVM_TAG_HSP, setno,
146 	    md_expldev(hsp->hsp_self_id));
147 
148 	rw_enter(&hotspares_md_ops.md_link_rw.lock, RW_WRITER);
149 	hsp->hsp_link.ln_next = hotspares_md_ops.md_head;
150 	hsp->hsp_link.ln_setno = setno;
151 	hsp->hsp_link.ln_id = hsp->hsp_self_id;
152 	hotspares_md_ops.md_head = &hsp->hsp_link;
153 	rw_exit(&hotspares_md_ops.md_link_rw.lock);
154 
155 	return (0);
156 }
157 
158 
159 static int
160 seths_add(set_hs_params_t *shs)
161 {
162 	hot_spare_t		*hs;
163 	hot_spare_pool_t	*hsp;
164 	hot_spare_pool_t	*prev_hsp;
165 	hot_spare_pool_t	*new_hsp;
166 	hot_spare_pool_t	*old_hsp;
167 	md_create_rec_option_t	options;
168 	mddb_recid_t		recid;
169 	mddb_recid_t		recids[5];
170 	size_t			new_size;
171 	int			i;
172 	int			delete_hsp = 0;
173 	int			irecid;
174 	set_t			setno;
175 	mddb_type_t		typ1;
176 	int			hsp_created = 0;
177 	mdkey_t			key_old;
178 	int			num_keys_old = 0;
179 
180 	/* Not much to do here in case of a dryrun */
181 	if (shs->shs_options & HS_OPT_DRYRUN) {
182 		return (0);
183 	}
184 
185 	/* create an empty hot spare pool */
186 	if (shs->shs_options & HS_OPT_POOL) {
187 		return (seths_create_hsp(shs));
188 	}
189 
190 	setno = HSP_SET(shs->shs_hot_spare_pool);
191 	typ1 = (mddb_type_t)md_getshared_key(setno,
192 	    hotspares_md_ops.md_driver.md_drivername);
193 
194 	/* Scan the hot spare list */
195 	hs = (hot_spare_t *)md_set[setno].s_hs;
196 	while (hs) {
197 		if (hs->hs_devnum == shs->shs_component_old) {
198 			break;
199 		}
200 		hs = hs->hs_next;
201 	}
202 
203 	if (hs == NULL) {
204 		/*
205 		 * Did not find match for device using devnum so use
206 		 * key associated with shs_component_old just
207 		 * in case there is a match but the match's dev is NODEV.
208 		 * If unable to find a unique key for shs_component_old
209 		 * then fail since namespace has multiple entries
210 		 * for this old component and we shouldn't allow
211 		 * an addition of a hotspare in this case.
212 		 */
213 		if (md_getkeyfromdev(setno, mddb_getsidenum(setno),
214 		    shs->shs_component_old, &key_old, &num_keys_old) != 0) {
215 			return (mddeverror(&shs->mde, MDE_NAME_SPACE,
216 			    shs->shs_component_old));
217 		}
218 
219 		/*
220 		 * If more than one key matches given old_dev - fail command
221 		 * since shouldn't add new hotspare if namespace has
222 		 * multiple entries.
223 		 */
224 		if (num_keys_old > 1) {
225 			return (mddeverror(&shs->mde, MDE_MULTNM,
226 			    shs->shs_component_old));
227 		}
228 		/*
229 		 * If there is no key for this entry then fail since
230 		 * a key for this entry should exist.
231 		 */
232 		if (num_keys_old == 0) {
233 			return (mddeverror(&shs->mde, MDE_INVAL_HS,
234 			    shs->shs_component_old));
235 		}
236 		/* Scan the hot spare list again */
237 		hs = (hot_spare_t *)md_set[setno].s_hs;
238 		while (hs) {
239 			/*
240 			 * Only need to compare keys when hs_devnum is NODEV.
241 			 */
242 			if ((hs->hs_devnum == NODEV64) &&
243 			    (hs->hs_key == key_old)) {
244 				break;
245 			}
246 			hs = hs->hs_next;
247 		}
248 	}
249 
250 	if (hs == NULL) {
251 		/* create a hot spare record */
252 		if (shs->shs_size_option & MD_CRO_64BIT) {
253 #if defined(_ILP32)
254 			return (mdhserror(&shs->mde, MDE_HS_UNIT_TOO_LARGE,
255 			    shs->shs_hot_spare_pool, shs->shs_component_old));
256 #else
257 			recid = mddb_createrec(HS_ONDSK_STR_SIZE, typ1, HS_REC,
258 				MD_CRO_64BIT | MD_CRO_HOTSPARE, setno);
259 #endif
260 		} else {
261 			recid = mddb_createrec(HS_ONDSK_STR_SIZE, typ1, HS_REC,
262 				MD_CRO_32BIT | MD_CRO_HOTSPARE, setno);
263 		}
264 
265 		if (recid < 0) {
266 			return (mdhserror(&shs->mde, MDE_HS_CREATE_FAILURE,
267 			    shs->shs_hot_spare_pool,
268 			    shs->shs_component_old));
269 		}
270 
271 		/* get the addr */
272 		hs = (hot_spare_t *)mddb_getrecaddr_resize(recid, sizeof (*hs),
273 			0);
274 
275 		hs->hs_record_id = recid;
276 
277 		hs->hs_devnum = shs->shs_component_old;
278 		hs->hs_key = shs->shs_key_old;
279 		hs->hs_start_blk = shs->shs_start_blk;
280 		hs->hs_has_label = shs->shs_has_label;
281 		hs->hs_number_blks = shs->shs_number_blks;
282 		set_hot_spare_state(hs, HSS_AVAILABLE);
283 		hs->hs_refcount = 0;
284 		hs->hs_next = (hot_spare_t *)md_set[setno].s_hs;
285 		md_set[setno].s_hs = (void *) hs;
286 	}
287 
288 	/* Scan the hot spare pool list */
289 	hsp = (hot_spare_pool_t *)md_set[setno].s_hsp;
290 	prev_hsp = (hot_spare_pool_t *)0;
291 	while (hsp) {
292 		if (hsp->hsp_self_id == shs->shs_hot_spare_pool) {
293 			break;
294 		}
295 		prev_hsp = hsp;
296 		hsp = hsp->hsp_next;
297 	}
298 
299 	if (hsp == NULL) {
300 		/* create a hot spare pool record */
301 		recid = mddb_createrec(sizeof (hot_spare_pool_ond_t),
302 		    typ1, HSP_REC,
303 		    MD_CRO_32BIT | MD_CRO_HOTSPARE_POOL | MD_CRO_FN, setno);
304 
305 		if (recid < 0) {
306 			return (mdhsperror(&shs->mde, MDE_HSP_CREATE_FAILURE,
307 			    shs->shs_hot_spare_pool));
308 		}
309 
310 		/* get the record addr */
311 		hsp = (hot_spare_pool_t *)mddb_getrecaddr_resize(recid,
312 			sizeof (*hsp), HSP_ONDSK_STR_OFF);
313 
314 		hsp->hsp_self_id = shs->shs_hot_spare_pool;
315 		hsp->hsp_record_id = recid;
316 		hsp->hsp_next = (hot_spare_pool_t *)md_set[setno].s_hsp;
317 		hsp->hsp_refcount = 0;
318 		hsp->hsp_nhotspares = 0;
319 		hsp->hsp_revision |= MD_FN_META_DEV;
320 
321 		/* force prev_hsp to NULL, this will cause hsp to be linked */
322 		prev_hsp = (hot_spare_pool_t *)0;
323 
324 		rw_enter(&hotspares_md_ops.md_link_rw.lock, RW_WRITER);
325 		hsp->hsp_link.ln_next = hotspares_md_ops.md_head;
326 		hsp->hsp_link.ln_setno = setno;
327 		hsp->hsp_link.ln_id = hsp->hsp_self_id;
328 		hotspares_md_ops.md_head = &hsp->hsp_link;
329 		rw_exit(&hotspares_md_ops.md_link_rw.lock);
330 		hsp_created = 1;
331 	} else {
332 
333 		/*
334 		 * Make sure the hot spare is not already in the pool.
335 		 */
336 		for (i = 0; i < hsp->hsp_nhotspares; i++)
337 			if (hsp->hsp_hotspares[i] == hs->hs_record_id) {
338 				return (mdhserror(&shs->mde, MDE_HS_INUSE,
339 					shs->shs_hot_spare_pool,
340 					hs->hs_devnum));
341 			}
342 		/*
343 		 * Create a new hot spare pool record
344 		 * This gives us the one extra hs slot,
345 		 * because there is one slot in the
346 		 * hot_spare_pool struct
347 		 */
348 		new_size = sizeof (hot_spare_pool_ond_t) +
349 			(sizeof (mddb_recid_t) * hsp->hsp_nhotspares);
350 
351 		/*
352 		 * The Friendly Name status of the new HSP should duplicate
353 		 * the status of the existing one.
354 		 */
355 		if (hsp->hsp_revision & MD_FN_META_DEV) {
356 			options =
357 				MD_CRO_32BIT | MD_CRO_HOTSPARE_POOL | MD_CRO_FN;
358 		} else {
359 			options = MD_CRO_32BIT | MD_CRO_HOTSPARE_POOL;
360 		}
361 		recid = mddb_createrec(new_size, typ1, HSP_REC, options, setno);
362 
363 		if (recid < 0) {
364 			return (mdhsperror(&shs->mde, MDE_HSP_CREATE_FAILURE,
365 			    hsp->hsp_self_id));
366 		}
367 		new_size = sizeof (hot_spare_pool_t) +
368 			(sizeof (mddb_recid_t) * hsp->hsp_nhotspares);
369 
370 		/* get the record addr */
371 		new_hsp = (hot_spare_pool_t *)mddb_getrecaddr_resize(recid,
372 			new_size, HSP_ONDSK_STR_OFF);
373 
374 		/* copy the old record into the new one */
375 		bcopy((caddr_t)hsp, (caddr_t)new_hsp,
376 		    (size_t)((sizeof (hot_spare_pool_t) +
377 		    (sizeof (mddb_recid_t) * hsp->hsp_nhotspares)
378 		    - sizeof (mddb_recid_t))));
379 		new_hsp->hsp_record_id = recid;
380 
381 		md_rem_link(setno, hsp->hsp_self_id,
382 		    &hotspares_md_ops.md_link_rw.lock,
383 		    &hotspares_md_ops.md_head);
384 
385 		rw_enter(&hotspares_md_ops.md_link_rw.lock, RW_WRITER);
386 		new_hsp->hsp_link.ln_next = hotspares_md_ops.md_head;
387 		new_hsp->hsp_link.ln_setno = setno;
388 		new_hsp->hsp_link.ln_id = new_hsp->hsp_self_id;
389 		hotspares_md_ops.md_head = &new_hsp->hsp_link;
390 		rw_exit(&hotspares_md_ops.md_link_rw.lock);
391 
392 		/* mark the old hsp to be deleted */
393 		delete_hsp = 1;
394 		old_hsp = hsp;
395 		hsp = new_hsp;
396 	}
397 
398 	if (shs->shs_size_option & MD_CRO_64BIT) {
399 		hs->hs_revision |= MD_64BIT_META_DEV;
400 	} else {
401 		hs->hs_revision &= ~MD_64BIT_META_DEV;
402 	}
403 
404 	/* lock the db records */
405 	recids[0] = hs->hs_record_id;
406 	recids[1] = hsp->hsp_record_id;
407 	irecid = 2;
408 	if (delete_hsp)
409 		recids[irecid++] = old_hsp->hsp_record_id;
410 	recids[irecid] = 0;
411 
412 	/* increment the reference count */
413 	hs->hs_refcount++;
414 
415 	/* add the hs at the end of the hot spare pool */
416 	hsp->hsp_hotspares[hsp->hsp_nhotspares] = hs->hs_record_id;
417 	hsp->hsp_nhotspares++;
418 
419 	/*
420 	 * NOTE: We do not commit the previous hot spare pool record.
421 	 *	 There is no need, the link gets rebuilt at boot time.
422 	 */
423 	if (prev_hsp)
424 		prev_hsp->hsp_next = hsp;
425 	else
426 		md_set[setno].s_hsp = (void *) hsp;
427 
428 	if (delete_hsp)
429 		old_hsp->hsp_self_id = MD_HSP_NONE;
430 
431 	/* commit the db records */
432 	mddb_commitrecs_wrapper(recids);
433 
434 	if (delete_hsp) {
435 		/* delete the old hot spare pool record */
436 		mddb_deleterec_wrapper(old_hsp->hsp_record_id);
437 	}
438 
439 	if (hsp_created) {
440 		SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_CREATE, SVM_TAG_HSP, setno,
441 		    md_expldev(hsp->hsp_self_id));
442 	}
443 	SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_ADD, SVM_TAG_HSP, setno,
444 	    md_expldev(hsp->hsp_self_id));
445 
446 	return (0);
447 }
448 
449 
450 static int
451 seths_delete_hsp(set_hs_params_t *shs)
452 {
453 
454 	hot_spare_pool_t	*prev_hsp;
455 	hot_spare_pool_t	*hsp;
456 	set_t			setno;
457 	hsp_t			hspid;
458 
459 	setno = HSP_SET(shs->shs_hot_spare_pool);
460 
461 	/* Scan the hot spare pool list */
462 	prev_hsp = (hot_spare_pool_t *)0;
463 	hsp = (hot_spare_pool_t *)md_set[setno].s_hsp;
464 	while (hsp) {
465 		if (hsp->hsp_self_id == shs->shs_hot_spare_pool) {
466 			break;
467 		}
468 		prev_hsp = hsp;
469 		hsp = hsp->hsp_next;
470 	}
471 
472 	if (hsp == NULL) {
473 		return (mdhsperror(&shs->mde, MDE_INVAL_HSP,
474 		    shs->shs_hot_spare_pool));
475 	}
476 
477 	if (hsp->hsp_nhotspares != 0) {
478 		return (mdhsperror(&shs->mde, MDE_HSP_BUSY,
479 		    shs->shs_hot_spare_pool));
480 	}
481 
482 	if (hsp->hsp_refcount != 0) {
483 		return (mdhsperror(&shs->mde, MDE_HSP_REF,
484 		    shs->shs_hot_spare_pool));
485 	}
486 
487 	/* In case of a dryrun, we're done here */
488 	if (shs->shs_options & HS_OPT_DRYRUN) {
489 		return (0);
490 	}
491 	/*
492 	 * NOTE: We do not commit the previous hot spare pool record.
493 	 *	 There is no need, the link gets rebuilt at boot time.
494 	 */
495 	if (prev_hsp)
496 		prev_hsp->hsp_next = hsp->hsp_next;
497 	else
498 		md_set[setno].s_hsp = (void *) hsp->hsp_next;
499 
500 	hspid = hsp->hsp_self_id;
501 
502 	md_rem_link(setno, hsp->hsp_self_id,
503 	    &hotspares_md_ops.md_link_rw.lock,
504 	    &hotspares_md_ops.md_head);
505 
506 	mddb_deleterec_wrapper(hsp->hsp_record_id);
507 
508 	SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_DELETE, SVM_TAG_HSP, setno,
509 	    md_expldev(hspid));
510 	return (0);
511 }
512 
513 
514 static int
515 seths_delete(set_hs_params_t *shs)
516 {
517 	hot_spare_t		*hs;
518 	hot_spare_t		*prev_hs;
519 	hot_spare_pool_t	*hsp;
520 	mddb_recid_t		recids[4];
521 	int			i;
522 	set_t			setno;
523 	sv_dev_t		sv;
524 	int			delete_hs = 0;
525 	mdkey_t			key_old;
526 	int			num_keys_old = 0;
527 
528 	/* delete the hot spare pool */
529 	if (shs->shs_options & HS_OPT_POOL) {
530 		return (seths_delete_hsp(shs));
531 	}
532 
533 	setno = HSP_SET(shs->shs_hot_spare_pool);
534 
535 	/* Scan the hot spare list */
536 	hs = (hot_spare_t *)md_set[setno].s_hs;
537 	prev_hs = (hot_spare_t *)0;
538 	while (hs) {
539 		if (hs->hs_devnum == shs->shs_component_old) {
540 			break;
541 		}
542 		prev_hs = hs;
543 		hs = hs->hs_next;
544 	}
545 
546 	if (hs == NULL) {
547 		/*
548 		 * Unable to find device using devnum so use
549 		 * key associated with shs_component_old instead.
550 		 * If unable to find a unique key for shs_component_old
551 		 * then fail since namespace has multiple entries
552 		 * for this old component and we're unable to determine
553 		 * which key is the valid match for shs_component_old.
554 		 *
555 		 * Only need to compare keys when hs_devnum is NODEV.
556 		 */
557 		if (md_getkeyfromdev(setno, mddb_getsidenum(setno),
558 		    shs->shs_component_old, &key_old, &num_keys_old) != 0) {
559 			return (mddeverror(&shs->mde, MDE_NAME_SPACE,
560 			    shs->shs_component_old));
561 		}
562 
563 		/*
564 		 * If more than one key matches given old_dev - fail command
565 		 * since shouldn't add new hotspare if namespace has
566 		 * multiple entries.
567 		 */
568 		if (num_keys_old > 1) {
569 			return (mddeverror(&shs->mde, MDE_MULTNM,
570 			    shs->shs_component_old));
571 		}
572 		/*
573 		 * If there is no key for this entry then fail since
574 		 * a key for this entry should exist.
575 		 */
576 		if (num_keys_old == 0) {
577 			return (mddeverror(&shs->mde, MDE_INVAL_HS,
578 			    shs->shs_component_old));
579 		}
580 		/* Scan the hot spare list again */
581 		hs = (hot_spare_t *)md_set[setno].s_hs;
582 		prev_hs = (hot_spare_t *)0;
583 		while (hs) {
584 			/*
585 			 * Only need to compare keys when hs_devnum is NODEV.
586 			 */
587 			if ((hs->hs_devnum == NODEV64) &&
588 			    (hs->hs_key == key_old)) {
589 				break;
590 			}
591 			prev_hs = hs;
592 			hs = hs->hs_next;
593 		}
594 	}
595 
596 	if (hs == NULL) {
597 		return (mddeverror(&shs->mde, MDE_INVAL_HS,
598 		    shs->shs_component_old));
599 	}
600 
601 	/* Scan the hot spare pool list */
602 	hsp = find_hot_spare_pool(setno, shs->shs_hot_spare_pool);
603 	if (hsp == (hot_spare_pool_t *)0) {
604 		return (mdhsperror(&shs->mde, MDE_INVAL_HSP,
605 		    shs->shs_hot_spare_pool));
606 	}
607 
608 	/* check for force flag and state of hot spare */
609 	if (((shs->shs_options & HS_OPT_FORCE) == 0) &&
610 	    (hs->hs_state == HSS_RESERVED)) {
611 		return (mdhserror(&shs->mde, MDE_HS_RESVD,
612 		    shs->shs_hot_spare_pool, shs->shs_component_old));
613 	}
614 
615 	if (hsp->hsp_refcount && (hs->hs_state == HSS_RESERVED)) {
616 		return (mdhserror(&shs->mde, MDE_HS_RESVD,
617 		    shs->shs_hot_spare_pool, shs->shs_component_old));
618 	}
619 
620 	/*
621 	 * Make sure the device is in the pool.
622 	 */
623 	for (i = 0; i < hsp->hsp_nhotspares; i++) {
624 		if (hsp->hsp_hotspares[i] == hs->hs_record_id) {
625 			break;
626 		}
627 	}
628 
629 	if (i >= hsp->hsp_nhotspares) {
630 		return (mddeverror(&shs->mde, MDE_INVAL_HS,
631 		    hs->hs_devnum));
632 	}
633 
634 	/* In case of a dryrun, we're done here */
635 	if (shs->shs_options & HS_OPT_DRYRUN) {
636 		return (0);
637 	}
638 
639 	/* lock the db records */
640 	recids[0] = hs->hs_record_id;
641 	recids[1] = hsp->hsp_record_id;
642 	recids[2] = 0;
643 
644 	sv.setno = setno;
645 	sv.key = hs->hs_key;
646 
647 	hs->hs_refcount--;
648 	if (hs->hs_refcount == 0) {
649 		/*
650 		 * NOTE: We do not commit the previous hot spare record.
651 		 *	 There is no need, the link we get rebuilt at boot time.
652 		 */
653 		if (prev_hs) {
654 			prev_hs->hs_next = hs->hs_next;
655 		} else
656 			md_set[setno].s_hs = (void *) hs->hs_next;
657 
658 		/* mark the hot spare to be deleted */
659 		delete_hs = 1;
660 		recids[0] = hsp->hsp_record_id;
661 		recids[1] = 0;
662 	}
663 
664 	/* find the location of the hs in the hsp */
665 	for (i = 0; i < hsp->hsp_nhotspares; i++) {
666 		if (hsp->hsp_hotspares[i] == hs->hs_record_id)
667 			break;
668 	}
669 
670 	/* remove the hs from the hsp */
671 	for (i++; i < hsp->hsp_nhotspares; i++)
672 		hsp->hsp_hotspares[i - 1] = hsp->hsp_hotspares[i];
673 
674 	hsp->hsp_nhotspares--;
675 
676 	/* commit the db records */
677 	mddb_commitrecs_wrapper(recids);
678 
679 	if (delete_hs)
680 		mddb_deleterec_wrapper(hs->hs_record_id);
681 
682 	md_rem_names(&sv, 1);
683 
684 	SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_REMOVE, SVM_TAG_HSP, setno,
685 	    md_expldev(hsp->hsp_self_id));
686 
687 	return (0);
688 }
689 
690 static int
691 seths_replace(set_hs_params_t *shs)
692 {
693 	hot_spare_t		*hs;
694 	hot_spare_t		*prev_hs;
695 	hot_spare_t		*new_hs;
696 	hot_spare_pool_t	*hsp;
697 	int			new_found = 0;
698 	mddb_recid_t		recid;
699 	mddb_recid_t		recids[5];
700 	int			i;
701 	sv_dev_t		sv;
702 	int			delete_hs = 0;
703 	set_t			setno;
704 	mddb_type_t		typ1;
705 	mdkey_t			key_old;
706 	int			num_keys_old = 0;
707 
708 	setno = HSP_SET(shs->shs_hot_spare_pool);
709 	typ1 = (mddb_type_t)md_getshared_key(setno,
710 	    hotspares_md_ops.md_driver.md_drivername);
711 
712 	/* Scan the hot spare list */
713 	hs = (hot_spare_t *)md_set[setno].s_hs;
714 	prev_hs = (hot_spare_t *)0;
715 	while (hs) {
716 		if (hs->hs_devnum == shs->shs_component_old) {
717 			break;
718 		}
719 		prev_hs = hs;
720 		hs = hs->hs_next;
721 	}
722 
723 	if (hs == NULL) {
724 		/*
725 		 * Unable to find device using devnum so use
726 		 * key associated with shs_component_old instead.
727 		 * If unable to find a unique key for shs_component_old
728 		 * then fail since namespace has multiple entries
729 		 * for this old component and we're unable to determine
730 		 * which key is the valid match for shs_component_old.
731 		 *
732 		 * Only need to compare keys when hs_devnum is NODEV.
733 		 */
734 		if (md_getkeyfromdev(setno, mddb_getsidenum(setno),
735 		    shs->shs_component_old, &key_old, &num_keys_old) != 0) {
736 			return (mddeverror(&shs->mde, MDE_NAME_SPACE,
737 			    shs->shs_component_old));
738 		}
739 
740 		/*
741 		 * If more than one key matches given old_dev - fail command
742 		 * since unable to determine which key is correct.
743 		 */
744 		if (num_keys_old > 1) {
745 			return (mddeverror(&shs->mde, MDE_MULTNM,
746 			    shs->shs_component_old));
747 		}
748 		/*
749 		 * If there is no key for this entry then fail since
750 		 * a key for this entry should exist.
751 		 */
752 		if (num_keys_old == 0) {
753 			return (mddeverror(&shs->mde, MDE_INVAL_HS,
754 			    shs->shs_component_old));
755 		}
756 		/* Scan the hot spare list again */
757 		hs = (hot_spare_t *)md_set[setno].s_hs;
758 		prev_hs = (hot_spare_t *)0;
759 		while (hs) {
760 			/*
761 			 * Only need to compare keys when hs_devnum is NODEV.
762 			 */
763 			if ((hs->hs_devnum == NODEV64) &&
764 			    (hs->hs_key == key_old)) {
765 				break;
766 			}
767 			prev_hs = hs;
768 			hs = hs->hs_next;
769 		}
770 	}
771 
772 	if (hs == NULL) {
773 		return (mddeverror(&shs->mde, MDE_INVAL_HS,
774 		    shs->shs_component_old));
775 	}
776 
777 	/* check the force flag and the state of the hot spare */
778 	if (((shs->shs_options & HS_OPT_FORCE) == 0) &&
779 	    (hs->hs_state == HSS_RESERVED)) {
780 		return (mdhserror(&shs->mde, MDE_HS_RESVD,
781 		    shs->shs_hot_spare_pool,
782 		    hs->hs_devnum));
783 	}
784 
785 	/* Scan the hot spare pool list */
786 	hsp = find_hot_spare_pool(setno, shs->shs_hot_spare_pool);
787 	if (hsp == (hot_spare_pool_t *)0) {
788 		return (mdhsperror(&shs->mde, MDE_INVAL_HSP,
789 		    shs->shs_hot_spare_pool));
790 	}
791 
792 	/*
793 	 * Make sure the old device is in the pool.
794 	 */
795 	for (i = 0; i < hsp->hsp_nhotspares; i++) {
796 		if (hsp->hsp_hotspares[i] == hs->hs_record_id) {
797 			break;
798 		}
799 	}
800 	if (i >= hsp->hsp_nhotspares) {
801 		return (mddeverror(&shs->mde, MDE_INVAL_HS,
802 		    hs->hs_devnum));
803 	}
804 
805 	/* Scan the hot spare list for the new hs */
806 	new_hs = (hot_spare_t *)md_set[setno].s_hs;
807 	new_found = 0;
808 	while (new_hs) {
809 		if (new_hs->hs_devnum == shs->shs_component_new) {
810 			new_found = 1;
811 			break;
812 		}
813 		new_hs = new_hs->hs_next;
814 	}
815 
816 	/*
817 	 * Make sure the new device is not already in the pool.
818 	 * We don't have to search the hs in this hsp, if the
819 	 * new hs was just created. Only if the hot spare was found.
820 	 */
821 	if (new_found) {
822 		for (i = 0; i < hsp->hsp_nhotspares; i++)
823 			if (hsp->hsp_hotspares[i] == new_hs->hs_record_id) {
824 				return (mdhserror(&shs->mde, MDE_HS_INUSE,
825 				    shs->shs_hot_spare_pool,
826 				    new_hs->hs_devnum));
827 			}
828 	}
829 
830 	/* In case of a dryrun, we're done here */
831 	if (shs->shs_options & HS_OPT_DRYRUN) {
832 		return (0);
833 	}
834 
835 	/*
836 	 * Create the new hotspare
837 	 */
838 	if (!new_found) {
839 		/* create a hot spare record */
840 		if (shs->shs_size_option & MD_CRO_64BIT) {
841 #if defined(_ILP32)
842 			return (mdhserror(&shs->mde, MDE_HS_UNIT_TOO_LARGE,
843 			    shs->shs_hot_spare_pool, shs->shs_component_new));
844 #else
845 			recid = mddb_createrec(HS_ONDSK_STR_SIZE, typ1, HS_REC,
846 				MD_CRO_64BIT | MD_CRO_HOTSPARE, setno);
847 #endif
848 		} else {
849 			recid = mddb_createrec(HS_ONDSK_STR_SIZE, typ1, HS_REC,
850 				MD_CRO_32BIT | MD_CRO_HOTSPARE, setno);
851 		}
852 
853 		if (recid < 0) {
854 			return (mdhserror(&shs->mde, MDE_HS_CREATE_FAILURE,
855 			    shs->shs_hot_spare_pool,
856 			    shs->shs_component_new));
857 		}
858 
859 		/* get the addr */
860 		new_hs = (hot_spare_t *)mddb_getrecaddr_resize(recid,
861 			sizeof (*new_hs), 0);
862 
863 		new_hs->hs_record_id = recid;
864 		new_hs->hs_devnum = shs->shs_component_new;
865 		new_hs->hs_key = shs->shs_key_new;
866 		new_hs->hs_start_blk = shs->shs_start_blk;
867 		new_hs->hs_has_label = shs->shs_has_label;
868 		new_hs->hs_number_blks = shs->shs_number_blks;
869 		set_hot_spare_state(new_hs, HSS_AVAILABLE);
870 		new_hs->hs_refcount = 0;
871 		new_hs->hs_isopen = 1;
872 	}
873 
874 	/* lock the db records */
875 	recids[0] = hs->hs_record_id;
876 	recids[1] = new_hs->hs_record_id;
877 	recids[2] = hsp->hsp_record_id;
878 	recids[3] = 0;
879 
880 	sv.setno = setno;
881 	sv.key = hs->hs_key;
882 
883 	hs->hs_refcount--;
884 	if (hs->hs_refcount == 0) {
885 		/*
886 		 * NOTE: We do not commit the previous hot spare record.
887 		 *	 There is no need, the link we get rebuilt at boot time.
888 		 */
889 		if (prev_hs) {
890 			prev_hs->hs_next = hs->hs_next;
891 		} else
892 			md_set[setno].s_hs = (void *) hs->hs_next;
893 
894 		/* mark hs to be deleted in the correct order */
895 		delete_hs = 1;
896 
897 		recids[0] = new_hs->hs_record_id;
898 		recids[1] = hsp->hsp_record_id;
899 		recids[2] = 0;
900 	}
901 
902 	/* link into the hs list */
903 	new_hs->hs_refcount++;
904 	if (!new_found) {
905 		/* do this AFTER the old dev is possibly removed */
906 		new_hs->hs_next = (hot_spare_t *)md_set[setno].s_hs;
907 		md_set[setno].s_hs = (void *) new_hs;
908 	}
909 
910 	/* find the location of the old hs in the hsp */
911 	for (i = 0; i < hsp->hsp_nhotspares; i++) {
912 		if (hsp->hsp_hotspares[i] == hs->hs_record_id) {
913 			hsp->hsp_hotspares[i] = new_hs->hs_record_id;
914 			break;
915 		}
916 	}
917 
918 	if (shs->shs_size_option & MD_CRO_64BIT) {
919 		new_hs->hs_revision |= MD_64BIT_META_DEV;
920 	} else {
921 		new_hs->hs_revision &= ~MD_64BIT_META_DEV;
922 	}
923 
924 	/* commit the db records */
925 	mddb_commitrecs_wrapper(recids);
926 
927 	if (delete_hs)
928 		mddb_deleterec_wrapper(hs->hs_record_id);
929 
930 	md_rem_names(&sv, 1);
931 
932 	SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_REPLACE, SVM_TAG_HSP, setno,
933 	    md_expldev(hsp->hsp_self_id));
934 	return (0);
935 }
936 
937 static int
938 seths_enable(set_hs_params_t *shs)
939 {
940 	hot_spare_t	*hs;
941 	mddb_recid_t	recids[2];
942 	set_t		setno = shs->md_driver.md_setno;
943 	mdkey_t		key_old;
944 	int		num_keys_old = 0;
945 
946 
947 	/*
948 	 * Find device by using key associated with shs_component_old.
949 	 * If unable to find a unique key for shs_component_old
950 	 * then fail since namespace has multiple entries
951 	 * for this old component and we're unable to determine
952 	 * which key is the valid match for shs_component_old.
953 	 * This failure keeps a hotspare from being enabled on a slice
954 	 * that may already be in use by another metadevice.
955 	 */
956 	if (md_getkeyfromdev(setno, mddb_getsidenum(setno),
957 	    shs->shs_component_old, &key_old, &num_keys_old) != 0) {
958 		return (mddeverror(&shs->mde, MDE_NAME_SPACE,
959 		    shs->shs_component_old));
960 	}
961 
962 	/*
963 	 * If more than one key matches given old_dev - fail command
964 	 * since unable to determine which key is correct.
965 	 */
966 	if (num_keys_old > 1) {
967 		return (mddeverror(&shs->mde, MDE_MULTNM,
968 		    shs->shs_component_old));
969 	}
970 	/*
971 	 * If there is no key for this entry then fail since
972 	 * a key for this entry should exist.
973 	 */
974 	if (num_keys_old == 0) {
975 		return (mddeverror(&shs->mde, MDE_INVAL_HS,
976 		    shs->shs_component_old));
977 	}
978 
979 	/* Scan the hot spare list for the hs */
980 	hs = (hot_spare_t *)md_set[setno].s_hs;
981 	while (hs) {
982 		/*
983 		 * Since component may or may not be currently in the system,
984 		 * use the keys to find a match (not the devt).
985 		 */
986 		if (hs->hs_key == key_old) {
987 			break;
988 		}
989 		hs = hs->hs_next;
990 	}
991 
992 	if (hs == NULL) {
993 		return (mddeverror(&shs->mde, MDE_INVAL_HS,
994 			shs->shs_component_old));
995 	}
996 
997 	/* make sure it's broken */
998 	if (hs->hs_state != HSS_BROKEN) {
999 		return (mddeverror(&shs->mde, MDE_FIX_INVAL_HS_STATE,
1000 		    hs->hs_devnum));
1001 	}
1002 
1003 	/* In case of a dryrun, we're done here */
1004 	if (shs->shs_options & HS_OPT_DRYRUN) {
1005 		return (0);
1006 	}
1007 
1008 	/* fix it */
1009 	set_hot_spare_state(hs, HSS_AVAILABLE);
1010 	hs->hs_start_blk = shs->shs_start_blk;
1011 	hs->hs_has_label = shs->shs_has_label;
1012 	hs->hs_number_blks = shs->shs_number_blks;
1013 
1014 	/* commit the db records */
1015 	recids[0] = hs->hs_record_id;
1016 	recids[1] = 0;
1017 	mddb_commitrecs_wrapper(recids);
1018 	SE_NOTIFY(EC_SVM_STATE, ESC_SVM_ENABLE, SVM_TAG_HS, setno,
1019 	    shs->shs_component_old);
1020 
1021 	return (0);
1022 }
1023 
1024 static int
1025 get_hs(
1026 	get_hs_params_t	*ghs
1027 )
1028 {
1029 	hot_spare_t	*hs;
1030 	set_t		setno = ghs->md_driver.md_setno;
1031 
1032 	mdclrerror(&ghs->mde);
1033 
1034 	/* Scan the hot spare list for the hs */
1035 	hs = (hot_spare_t *)md_set[setno].s_hs;
1036 	while (hs) {
1037 		if (hs->hs_key == ghs->ghs_key) {
1038 			break;
1039 		}
1040 		hs = hs->hs_next;
1041 	}
1042 
1043 	if (hs == NULL) {
1044 		return (mddeverror(&ghs->mde, MDE_INVAL_HS,
1045 		    ghs->ghs_devnum));
1046 	}
1047 
1048 	ghs->ghs_start_blk = hs->hs_start_blk;
1049 	ghs->ghs_number_blks = hs->hs_number_blks;
1050 	ghs->ghs_state = hs->hs_state;
1051 	ghs->ghs_timestamp = hs->hs_timestamp;
1052 	ghs->ghs_revision = hs->hs_revision;
1053 	return (0);
1054 }
1055 
1056 static void
1057 build_key_list(set_t setno, hot_spare_pool_t *hsp, mdkey_t *list)
1058 {
1059 	int	i;
1060 
1061 	for (i = 0; i < hsp->hsp_nhotspares; i++) {
1062 		hot_spare_t *hs;
1063 		hs = lookup_hot_spare(setno, hsp->hsp_hotspares[i], 1);
1064 		list[i] = hs->hs_key;
1065 	}
1066 }
1067 
1068 static int
1069 get_hsp(
1070 	void			*d,
1071 	int			mode
1072 )
1073 {
1074 	hot_spare_pool_t	*hsp;
1075 	get_hsp_t		*ghsp;
1076 	size_t			size;
1077 	set_t			setno;
1078 	int			err = 0;
1079 	md_i_get_t		*migp = (md_i_get_t *)d;
1080 
1081 
1082 	setno = migp->md_driver.md_setno;
1083 
1084 	mdclrerror(&migp->mde);
1085 
1086 	/* Scan the hot spare pool list */
1087 	hsp = find_hot_spare_pool(setno, migp->id);
1088 	if (hsp == NULL) {
1089 		return (mdhsperror(&migp->mde, MDE_INVAL_HSP,
1090 			migp->id));
1091 	}
1092 
1093 	size = (sizeof (ghsp->ghsp_hs_keys[0]) * (hsp->hsp_nhotspares - 1)) +
1094 	    sizeof (get_hsp_t);
1095 
1096 	if (migp->size == 0) {
1097 		migp->size = (int)size;
1098 		return (0);
1099 	}
1100 
1101 	if (migp->size < size)
1102 		return (EFAULT);
1103 
1104 	ghsp = kmem_alloc(size, KM_SLEEP);
1105 
1106 	ghsp->ghsp_id = hsp->hsp_self_id;
1107 	ghsp->ghsp_refcount = hsp->hsp_refcount;
1108 	ghsp->ghsp_nhotspares = hsp->hsp_nhotspares;
1109 	build_key_list(setno, hsp, ghsp->ghsp_hs_keys);
1110 	if (ddi_copyout(ghsp, (caddr_t)(uintptr_t)migp->mdp, size, mode))
1111 		err = EFAULT;
1112 	kmem_free(ghsp, size);
1113 	return (err);
1114 }
1115 
1116 static int
1117 set_hs(
1118 	set_hs_params_t	*shs
1119 )
1120 {
1121 	mdclrerror(&shs->mde);
1122 
1123 	if (md_get_setstatus(shs->md_driver.md_setno) & MD_SET_STALE)
1124 		return (mdmddberror(&shs->mde, MDE_DB_STALE, NODEV32,
1125 		    shs->md_driver.md_setno));
1126 
1127 	switch (shs->shs_cmd) {
1128 	case ADD_HOT_SPARE:
1129 		return (seths_add(shs));
1130 	case DELETE_HOT_SPARE:
1131 		return (seths_delete(shs));
1132 	case REPLACE_HOT_SPARE:
1133 		return (seths_replace(shs));
1134 	case FIX_HOT_SPARE:
1135 		return (seths_enable(shs));
1136 	default:
1137 		return (mderror(&shs->mde, MDE_INVAL_HSOP));
1138 	}
1139 }
1140 
1141 static void
1142 hotspares_poke_hotspares(void)
1143 {
1144 	intptr_t	(*poke_hs)();
1145 	int		i;
1146 
1147 	for (i = 0; i < MD_NOPS; i++) {
1148 		/* handle change */
1149 		poke_hs = md_get_named_service(NODEV64, i, "poke hotspares", 0);
1150 		if (poke_hs)
1151 			(void) (*poke_hs)();
1152 	}
1153 }
1154 
1155 
1156 /*ARGSUSED4*/
1157 static int
1158 hotspares_ioctl(
1159 	dev_t	dev,
1160 	int	cmd,
1161 	void	*data,
1162 	int	mode,
1163 	IOLOCK	*lockp
1164 )
1165 {
1166 	size_t	sz = 0;
1167 	void	*d = NULL;
1168 	int	err = 0;
1169 
1170 	/* single thread */
1171 	if (getminor(dev) != MD_ADM_MINOR)
1172 		return (ENOTTY);
1173 
1174 	/* We can only handle 32-bit clients for internal commands */
1175 	if ((mode & DATAMODEL_MASK) != DATAMODEL_ILP32) {
1176 		return (EINVAL);
1177 	}
1178 
1179 	mutex_enter(&md_mx);
1180 	while (md_status & MD_GBL_HS_LOCK)
1181 		cv_wait(&md_cv, &md_mx);
1182 	md_status |= MD_GBL_HS_LOCK;
1183 	mutex_exit(&md_mx);
1184 
1185 	/* dispatch ioctl */
1186 	switch (cmd) {
1187 
1188 	case MD_IOCSET_HS:	/* setup hot spares and pools */
1189 	{
1190 		if (! (mode & FWRITE)) {
1191 			err = EACCES;
1192 			break;
1193 		}
1194 
1195 		sz = sizeof (set_hs_params_t);
1196 		d = kmem_alloc(sz, KM_SLEEP);
1197 
1198 		if (ddi_copyin(data, d, sz, mode)) {
1199 			err = EFAULT;
1200 			break;
1201 		}
1202 
1203 		err = set_hs(d);
1204 		break;
1205 	}
1206 
1207 	case MD_IOCGET_HS:	/* get hot spare info */
1208 	{
1209 		if (! (mode & FREAD)) {
1210 			err = EACCES;
1211 			break;
1212 		}
1213 
1214 		sz = sizeof (get_hs_params_t);
1215 		d = kmem_alloc(sz, KM_SLEEP);
1216 
1217 		if (ddi_copyin(data, d, sz, mode)) {
1218 			err = EFAULT;
1219 			break;
1220 		}
1221 
1222 		err = get_hs(d);
1223 		break;
1224 	}
1225 
1226 	case MD_IOCGET:		/* get hot spare pool info */
1227 	{
1228 		if (! (mode & FREAD)) {
1229 			err = EACCES;
1230 			break;
1231 		}
1232 
1233 		sz = sizeof (md_i_get_t);
1234 		d = kmem_alloc(sz, KM_SLEEP);
1235 
1236 		if (ddi_copyin(data, d, sz, mode)) {
1237 			err = EFAULT;
1238 			break;
1239 		}
1240 
1241 		err = get_hsp(d, mode);
1242 		break;
1243 	}
1244 
1245 	default:
1246 		err = ENOTTY;
1247 	}
1248 
1249 	/*
1250 	 * copyout and free any args
1251 	 */
1252 	if (sz != 0) {
1253 		if (err == 0) {
1254 			if (ddi_copyout(d, data, sz, mode) != 0) {
1255 				err = EFAULT;
1256 			}
1257 		}
1258 		kmem_free(d, sz);
1259 	}
1260 
1261 	/* un single thread */
1262 	mutex_enter(&md_mx);
1263 	md_status &= ~MD_GBL_HS_LOCK;
1264 	cv_broadcast(&md_cv);
1265 	mutex_exit(&md_mx);
1266 
1267 	/* handle change */
1268 	hotspares_poke_hotspares();
1269 
1270 	/* return success */
1271 	return (err);
1272 }
1273 
1274 
1275 static void
1276 load_hotspare(set_t setno, mddb_recid_t recid)
1277 {
1278 	hot_spare_t	*hs;
1279 	mddb_de_ic_t	*dep;
1280 	mddb_rb32_t	*rbp;
1281 	size_t		newreqsize;
1282 	hot_spare_t	*b_hs;
1283 	hot_spare32_od_t *s_hs;
1284 
1285 	mddb_setrecprivate(recid, MD_PRV_GOTIT);
1286 
1287 	dep = mddb_getrecdep(recid);
1288 	dep->de_flags = MDDB_F_HOTSPARE;
1289 	rbp = dep->de_rb;
1290 	switch (rbp->rb_revision) {
1291 	case MDDB_REV_RB:
1292 	case MDDB_REV_RBFN:
1293 		/*
1294 		 * Needs to convert to internal 64 bit
1295 		 */
1296 		s_hs = (hot_spare32_od_t *)mddb_getrecaddr(recid);
1297 		newreqsize = sizeof (hot_spare_t);
1298 		b_hs = (hot_spare_t *)kmem_zalloc(newreqsize, KM_SLEEP);
1299 		hs_convert((caddr_t)s_hs, (caddr_t)b_hs, SMALL_2_BIG);
1300 		kmem_free(s_hs, dep->de_reqsize);
1301 		dep->de_rb_userdata = b_hs;
1302 		dep->de_reqsize = newreqsize;
1303 		hs = b_hs;
1304 		break;
1305 	case MDDB_REV_RB64:
1306 	case MDDB_REV_RB64FN:
1307 		hs = (hot_spare_t *)mddb_getrecaddr_resize
1308 			(recid, sizeof (*hs), 0);
1309 		break;
1310 	}
1311 	NOTE_FN(rbp->rb_revision, hs->hs_revision);
1312 
1313 #if defined(_ILP32)
1314 	if (hs->hs_revision & MD_64BIT_META_DEV) {
1315 		char	devname[MD_MAX_CTDLEN];
1316 
1317 		set_hot_spare_state(hs, HSS_BROKEN);
1318 		(void) md_devname(setno, hs->hs_devnum, devname,
1319 		    sizeof (devname));
1320 		cmn_err(CE_NOTE, "%s is unavailable because 64 bit hotspares "
1321 		    "are not accessible on a 32 bit kernel\n", devname);
1322 	}
1323 #endif
1324 
1325 	ASSERT(hs != NULL);
1326 
1327 	if (hs->hs_refcount == 0) {
1328 		mddb_setrecprivate(recid, MD_PRV_PENDDEL);
1329 		return;
1330 	}
1331 
1332 	hs->hs_next = (hot_spare_t *)md_set[setno].s_hs;
1333 	md_set[setno].s_hs = (void *)hs;
1334 
1335 	hs->hs_isopen = 0;
1336 
1337 	hs->hs_devnum = md_getdevnum(setno, mddb_getsidenum(setno),
1338 		hs->hs_key, MD_NOTRUST_DEVT);
1339 }
1340 
1341 
1342 static void
1343 load_hotsparepool(set_t setno, mddb_recid_t recid)
1344 {
1345 	hot_spare_pool_t *hsp;
1346 	hot_spare_pool_ond_t *hsp_ond;
1347 	size_t hsp_icsize;
1348 
1349 	mddb_setrecprivate(recid, MD_PRV_GOTIT);
1350 
1351 	hsp_ond = (hot_spare_pool_ond_t *)mddb_getrecaddr(recid);
1352 	ASSERT(hsp_ond != NULL);
1353 
1354 	if (hsp_ond->hsp_self_id == MD_HSP_NONE) {
1355 		mddb_setrecprivate(recid, MD_PRV_PENDDEL);
1356 		return;
1357 	}
1358 
1359 	hsp_icsize =  HSP_ONDSK_STR_OFF + mddb_getrecsize(recid);
1360 
1361 	hsp = (hot_spare_pool_t *)mddb_getrecaddr_resize(recid, hsp_icsize,
1362 		HSP_ONDSK_STR_OFF);
1363 	hsp->hsp_next = (hot_spare_pool_t *)md_set[setno].s_hsp;
1364 	md_set[setno].s_hsp = (void *) hsp;
1365 
1366 	rw_enter(&hotspares_md_ops.md_link_rw.lock, RW_WRITER);
1367 	hsp->hsp_link.ln_next = hotspares_md_ops.md_head;
1368 	hsp->hsp_link.ln_setno = setno;
1369 	hsp->hsp_link.ln_id = hsp->hsp_self_id;
1370 	hotspares_md_ops.md_head = &hsp->hsp_link;
1371 	rw_exit(&hotspares_md_ops.md_link_rw.lock);
1372 }
1373 
1374 static int
1375 hotspares_snarf(md_snarfcmd_t cmd, set_t setno)
1376 {
1377 	mddb_recid_t	recid;
1378 	int		gotsomething;
1379 	mddb_type_t	typ1;
1380 
1381 	if (cmd == MD_SNARF_CLEANUP)
1382 		return (0);
1383 
1384 	gotsomething = 0;
1385 
1386 	typ1 = (mddb_type_t)md_getshared_key(setno,
1387 	    hotspares_md_ops.md_driver.md_drivername);
1388 	recid = mddb_makerecid(setno, 0);
1389 	while ((recid = mddb_getnextrec(recid, typ1, 0)) > 0) {
1390 		if (mddb_getrecprivate(recid) & MD_PRV_GOTIT)
1391 			continue;
1392 
1393 		switch (mddb_getrectype2(recid)) {
1394 		case HSP_REC:
1395 			load_hotsparepool(setno, recid);
1396 			gotsomething = 1;
1397 			break;
1398 		case HS_REC:
1399 			load_hotspare(setno, recid);
1400 			gotsomething = 1;
1401 			break;
1402 		default:
1403 			ASSERT(0);
1404 		}
1405 	}
1406 
1407 	if (gotsomething)
1408 		return (gotsomething);
1409 
1410 	recid = mddb_makerecid(setno, 0);
1411 	while ((recid = mddb_getnextrec(recid, typ1, 0)) > 0)
1412 		if (!(mddb_getrecprivate(recid) & MD_PRV_GOTIT))
1413 			mddb_setrecprivate(recid, MD_PRV_PENDDEL);
1414 
1415 	return (0);
1416 }
1417 
1418 static int
1419 hotspares_halt(md_haltcmd_t cmd, set_t setno)
1420 {
1421 	hot_spare_t		*hs, **p_hs;
1422 	hot_spare_pool_t	*hsp, **p_hsp;
1423 
1424 	if (cmd == MD_HALT_CLOSE)
1425 		return (0);
1426 
1427 	if (cmd == MD_HALT_OPEN)
1428 		return (0);
1429 
1430 	if (cmd == MD_HALT_CHECK)
1431 		return (0);
1432 
1433 	if (cmd == MD_HALT_UNLOAD)
1434 		return (0);
1435 
1436 	if (cmd != MD_HALT_DOIT)
1437 		return (1);
1438 	/*
1439 	 * Find all the hotspares for set "setno"
1440 	 *   and remove them from the hot_spare_list.
1441 	 */
1442 	p_hs = (hot_spare_t **)&md_set[setno].s_hs;
1443 	hs = (hot_spare_t *)md_set[setno].s_hs;
1444 	for (; hs != NULL; hs = *p_hs)
1445 		*p_hs = hs->hs_next;
1446 
1447 	/*
1448 	 * Find all the hotspare pools for set "setno"
1449 	 *   and remove them from the hot_spare_pools list.
1450 	 * Also remove from the get_next list.
1451 	 */
1452 	p_hsp = (hot_spare_pool_t **)&md_set[setno].s_hsp;
1453 	hsp = (hot_spare_pool_t *)md_set[setno].s_hsp;
1454 	for (; hsp != NULL; hsp = *p_hsp) {
1455 		md_rem_link(setno, hsp->hsp_self_id,
1456 		    &hotspares_md_ops.md_link_rw.lock,
1457 		    &hotspares_md_ops.md_head);
1458 		*p_hsp = hsp->hsp_next;
1459 	}
1460 
1461 	return (0);
1462 }
1463 
1464 static hot_spare_t *
1465 usable_hs(
1466 	set_t		setno,
1467 	mddb_recid_t	hs_id,
1468 	diskaddr_t	nblks,
1469 	int		labeled,
1470 	diskaddr_t	*start)
1471 {
1472 	hot_spare_t	*hs;
1473 
1474 	hs = lookup_hot_spare(setno, hs_id, 1);
1475 
1476 	if (hs->hs_state != HSS_AVAILABLE)
1477 		return ((hot_spare_t *)0);
1478 
1479 	if (labeled && hs->hs_has_label && (hs->hs_number_blks >= nblks)) {
1480 		*start = 0;
1481 		return (hs);
1482 	} else if ((hs->hs_number_blks - hs->hs_start_blk) >= nblks) {
1483 		*start = hs->hs_start_blk;
1484 		return (hs);
1485 	}
1486 	return ((hot_spare_t *)0);
1487 }
1488 
1489 static int
1490 reserve_a_hs(
1491 	set_t		setno,
1492 	mddb_recid_t	id,
1493 	uint64_t	size,
1494 	int		labeled,
1495 	mddb_recid_t	*hs_id,
1496 	mdkey_t		*key,
1497 	md_dev64_t	*dev,
1498 	diskaddr_t	*sblock)
1499 {
1500 	hot_spare_pool_t	*hsp;
1501 	hot_spare_t		*hs;
1502 	int			i;
1503 
1504 	*hs_id = 0;
1505 
1506 	hsp = find_hot_spare_pool(setno, id);
1507 	if (hsp == NULL)
1508 		return (-1);
1509 
1510 	for (i = 0; i < hsp->hsp_nhotspares; i++) {
1511 		hs = usable_hs(setno, hsp->hsp_hotspares[i],
1512 		    size, labeled, sblock);
1513 		if (hs == NULL)
1514 			continue;
1515 
1516 		set_hot_spare_state(hs, HSS_RESERVED);
1517 		*hs_id = hs->hs_record_id;
1518 		*key = hs->hs_key;
1519 		*dev = hs->hs_devnum;
1520 		/* NOTE: Mirror code commits the hs record */
1521 		return (0);
1522 	}
1523 
1524 	return (-1);
1525 }
1526 
1527 
1528 /* ARGSUSED3 */
1529 static int
1530 return_a_hs(
1531 	set_t			setno,
1532 	mddb_recid_t		id,
1533 	mddb_recid_t		*hs_id,
1534 	mdkey_t			key,
1535 	diskaddr_t		sblock,
1536 	uint64_t		size,
1537 	hotspare_states_t	new_state)
1538 {
1539 	hot_spare_pool_t	*hsp;
1540 	hot_spare_t		*hs;
1541 	int			i;
1542 
1543 	/*
1544 	 * NOTE: sblock/size are not currently being used.
1545 	 *	 That is because we always allocate the whole hs.
1546 	 *	 Later if we choose to allocate only what is needed
1547 	 *	 then the sblock/size can be used to determine
1548 	 *	 which part is being unreseved.
1549 	 */
1550 	*hs_id = 0;
1551 
1552 	hsp = find_hot_spare_pool(setno, id);
1553 	if (hsp == NULL)
1554 		return (-1);
1555 
1556 	for (i = 0; i < hsp->hsp_nhotspares; i++) {
1557 		hs = lookup_hot_spare(setno, hsp->hsp_hotspares[i], 1);
1558 		if (hs->hs_key != key)
1559 			continue;
1560 
1561 		set_hot_spare_state(hs, new_state);
1562 		*hs_id = hs->hs_record_id;
1563 		if (new_state == HSS_BROKEN) {
1564 			SE_NOTIFY(EC_SVM_STATE, ESC_SVM_ERRED, SVM_TAG_HS,
1565 			    setno, hs->hs_devnum);
1566 		}
1567 		if (new_state == HSS_AVAILABLE) {
1568 			SE_NOTIFY(EC_SVM_STATE, ESC_SVM_HS_FREED, SVM_TAG_HS,
1569 			    setno, hs->hs_devnum);
1570 		}
1571 
1572 		/* NOTE: Mirror/Raid code commits the hs record */
1573 		return (0);
1574 	}
1575 
1576 	return (-1);
1577 }
1578 
1579 
1580 static int
1581 modify_hsp_ref(set_t setno, mddb_recid_t id, int incref,  mddb_recid_t *hsp_id)
1582 {
1583 	hot_spare_pool_t	*hsp;
1584 
1585 	*hsp_id = 0;
1586 
1587 	if (id  < 0)
1588 		return (0);
1589 
1590 	hsp = find_hot_spare_pool(setno, id);
1591 	if (hsp == NULL)
1592 		return (-1);
1593 
1594 	if (incref)
1595 		hsp->hsp_refcount++;
1596 	else
1597 		hsp->hsp_refcount--;
1598 
1599 	*hsp_id = hsp->hsp_record_id;
1600 
1601 	/* NOTE: Stripe code commits the hsp record */
1602 	return (0);
1603 }
1604 
1605 
1606 static int
1607 mkdev_for_a_hs(mddb_recid_t hs_id, md_dev64_t *dev)
1608 {
1609 	hot_spare_t	*hs;
1610 
1611 	hs = lookup_hot_spare(mddb_getsetnum(hs_id), hs_id, 0);
1612 	if (hs == NULL)
1613 		return (0);
1614 
1615 	*dev = hs->hs_devnum;
1616 	return (0);
1617 }
1618 
1619 static intptr_t
1620 hotspares_interface(
1621 	hs_cmds_t	cmd,
1622 	mddb_recid_t	id,
1623 	uint64_t	size,
1624 	int		bool,
1625 	mddb_recid_t	*hs_id,
1626 	mdkey_t		*key,
1627 	md_dev64_t	*dev,
1628 	diskaddr_t	*sblock)
1629 {
1630 	set_t	setno;
1631 	int	err = -1;
1632 
1633 	mutex_enter(&md_mx);
1634 	while (md_status & MD_GBL_HS_LOCK)
1635 		cv_wait(&md_cv, &md_mx);
1636 
1637 	/* If md_halt has been run do not continue */
1638 	if (md_status & (MD_GBL_HALTED | MD_GBL_DAEMONS_DIE)) {
1639 		mutex_exit(&md_mx);
1640 		return (ENXIO);
1641 	}
1642 
1643 	md_status |= MD_GBL_HS_LOCK;
1644 	mutex_exit(&md_mx);
1645 
1646 	setno = mddb_getsetnum(id);
1647 
1648 	switch (cmd) {
1649 	case HS_GET:
1650 		err = reserve_a_hs(setno, id, size, bool, hs_id,
1651 		    key, dev, sblock);
1652 		break;
1653 	case HS_FREE:
1654 		err = return_a_hs(setno, id, hs_id, *key, 0, 0, HSS_AVAILABLE);
1655 		hotspares_poke_hotspares();
1656 		break;
1657 	case HS_BAD:
1658 		err = return_a_hs(setno, id, hs_id, *key, 0, 0, HSS_BROKEN);
1659 		break;
1660 	case HSP_INCREF:
1661 		err = modify_hsp_ref(setno, id, 1, hs_id);
1662 		break;
1663 	case HSP_DECREF:
1664 		err = modify_hsp_ref(setno, id, 0, hs_id);
1665 		break;
1666 	case HS_MKDEV:
1667 		err = mkdev_for_a_hs(*hs_id, dev);
1668 		break;
1669 	}
1670 
1671 	mutex_enter(&md_mx);
1672 	md_status &= ~MD_GBL_HS_LOCK;
1673 	cv_broadcast(&md_cv);
1674 	mutex_exit(&md_mx);
1675 
1676 	return (err);
1677 }
1678 
1679 static void
1680 imp_hotsparepool(
1681 	set_t	setno,
1682 	mddb_recid_t	recid
1683 )
1684 {
1685 	hot_spare_pool_ond_t	*hsp_ond;
1686 	mddb_recid_t		*hsp_recid, *hs_recid;
1687 	int			i;
1688 	uint_t			*hsp_selfid;
1689 
1690 	mddb_setrecprivate(recid, MD_PRV_GOTIT);
1691 
1692 	hsp_ond = (hot_spare_pool_ond_t *)mddb_getrecaddr(recid);
1693 	hsp_recid = &(hsp_ond->hsp_record_id);
1694 	hsp_selfid = &(hsp_ond->hsp_self_id);
1695 	/*
1696 	 * Fixup the pool and hotspares
1697 	 */
1698 	*hsp_recid = MAKERECID(setno, DBID(*hsp_recid));
1699 	*hsp_selfid = MAKERECID(setno, DBID(*hsp_selfid));
1700 
1701 	for (i = 0; i < hsp_ond->hsp_nhotspares; i++) {
1702 		hs_recid = &(hsp_ond->hsp_hotspares[i]);
1703 		*hs_recid = MAKERECID(setno, DBID(*hs_recid));
1704 	}
1705 }
1706 
1707 static void
1708 imp_hotspare(
1709 	set_t	setno,
1710 	mddb_recid_t	recid
1711 )
1712 {
1713 	mddb_de_ic_t	*dep;
1714 	mddb_rb32_t	*rbp;
1715 	hot_spare_t	*hs64;
1716 	hot_spare32_od_t	*hs32;
1717 	mddb_recid_t	*hs_recid;
1718 
1719 	mddb_setrecprivate(recid, MD_PRV_GOTIT);
1720 
1721 	dep = mddb_getrecdep(recid);
1722 	rbp = dep->de_rb;
1723 	switch (rbp->rb_revision) {
1724 	case MDDB_REV_RB:
1725 	case MDDB_REV_RBFN:
1726 		/*
1727 		 * 32 bit hotspare
1728 		 */
1729 		hs32 = (hot_spare32_od_t *)mddb_getrecaddr(recid);
1730 		hs_recid = &(hs32->hs_record_id);
1731 		break;
1732 	case MDDB_REV_RB64:
1733 	case MDDB_REV_RB64FN:
1734 		hs64 = (hot_spare_t *)mddb_getrecaddr(recid);
1735 		hs_recid = &(hs64->hs_record_id);
1736 		break;
1737 	}
1738 
1739 	/*
1740 	 * Fixup the setno
1741 	 */
1742 	*hs_recid = MAKERECID(setno, DBID(*hs_recid));
1743 }
1744 
1745 static int
1746 hotspares_imp_set(
1747 	set_t	setno
1748 )
1749 {
1750 	mddb_recid_t	recid;
1751 	int		gotsomething;
1752 	mddb_type_t	typ1;
1753 
1754 
1755 	gotsomething = 0;
1756 
1757 	typ1 = (mddb_type_t)md_getshared_key(setno,
1758 	    hotspares_md_ops.md_driver.md_drivername);
1759 	recid = mddb_makerecid(setno, 0);
1760 	while ((recid = mddb_getnextrec(recid, typ1, 0)) > 0) {
1761 		if (mddb_getrecprivate(recid) & MD_PRV_GOTIT)
1762 			continue;
1763 
1764 		switch (mddb_getrectype2(recid)) {
1765 		case HSP_REC:
1766 			imp_hotsparepool(setno, recid);
1767 			gotsomething = 1;
1768 			break;
1769 		case HS_REC:
1770 			imp_hotspare(setno, recid);
1771 			gotsomething = 1;
1772 			break;
1773 		default:
1774 			ASSERT(0);
1775 		}
1776 	}
1777 
1778 	return (gotsomething);
1779 }
1780 
1781 static md_named_services_t hotspares_named_services[] = {
1782 	{hotspares_interface,	"hot spare interface"},
1783 	{NULL,			0}
1784 };
1785 
1786 md_ops_t hotspares_md_ops = {
1787 	NULL,			/* open */
1788 	NULL,			/* close */
1789 	NULL,			/* strategy */
1790 	NULL,			/* print */
1791 	NULL,			/* dump */
1792 	NULL,			/* read */
1793 	NULL,			/* write */
1794 	hotspares_ioctl,	/* hotspares_ioctl, */
1795 	hotspares_snarf,	/* hotspares_snarf */
1796 	hotspares_halt,		/* halt */
1797 	NULL,			/* aread */
1798 	NULL,			/* awrite */
1799 	hotspares_imp_set,	/* import set */
1800 	hotspares_named_services /* named_services */
1801 };
1802 
1803 static void
1804 fini_uninit()
1805 {
1806 	/* prevent access to services that may have been imported */
1807 	md_clear_hot_spare_interface();
1808 }
1809 
1810 /* define the module linkage */
1811 MD_PLUGIN_MISC_MODULE("hot spares module %I%", md_noop, fini_uninit())
1812