1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 * Copyright 2012 Milan Jurik. All rights reserved.
26 */
27
28 #include <sys/param.h>
29 #include <sys/systm.h>
30 #include <sys/conf.h>
31 #include <sys/file.h>
32 #include <sys/user.h>
33 #include <sys/uio.h>
34 #include <sys/t_lock.h>
35 #include <sys/buf.h>
36 #include <sys/dkio.h>
37 #include <sys/vtoc.h>
38 #include <sys/kmem.h>
39 #include <vm/page.h>
40 #include <sys/sysmacros.h>
41 #include <sys/types.h>
42 #include <sys/mkdev.h>
43 #include <sys/stat.h>
44 #include <sys/open.h>
45 #include <sys/modctl.h>
46 #include <sys/ddi.h>
47 #include <sys/sunddi.h>
48
49 #include <sys/lvm/mdvar.h>
50 #include <sys/lvm/md_names.h>
51 #include <sys/lvm/md_mddb.h>
52 #include <sys/lvm/md_stripe.h>
53 #include <sys/lvm/md_mirror.h>
54
55 #include <sys/model.h>
56
57 #include <sys/sysevent/eventdefs.h>
58 #include <sys/sysevent/svm.h>
59 #include <sys/lvm/mdmn_commd.h>
60
61 extern int md_status;
62 extern kmutex_t md_mx;
63 extern kcondvar_t md_cv;
64
65 extern unit_t md_nunits;
66 extern set_t md_nsets;
67 extern md_set_t md_set[];
68
69 extern md_ops_t mirror_md_ops;
70 extern int md_ioctl_cnt;
71 extern md_krwlock_t md_unit_array_rw;
72 extern major_t md_major;
73 extern mdq_anchor_t md_ff_daemonq;
74 extern void md_probe_one(probe_req_t *);
75 extern void mirror_openfail_console_info(mm_unit_t *, int, int);
76
77 #ifdef DEBUG
78 extern int mirror_debug_flag;
79 #endif
80
81 static void
mirror_resume_writes(mm_unit_t * un)82 mirror_resume_writes(mm_unit_t *un)
83 {
84 /*
85 * Release the block on writes to the mirror and resume any blocked
86 * resync thread.
87 * This is only required for MN sets
88 */
89 if (MD_MNSET_SETNO(MD_UN2SET(un))) {
90 #ifdef DEBUG
91 if (mirror_debug_flag)
92 printf("mirror_resume_writes: mnum %x\n", MD_SID(un));
93 #endif
94 mutex_enter(&un->un_suspend_wr_mx);
95 un->un_suspend_wr_flag = 0;
96 cv_broadcast(&un->un_suspend_wr_cv);
97 mutex_exit(&un->un_suspend_wr_mx);
98 mutex_enter(&un->un_rs_thread_mx);
99 un->un_rs_thread_flags &= ~MD_RI_BLOCK;
100 cv_signal(&un->un_rs_thread_cv);
101 mutex_exit(&un->un_rs_thread_mx);
102 }
103 }
104
105 mm_unit_t *
mirror_getun(minor_t mnum,md_error_t * mde,int flags,IOLOCK * lock)106 mirror_getun(minor_t mnum, md_error_t *mde, int flags, IOLOCK *lock)
107 {
108 mm_unit_t *un;
109 mdi_unit_t *ui;
110 set_t setno = MD_MIN2SET(mnum);
111
112 if ((setno >= md_nsets) || (MD_MIN2UNIT(mnum) >= md_nunits)) {
113 (void) mdmderror(mde, MDE_INVAL_UNIT, mnum);
114 return (NULL);
115 }
116
117 if (!(flags & STALE_OK)) {
118 if (md_get_setstatus(setno) & MD_SET_STALE) {
119 (void) mdmddberror(mde, MDE_DB_STALE, mnum, setno);
120 return (NULL);
121 }
122 }
123
124 ui = MDI_UNIT(mnum);
125 if (flags & NO_OLD) {
126 if (ui != NULL) {
127 (void) mdmderror(mde, MDE_UNIT_ALREADY_SETUP, mnum);
128 return (NULL);
129 }
130 return ((mm_unit_t *)1);
131 }
132
133 if (ui == NULL) {
134 (void) mdmderror(mde, MDE_UNIT_NOT_SETUP, mnum);
135 return (NULL);
136 }
137
138 if (flags & ARRAY_WRITER)
139 md_array_writer(lock);
140 else if (flags & ARRAY_READER)
141 md_array_reader(lock);
142
143 if (!(flags & NO_LOCK)) {
144 if (flags & WR_LOCK)
145 (void) md_ioctl_writerlock(lock, ui);
146 else /* RD_LOCK */
147 (void) md_ioctl_readerlock(lock, ui);
148 }
149 un = (mm_unit_t *)MD_UNIT(mnum);
150
151 if (un->c.un_type != MD_METAMIRROR) {
152 (void) mdmderror(mde, MDE_NOT_MM, mnum);
153 return (NULL);
154 }
155
156 return (un);
157 }
158
159 static int
mirror_set(void * d,int mode)160 mirror_set(
161 void *d,
162 int mode
163 )
164 {
165 minor_t mnum;
166 mm_unit_t *un;
167 mddb_recid_t recid;
168 mddb_type_t typ1;
169 int err;
170 int i;
171 set_t setno;
172 md_set_params_t *msp = d;
173
174
175 mnum = msp->mnum;
176
177 mdclrerror(&msp->mde);
178
179 if (mirror_getun(mnum, &msp->mde, NO_OLD, NULL) == NULL)
180 return (0);
181
182 setno = MD_MIN2SET(mnum);
183
184 typ1 = (mddb_type_t)md_getshared_key(setno,
185 mirror_md_ops.md_driver.md_drivername);
186
187 /*
188 * Create the db record for this mdstruct
189 * We don't store incore elements ondisk
190 */
191
192 if (msp->options & MD_CRO_64BIT) {
193 #if defined(_ILP32)
194 return (mdmderror(&msp->mde, MDE_UNIT_TOO_LARGE, mnum));
195 #else
196 recid = mddb_createrec((size_t)msp->size, typ1, MIRROR_REC,
197 MD_CRO_64BIT | MD_CRO_MIRROR | MD_CRO_FN, setno);
198 #endif
199 } else {
200 /*
201 * It's important to use the correct size here
202 */
203 msp->size = sizeof (mm_unit32_od_t);
204 recid = mddb_createrec((size_t)msp->size, typ1, MIRROR_REC,
205 MD_CRO_32BIT | MD_CRO_MIRROR | MD_CRO_FN, setno);
206 }
207 if (recid < 0)
208 return (mddbstatus2error(&msp->mde, (int)recid,
209 mnum, setno));
210
211 /* Resize to include incore fields */
212 un = (mm_unit_t *)mddb_getrecaddr_resize(recid, sizeof (*un), 0);
213 /*
214 * It is okay that we muck with the mdstruct here,
215 * since no one else will know about the mdstruct
216 * until we commit it. If we crash, the record will
217 * be automatically purged, since we haven't
218 * committed it yet.
219 */
220
221 /* copy in the user's mdstruct */
222 if (err = ddi_copyin((caddr_t)(uintptr_t)msp->mdp, un,
223 (uint_t)msp->size, mode)) {
224 mddb_deleterec_wrapper(recid);
225 return (EFAULT);
226 }
227 /* All 64 bit metadevices only support EFI labels. */
228 if (msp->options & MD_CRO_64BIT) {
229 un->c.un_flag |= MD_EFILABEL;
230 }
231
232 un->c.un_revision |= MD_FN_META_DEV;
233 MD_RECID(un) = recid;
234 MD_CAPAB(un) = MD_CAN_PARENT | MD_CAN_META_CHILD | MD_CAN_SP;
235 MD_PARENT(un) = MD_NO_PARENT;
236
237 for (i = 0; i < NMIRROR; i++) {
238 struct mm_submirror *sm;
239
240 sm = &un->un_sm[i];
241 if (!SMS_IS(sm, SMS_INUSE))
242 continue;
243
244 /* ensure that the submirror is a metadevice */
245 if (md_getmajor(sm->sm_dev) != md_major)
246 return (mdmderror(&msp->mde, MDE_INVAL_UNIT,
247 md_getminor(sm->sm_dev)));
248
249 if (md_get_parent(sm->sm_dev) == MD_NO_PARENT)
250 continue;
251
252 /* mirror creation should fail here */
253 md_nblocks_set(mnum, -1ULL);
254 MD_UNIT(mnum) = NULL;
255
256 mddb_deleterec_wrapper(recid);
257 return (mdmderror(&msp->mde, MDE_IN_USE,
258 md_getminor(sm->sm_dev)));
259 }
260
261 if (err = mirror_build_incore(un, 0)) {
262 md_nblocks_set(mnum, -1ULL);
263 MD_UNIT(mnum) = NULL;
264
265 mddb_deleterec_wrapper(recid);
266 return (err);
267 }
268
269 /*
270 * Update unit availability
271 */
272 md_set[setno].s_un_avail--;
273
274 mirror_commit(un, ALL_SUBMIRRORS, 0);
275 md_create_unit_incore(MD_SID(un), &mirror_md_ops, 0);
276 mirror_check_failfast(mnum);
277 SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_CREATE, SVM_TAG_METADEVICE, setno,
278 MD_SID(un));
279
280 resync_start_timeout(setno);
281 return (0);
282 }
283
284 static int
mirror_get(void * migp,int mode,IOLOCK * lock)285 mirror_get(
286 void *migp,
287 int mode,
288 IOLOCK *lock
289 )
290 {
291 mm_unit_t *un;
292 md_i_get_t *migph = migp;
293
294 mdclrerror(&migph->mde);
295
296 if ((un = mirror_getun(migph->id, &migph->mde, RD_LOCK, lock)) == NULL)
297 return (0);
298
299 if (migph->size == 0) {
300 migph->size = un->c.un_size;
301 return (0);
302 }
303
304 if (migph->size < un->c.un_size) {
305 return (EFAULT);
306 }
307 if (ddi_copyout(un, (caddr_t)(uintptr_t)migph->mdp,
308 un->c.un_size, mode))
309 return (EFAULT);
310 return (0);
311 }
312
313 static int
mirror_getdevs(void * mgdp,int mode,IOLOCK * lock)314 mirror_getdevs(
315 void *mgdp,
316 int mode,
317 IOLOCK *lock
318 )
319 {
320 mm_unit_t *un;
321 md_dev64_t *udevs;
322 int cnt;
323 int i;
324 md_dev64_t unit_dev;
325 md_getdevs_params_t *mgdph = mgdp;
326
327
328 mdclrerror(&mgdph->mde);
329
330 if ((un = mirror_getun(mgdph->mnum,
331 &mgdph->mde, RD_LOCK, lock)) == NULL)
332 return (0);
333
334 udevs = (md_dev64_t *)(uintptr_t)mgdph->devs;
335
336 for (cnt = 0, i = 0; i < NMIRROR; i++) {
337 if (!SMS_BY_INDEX_IS(un, i, SMS_INUSE))
338 continue;
339 if (cnt < mgdph->cnt) {
340 unit_dev = un->un_sm[i].sm_dev;
341 if (md_getmajor(unit_dev) != md_major) {
342 unit_dev = md_xlate_mini_2_targ(unit_dev);
343 if (unit_dev == NODEV64)
344 return (ENODEV);
345 }
346
347 if (ddi_copyout((caddr_t)&unit_dev, (caddr_t)udevs,
348 sizeof (*udevs), mode) != 0)
349 return (EFAULT);
350 ++udevs;
351 }
352 ++cnt;
353 }
354
355 mgdph->cnt = cnt;
356 return (0);
357 }
358
359 static int
mirror_reset(md_i_reset_t * mirp)360 mirror_reset(
361 md_i_reset_t *mirp
362 )
363 {
364 minor_t mnum = mirp->mnum;
365 mm_unit_t *un;
366 mdi_unit_t *ui;
367 set_t setno = MD_MIN2SET(mnum);
368
369 mdclrerror(&mirp->mde);
370
371 if ((un = mirror_getun(mnum, &mirp->mde, NO_LOCK, NULL)) == NULL)
372 return (0);
373
374 if (MD_HAS_PARENT(un->c.un_parent)) {
375 return (mdmderror(&mirp->mde, MDE_IN_USE, mnum));
376 }
377
378 rw_enter(&md_unit_array_rw.lock, RW_WRITER);
379
380 /* single thread */
381 ui = MDI_UNIT(mnum);
382 (void) md_unit_openclose_enter(ui);
383
384 if (md_unit_isopen(ui)) {
385 md_unit_openclose_exit(ui);
386 rw_exit(&md_unit_array_rw.lock);
387 return (mdmderror(&mirp->mde, MDE_IS_OPEN, mnum));
388 }
389
390 md_unit_openclose_exit(ui);
391
392 if (!mirp->force) {
393 int smi;
394 for (smi = 0; smi < NMIRROR; smi++) {
395 if (!SMS_BY_INDEX_IS(un, smi, SMS_INUSE))
396 continue;
397
398 if (!SMS_BY_INDEX_IS(un, smi, SMS_RUNNING)) {
399 rw_exit(&md_unit_array_rw.lock);
400 return (mdmderror(&mirp->mde,
401 MDE_C_WITH_INVAL_SM, mnum));
402 }
403 }
404 }
405
406 reset_mirror(un, mnum, 1);
407
408 /*
409 * Update unit availability
410 */
411 md_set[setno].s_un_avail++;
412
413 /*
414 * If MN set, reset s_un_next so all nodes can have
415 * the same view of the next available slot when
416 * nodes are -w and -j
417 */
418 if (MD_MNSET_SETNO(setno)) {
419 (void) md_upd_set_unnext(setno, MD_MIN2UNIT(mnum));
420 }
421
422 rw_exit(&md_unit_array_rw.lock);
423 return (0);
424 }
425
426 static int
mirror_get_geom(mm_unit_t * un,struct dk_geom * geomp)427 mirror_get_geom(
428 mm_unit_t *un,
429 struct dk_geom *geomp
430 )
431 {
432 md_get_geom((md_unit_t *)un, geomp);
433
434 return (0);
435 }
436
437 static int
mirror_get_vtoc(mm_unit_t * un,struct vtoc * vtocp)438 mirror_get_vtoc(
439 mm_unit_t *un,
440 struct vtoc *vtocp
441 )
442 {
443 md_get_vtoc((md_unit_t *)un, vtocp);
444
445 return (0);
446 }
447
448 static int
mirror_set_vtoc(mm_unit_t * un,struct vtoc * vtocp)449 mirror_set_vtoc(
450 mm_unit_t *un,
451 struct vtoc *vtocp
452 )
453 {
454 return (md_set_vtoc((md_unit_t *)un, vtocp));
455 }
456
457 static int
mirror_get_extvtoc(mm_unit_t * un,struct extvtoc * vtocp)458 mirror_get_extvtoc(
459 mm_unit_t *un,
460 struct extvtoc *vtocp
461 )
462 {
463 md_get_extvtoc((md_unit_t *)un, vtocp);
464
465 return (0);
466 }
467
468 static int
mirror_set_extvtoc(mm_unit_t * un,struct extvtoc * vtocp)469 mirror_set_extvtoc(
470 mm_unit_t *un,
471 struct extvtoc *vtocp
472 )
473 {
474 return (md_set_extvtoc((md_unit_t *)un, vtocp));
475 }
476
477 static int
mirror_get_cgapart(mm_unit_t * un,struct dk_map * dkmapp)478 mirror_get_cgapart(
479 mm_unit_t *un,
480 struct dk_map *dkmapp
481 )
482 {
483 md_get_cgapart((md_unit_t *)un, dkmapp);
484 return (0);
485 }
486
487 static int
mirror_getcomp_by_dev(mm_unit_t * un,replace_params_t * params,int * smi,int * cip)488 mirror_getcomp_by_dev(mm_unit_t *un, replace_params_t *params,
489 int *smi, int *cip)
490 {
491 mm_submirror_t *sm;
492 mm_submirror_ic_t *smic;
493 ms_comp_t *comp;
494 ms_unit_t *mous;
495 int ci;
496 int i;
497 int compcnt;
498 ms_cd_info_t cd;
499 void (*get_dev)();
500 md_dev64_t dev = md_expldev(params->old_dev);
501 md_error_t *ep = ¶ms->mde;
502 minor_t mnum = params->mnum;
503 mdkey_t devkey;
504 int nkeys;
505 set_t setno;
506 side_t side;
507
508 setno = MD_MIN2SET(MD_SID(un));
509 side = mddb_getsidenum(setno);
510
511 if (md_getkeyfromdev(setno, side, dev, &devkey, &nkeys) != 0)
512 return (mddeverror(ep, MDE_NAME_SPACE, dev));
513
514 for (i = 0; i < NMIRROR; i++) {
515 sm = &un->un_sm[i];
516 smic = &un->un_smic[i];
517
518 if (!SMS_IS(sm, SMS_INUSE))
519 continue;
520
521 get_dev =
522 (void (*)())md_get_named_service(sm->sm_dev, 0,
523 "get device", 0);
524 compcnt = (*(smic->sm_get_component_count))(sm->sm_dev, un);
525
526 /*
527 * For each of the underlying stripe components get
528 * the info.
529 */
530 for (ci = 0; ci < compcnt; ci++) {
531 (void) (*get_dev)(sm->sm_dev, sm, ci, &cd);
532 if ((cd.cd_dev == dev) || (cd.cd_orig_dev == dev)) {
533 *cip = ci;
534 *smi = i;
535 return (1);
536 }
537 }
538
539 /*
540 * now we rescan looking only for NODEV. If we find
541 * NODEV then we will check the keys to see if its a match.
542 *
543 * If no key was found to match dev, then there is
544 * no way to compare keys - so continue.
545 */
546 if (nkeys == 0) {
547 continue;
548 }
549 mous = MD_UNIT(md_getminor(sm->sm_dev));
550
551 for (ci = 0; ci < compcnt; ci++) {
552
553 comp = (struct ms_comp *)
554 ((void *)&((char *)mous)[mous->un_ocomp]);
555
556 (void) (*get_dev)(sm->sm_dev, sm, ci, &cd);
557
558 if (cd.cd_dev == NODEV64 || cd.cd_orig_dev == NODEV64) {
559 comp += ci;
560 if (comp->un_key == devkey) {
561 if (nkeys > 1) {
562 return (mddeverror(
563 ep, MDE_MULTNM, dev));
564 }
565 *cip = ci;
566 *smi = i;
567 return (1);
568 }
569 }
570 }
571 }
572 return (mdcomperror(ep, MDE_CANT_FIND_COMP, mnum, dev));
573 }
574
575 /*
576 * comp_replace:
577 * ----------------
578 * Called to implement the component replace function
579 *
580 * Owner is returned in the parameter block passed in by the caller.
581 *
582 * Returns:
583 * 0 success
584 * error code if the functions fails
585 *
586 * For a MN set, on entry all writes to the mirror are suspended, on exit
587 * from this function, writes must be resumed when not a dryrun.
588 */
589 static int
comp_replace(replace_params_t * params,IOLOCK * lock)590 comp_replace(
591 replace_params_t *params,
592 IOLOCK *lock
593 )
594 {
595 minor_t mnum = params->mnum;
596 set_t setno;
597 side_t side;
598 mm_unit_t *un;
599 mdi_unit_t *ui;
600 ms_unit_t *ms_un;
601 mdi_unit_t *ms_ui;
602 ms_comp_t *comp;
603 mm_submirror_t *sm;
604 md_dev64_t smdev;
605 mddb_recid_t recids[6]; /* recids for stripe on SP */
606 int smi, ci;
607 ms_new_dev_t nd;
608 int (*repl_dev)();
609 void (*repl_done)();
610 void *repl_data;
611 int err = 0;
612 ms_cd_info_t cd;
613 void (*get_dev)();
614
615 mdclrerror(¶ms->mde);
616
617 if ((un = mirror_getun(mnum, ¶ms->mde, WRITERS, lock)) == NULL) {
618 return (0);
619 }
620
621 ui = MDI_UNIT(mnum);
622 if (ui->ui_tstate & MD_INACCESSIBLE) {
623 (void) mdmderror(¶ms->mde, MDE_IN_UNAVAIL_STATE, mnum);
624 goto errexit;
625 }
626
627 /*
628 * replace cannot be done while a resync is active or we are
629 * still waiting for an optimized resync to be started
630 */
631 if (MD_STATUS(un) & (MD_UN_RESYNC_ACTIVE | MD_UN_OPT_NOT_DONE)) {
632 (void) mdmderror(¶ms->mde, MDE_RESYNC_ACTIVE, mnum);
633 goto errexit;
634 }
635
636 if (mirror_getcomp_by_dev(un, params, &smi, &ci) == 0) {
637 goto errexit;
638 }
639
640 if (un->un_nsm == 1) {
641 (void) mdmderror(¶ms->mde, MDE_LAST_SM_RE, mnum);
642 goto errexit;
643 }
644
645 if (mirror_other_sources(un, smi, ci, 0) != 0) {
646 (void) mdcomperror(¶ms->mde, MDE_REPL_INVAL_STATE,
647 mnum, md_expldev(params->old_dev));
648 goto errexit;
649 }
650
651 sm = &un->un_sm[smi];
652 if (sm->sm_state & (SMS_OFFLINE | SMS_OFFLINE_RESYNC)) {
653 (void) mdmderror(¶ms->mde, MDE_ILLEGAL_SM_STATE, mnum);
654 goto errexit;
655 }
656
657 get_dev = (void (*)())md_get_named_service(sm->sm_dev, 0,
658 "get device", 0);
659 (void) (*get_dev)(sm->sm_dev, sm, ci, &cd);
660
661 repl_dev = (int (*)())md_get_named_service(sm->sm_dev, 0,
662 "replace device", 0);
663
664 smdev = sm->sm_dev;
665 ms_un = MD_UNIT(md_getminor(smdev));
666
667 if (params->cmd == ENABLE_COMP) {
668 md_dev64_t this_dev;
669 int numkeys;
670 mdkey_t this_key;
671
672 this_dev = ((cd.cd_orig_dev == 0) ? cd.cd_dev :
673 cd.cd_orig_dev);
674 setno = MD_MIN2SET(md_getminor(smdev));
675 side = mddb_getsidenum(setno);
676 comp = (struct ms_comp *)
677 ((void *)&((char *)ms_un)[ms_un->un_ocomp]);
678 comp += ci;
679 /*
680 * We trust the dev_t because we cannot determine the
681 * dev_t from the device id since a new disk is in the
682 * same location. Since this is a call from metareplace -e dx
683 * AND it is SCSI a new dev_t is not generated. So the
684 * dev_t from the mddb is used. Before enabling the device
685 * we check to make sure that multiple entries for the same
686 * device does not exist in the namespace. If they do we
687 * fail the ioctl.
688 * One of the many ways multiple entries in the name space
689 * can occur is if one removed the failed component in the
690 * stripe of a mirror and put another disk that was part of
691 * another metadevice. After reboot metadevadm would correctly
692 * update the device name for the metadevice whose component
693 * has moved. However now in the metadb there are two entries
694 * for the same name (ctds) that belong to different
695 * metadevices. One is valid, the other is a ghost or "last
696 * know as" ctds.
697 */
698 this_dev = md_getdevnum(setno, side,
699 comp->un_key, MD_TRUST_DEVT);
700
701 /*
702 * Verify that multiple keys for the same
703 * dev_t don't exist
704 */
705
706 if (md_getkeyfromdev(setno, side, this_dev,
707 &this_key, &numkeys) != 0) {
708 (void) mddeverror(¶ms->mde, MDE_NAME_SPACE,
709 md_expldev(params->old_dev));
710 goto errexit;
711 }
712 /*
713 * Namespace has multiple entries
714 * for the same devt
715 */
716 if (numkeys > 1) {
717 (void) mddeverror(¶ms->mde, MDE_MULTNM,
718 md_expldev(params->old_dev));
719 goto errexit;
720 }
721 if ((numkeys == 0) || (comp->un_key != this_key)) {
722 (void) mdcomperror(¶ms->mde, MDE_CANT_FIND_COMP,
723 mnum, this_dev);
724 goto errexit;
725 }
726
727 if ((md_getmajor(this_dev) != md_major) &&
728 (md_devid_found(setno, side, this_key) == 1)) {
729 if (md_update_namespace_did(setno, side,
730 this_key, ¶ms->mde) != 0) {
731 (void) mddeverror(¶ms->mde, MDE_NAME_SPACE,
732 this_dev);
733 goto errexit;
734 }
735 }
736
737 if (md_expldev(params->new_dev) != this_dev) {
738 (void) mddeverror(¶ms->mde, MDE_FIX_INVAL_STATE,
739 md_expldev(params->new_dev));
740 goto errexit;
741 }
742
743 /* in case of dryrun, don't actually do anything */
744 if ((params->options & MDIOCTL_DRYRUN) == 0) {
745 err = (*repl_dev)(sm->sm_dev, 0, ci, NULL, recids, 6,
746 &repl_done, &repl_data);
747 }
748 } else if ((params->options & MDIOCTL_DRYRUN) == 0) {
749 nd.nd_dev = md_expldev(params->new_dev);
750 nd.nd_key = params->new_key;
751 nd.nd_start_blk = params->start_blk;
752 nd.nd_nblks = params->number_blks;
753 nd.nd_labeled = params->has_label;
754 nd.nd_hs_id = 0;
755
756 err = (*repl_dev)(sm->sm_dev, 0, ci, &nd, recids, 6,
757 &repl_done, &repl_data);
758
759 }
760
761 if (err != 0) {
762 (void) mdcomperror(¶ms->mde, err, mnum,
763 md_expldev(params->new_dev));
764 goto errexit;
765 }
766 /* In case of a dryun we're done. */
767 if (params->options & MDIOCTL_DRYRUN) {
768 mdclrerror(¶ms->mde);
769 return (0);
770 }
771
772 /* set_sm_comp_state() commits the modified records */
773 set_sm_comp_state(un, smi, ci, CS_RESYNC, recids, MD_STATE_NO_XMIT,
774 lock);
775
776 (*repl_done)(sm->sm_dev, repl_data);
777
778 /*
779 * If the mirror is open then need to make sure that the submirror,
780 * on which the replace ran, is also open and if not then open it.
781 * This is only a concern for a single component sub-mirror stripe
782 * as it may not be open due to the failure of the single component.
783 *
784 * This check has to be done after the call to (*repl_done)
785 * as that function releases the writer lock on the submirror.
786 */
787 if (md_unit_isopen(ui)) {
788 minor_t ms_mnum = md_getminor(sm->sm_dev);
789
790 ms_ui = MDI_UNIT(ms_mnum);
791
792 if (!md_unit_isopen(ms_ui)) {
793 /*
794 * Underlying submirror is not open so open it.
795 */
796 if (md_layered_open(ms_mnum, &smdev, MD_OFLG_NULL)) {
797 mirror_openfail_console_info(un, smi, ci);
798 goto errexit;
799 }
800 }
801 }
802
803 mirror_check_failfast(mnum);
804
805 if (params->cmd == ENABLE_COMP) {
806 SE_NOTIFY(EC_SVM_STATE, ESC_SVM_ENABLE, SVM_TAG_METADEVICE,
807 MD_UN2SET(un), MD_SID(un));
808 } else {
809 SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_REPLACE, SVM_TAG_METADEVICE,
810 MD_UN2SET(un), MD_SID(un));
811 }
812
813 md_ioctl_writerexit(lock);
814 /*
815 * Reset any saved resync location flags as we've now replaced the
816 * component. This means we have to resync the _whole_ component.
817 */
818 un->un_rs_resync_done = un->un_rs_resync_2_do = 0;
819 un->un_rs_type = MD_RS_NONE;
820 mirror_resume_writes(un);
821 if (!MD_MNSET_SETNO(MD_UN2SET(un)))
822 (void) mirror_resync_unit(mnum, NULL, ¶ms->mde, lock);
823 mdclrerror(¶ms->mde);
824 return (0);
825 errexit:
826 /* We need to resume writes unless this is a dryrun */
827 if (!(params->options & MDIOCTL_DRYRUN))
828 mirror_resume_writes(un);
829 return (0);
830 }
831
832 /*
833 * mirror_attach:
834 * ----------------
835 * Called to implement the submirror attach function
836 *
837 * Owner is returned in the parameter block passed in by the caller.
838 *
839 * Returns:
840 * 0 success
841 * error code if the functions fails
842 *
843 * For a MN set, on entry all writes to the mirror are suspended, on exit
844 * from this function, writes must be resumed when not a dryrun.
845 */
846 static int
mirror_attach(md_att_struct_t * att,IOLOCK * lock)847 mirror_attach(
848 md_att_struct_t *att,
849 IOLOCK *lock
850 )
851 {
852 minor_t mnum = att->mnum;
853 mm_unit_t *un;
854 md_unit_t *su;
855 mm_submirror_t *sm;
856 mm_submirror_ic_t *smic;
857 int smi;
858 md_dev64_t sm_dev;
859 minor_t sm_mnum;
860 mdkey_t indx;
861 set_t setno;
862 uint_t options;
863
864 /*
865 * This routine should not be called during upgrade.
866 */
867 if (MD_UPGRADE) {
868 return (0);
869 }
870
871 mdclrerror(&att->mde);
872 options = att->options;
873
874 if ((un = mirror_getun(mnum, &att->mde, WRITERS, lock)) == NULL) {
875 return (0);
876 }
877
878 setno = MD_UN2SET(un);
879
880 for (smi = 0; smi < NMIRROR; smi++)
881 if (!SMS_BY_INDEX_IS(un, smi, SMS_INUSE))
882 break;
883
884 if (smi == NMIRROR) {
885 (void) mdmderror(&att->mde, MDE_MIRROR_FULL, mnum);
886 goto errexit;
887 }
888
889 sm = &un->un_sm[smi];
890 smic = &un->un_smic[smi];
891 sm_dev = att->submirror;
892 sm_mnum = md_getminor(sm_dev);
893
894 if (md_get_parent(sm_dev) != MD_NO_PARENT) {
895 (void) mdmderror(&att->mde, MDE_IN_USE, sm_mnum);
896 goto errexit;
897 }
898
899 if (md_unit_isopen(MDI_UNIT(sm_mnum))) {
900 (void) mdmderror(&att->mde, MDE_IS_OPEN, sm_mnum);
901 goto errexit;
902 }
903
904 /* Check the size */
905 su = (md_unit_t *)MD_UNIT(sm_mnum);
906 if (un->c.un_total_blocks > su->c.un_total_blocks) {
907 (void) mdmderror(&att->mde, MDE_SM_TOO_SMALL, sm_mnum);
908 goto errexit;
909 }
910
911 /* Don't attach labeled sm to unlabeled mirrors */
912 if ((su->c.un_flag & MD_LABELED) && !(un->c.un_flag & MD_LABELED)) {
913 (void) mdmderror(&att->mde, MDE_NO_LABELED_SM, sm_mnum);
914 goto errexit;
915 }
916
917 indx = md_setshared_name(setno,
918 ddi_major_to_name(md_getmajor(sm_dev)), 0L);
919
920 /* Open the sm, only if the mirror is open */
921 if (md_unit_isopen(MDI_UNIT(mnum))) {
922 if (md_layered_open(mnum, &sm_dev, MD_OFLG_NULL)) {
923 (void) md_remshared_name(setno, indx);
924 (void) mdmderror(&att->mde, MDE_SM_OPEN_ERR,
925 md_getminor(att->submirror));
926 goto errexit;
927 }
928 /* in dryrun mode, don't leave the device open */
929 if (options & MDIOCTL_DRYRUN) {
930 md_layered_close(sm_dev, MD_OFLG_NULL);
931 }
932 }
933
934 /*
935 * After this point the checks are done and action is taken.
936 * So, clean up and return in case of dryrun.
937 */
938
939 if (options & MDIOCTL_DRYRUN) {
940 md_ioctl_writerexit(lock);
941 mdclrerror(&att->mde);
942 return (0);
943 }
944
945 sm->sm_key = att->key;
946 sm->sm_dev = sm_dev;
947 md_set_parent(sm_dev, MD_SID(un));
948 mirror_set_sm_state(sm, smic, SMS_ATTACHED_RESYNC, 1);
949 build_submirror(un, smi, 0);
950 un->un_nsm++;
951 mirror_commit(un, SMI2BIT(smi), 0);
952 mirror_check_failfast(mnum);
953 SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_ATTACH, SVM_TAG_METADEVICE,
954 MD_UN2SET(un), MD_SID(un));
955
956 mirror_resume_writes(un);
957 md_ioctl_writerexit(lock);
958 if (!MD_MNSET_SETNO(setno))
959 (void) mirror_resync_unit(mnum, NULL, &att->mde, lock);
960 mdclrerror(&att->mde);
961 return (0);
962 errexit:
963 /* We need to resume writes unless this is a dryrun */
964 if (!(options & MDIOCTL_DRYRUN))
965 mirror_resume_writes(un);
966 return (0);
967 }
968
969
970 void
reset_comp_states(mm_submirror_t * sm,mm_submirror_ic_t * smic)971 reset_comp_states(mm_submirror_t *sm, mm_submirror_ic_t *smic)
972 {
973 int compcnt;
974 int i;
975 md_m_shared_t *shared;
976
977 compcnt = (*(smic->sm_get_component_count)) (sm->sm_dev, sm);
978 for (i = 0; i < compcnt; i++) {
979 shared = (md_m_shared_t *)(*(smic->sm_shared_by_indx))
980 (sm->sm_dev, sm, i);
981
982 shared->ms_state = CS_OKAY;
983 shared->ms_flags &= ~MDM_S_NOWRITE;
984 shared->ms_lasterrcnt = 0;
985 }
986 }
987
988
989 /*
990 * mirror_detach:
991 * ----------------
992 * Called to implement the submirror detach function
993 *
994 * Owner is returned in the parameter block passed in by the caller.
995 *
996 * Returns:
997 * 0 success
998 * error code if the functions fails
999 *
1000 * For a MN set, on entry all writes to the mirror are suspended, on exit
1001 * from this function, writes must be resumed.
1002 */
1003 static int
mirror_detach(md_detach_params_t * det,IOLOCK * lock)1004 mirror_detach(
1005 md_detach_params_t *det,
1006 IOLOCK *lock
1007 )
1008 {
1009 minor_t mnum = det->mnum;
1010 mm_unit_t *un;
1011 mdi_unit_t *ui;
1012 mm_submirror_t *sm;
1013 mm_submirror_t *old_sm;
1014 mm_submirror_t *new_sm;
1015 mm_submirror_ic_t *smic;
1016 int smi;
1017 md_dev64_t sm_dev;
1018 md_unit_t *su;
1019 sv_dev_t sv;
1020 mddb_recid_t recids[2];
1021 int nsv = 0;
1022 int smi_remove;
1023 mm_submirror_ic_t *old_smic;
1024 mm_submirror_ic_t *new_smic;
1025
1026 mdclrerror(&det->mde);
1027
1028 if ((un = mirror_getun(mnum, &det->mde, WRITERS, lock)) == NULL) {
1029 return (0);
1030 }
1031
1032 ui = MDI_UNIT(mnum);
1033 if (ui->ui_tstate & MD_INACCESSIBLE) {
1034 mirror_resume_writes(un);
1035 return (mdmderror(&det->mde, MDE_IN_UNAVAIL_STATE, mnum));
1036 }
1037 /*
1038 * detach cannot be done while a resync is active or we are
1039 * still waiting for an optimized resync to be started
1040 */
1041 if (MD_STATUS(un) & (MD_UN_RESYNC_ACTIVE | MD_UN_OPT_NOT_DONE)) {
1042 mirror_resume_writes(un);
1043 return (mdmderror(&det->mde, MDE_RESYNC_ACTIVE, mnum));
1044 }
1045
1046 for (smi = 0; smi < NMIRROR; smi++) {
1047 if (!SMS_BY_INDEX_IS(un, smi, SMS_INUSE)) {
1048 continue;
1049 }
1050 if (un->un_sm[smi].sm_dev == det->submirror) {
1051 smi_remove = smi;
1052 break;
1053 }
1054 }
1055
1056 if (smi == NMIRROR) {
1057 mirror_resume_writes(un);
1058 return (mdmderror(&det->mde, MDE_CANT_FIND_SM, mnum));
1059 }
1060
1061 if (un->un_nsm == 1) {
1062 mirror_resume_writes(un);
1063 return (mdmderror(&det->mde, MDE_LAST_SM, mnum));
1064 }
1065
1066 if (mirror_other_sources(un, smi, WHOLE_SM, 0) != 0) {
1067 mirror_resume_writes(un);
1068 return (mdmderror(&det->mde, MDE_NO_READABLE_SM, mnum));
1069 }
1070
1071 sm = &un->un_sm[smi];
1072 smic = &un->un_smic[smi];
1073 sm_dev = sm->sm_dev;
1074 su = (md_unit_t *)MD_UNIT(md_getminor(sm_dev));
1075
1076 /*
1077 * Need to pass in the extra record id,
1078 * cause mirror_commit() will not commit
1079 * a sm (from the smmask) if the slot is unused.
1080 * Which it is, since we are detaching.
1081 */
1082 recids[0] = ((md_unit_t *)MD_UNIT(md_getminor(sm_dev)))->c.un_record_id;
1083 recids[1] = 0;
1084
1085 mirror_set_sm_state(sm, smic, SMS_UNUSED, det->force_detach);
1086 /*
1087 * If there are any erred components
1088 * then make the detach fail and do not unparent the
1089 * submirror.
1090 */
1091 if (sm->sm_state == SMS_UNUSED) {
1092 /* reallow soft partitioning of submirror */
1093 MD_CAPAB(su) |= MD_CAN_SP;
1094 md_reset_parent(sm_dev);
1095 reset_comp_states(sm, smic);
1096 un->un_nsm--;
1097 /* Close the sm, only if the mirror is open */
1098 if (md_unit_isopen(MDI_UNIT(mnum)))
1099 md_layered_close(sm_dev, MD_OFLG_NULL);
1100 sv.setno = MD_UN2SET(un);
1101 sv.key = sm->sm_key;
1102 nsv = 1;
1103 } else
1104 (void) mdmderror(&det->mde, MDE_SM_FAILED_COMPS, mnum);
1105
1106 /*
1107 * Perhaps the mirror changed it's size due to this detach.
1108 * (void) mirror_grow_unit(un, &mde);
1109 */
1110
1111 /*
1112 * NOTE: We are passing the detached sm recid
1113 * and not the smmask field. This is correct.
1114 */
1115 mirror_commit(un, 0, recids);
1116 md_rem_names(&sv, nsv);
1117 if (sm->sm_state == SMS_UNUSED) {
1118 SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_DETACH, SVM_TAG_METADEVICE,
1119 MD_UN2SET(un), MD_SID(un));
1120 }
1121
1122 /*
1123 * Reshuffle the submirror devices in the array as we potentially
1124 * have a dead record in the middle of it.
1125 */
1126 for (smi = 0; nsv && (smi < NMIRROR); smi++) {
1127 if (smi < smi_remove) {
1128 continue;
1129 }
1130 if (smi > smi_remove) {
1131 old_sm = &un->un_sm[smi];
1132 new_sm = &un->un_sm[smi - 1];
1133 new_sm->sm_key = old_sm->sm_key;
1134 new_sm->sm_dev = old_sm->sm_dev;
1135 new_sm->sm_state = old_sm->sm_state;
1136 new_sm->sm_flags = old_sm->sm_flags;
1137 new_sm->sm_shared = old_sm->sm_shared;
1138 new_sm->sm_hsp_id = old_sm->sm_hsp_id;
1139 new_sm->sm_timestamp = old_sm->sm_timestamp;
1140 bzero(old_sm, sizeof (mm_submirror_t));
1141 old_smic = &un->un_smic[smi];
1142 new_smic = &un->un_smic[smi - 1];
1143 bcopy(old_smic, new_smic, sizeof (mm_submirror_ic_t));
1144 bzero(old_smic, sizeof (mm_submirror_ic_t));
1145 }
1146 }
1147 mirror_commit(un, 0, NULL);
1148 mirror_resume_writes(un);
1149 return (0);
1150 }
1151
1152 /*
1153 * mirror_offline:
1154 * ----------------
1155 * Called to implement the submirror offline function
1156 *
1157 * Owner is returned in the parameter block passed in by the caller.
1158 *
1159 * Returns:
1160 * 0 success
1161 * error code if the functions fails
1162 *
1163 * For a MN set, on entry all writes to the mirror are suspended, on exit
1164 * from this function, writes must be resumed.
1165 */
1166 static int
mirror_offline(md_i_off_on_t * miop,IOLOCK * lock)1167 mirror_offline(
1168 md_i_off_on_t *miop,
1169 IOLOCK *lock
1170 )
1171 {
1172 minor_t mnum = miop->mnum;
1173 mm_unit_t *un;
1174 mm_submirror_t *sm;
1175 mm_submirror_ic_t *smic;
1176 int smi;
1177 mdi_unit_t *ui = MDI_UNIT(mnum);
1178
1179 mdclrerror(&miop->mde);
1180
1181 if ((un = mirror_getun(mnum, &miop->mde, WR_LOCK, lock)) == NULL) {
1182 return (0);
1183 }
1184
1185 /*
1186 * offline cannot be done while a resync is active or we are
1187 * still waiting for an optimized resync to be started
1188 */
1189 if (MD_STATUS(un) & (MD_UN_RESYNC_ACTIVE | MD_UN_OPT_NOT_DONE)) {
1190 mirror_resume_writes(un);
1191 return (mdmderror(&miop->mde, MDE_RESYNC_ACTIVE, mnum));
1192 }
1193
1194 /*
1195 * Reject mirror_offline if ABR is set
1196 */
1197 if ((ui->ui_tstate & MD_ABR_CAP) || un->un_abr_count) {
1198 mirror_resume_writes(un);
1199 return (mderror(&miop->mde, MDE_ABR_SET));
1200 }
1201
1202 for (smi = 0; smi < NMIRROR; smi++) {
1203 if (!SMS_BY_INDEX_IS(un, smi, SMS_INUSE))
1204 continue;
1205 if (un->un_sm[smi].sm_dev == miop->submirror)
1206 break;
1207 }
1208
1209 if (smi == NMIRROR) {
1210 mirror_resume_writes(un);
1211 return (mdmderror(&miop->mde, MDE_CANT_FIND_SM, mnum));
1212 }
1213
1214 sm = &un->un_sm[smi];
1215 smic = &un->un_smic[smi];
1216 if (!SMS_IS(sm, SMS_RUNNING) && !miop->force_offline) {
1217 mirror_resume_writes(un);
1218 return (mdmderror(&miop->mde, MDE_ILLEGAL_SM_STATE, mnum));
1219 }
1220
1221 if (mirror_other_sources(un, smi, WHOLE_SM, 0) != 0) {
1222 mirror_resume_writes(un);
1223 return (mdmderror(&miop->mde, MDE_NO_READABLE_SM, mnum));
1224 }
1225 mirror_set_sm_state(sm, smic, SMS_OFFLINE, 1);
1226 mirror_resume_writes(un);
1227
1228 MD_STATUS(un) |= MD_UN_OFFLINE_SM;
1229 mirror_commit(un, NO_SUBMIRRORS, 0);
1230 SE_NOTIFY(EC_SVM_STATE, ESC_SVM_OFFLINE, SVM_TAG_METADEVICE,
1231 MD_UN2SET(un), MD_SID(un));
1232 return (0);
1233 }
1234
1235 /*
1236 * mirror_online:
1237 * ----------------
1238 * Called to implement the submirror online function
1239 *
1240 * Owner is returned in the parameter block passed in by the caller.
1241 *
1242 * Returns:
1243 * 0 success
1244 * error code if the functions fails
1245 *
1246 * For a MN set, on entry all writes to the mirror are suspended, on exit
1247 * from this function, writes must be resumed.
1248 */
1249 static int
mirror_online(md_i_off_on_t * miop,IOLOCK * lock)1250 mirror_online(
1251 md_i_off_on_t *miop,
1252 IOLOCK *lock
1253 )
1254 {
1255 minor_t mnum = miop->mnum;
1256 mm_unit_t *un;
1257 mm_submirror_t *sm;
1258 mm_submirror_ic_t *smic;
1259 int smi;
1260 set_t setno = MD_MIN2SET(mnum);
1261
1262 mdclrerror(&miop->mde);
1263
1264 if ((un = mirror_getun(mnum, &miop->mde, WR_LOCK, lock)) == NULL) {
1265 return (0);
1266 }
1267
1268 for (smi = 0; smi < NMIRROR; smi++) {
1269 if (!SMS_BY_INDEX_IS(un, smi, SMS_INUSE))
1270 continue;
1271 if (un->un_sm[smi].sm_dev == miop->submirror)
1272 break;
1273 }
1274 if (smi == NMIRROR) {
1275 mirror_resume_writes(un);
1276 return (mdmderror(&miop->mde, MDE_CANT_FIND_SM, mnum));
1277 }
1278
1279 sm = &un->un_sm[smi];
1280 smic = &un->un_smic[smi];
1281 if (!SMS_IS(sm, SMS_OFFLINE)) {
1282 mirror_resume_writes(un);
1283 return (mdmderror(&miop->mde, MDE_ILLEGAL_SM_STATE, mnum));
1284 }
1285
1286 /*
1287 * online cannot be done while a resync is active or we are
1288 * still waiting for an optimized resync to be started
1289 */
1290 if (MD_STATUS(un) & (MD_UN_RESYNC_ACTIVE | MD_UN_OPT_NOT_DONE)) {
1291 mirror_resume_writes(un);
1292 return (mdmderror(&miop->mde, MDE_RESYNC_ACTIVE, mnum));
1293 }
1294
1295 mirror_set_sm_state(sm, smic, SMS_OFFLINE_RESYNC, 1);
1296 mirror_commit(un, NO_SUBMIRRORS, 0);
1297 mirror_check_failfast(mnum);
1298 SE_NOTIFY(EC_SVM_STATE, ESC_SVM_ONLINE, SVM_TAG_METADEVICE,
1299 MD_UN2SET(un), MD_SID(un));
1300
1301
1302 /* for MN sets, re-read the resync record from disk */
1303 if (MD_MNSET_SETNO(MD_UN2SET(un)))
1304 (void) mddb_reread_rr(setno, un->un_rr_dirty_recid);
1305
1306 bcopy((caddr_t)un->un_dirty_bm, (caddr_t)un->un_resync_bm,
1307 howmany(un->un_rrd_num, NBBY));
1308 MD_STATUS(un) |= MD_UN_OPT_NOT_DONE;
1309 sm->sm_flags |= MD_SM_RESYNC_TARGET;
1310 mirror_resume_writes(un);
1311 md_ioctl_writerexit(lock);
1312 if (!MD_MNSET_SETNO(setno))
1313 return (mirror_resync_unit(mnum, NULL, &miop->mde, lock));
1314 else return (0);
1315 }
1316
1317 int
mirror_grow_unit(mm_unit_t * un,md_error_t * ep)1318 mirror_grow_unit(
1319 mm_unit_t *un,
1320 md_error_t *ep
1321 )
1322 {
1323 md_unit_t *su;
1324 mm_submirror_t *sm;
1325 int smi;
1326 diskaddr_t total_blocks;
1327 diskaddr_t current_tb;
1328 int spc; /* sectors per head */
1329 minor_t mnum = MD_SID(un);
1330
1331 /*
1332 * grow_unit cannot be done while a resync is active or we are
1333 * still waiting for an optimized resync to be started. Set
1334 * flag to indicate GROW_PENDING and once the resync is complete
1335 * the grow_unit function will be executed.
1336 */
1337 if (MD_STATUS(un) & (MD_UN_RESYNC_ACTIVE | MD_UN_OPT_NOT_DONE)) {
1338 MD_STATUS(un) |= MD_UN_GROW_PENDING;
1339 mirror_commit(un, NO_SUBMIRRORS, 0);
1340 return (mdmderror(ep, MDE_GROW_DELAYED, MD_SID(un)));
1341 }
1342
1343 /*
1344 * Find the smallest submirror
1345 */
1346 total_blocks = 0;
1347 for (smi = 0; smi < NMIRROR; smi++) {
1348 if (!SMS_BY_INDEX_IS(un, smi, SMS_INUSE))
1349 continue;
1350 sm = &un->un_sm[smi];
1351 /*
1352 * Growth is not possible if there is one or more
1353 * submirrors made up of non-Metadevices.
1354 */
1355 if (md_getmajor(sm->sm_dev) != md_major)
1356 return (0);
1357
1358 su = MD_UNIT(md_getminor(sm->sm_dev));
1359 if ((total_blocks == 0) ||
1360 (su->c.un_total_blocks < total_blocks))
1361 total_blocks = su->c.un_total_blocks;
1362 }
1363
1364 /*
1365 * If the smallest submirror is not larger
1366 * than the mirror, we are all done.
1367 */
1368 if (total_blocks <= un->c.un_total_blocks)
1369 return (0);
1370
1371 /*
1372 * Growing the mirror now.
1373 * First: Round down the actual_tb to be a multiple
1374 * of nheads * nsects.
1375 */
1376 spc = un->c.un_nhead * un->c.un_nsect;
1377 current_tb = (total_blocks/spc) * spc;
1378
1379 un->c.un_total_blocks = current_tb;
1380 md_nblocks_set(mnum, un->c.un_total_blocks);
1381 un->c.un_actual_tb = total_blocks;
1382
1383 /* Is the mirror growing from 32 bit device to 64 bit device? */
1384 if (((un->c.un_revision & MD_64BIT_META_DEV) == 0) &&
1385 (un->c.un_total_blocks > MD_MAX_BLKS_FOR_SMALL_DEVS)) {
1386 #if defined(_ILP32)
1387 return (mdmderror(ep, MDE_UNIT_TOO_LARGE, mnum));
1388 #else
1389 mddb_type_t typ1;
1390 mddb_recid_t recid;
1391 set_t setno;
1392 mddb_recid_t old_recid = un->c.un_record_id;
1393 mddb_recid_t old_vtoc;
1394 mddb_de_ic_t *dep, *old_dep;
1395 md_create_rec_option_t options;
1396
1397 /* yup, new device size. So we need to replace the record */
1398 typ1 = (mddb_type_t)md_getshared_key(MD_UN2SET(un),
1399 mirror_md_ops.md_driver.md_drivername);
1400 setno = MD_MIN2SET(mnum);
1401
1402 /* Preserve the friendly name properties of growing unit */
1403 options = MD_CRO_64BIT | MD_CRO_MIRROR;
1404 if (un->c.un_revision & MD_FN_META_DEV)
1405 options |= MD_CRO_FN;
1406 recid = mddb_createrec(offsetof(mm_unit_t, un_smic), typ1,
1407 MIRROR_REC, options, setno);
1408 /* Resize to include incore fields */
1409 un->c.un_revision |= MD_64BIT_META_DEV;
1410 /* All 64 bit metadevices only support EFI labels. */
1411 un->c.un_flag |= MD_EFILABEL;
1412 /*
1413 * If the device had a vtoc record attached to it, we remove
1414 * the vtoc record, because the layout has changed completely.
1415 */
1416 old_vtoc = un->c.un_vtoc_id;
1417 if (old_vtoc != 0) {
1418 un->c.un_vtoc_id =
1419 md_vtoc_to_efi_record(old_vtoc, setno);
1420 }
1421 MD_RECID(un) = recid;
1422 dep = mddb_getrecdep(recid);
1423 old_dep = mddb_getrecdep(old_recid);
1424 kmem_free(dep->de_rb_userdata, dep->de_reqsize);
1425 dep->de_rb_userdata = old_dep->de_rb_userdata;
1426 dep->de_reqsize = old_dep->de_reqsize;
1427 dep->de_rb_userdata_ic = old_dep->de_rb_userdata_ic;
1428 dep->de_icreqsize = old_dep->de_icreqsize;
1429 mirror_commit(un, NO_SUBMIRRORS, 0);
1430 old_dep->de_rb_userdata = NULL;
1431 old_dep->de_rb_userdata_ic = NULL;
1432 mddb_deleterec_wrapper(old_recid);
1433 /*
1434 * If there was a vtoc record, it is no longer needed, because
1435 * a new efi record has been created for this un.
1436 */
1437 if (old_vtoc != 0) {
1438 mddb_deleterec_wrapper(old_vtoc);
1439 }
1440 #endif
1441 }
1442
1443 if ((current_tb/un->un_rrd_blksize) > MD_MAX_NUM_RR) {
1444 if (mirror_resize_resync_regions(un, current_tb)) {
1445 return (mdmderror(ep, MDE_RR_ALLOC_ERROR, MD_SID(un)));
1446 }
1447 mirror_check_failfast(mnum);
1448 SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_GROW, SVM_TAG_METADEVICE,
1449 MD_UN2SET(un), MD_SID(un));
1450 return (0);
1451 }
1452
1453 if (mirror_add_resync_regions(un, current_tb)) {
1454 return (mdmderror(ep, MDE_RR_ALLOC_ERROR, MD_SID(un)));
1455 }
1456
1457 mirror_check_failfast(mnum);
1458 SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_GROW, SVM_TAG_METADEVICE,
1459 MD_UN2SET(un), MD_SID(un));
1460
1461 return (0);
1462 }
1463
1464 static int
mirror_grow(void * mgp,IOLOCK * lock)1465 mirror_grow(
1466 void *mgp,
1467 IOLOCK *lock
1468 )
1469 {
1470 mm_unit_t *un;
1471 md_grow_params_t *mgph = mgp;
1472
1473 mdclrerror(&mgph->mde);
1474
1475 if ((un = mirror_getun(mgph->mnum,
1476 &mgph->mde, WR_LOCK, lock)) == NULL)
1477 return (0);
1478
1479 if (MD_STATUS(un) & MD_UN_GROW_PENDING)
1480 return (0);
1481
1482 return (mirror_grow_unit(un, &mgph->mde));
1483 }
1484
1485 static int
mirror_change(md_mirror_params_t * mmp,IOLOCK * lock)1486 mirror_change(
1487 md_mirror_params_t *mmp,
1488 IOLOCK *lock
1489 )
1490 {
1491 mm_params_t *pp = &mmp->params;
1492 mm_unit_t *un;
1493
1494 mdclrerror(&mmp->mde);
1495
1496 if ((un = mirror_getun(mmp->mnum, &mmp->mde, WR_LOCK, lock)) == NULL)
1497 return (0);
1498
1499 if (pp->change_read_option)
1500 un->un_read_option = pp->read_option;
1501
1502 if (pp->change_write_option)
1503 un->un_write_option = pp->write_option;
1504
1505 if (pp->change_pass_num)
1506 un->un_pass_num = pp->pass_num;
1507
1508 mirror_commit(un, NO_SUBMIRRORS, 0);
1509
1510 SE_NOTIFY(EC_SVM_STATE, ESC_SVM_CHANGE, SVM_TAG_METADEVICE,
1511 MD_UN2SET(un), MD_SID(un));
1512 return (0);
1513 }
1514
1515 static int
mirror_get_resync(md_resync_ioctl_t * ri)1516 mirror_get_resync(
1517 md_resync_ioctl_t *ri
1518 )
1519 {
1520 minor_t mnum = ri->ri_mnum;
1521 mm_unit_t *un;
1522 u_longlong_t percent;
1523 uint_t cnt;
1524 uint_t rr;
1525 diskaddr_t d;
1526
1527 mdclrerror(&ri->mde);
1528
1529 if ((un = mirror_getun(mnum, &ri->mde, STALE_OK|NO_LOCK, NULL)) == NULL)
1530 return (0);
1531
1532 ri->ri_flags = 0;
1533 if (md_get_setstatus(MD_MIN2SET(mnum)) & MD_SET_STALE) {
1534 ri->ri_percent_done = 0;
1535 ri->ri_percent_dirty = 0;
1536 return (0);
1537 }
1538
1539 if (MD_STATUS(un) & (MD_UN_RESYNC_ACTIVE|MD_UN_RESYNC_CANCEL)) {
1540 if (MD_STATUS(un) & MD_UN_RESYNC_ACTIVE)
1541 ri->ri_flags |= MD_RI_INPROGRESS;
1542 /* Return state of resync thread */
1543 ri->ri_flags |= (un->un_rs_thread_flags & MD_RI_BLOCK);
1544 d = un->un_rs_resync_2_do;
1545 if (d) {
1546 percent = un->un_rs_resync_done;
1547 if (un->c.un_total_blocks >
1548 MD_MAX_BLKS_FOR_SMALL_DEVS) {
1549 percent *= 1000;
1550 percent /= d;
1551 if (percent > 1000)
1552 percent = 1000;
1553 } else {
1554 percent *= 100;
1555 percent /= d;
1556 }
1557 ri->ri_percent_done = (int)percent;
1558 } else {
1559 ri->ri_percent_done = 0;
1560 }
1561 }
1562 if (un->un_nsm < 2) {
1563 ri->ri_percent_dirty = 0;
1564 return (0);
1565 }
1566 cnt = 0;
1567 for (rr = 0; rr < un->un_rrd_num; rr++)
1568 if (IS_REGION_DIRTY(rr, un))
1569 cnt++;
1570 d = un->un_rrd_num;
1571 if (d) {
1572 percent = cnt;
1573 percent *= 100;
1574 percent += d - 1; /* round up */
1575 percent /= d;
1576 } else
1577 percent = 0;
1578 ri->ri_percent_dirty = (int)percent;
1579 return (0);
1580 }
1581
1582 /*
1583 * mirror_get_owner:
1584 * ----------------
1585 * Called to obtain the current owner of a mirror.
1586 *
1587 * Owner is returned in the parameter block passed in by the caller.
1588 *
1589 * Returns:
1590 * 0 success
1591 * EINVAL metadevice does not exist or is not a member of a multi-owned
1592 * set.
1593 */
1594 static int
mirror_get_owner(md_set_mmown_params_t * p,IOLOCK * lock)1595 mirror_get_owner(md_set_mmown_params_t *p, IOLOCK *lock)
1596 {
1597 mm_unit_t *un;
1598 set_t setno;
1599
1600 if ((un = mirror_getun(p->d.mnum, &p->mde, RD_LOCK, lock)) == NULL)
1601 return (EINVAL);
1602
1603 setno = MD_UN2SET(un);
1604 if (!MD_MNSET_SETNO(setno)) {
1605 return (EINVAL);
1606 }
1607 p->d.owner = un->un_mirror_owner;
1608 return (0);
1609 }
1610
1611 /*
1612 * mirror_choose_owner_thread:
1613 * --------------------------
1614 * Called to send a CHOOSE_OWNER message to the commd running on the master
1615 * node. This needs to run in a separate context so that mutex livelock is
1616 * avoided. This can occur because the original request is issued from a call
1617 * to metaioctl() which acquires the global ioctl lock, calls down into the
1618 * mirror_ioctl code and then attempts to mdmn_ksend_message() to the master
1619 * node. As the handler for the choose_owner message needs to send another
1620 * ioctl through the metaioctl() entry point, any other use (by rpc.metad or
1621 * mdcommd checking on set ownership) will deadlock the system leading to
1622 * cluster reconfiguration timeouts and eventually a node or (at worst) a
1623 * cluster-wide panic
1624 */
1625 static void
mirror_choose_owner_thread(md_mn_msg_chooseid_t * msg)1626 mirror_choose_owner_thread(md_mn_msg_chooseid_t *msg)
1627 {
1628 int rval;
1629 md_mn_kresult_t *kres;
1630 set_t setno = MD_MIN2SET(msg->msg_chooseid_mnum);
1631
1632 kres = kmem_alloc(sizeof (md_mn_kresult_t), KM_SLEEP);
1633 rval = mdmn_ksend_message(setno, MD_MN_MSG_CHOOSE_OWNER,
1634 MD_MSGF_NO_BCAST | MD_MSGF_NO_LOG, 0, (char *)msg,
1635 sizeof (md_mn_msg_chooseid_t), kres);
1636 if (!MDMN_KSEND_MSG_OK(rval, kres)) {
1637 mdmn_ksend_show_error(rval, kres, "CHOOSE OWNER");
1638 cmn_err(CE_WARN, "ksend_message failure: CHOOSE_OWNER");
1639 }
1640
1641 kmem_free(kres, sizeof (md_mn_kresult_t));
1642 kmem_free(msg, sizeof (md_mn_msg_chooseid_t));
1643 thread_exit();
1644 }
1645
1646 /*
1647 * mirror_owner_thread:
1648 * -------------------
1649 * Called to request an ownership change from a thread context. This issues
1650 * a mdmn_ksend_message() and then completes the appropriate ownership change
1651 * on successful completion of the message transport.
1652 * The originating application must poll for completion on the 'flags' member
1653 * of the MD_MN_MM_OWNER_STATUS ioctl() parameter block.
1654 * Success is marked by a return value of MD_MN_MM_RES_OK, Failure by
1655 * MD_MN_MM_RES_FAIL
1656 */
1657 static void
mirror_owner_thread(md_mn_req_owner_t * ownp)1658 mirror_owner_thread(md_mn_req_owner_t *ownp)
1659 {
1660 int rval;
1661 set_t setno = MD_MIN2SET(ownp->mnum);
1662 mm_unit_t *un = MD_UNIT(ownp->mnum);
1663 md_mn_kresult_t *kresult;
1664 md_mps_t *ps1;
1665
1666 un->un_mirror_owner_status = 0;
1667
1668 mutex_enter(&un->un_owner_mx);
1669 un->un_owner_state |= MM_MN_OWNER_SENT;
1670 mutex_exit(&un->un_owner_mx);
1671
1672 kresult = kmem_alloc(sizeof (md_mn_kresult_t), KM_SLEEP);
1673 rval = mdmn_ksend_message(setno, MD_MN_MSG_REQUIRE_OWNER,
1674 MD_MSGF_NO_LOG, 0, (char *)ownp, sizeof (md_mn_req_owner_t),
1675 kresult);
1676
1677 if (!MDMN_KSEND_MSG_OK(rval, kresult)) {
1678 /*
1679 * Message transport layer failed. Return the failure code to
1680 * the application.
1681 */
1682 mdmn_ksend_show_error(rval, kresult, "CHANGE OWNER");
1683 mutex_enter(&un->un_owner_mx);
1684 un->un_owner_state &= ~(MM_MN_BECOME_OWNER|MM_MN_OWNER_SENT);
1685 mutex_exit(&un->un_owner_mx);
1686 un->un_mirror_owner_status =
1687 MD_MN_MM_RESULT | MD_MN_MM_RES_FAIL;
1688 } else {
1689 /*
1690 * Ownership change succeeded. Update in-core version of
1691 * mirror owner.
1692 */
1693 mutex_enter(&un->un_owner_mx);
1694 if (un->un_owner_state & MM_MN_BECOME_OWNER) {
1695 un->un_mirror_owner = md_mn_mynode_id;
1696 /* Sets node owner of un_rr_dirty record */
1697 if (un->un_rr_dirty_recid)
1698 (void) mddb_setowner(un->un_rr_dirty_recid,
1699 md_mn_mynode_id);
1700 /*
1701 * Release the block on the current resync region if it
1702 * is blocked
1703 */
1704 ps1 = un->un_rs_prev_overlap;
1705 if ((ps1 != NULL) &&
1706 (ps1->ps_flags & MD_MPS_ON_OVERLAP))
1707 mirror_overlap_tree_remove(ps1);
1708 }
1709
1710 un->un_owner_state &= ~(MM_MN_OWNER_SENT|MM_MN_BECOME_OWNER);
1711 mutex_exit(&un->un_owner_mx);
1712 un->un_mirror_owner_status =
1713 MD_MN_MM_RESULT | MD_MN_MM_RES_OK;
1714
1715 /* Restart the resync thread if it was previously blocked */
1716 if (un->un_rs_thread_flags & MD_RI_BLOCK_OWNER) {
1717 mutex_enter(&un->un_rs_thread_mx);
1718 un->un_rs_thread_flags &= ~MD_RI_BLOCK_OWNER;
1719 cv_signal(&un->un_rs_thread_cv);
1720 mutex_exit(&un->un_rs_thread_mx);
1721 }
1722 }
1723 kmem_free(kresult, sizeof (md_mn_kresult_t));
1724 kmem_free(ownp, sizeof (md_mn_req_owner_t));
1725 thread_exit();
1726 }
1727
1728 /*
1729 * mirror_set_owner:
1730 * ----------------
1731 * Called to change the owner of a mirror to the specified node. If we
1732 * are not the owner of the mirror, we do nothing apart from update the in-core
1733 * ownership. It can also be used to choose a new owner for the resync of a
1734 * mirror, this case is specified by the flag MD_MN_MM_CHOOSE_OWNER, see below.
1735 *
1736 * The p->d.flags bitfield controls how subsequent ownership changes will be
1737 * handled:
1738 * MD_MN_MM_SPAWN_THREAD
1739 * a separate thread is created which emulates the behaviour of
1740 * become_owner() [mirror.c]. This is needed when changing the
1741 * ownership from user context as there needs to be a controlling
1742 * kernel thread which updates the owner info on the originating
1743 * node. Successful completion of the mdmn_ksend_message() means
1744 * that the owner field can be changed.
1745 *
1746 * MD_MN_MM_PREVENT_CHANGE
1747 * Disallow any change of ownership once this ownership change has
1748 * been processed. The only way of changing the owner away from
1749 * the p->d.owner node specified in the call is to issue a request
1750 * with MD_MN_MM_ALLOW_CHANGE set in the flags. Any request to
1751 * become owner from a different node while the PREVENT_CHANGE
1752 * is in operation will result in an EAGAIN return value.
1753 * un->un_owner_state has MM_MN_PREVENT_CHANGE set.
1754 *
1755 * MD_MN_MM_ALLOW_CHANGE
1756 * Allow the owner to be changed by a subsequent request.
1757 * un->un_owner_state has MM_MN_PREVENT_CHANGE cleared.
1758 *
1759 * MD_MN_MM_CHOOSE_OWNER
1760 * Choose a new owner for a mirror resync. In this case, the new
1761 * owner argument is not used. The selection of a new owner
1762 * is a round robin allocation using a resync owner count. This
1763 * ioctl passes this value in a message to the master node
1764 * which uses it to select a node from the node list and then
1765 * sends it a message to become the owner.
1766 *
1767 * If we are the current owner, we must stop further i/o from being scheduled
1768 * and wait for any pending i/o to drain. We wait for any in-progress resync
1769 * bitmap updates to complete and we can then set the owner. If an update to
1770 * the resync bitmap is attempted after this we simply don't write this out to
1771 * disk until the ownership is restored.
1772 *
1773 * If we are the node that wants to become the owner we update the in-core
1774 * owner and return. The i/o that initiated the ownership change will complete
1775 * on successful return from this ioctl.
1776 *
1777 * Return Value:
1778 * 0 Success
1779 * EINVAL Invalid unit referenced
1780 * EAGAIN Ownership couldn't be transferred away or change of
1781 * ownership is prevented. Caller should retry later on.
1782 */
1783 static int
mirror_set_owner(md_set_mmown_params_t * p,IOLOCK * lock)1784 mirror_set_owner(md_set_mmown_params_t *p, IOLOCK *lock)
1785 {
1786 mdi_unit_t *ui;
1787 mm_unit_t *un;
1788 set_t setno;
1789
1790 if ((un = mirror_getun(p->d.mnum, &p->mde, RD_LOCK, lock)) == NULL)
1791 return (EINVAL);
1792 ui = MDI_UNIT(p->d.mnum);
1793 setno = MD_MIN2SET(p->d.mnum);
1794 if (!MD_MNSET_SETNO(setno)) {
1795 return (EINVAL);
1796 }
1797
1798 /*
1799 * If we are choosing a new resync owner, send a message to the master
1800 * to make the choice.
1801 */
1802 if (p->d.flags & MD_MN_MM_CHOOSE_OWNER) {
1803 /* Release ioctl lock before we call ksend_message() */
1804 md_ioctl_readerexit(lock);
1805 /* If we're resetting the owner pass the node id in */
1806 if (p->d.owner != MD_MN_MIRROR_UNOWNED) {
1807 return (mirror_choose_owner(un, &p->d));
1808 } else {
1809 return (mirror_choose_owner(un, NULL));
1810 }
1811 }
1812
1813 /*
1814 * Check for whether we have to spawn a thread to issue this request.
1815 * If set we issue a mdmn_ksend_message() to cause the appropriate
1816 * ownership change. On completion of this request the calling
1817 * application _must_ poll the structure 'flags' field to determine the
1818 * result of the request. All this is necessary until we have true
1819 * multi-entrant ioctl support.
1820 * If we are just clearing the owner, then MD_MN_MM_SPAWN_THREAD can
1821 * be ignored.
1822 */
1823 if ((p->d.flags & MD_MN_MM_SPAWN_THREAD) && (p->d.owner != 0)) {
1824 md_mn_req_owner_t *ownp;
1825 ownp = kmem_zalloc(sizeof (md_mn_req_owner_t), KM_SLEEP);
1826 p->d.flags &= ~MD_MN_MM_SPAWN_THREAD;
1827 bcopy(&p->d, ownp, sizeof (md_mn_req_owner_t));
1828 if (thread_create(NULL, 0, mirror_owner_thread, (caddr_t)ownp,
1829 0, &p0, TS_RUN, 60) == NULL) {
1830 kmem_free(ownp, sizeof (md_mn_req_owner_t));
1831 return (EFAULT);
1832 } else {
1833 return (0);
1834 }
1835 }
1836
1837 /*
1838 * If setting owner to NULL, this is being done because the owner has
1839 * died and therefore we set OPT_NOT_DONE to ensure that the
1840 * mirror is marked as "Needs Maintenance" and that an optimized
1841 * resync will be done when we resync the mirror, Also clear the
1842 * PREVENT_CHANGE flag and remove the last resync region from the
1843 * overlap tree.
1844 */
1845 if (p->d.owner == 0) {
1846 md_mps_t *ps;
1847 int i;
1848
1849 md_ioctl_readerexit(lock);
1850 un = md_ioctl_writerlock(lock, ui);
1851 /*
1852 * If the ABR capability is not set and the pass_num is non-zero
1853 * there is need to perform an optimized resync
1854 * Therefore set OPT_NOT_DONE, setup the resync_bm and set
1855 * the submirrors as resync targets.
1856 */
1857 if (!(ui->ui_tstate & MD_ABR_CAP) && un->un_pass_num) {
1858 MD_STATUS(un) |= MD_UN_OPT_NOT_DONE;
1859
1860 (void) mddb_reread_rr(setno, un->un_rr_dirty_recid);
1861 bcopy((caddr_t)un->un_dirty_bm,
1862 (caddr_t)un->un_resync_bm,
1863 howmany(un->un_rrd_num, NBBY));
1864 for (i = 0; i < NMIRROR; i++) {
1865 if ((SUBMIRROR_IS_READABLE(un, i)) ||
1866 SMS_BY_INDEX_IS(un, i,
1867 SMS_OFFLINE_RESYNC))
1868 un->un_sm[i].sm_flags |=
1869 MD_SM_RESYNC_TARGET;
1870 }
1871 }
1872 mutex_enter(&un->un_owner_mx);
1873 un->un_owner_state &= ~MD_MN_MM_PREVENT_CHANGE;
1874 mutex_exit(&un->un_owner_mx);
1875 ps = un->un_rs_prev_overlap;
1876 if ((ps != NULL) && (ps->ps_flags & MD_MPS_ON_OVERLAP)) {
1877 mirror_overlap_tree_remove(ps);
1878 ps->ps_firstblk = 0;
1879 ps->ps_lastblk = 0;
1880 }
1881 md_ioctl_writerexit(lock);
1882 un = md_ioctl_readerlock(lock, ui);
1883 }
1884
1885 mutex_enter(&un->un_owner_mx);
1886 if (!(un->un_owner_state & MM_MN_BECOME_OWNER)) {
1887 /*
1888 * If we are not trying to become owner ourselves check
1889 * to see if we have to change the owner
1890 */
1891 if (un->un_mirror_owner == p->d.owner) {
1892 /*
1893 * No need to change owner,
1894 * Clear/set PREVENT_CHANGE bit
1895 */
1896 if (p->d.flags & MD_MN_MM_PREVENT_CHANGE) {
1897 un->un_owner_state |= MM_MN_PREVENT_CHANGE;
1898 } else if (p->d.flags & MD_MN_MM_ALLOW_CHANGE) {
1899 un->un_owner_state &= ~MM_MN_PREVENT_CHANGE;
1900 }
1901 mutex_exit(&un->un_owner_mx);
1902 return (0);
1903 }
1904 }
1905
1906 /*
1907 * Disallow ownership change if previously requested to. This can only
1908 * be reset by issuing a request with MD_MN_MM_ALLOW_CHANGE set in the
1909 * flags field.
1910 */
1911 if ((un->un_owner_state & MM_MN_PREVENT_CHANGE) &&
1912 !(p->d.flags & MD_MN_MM_ALLOW_CHANGE)) {
1913 mutex_exit(&un->un_owner_mx);
1914 #ifdef DEBUG
1915 cmn_err(CE_WARN, "mirror_ioctl: Node %x attempted to become "
1916 "owner while node %x has exclusive access to %s",
1917 p->d.owner, un->un_mirror_owner, md_shortname(MD_SID(un)));
1918 #endif
1919 return (EAGAIN);
1920 }
1921 if (p->d.owner == md_mn_mynode_id) {
1922 /*
1923 * I'm becoming the mirror owner. Flag this so that the
1924 * message sender can change the in-core owner when all
1925 * nodes have processed this message
1926 */
1927 un->un_owner_state &= ~MM_MN_OWNER_SENT;
1928 un->un_owner_state |= MM_MN_BECOME_OWNER;
1929 un->un_owner_state |= (p->d.flags & MD_MN_MM_PREVENT_CHANGE) ?
1930 MM_MN_PREVENT_CHANGE : 0;
1931 un->un_owner_state &= (p->d.flags & MD_MN_MM_ALLOW_CHANGE) ?
1932 ~MM_MN_PREVENT_CHANGE : ~0;
1933
1934 mutex_exit(&un->un_owner_mx);
1935 } else if ((un->un_mirror_owner == md_mn_mynode_id) ||
1936 un->un_owner_state & MM_MN_BECOME_OWNER) {
1937 mutex_exit(&un->un_owner_mx);
1938
1939 /*
1940 * I'm releasing ownership. Block and drain i/o. This also
1941 * blocks until any in-progress resync record update completes.
1942 */
1943 md_ioctl_readerexit(lock);
1944 un = md_ioctl_writerlock(lock, ui);
1945 /* Block the resync thread */
1946 mutex_enter(&un->un_rs_thread_mx);
1947 un->un_rs_thread_flags |= MD_RI_BLOCK_OWNER;
1948 mutex_exit(&un->un_rs_thread_mx);
1949 mutex_enter(&un->un_owner_mx);
1950 un->un_mirror_owner = p->d.owner;
1951
1952 /* Sets node owner of un_rr_dirty record */
1953 if (un->un_rr_dirty_recid)
1954 (void) mddb_setowner(un->un_rr_dirty_recid, p->d.owner);
1955 un->un_owner_state &= ~MM_MN_BECOME_OWNER;
1956 un->un_owner_state |= (p->d.flags & MD_MN_MM_PREVENT_CHANGE) ?
1957 MM_MN_PREVENT_CHANGE : 0;
1958 un->un_owner_state &= (p->d.flags & MD_MN_MM_ALLOW_CHANGE) ?
1959 ~MM_MN_PREVENT_CHANGE : ~0;
1960 mutex_exit(&un->un_owner_mx);
1961 /*
1962 * Allow further i/o to occur. Any write() from another node
1963 * will now cause another ownership change to occur.
1964 */
1965 md_ioctl_writerexit(lock);
1966 } else {
1967 /* Update the in-core mirror owner */
1968 un->un_mirror_owner = p->d.owner;
1969 /* Sets node owner of un_rr_dirty record */
1970 if (un->un_rr_dirty_recid)
1971 (void) mddb_setowner(un->un_rr_dirty_recid, p->d.owner);
1972 un->un_owner_state |= (p->d.flags & MD_MN_MM_PREVENT_CHANGE) ?
1973 MM_MN_PREVENT_CHANGE : 0;
1974 un->un_owner_state &= (p->d.flags & MD_MN_MM_ALLOW_CHANGE) ?
1975 ~MM_MN_PREVENT_CHANGE : ~0;
1976 mutex_exit(&un->un_owner_mx);
1977 }
1978 return (0);
1979 }
1980 /*
1981 * mirror_allocate_hotspare:
1982 * ------------------------
1983 * Called to allocate a hotspare for a failed component. This function is
1984 * called by the MD_MN_ALLOCATE_HOTSPARE ioctl.
1985 */
1986 static int
mirror_allocate_hotspare(md_alloc_hotsp_params_t * p,IOLOCK * lockp)1987 mirror_allocate_hotspare(md_alloc_hotsp_params_t *p, IOLOCK *lockp)
1988 {
1989 set_t setno;
1990 mm_unit_t *un;
1991
1992 #ifdef DEBUG
1993 if (mirror_debug_flag)
1994 printf("mirror_allocate_hotspare: mnum,sm,comp = %x, %x, %x\n",
1995 p->mnum, p->sm, p->comp);
1996 #endif
1997
1998 if ((un = mirror_getun(p->mnum, &p->mde, WR_LOCK, lockp)) == NULL)
1999 return (EINVAL);
2000
2001 /* This function is only valid for a multi-node set */
2002 setno = MD_MIN2SET(p->mnum);
2003 if (!MD_MNSET_SETNO(setno)) {
2004 return (EINVAL);
2005 }
2006 (void) check_comp_4_hotspares(un, p->sm, p->comp, MD_HOTSPARE_NO_XMIT,
2007 p->hs_id, lockp);
2008 md_ioctl_writerexit(lockp);
2009 return (0);
2010 }
2011
2012 /*
2013 * mirror_get_owner_status:
2014 * -----------------------
2015 * Return the status of a previously issued ioctl to change ownership. This is
2016 * required for soft-partition support as the request to change mirror owner
2017 * needs to be run from a separate daemon thread.
2018 *
2019 * Returns:
2020 * 0 Success (contents of un_mirror_owner_status placed in 'flags')
2021 * EINVAL Invalid unit
2022 */
2023 static int
mirror_get_owner_status(md_mn_own_status_t * p,IOLOCK * lock)2024 mirror_get_owner_status(md_mn_own_status_t *p, IOLOCK *lock)
2025 {
2026 mm_unit_t *un;
2027 set_t setno;
2028
2029 if ((un = mirror_getun(p->mnum, &p->mde, RD_LOCK, lock)) == NULL)
2030 return (EINVAL);
2031
2032 setno = MD_MIN2SET(p->mnum);
2033 if (!MD_MNSET_SETNO(setno)) {
2034 return (EINVAL);
2035 }
2036
2037 p->flags = un->un_mirror_owner_status;
2038 return (0);
2039 }
2040
2041 /*
2042 * mirror_set_state:
2043 * ---------------
2044 * Called to set the state of the component of a submirror to the specified
2045 * value. This function is called by the MD_MN_SET_STATE ioctl.
2046 */
2047 static int
mirror_set_state(md_set_state_params_t * p,IOLOCK * lockp)2048 mirror_set_state(md_set_state_params_t *p, IOLOCK *lockp)
2049 {
2050 mm_unit_t *un;
2051 mm_submirror_t *sm;
2052 mm_submirror_ic_t *smic;
2053 md_m_shared_t *shared;
2054 set_t setno;
2055
2056 #ifdef DEBUG
2057 if (mirror_debug_flag)
2058 printf("mirror_set_state: mnum,sm,comp,state, hs_id = %x, "
2059 "%x, %x, %x %x\n", p->mnum, p->sm, p->comp,
2060 p->state, p->hs_id);
2061 #endif
2062 if ((un = mirror_getun(p->mnum, &p->mde, WR_LOCK, lockp)) == NULL)
2063 return (EINVAL);
2064
2065 /* This function is only valid for a multi-node set */
2066 setno = MD_MIN2SET(p->mnum);
2067 if (!MD_MNSET_SETNO(setno)) {
2068 return (EINVAL);
2069 }
2070 sm = &un->un_sm[p->sm];
2071 smic = &un->un_smic[p->sm];
2072
2073 /* Set state in component and update ms_flags */
2074 shared = (md_m_shared_t *)
2075 (*(smic->sm_shared_by_indx))(sm->sm_dev, sm, p->comp);
2076 /*
2077 * If a CS_ERRED state is being sent, verify that the sender
2078 * has the same view of the component that this node currently has.
2079 *
2080 * There is a case where the sender was sending a CS_ERRED when a
2081 * component was in error, but before the sender returns from
2082 * ksend_message the component has been hotspared and resync'd.
2083 *
2084 * In this case, the hs_id will be different from the shared ms_hs_id,
2085 * so the component has already been hotspared. Just return in this
2086 * case.
2087 */
2088 if (p->state == CS_ERRED) {
2089 if (shared->ms_hs_id != p->hs_id) {
2090 #ifdef DEBUG
2091 if (mirror_debug_flag) {
2092 printf("mirror_set_state: short circuit "
2093 "hs_id=0x%x, ms_hs_id=0x%x\n",
2094 p->hs_id, shared->ms_hs_id);
2095 }
2096 #endif
2097 /* release the block on writes to the mirror */
2098 mirror_resume_writes(un);
2099 md_ioctl_writerexit(lockp);
2100 return (0);
2101 }
2102 }
2103
2104 /*
2105 * If the device is newly errored then make sure that it is
2106 * closed. Closing the device allows for the RCM framework
2107 * to unconfigure the device if required.
2108 */
2109 if (!(shared->ms_state & CS_ERRED) && (p->state & CS_ERRED) &&
2110 (shared->ms_flags & MDM_S_ISOPEN)) {
2111 void (*get_dev)();
2112 ms_cd_info_t cd;
2113
2114 get_dev = (void (*)())md_get_named_service(sm->sm_dev, 0,
2115 "get device", 0);
2116 (void) (*get_dev)(sm->sm_dev, sm, p->comp, &cd);
2117
2118 md_layered_close(cd.cd_dev, MD_OFLG_NULL);
2119 shared->ms_flags &= ~MDM_S_ISOPEN;
2120 }
2121
2122 shared->ms_state = p->state;
2123 uniqtime32(&shared->ms_timestamp);
2124
2125 if (p->state == CS_ERRED) {
2126 shared->ms_flags |= MDM_S_NOWRITE;
2127 } else
2128 shared->ms_flags &= ~MDM_S_NOWRITE;
2129
2130 shared->ms_flags &= ~MDM_S_IOERR;
2131 un->un_changecnt++;
2132 shared->ms_lasterrcnt = un->un_changecnt;
2133
2134 /* Update state in submirror */
2135 mirror_set_sm_state(sm, smic, SMS_RUNNING, 0);
2136 /*
2137 * Commit the state change to the metadb, only the master will write
2138 * to disk
2139 */
2140 mirror_commit(un, SMI2BIT(p->sm), 0);
2141
2142 /* release the block on writes to the mirror */
2143 mirror_resume_writes(un);
2144
2145 /* generate NOTIFY events for error state changes */
2146 if (p->state == CS_ERRED) {
2147 SE_NOTIFY(EC_SVM_STATE, ESC_SVM_ERRED, SVM_TAG_METADEVICE,
2148 MD_UN2SET(un), MD_SID(un));
2149 } else if (p->state == CS_LAST_ERRED) {
2150 SE_NOTIFY(EC_SVM_STATE, ESC_SVM_LASTERRED, SVM_TAG_METADEVICE,
2151 MD_UN2SET(un), MD_SID(un));
2152 }
2153 md_ioctl_writerexit(lockp);
2154 return (0);
2155 }
2156
2157 /*
2158 * mirror_suspend_writes:
2159 * ---------------------
2160 * Called to suspend writes to a mirror region. The flag un_suspend_wr_flag is
2161 * tested in mirror_write_strategy, and if set all writes are blocked.
2162 * This function is called by the MD_MN_SUSPEND_WRITES ioctl.
2163 */
2164 static int
mirror_suspend_writes(md_suspend_wr_params_t * p)2165 mirror_suspend_writes(md_suspend_wr_params_t *p)
2166 {
2167 set_t setno;
2168 mm_unit_t *un;
2169
2170 #ifdef DEBUG
2171 if (mirror_debug_flag)
2172 printf("mirror_suspend_writes: mnum = %x\n", p->mnum);
2173 #endif
2174 if ((un = mirror_getun(p->mnum, &p->mde, NO_LOCK, NULL)) == NULL)
2175 return (EINVAL); /* No unit */
2176
2177 /* This function is only valid for a multi-node set */
2178 setno = MD_MIN2SET(p->mnum);
2179 if (!MD_MNSET_SETNO(setno)) {
2180 return (EINVAL);
2181 }
2182
2183 /*
2184 * Mark the resync as blocked. This will stop any currently running
2185 * thread and will prevent a new resync from attempting to perform
2186 * i/o
2187 */
2188 mutex_enter(&un->un_rs_thread_mx);
2189 un->un_rs_thread_flags |= MD_RI_BLOCK;
2190 mutex_exit(&un->un_rs_thread_mx);
2191
2192 mutex_enter(&un->un_suspend_wr_mx);
2193 un->un_suspend_wr_flag = 1;
2194 mutex_exit(&un->un_suspend_wr_mx);
2195
2196 return (0);
2197 }
2198
2199 /*
2200 * mirror_set_capability:
2201 * ------------------------
2202 * Called to set or clear a capability for a mirror
2203 * called by the MD_MN_SET_CAP ioctl.
2204 */
2205 static int
mirror_set_capability(md_mn_setcap_params_t * p,IOLOCK * lockp)2206 mirror_set_capability(md_mn_setcap_params_t *p, IOLOCK *lockp)
2207 {
2208 set_t setno;
2209 mm_unit_t *un;
2210 mdi_unit_t *ui;
2211
2212 #ifdef DEBUG
2213 if (mirror_debug_flag)
2214 printf("mirror_set_capability: mnum = %x\n", p->mnum);
2215 #endif
2216 if ((un = mirror_getun(p->mnum, &p->mde, RD_LOCK, lockp)) == NULL)
2217 return (EINVAL);
2218
2219 /* This function is only valid for a multi-node set */
2220 setno = MD_MIN2SET(p->mnum);
2221 if (!MD_MNSET_SETNO(setno)) {
2222 return (EINVAL);
2223 }
2224 ui = MDI_UNIT(p->mnum);
2225
2226 if (p->sc_set & DKV_ABR_CAP) {
2227 ui->ui_tstate |= MD_ABR_CAP; /* Set ABR capability */
2228 /* Clear DRL and set owner to 0 if no resync active */
2229 mirror_process_unit_resync(un);
2230 if (!(un->c.un_status & MD_UN_RESYNC_ACTIVE)) {
2231 mutex_enter(&un->un_owner_mx);
2232 un->un_mirror_owner = 0;
2233 mutex_exit(&un->un_owner_mx);
2234 }
2235 } else {
2236 ui->ui_tstate &= ~MD_ABR_CAP; /* Clear ABR capability */
2237 }
2238 if (p->sc_set & DKV_DMR_CAP) {
2239 ui->ui_tstate |= MD_DMR_CAP; /* Set DMR capability */
2240 } else {
2241 ui->ui_tstate &= ~MD_DMR_CAP; /* Clear DMR capability */
2242 }
2243 return (0);
2244 }
2245
2246 /*
2247 * mirror_choose_owner:
2248 * ------------------------
2249 * Called to choose an owner for a mirror resync. Can be called when starting
2250 * resync or by the MD_MN_SET_MM_OWNER ioctl with the MD_MN_MM_CHOOSE_OWNER flag
2251 * set. The ioctl is called with this flag set when we are in the cluster
2252 * reconfig and we wish to set a new owner for a resync whose owner has left
2253 * the cluster. We use a resync owner count to implement a round robin
2254 * allocation of resync owners. We send a message to the master including
2255 * this count and the message handler uses it to select an owner from the
2256 * nodelist and then sends a SET_MM_OWNER message to the chosen node to
2257 * become the owner.
2258 *
2259 * Input:
2260 * un - unit reference
2261 * ownp - owner information (if non-NULL)
2262 */
2263 int
mirror_choose_owner(mm_unit_t * un,md_mn_req_owner_t * ownp)2264 mirror_choose_owner(mm_unit_t *un, md_mn_req_owner_t *ownp)
2265 {
2266 set_t setno;
2267 md_mn_msg_chooseid_t *msg;
2268
2269 /* This function is only valid for a multi-node set */
2270 setno = MD_UN2SET(un);
2271 if (!MD_MNSET_SETNO(setno)) {
2272 return (EINVAL);
2273 }
2274
2275
2276 #ifdef DEBUG
2277 if (mirror_debug_flag)
2278 printf("send choose owner message, mnum = %x,"
2279 "rcnt = %d\n", MD_SID(un), md_set[setno].s_rcnt);
2280 #endif
2281
2282 /*
2283 * setup message with current resync count
2284 * and then increment the count. If we're called with a non-NULL
2285 * owner then we are reestablishing the owner of the mirror. In this
2286 * case we have to flag this to the message handler and set rcnt to
2287 * the new owner node.
2288 */
2289 msg = kmem_zalloc(sizeof (md_mn_msg_chooseid_t), KM_SLEEP);
2290 msg->msg_chooseid_mnum = MD_SID(un);
2291 if (ownp == NULL) {
2292 mutex_enter(&md_mx);
2293 msg->msg_chooseid_rcnt = md_set[setno].s_rcnt;
2294 md_set[setno].s_rcnt++;
2295 mutex_exit(&md_mx);
2296 msg->msg_chooseid_set_node = B_FALSE;
2297 } else {
2298 msg->msg_chooseid_rcnt = ownp->owner;
2299 msg->msg_chooseid_set_node = B_TRUE;
2300 }
2301
2302 /*
2303 * Spawn a thread to issue the ksend_message() call so that we can
2304 * drop the ioctl lock hierarchy that is blocking further rpc.metad and
2305 * commd set ownership checking.
2306 */
2307 if (thread_create(NULL, 0, mirror_choose_owner_thread, (caddr_t)msg,
2308 0, &p0, TS_RUN, 60) == NULL) {
2309 kmem_free(msg, sizeof (md_mn_msg_chooseid_t));
2310 return (EFAULT);
2311 } else {
2312 return (0);
2313 }
2314 }
2315
2316 /*
2317 * mirror_get_status:
2318 * ----------------------------------
2319 * Called by nodes which are not the master node of the cluster. Obtains the
2320 * master abr state and the submirror status for each valid submirror of the
2321 * unit so that the status returned by metastat is consistent across the
2322 * cluster.
2323 * We update tstate for the mirror and both the sm_flag and the sm_state for
2324 * each submirror.
2325 *
2326 * Input:
2327 * un mirror to obtain status from
2328 *
2329 * Calling Convention:
2330 * writerlock (either ioctl or unit) must be held
2331 */
2332 void
mirror_get_status(mm_unit_t * un,IOLOCK * lockp)2333 mirror_get_status(mm_unit_t *un, IOLOCK *lockp)
2334 {
2335 mm_submirror_t *sm;
2336 int smi;
2337 int rval;
2338 md_mn_kresult_t *kres;
2339 md_mn_msg_mir_state_t msg;
2340 md_mn_msg_mir_state_res_t *res;
2341 set_t setno = MD_UN2SET(un);
2342 mdi_unit_t *ui = MDI_UNIT(MD_SID(un));
2343
2344
2345 ASSERT(ui->ui_lock & MD_UL_WRITER);
2346
2347 /*
2348 * Get all of the information for the mirror.
2349 */
2350 bzero(&msg, sizeof (msg));
2351 msg.mir_state_mnum = MD_SID(un);
2352
2353 /*
2354 * Must drop the writerlock over ksend_message since another
2355 * thread on this node could be running a higher class message
2356 * and be trying grab the readerlock.
2357 *
2358 * If we are in the context of an ioctl, drop the ioctl lock.
2359 * lockp holds the list of locks held.
2360 */
2361 if (lockp) {
2362 IOLOCK_RETURN_RELEASE(0, lockp);
2363 } else {
2364 md_unit_writerexit(ui);
2365 }
2366
2367 kres = kmem_alloc(sizeof (md_mn_kresult_t), KM_SLEEP);
2368 rval = mdmn_ksend_message(setno, MD_MN_MSG_GET_MIRROR_STATE,
2369 MD_MSGF_NO_BCAST | MD_MSGF_NO_LOG, 0, (char *)&msg,
2370 sizeof (msg), kres);
2371
2372 /* if the node hasn't yet joined, it's Ok. */
2373 if ((!MDMN_KSEND_MSG_OK(rval, kres)) &&
2374 (kres->kmmr_comm_state != MDMNE_NOT_JOINED)) {
2375 mdmn_ksend_show_error(rval, kres, "GET_MIRROR_STATE");
2376 cmn_err(CE_WARN, "ksend_message failure: GET_MIRROR_STATE");
2377 }
2378
2379 /* if dropped the lock previously, regain it */
2380 if (lockp) {
2381 IOLOCK_RETURN_REACQUIRE(lockp);
2382 } else {
2383 /*
2384 * Reacquire dropped locks and update acquirecnts
2385 * appropriately.
2386 */
2387 (void) md_unit_writerlock(ui);
2388 }
2389
2390 /*
2391 * Check to see if we've got a believable amount of returned data.
2392 * If not, we simply return as there is no usable information.
2393 */
2394 if (kres->kmmr_res_size < sizeof (*res)) {
2395 cmn_err(CE_WARN, "GET_MIRROR_STATE: returned %d bytes, expected"
2396 " %d\n", kres->kmmr_res_size, (int)sizeof (*res));
2397 kmem_free(kres, sizeof (md_mn_kresult_t));
2398 return;
2399 }
2400
2401 /*
2402 * Copy the results from the call back into our sm_state/sm_flags
2403 */
2404 res = (md_mn_msg_mir_state_res_t *)kres->kmmr_res_data;
2405 #ifdef DEBUG
2406 if (mirror_debug_flag)
2407 printf("mirror_get_status: %s\n", md_shortname(MD_SID(un)));
2408 #endif
2409 for (smi = 0; smi < NMIRROR; smi++) {
2410 sm = &un->un_sm[smi];
2411 #ifdef DEBUG
2412 if (mirror_debug_flag) {
2413 printf("curr state %4x, new state %4x\n", sm->sm_state,
2414 res->sm_state[smi]);
2415 printf("curr_flags %4x, new flags %4x\n", sm->sm_flags,
2416 res->sm_flags[smi]);
2417 }
2418 #endif
2419 sm->sm_state = res->sm_state[smi];
2420 sm->sm_flags = res->sm_flags[smi];
2421 }
2422
2423 /* Set ABR if set on the Master node */
2424 ui->ui_tstate |= (res->mir_tstate & MD_ABR_CAP);
2425
2426 kmem_free(kres, sizeof (md_mn_kresult_t));
2427 }
2428
2429 /*
2430 * mirror_get_mir_state:
2431 * -------------------
2432 * Obtain the ABR state of a mirror and the state of all submirrors from the
2433 * master node for the unit specified in sm_state->mnum.
2434 * Called by MD_MN_GET_MIRROR_STATE ioctl.
2435 */
2436 static int
mirror_get_mir_state(md_mn_get_mir_state_t * p,IOLOCK * lockp)2437 mirror_get_mir_state(md_mn_get_mir_state_t *p, IOLOCK *lockp)
2438 {
2439 mm_unit_t *un;
2440 set_t setno;
2441 md_error_t mde;
2442
2443 mdclrerror(&mde);
2444
2445 if ((un = mirror_getun(p->mnum, &mde, WR_LOCK, lockp)) == NULL) {
2446 return (EINVAL);
2447 }
2448 setno = MD_MIN2SET(p->mnum);
2449 if (!MD_MNSET_SETNO(setno)) {
2450 return (EINVAL);
2451 }
2452
2453 /*
2454 * We've now got a writerlock on the unit structure (so no-one can
2455 * modify the incore values) and we'll now send the message to the
2456 * master node. Since we're only called as part of a reconfig cycle
2457 * we don't need to release the unit locks across the ksend_message as
2458 * only the master node will process it, and we never send this to
2459 * ourselves if we're the master.
2460 */
2461
2462 mirror_get_status(un, lockp);
2463
2464 return (0);
2465 }
2466
2467 static int
mirror_admin_ioctl(int cmd,void * data,int mode,IOLOCK * lockp)2468 mirror_admin_ioctl(int cmd, void *data, int mode, IOLOCK *lockp)
2469 {
2470 size_t sz = 0;
2471 void *d = NULL;
2472 int err = 0;
2473
2474 /* We can only handle 32-bit clients for internal commands */
2475 if ((mode & DATAMODEL_MASK) != DATAMODEL_ILP32) {
2476 return (EINVAL);
2477 }
2478 /* dispatch ioctl */
2479 switch (cmd) {
2480
2481 case MD_IOCSET:
2482 {
2483 if (! (mode & FWRITE))
2484 return (EACCES);
2485
2486 sz = sizeof (md_set_params_t);
2487
2488 d = kmem_alloc(sz, KM_SLEEP);
2489
2490 if (ddi_copyin(data, d, sz, mode)) {
2491 err = EFAULT;
2492 break;
2493 }
2494
2495 err = mirror_set(d, mode);
2496 break;
2497 }
2498
2499 case MD_IOCGET:
2500 {
2501 if (! (mode & FREAD))
2502 return (EACCES);
2503
2504 sz = sizeof (md_i_get_t);
2505
2506 d = kmem_alloc(sz, KM_SLEEP);
2507
2508 if (ddi_copyin(data, d, sz, mode)) {
2509 err = EFAULT;
2510 break;
2511 }
2512
2513 err = mirror_get(d, mode, lockp);
2514 break;
2515 }
2516
2517 case MD_IOCRESET:
2518 {
2519 if (! (mode & FWRITE))
2520 return (EACCES);
2521
2522 sz = sizeof (md_i_reset_t);
2523 d = kmem_alloc(sz, KM_SLEEP);
2524
2525 if (ddi_copyin(data, d, sz, mode)) {
2526 err = EFAULT;
2527 break;
2528 }
2529
2530 err = mirror_reset((md_i_reset_t *)d);
2531 break;
2532 }
2533
2534 case MD_IOCSETSYNC:
2535 case MD_MN_SETSYNC:
2536 {
2537 if (! (mode & FWRITE))
2538 return (EACCES);
2539
2540 sz = sizeof (md_resync_ioctl_t);
2541 d = kmem_alloc(sz, KM_SLEEP);
2542
2543 if (ddi_copyin(data, d, sz, mode)) {
2544 err = EFAULT;
2545 break;
2546 }
2547
2548 err = mirror_ioctl_resync((md_resync_ioctl_t *)d, lockp);
2549 break;
2550 }
2551
2552 case MD_IOCGETSYNC:
2553 {
2554 if (! (mode & FREAD))
2555 return (EACCES);
2556
2557 sz = sizeof (md_resync_ioctl_t);
2558 d = kmem_alloc(sz, KM_SLEEP);
2559
2560 if (ddi_copyin(data, d, sz, mode)) {
2561 err = EFAULT;
2562 break;
2563 }
2564
2565 err = mirror_get_resync((md_resync_ioctl_t *)d);
2566 break;
2567 }
2568
2569 case MD_IOCREPLACE:
2570 {
2571 if (! (mode & FWRITE))
2572 return (EACCES);
2573
2574 sz = sizeof (replace_params_t);
2575 d = kmem_alloc(sz, KM_SLEEP);
2576
2577 if (ddi_copyin(data, d, sz, mode)) {
2578 err = EFAULT;
2579 break;
2580 }
2581
2582 err = comp_replace((replace_params_t *)d, lockp);
2583 break;
2584 }
2585
2586 case MD_IOCOFFLINE:
2587 {
2588 if (! (mode & FWRITE))
2589 return (EACCES);
2590
2591 sz = sizeof (md_i_off_on_t);
2592 d = kmem_alloc(sz, KM_SLEEP);
2593
2594 if (ddi_copyin(data, d, sz, mode)) {
2595 err = EFAULT;
2596 break;
2597 }
2598
2599 err = mirror_offline((md_i_off_on_t *)d, lockp);
2600 break;
2601 }
2602
2603 case MD_IOCONLINE:
2604 {
2605 if (! (mode & FWRITE))
2606 return (EACCES);
2607
2608 sz = sizeof (md_i_off_on_t);
2609 d = kmem_alloc(sz, KM_SLEEP);
2610
2611 if (ddi_copyin(data, d, sz, mode)) {
2612 err = EFAULT;
2613 break;
2614 }
2615
2616 err = mirror_online((md_i_off_on_t *)d, lockp);
2617 break;
2618 }
2619
2620 case MD_IOCDETACH:
2621 {
2622 if (! (mode & FWRITE))
2623 return (EACCES);
2624
2625 sz = sizeof (md_detach_params_t);
2626 d = kmem_alloc(sz, KM_SLEEP);
2627
2628 if (ddi_copyin(data, d, sz, mode)) {
2629 err = EFAULT;
2630 break;
2631 }
2632
2633 err = mirror_detach((md_detach_params_t *)d, lockp);
2634 break;
2635 }
2636
2637 case MD_IOCATTACH:
2638 {
2639
2640 if (! (mode & FWRITE))
2641 return (EACCES);
2642
2643 sz = sizeof (md_att_struct_t);
2644 d = kmem_alloc(sz, KM_SLEEP);
2645
2646 if (ddi_copyin(data, d, sz, mode)) {
2647 err = EFAULT;
2648 break;
2649 }
2650
2651 err = mirror_attach((md_att_struct_t *)d, lockp);
2652 break;
2653 }
2654
2655 case MD_IOCGET_DEVS:
2656 {
2657 if (! (mode & FREAD))
2658 return (EACCES);
2659
2660 sz = sizeof (md_getdevs_params_t);
2661
2662 d = kmem_alloc(sz, KM_SLEEP);
2663
2664 if (ddi_copyin(data, d, sz, mode)) {
2665 err = EFAULT;
2666 break;
2667 }
2668
2669 err = mirror_getdevs(d, mode, lockp);
2670 break;
2671 }
2672
2673 case MD_IOCGROW:
2674 {
2675 if (! (mode & FWRITE))
2676 return (EACCES);
2677
2678 sz = sizeof (md_grow_params_t);
2679
2680 d = kmem_alloc(sz, KM_SLEEP);
2681
2682 if (ddi_copyin(data, d, sz, mode)) {
2683 err = EFAULT;
2684 break;
2685 }
2686
2687 err = mirror_grow(d, lockp);
2688 break;
2689 }
2690
2691 case MD_IOCCHANGE:
2692 {
2693 if (! (mode & FWRITE))
2694 return (EACCES);
2695
2696 sz = sizeof (md_mirror_params_t);
2697 d = kmem_alloc(sz, KM_SLEEP);
2698
2699 if (ddi_copyin(data, d, sz, mode)) {
2700 err = EFAULT;
2701 break;
2702 }
2703
2704 err = mirror_change((md_mirror_params_t *)d, lockp);
2705 break;
2706 }
2707
2708 case MD_IOCPROBE_DEV:
2709 {
2710 md_probedev_impl_t *p = NULL;
2711 md_probedev_t *ph = NULL;
2712 daemon_queue_t *hdr = NULL;
2713 int i;
2714 size_t sz2 = 0;
2715
2716 if (! (mode & FREAD))
2717 return (EACCES);
2718
2719
2720 sz = sizeof (md_probedev_t);
2721 d = kmem_alloc(sz, KM_SLEEP);
2722
2723 /* now copy in the data */
2724 if (ddi_copyin(data, d, sz, mode)) {
2725 err = EFAULT;
2726 goto free_mem;
2727 }
2728
2729 /*
2730 * Sanity test the args. Test name should have the keyword
2731 * probe.
2732 */
2733
2734 p = kmem_alloc(sizeof (md_probedev_impl_t), KM_SLEEP);
2735
2736 p->probe_sema = NULL;
2737 p->probe_mx = NULL;
2738 p->probe.mnum_list = (uint64_t)NULL;
2739
2740 ph = (struct md_probedev *)d;
2741
2742 p->probe.nmdevs = ph->nmdevs;
2743 (void) strcpy(p->probe.test_name, ph->test_name);
2744 bcopy(&ph->md_driver, &(p->probe.md_driver),
2745 sizeof (md_driver_t));
2746
2747 if ((p->probe.nmdevs < 1) ||
2748 (strstr(p->probe.test_name, "probe") == NULL)) {
2749 err = EINVAL;
2750 goto free_mem;
2751 }
2752
2753
2754 sz2 = sizeof (minor_t) * p->probe.nmdevs;
2755 p->probe.mnum_list = (uint64_t)(uintptr_t)kmem_alloc(sz2,
2756 KM_SLEEP);
2757
2758 if (ddi_copyin((void *)(uintptr_t)ph->mnum_list,
2759 (void *)(uintptr_t)p->probe.mnum_list, sz2, mode)) {
2760 err = EFAULT;
2761 goto free_mem;
2762 }
2763
2764 if (err = md_init_probereq(p, &hdr))
2765 goto free_mem;
2766
2767 /*
2768 * put the request on the queue and wait.
2769 */
2770
2771 daemon_request_new(&md_ff_daemonq, md_probe_one, hdr, REQ_NEW);
2772
2773 (void) IOLOCK_RETURN(0, lockp);
2774 /* wait for the events to occur */
2775 for (i = 0; i < p->probe.nmdevs; i++) {
2776 sema_p(PROBE_SEMA(p));
2777 }
2778 while (md_ioctl_lock_enter() == EINTR)
2779 ;
2780
2781 /*
2782 * clean up. The hdr list is freed in the probe routines
2783 * since the list is NULL by the time we get here.
2784 */
2785 free_mem:
2786 if (p) {
2787 if (p->probe_sema != NULL) {
2788 sema_destroy(PROBE_SEMA(p));
2789 kmem_free(p->probe_sema, sizeof (ksema_t));
2790 }
2791 if (p->probe_mx != NULL) {
2792 mutex_destroy(PROBE_MX(p));
2793 kmem_free(p->probe_mx, sizeof (kmutex_t));
2794 }
2795 if ((uintptr_t)p->probe.mnum_list)
2796 kmem_free((void *)(uintptr_t)
2797 p->probe.mnum_list, sz2);
2798
2799 kmem_free(p, sizeof (md_probedev_impl_t));
2800 }
2801 break;
2802 }
2803
2804 case MD_MN_SET_MM_OWNER:
2805 {
2806 if (! (mode & FWRITE))
2807 return (EACCES);
2808
2809 sz = sizeof (md_set_mmown_params_t);
2810 d = kmem_alloc(sz, KM_SLEEP);
2811
2812 if (ddi_copyin(data, d, sz, mode) != 0) {
2813 err = EFAULT;
2814 break;
2815 }
2816
2817 err = mirror_set_owner((md_set_mmown_params_t *)d, lockp);
2818 break;
2819 }
2820
2821 case MD_MN_GET_MM_OWNER:
2822 {
2823 if (! (mode & FREAD))
2824 return (EACCES);
2825
2826 sz = sizeof (md_set_mmown_params_t);
2827 d = kmem_alloc(sz, KM_SLEEP);
2828
2829 if (ddi_copyin(data, d, sz, mode) != 0) {
2830 err = EFAULT;
2831 break;
2832 }
2833
2834 err = mirror_get_owner((md_set_mmown_params_t *)d, lockp);
2835 break;
2836 }
2837
2838 case MD_MN_MM_OWNER_STATUS:
2839 {
2840 if (! (mode & FREAD))
2841 return (EACCES);
2842
2843 sz = sizeof (md_mn_own_status_t);
2844 d = kmem_alloc(sz, KM_SLEEP);
2845
2846 if (ddi_copyin(data, d, sz, mode) != 0) {
2847 err = EFAULT;
2848 break;
2849 }
2850
2851 err = mirror_get_owner_status((md_mn_own_status_t *)d, lockp);
2852 break;
2853 }
2854
2855 case MD_MN_SET_STATE:
2856 {
2857 if (! (mode & FWRITE))
2858 return (EACCES);
2859
2860 sz = sizeof (md_set_state_params_t);
2861 d = kmem_alloc(sz, KM_SLEEP);
2862
2863 if (ddi_copyin(data, d, sz, mode)) {
2864 err = EFAULT;
2865 break;
2866 }
2867
2868 err = mirror_set_state((md_set_state_params_t *)d, lockp);
2869 break;
2870 }
2871
2872 case MD_MN_SUSPEND_WRITES:
2873 {
2874 if (! (mode & FREAD))
2875 return (EACCES);
2876
2877 sz = sizeof (md_suspend_wr_params_t);
2878 d = kmem_alloc(sz, KM_SLEEP);
2879
2880 if (ddi_copyin(data, d, sz, mode) != 0) {
2881 err = EFAULT;
2882 break;
2883 }
2884
2885 err = mirror_suspend_writes((md_suspend_wr_params_t *)d);
2886 break;
2887 }
2888
2889 case MD_MN_RESYNC:
2890 {
2891 sz = sizeof (md_mn_rs_params_t);
2892 d = kmem_alloc(sz, KM_SLEEP);
2893
2894 if (ddi_copyin(data, d, sz, mode) != 0) {
2895 err = EFAULT;
2896 break;
2897 }
2898
2899 err = mirror_resync_message((md_mn_rs_params_t *)d, lockp);
2900 break;
2901 }
2902
2903 case MD_MN_ALLOCATE_HOTSPARE:
2904 {
2905 if (! (mode & FWRITE))
2906 return (EACCES);
2907
2908 sz = sizeof (md_alloc_hotsp_params_t);
2909 d = kmem_alloc(sz, KM_SLEEP);
2910
2911 if (ddi_copyin(data, d, sz, mode)) {
2912 err = EFAULT;
2913 break;
2914 }
2915
2916 err = mirror_allocate_hotspare((md_alloc_hotsp_params_t *)d,
2917 lockp);
2918 break;
2919 }
2920
2921 case MD_MN_POKE_HOTSPARES:
2922 {
2923 (void) poke_hotspares();
2924 break;
2925 }
2926
2927 case MD_MN_SET_CAP:
2928 {
2929 if (! (mode & FWRITE))
2930 return (EACCES);
2931
2932 sz = sizeof (md_mn_setcap_params_t);
2933 d = kmem_alloc(sz, KM_SLEEP);
2934
2935 if (ddi_copyin(data, d, sz, mode)) {
2936 err = EFAULT;
2937 break;
2938 }
2939
2940 err = mirror_set_capability((md_mn_setcap_params_t *)d,
2941 lockp);
2942 break;
2943 }
2944
2945 case MD_MN_GET_MIRROR_STATE:
2946 {
2947 sz = sizeof (md_mn_get_mir_state_t);
2948 d = kmem_zalloc(sz, KM_SLEEP);
2949
2950 if (ddi_copyin(data, d, sz, mode)) {
2951 err = EFAULT;
2952 break;
2953 }
2954
2955 err = mirror_get_mir_state((md_mn_get_mir_state_t *)d,
2956 lockp);
2957 break;
2958 }
2959
2960 case MD_MN_RR_DIRTY:
2961 {
2962 sz = sizeof (md_mn_rr_dirty_params_t);
2963 d = kmem_zalloc(sz, KM_SLEEP);
2964
2965 if (ddi_copyin(data, d, sz, mode)) {
2966 err = EFAULT;
2967 break;
2968 }
2969
2970 err = mirror_set_dirty_rr((md_mn_rr_dirty_params_t *)d);
2971 break;
2972 }
2973
2974 case MD_MN_RR_CLEAN:
2975 {
2976 md_mn_rr_clean_params_t tmp;
2977
2978 /* get the first part of the structure to find the size */
2979 if (ddi_copyin(data, &tmp, sizeof (tmp), mode)) {
2980 err = EFAULT;
2981 break;
2982 }
2983
2984 sz = MDMN_RR_CLEAN_PARAMS_SIZE(&tmp);
2985 d = kmem_zalloc(sz, KM_SLEEP);
2986
2987 if (ddi_copyin(data, d, sz, mode)) {
2988 err = EFAULT;
2989 break;
2990 }
2991
2992 err = mirror_set_clean_rr((md_mn_rr_clean_params_t *)d);
2993 break;
2994 }
2995
2996 default:
2997 return (ENOTTY);
2998 }
2999
3000 /*
3001 * copyout and free any args
3002 */
3003 if (sz != 0) {
3004 if (err == 0) {
3005 if (ddi_copyout(d, data, sz, mode) != 0) {
3006 err = EFAULT;
3007 }
3008 }
3009 kmem_free(d, sz);
3010 }
3011 return (err);
3012 }
3013
3014 int
md_mirror_ioctl(dev_t ddi_dev,int cmd,void * data,int mode,IOLOCK * lockp)3015 md_mirror_ioctl(
3016 dev_t ddi_dev,
3017 int cmd,
3018 void *data,
3019 int mode,
3020 IOLOCK *lockp
3021 )
3022 {
3023 minor_t mnum = getminor(ddi_dev);
3024 mm_unit_t *un;
3025 int err = 0;
3026
3027 /* handle admin ioctls */
3028 if (mnum == MD_ADM_MINOR)
3029 return (mirror_admin_ioctl(cmd, data, mode, lockp));
3030
3031 /* check unit */
3032 if ((MD_MIN2SET(mnum) >= md_nsets) ||
3033 (MD_MIN2UNIT(mnum) >= md_nunits) ||
3034 ((un = MD_UNIT(mnum)) == NULL))
3035 return (ENXIO);
3036 /* is this a supported ioctl? */
3037 err = md_check_ioctl_against_unit(cmd, un->c);
3038 if (err != 0) {
3039 return (err);
3040 }
3041
3042 /* dispatch ioctl */
3043 switch (cmd) {
3044
3045 case DKIOCINFO:
3046 {
3047 struct dk_cinfo *p;
3048
3049 if (! (mode & FREAD))
3050 return (EACCES);
3051
3052 p = kmem_alloc(sizeof (*p), KM_SLEEP);
3053
3054 get_info(p, mnum);
3055 if (ddi_copyout((caddr_t)p, data, sizeof (*p), mode) != 0)
3056 err = EFAULT;
3057
3058 kmem_free(p, sizeof (*p));
3059 return (err);
3060 }
3061
3062 case DKIOCGMEDIAINFO:
3063 {
3064 struct dk_minfo p;
3065
3066 if (! (mode & FREAD))
3067 return (EACCES);
3068
3069 get_minfo(&p, mnum);
3070 if (ddi_copyout(&p, data, sizeof (struct dk_minfo), mode) != 0)
3071 err = EFAULT;
3072
3073 return (err);
3074 }
3075
3076 case DKIOCGGEOM:
3077 {
3078 struct dk_geom *p;
3079
3080 if (! (mode & FREAD))
3081 return (EACCES);
3082
3083 p = kmem_alloc(sizeof (*p), KM_SLEEP);
3084
3085 if ((err = mirror_get_geom(un, p)) == 0) {
3086 if (ddi_copyout((caddr_t)p, data, sizeof (*p),
3087 mode) != 0)
3088 err = EFAULT;
3089 }
3090
3091 kmem_free(p, sizeof (*p));
3092 return (err);
3093 }
3094
3095 case DKIOCGVTOC:
3096 {
3097 struct vtoc *vtoc;
3098
3099 if (! (mode & FREAD))
3100 return (EACCES);
3101
3102 vtoc = kmem_zalloc(sizeof (*vtoc), KM_SLEEP);
3103
3104 if ((err = mirror_get_vtoc(un, vtoc)) != 0) {
3105 kmem_free(vtoc, sizeof (*vtoc));
3106 return (err);
3107 }
3108
3109 if ((mode & DATAMODEL_MASK) == DATAMODEL_NATIVE) {
3110 if (ddi_copyout(vtoc, data, sizeof (*vtoc), mode))
3111 err = EFAULT;
3112 }
3113 #ifdef _SYSCALL32
3114 else {
3115 struct vtoc32 *vtoc32;
3116
3117 vtoc32 = kmem_zalloc(sizeof (*vtoc32), KM_SLEEP);
3118
3119 vtoctovtoc32((*vtoc), (*vtoc32));
3120 if (ddi_copyout(vtoc32, data, sizeof (*vtoc32), mode))
3121 err = EFAULT;
3122 kmem_free(vtoc32, sizeof (*vtoc32));
3123 }
3124 #endif /* _SYSCALL32 */
3125
3126 kmem_free(vtoc, sizeof (*vtoc));
3127 return (err);
3128 }
3129
3130 case DKIOCSVTOC:
3131 {
3132 struct vtoc *vtoc;
3133
3134 if (! (mode & FWRITE))
3135 return (EACCES);
3136
3137 vtoc = kmem_zalloc(sizeof (*vtoc), KM_SLEEP);
3138
3139 if ((mode & DATAMODEL_MASK) == DATAMODEL_NATIVE) {
3140 if (ddi_copyin(data, vtoc, sizeof (*vtoc), mode)) {
3141 err = EFAULT;
3142 }
3143 }
3144 #ifdef _SYSCALL32
3145 else {
3146 struct vtoc32 *vtoc32;
3147
3148 vtoc32 = kmem_zalloc(sizeof (*vtoc32), KM_SLEEP);
3149
3150 if (ddi_copyin(data, vtoc32, sizeof (*vtoc32), mode)) {
3151 err = EFAULT;
3152 } else {
3153 vtoc32tovtoc((*vtoc32), (*vtoc));
3154 }
3155 kmem_free(vtoc32, sizeof (*vtoc32));
3156 }
3157 #endif /* _SYSCALL32 */
3158
3159 if (err == 0)
3160 err = mirror_set_vtoc(un, vtoc);
3161
3162 kmem_free(vtoc, sizeof (*vtoc));
3163 return (err);
3164 }
3165
3166 case DKIOCGEXTVTOC:
3167 {
3168 struct extvtoc *extvtoc;
3169
3170 if (! (mode & FREAD))
3171 return (EACCES);
3172
3173 extvtoc = kmem_zalloc(sizeof (*extvtoc), KM_SLEEP);
3174
3175 if ((err = mirror_get_extvtoc(un, extvtoc)) != 0) {
3176 kmem_free(extvtoc, sizeof (*extvtoc));
3177 return (err);
3178 }
3179
3180 if (ddi_copyout(extvtoc, data, sizeof (*extvtoc), mode))
3181 err = EFAULT;
3182
3183 kmem_free(extvtoc, sizeof (*extvtoc));
3184 return (err);
3185 }
3186
3187 case DKIOCSEXTVTOC:
3188 {
3189 struct extvtoc *extvtoc;
3190
3191 if (! (mode & FWRITE))
3192 return (EACCES);
3193
3194 extvtoc = kmem_zalloc(sizeof (*extvtoc), KM_SLEEP);
3195
3196 if (ddi_copyin(data, extvtoc, sizeof (*extvtoc), mode)) {
3197 err = EFAULT;
3198 }
3199
3200 if (err == 0)
3201 err = mirror_set_extvtoc(un, extvtoc);
3202
3203 kmem_free(extvtoc, sizeof (*extvtoc));
3204 return (err);
3205 }
3206
3207 case DKIOCGAPART:
3208 {
3209 struct dk_map dmp;
3210
3211 if ((err = mirror_get_cgapart(un, &dmp)) != 0) {
3212 return (err);
3213 }
3214
3215 if ((mode & DATAMODEL_MASK) == DATAMODEL_NATIVE) {
3216 if (ddi_copyout((caddr_t)&dmp, data, sizeof (dmp),
3217 mode) != 0)
3218 err = EFAULT;
3219 }
3220 #ifdef _SYSCALL32
3221 else {
3222 struct dk_map32 dmp32;
3223
3224 dmp32.dkl_cylno = dmp.dkl_cylno;
3225 dmp32.dkl_nblk = dmp.dkl_nblk;
3226
3227 if (ddi_copyout((caddr_t)&dmp32, data, sizeof (dmp32),
3228 mode) != 0)
3229 err = EFAULT;
3230 }
3231 #endif /* _SYSCALL32 */
3232
3233 return (err);
3234 }
3235 case DKIOCGETEFI:
3236 {
3237 /*
3238 * This one can be done centralized,
3239 * no need to put in the same code for all types of metadevices
3240 */
3241 return (md_dkiocgetefi(mnum, data, mode));
3242 }
3243 case DKIOCSETEFI:
3244 {
3245 /*
3246 * This one can be done centralized,
3247 * no need to put in the same code for all types of metadevices
3248 */
3249 return (md_dkiocsetefi(mnum, data, mode));
3250 }
3251 case DKIOCPARTITION:
3252 {
3253 return (md_dkiocpartition(mnum, data, mode));
3254 }
3255
3256 case DKIOCGETVOLCAP:
3257 {
3258 volcap_t vc;
3259 mdi_unit_t *ui;
3260
3261 /* Only valid for MN sets */
3262 if (!MD_MNSET_SETNO(MD_MIN2SET(mnum)))
3263 return (EINVAL);
3264
3265 ui = MDI_UNIT(mnum);
3266 if (! (mode & FREAD))
3267 return (EACCES);
3268
3269 vc.vc_info = DKV_ABR_CAP | DKV_DMR_CAP;
3270 vc.vc_set = 0;
3271 if (ui->ui_tstate & MD_ABR_CAP) {
3272 vc.vc_set |= DKV_ABR_CAP;
3273 }
3274 if (ddi_copyout(&vc, data, sizeof (volcap_t), mode))
3275 err = EFAULT;
3276 return (err);
3277 }
3278
3279 case DKIOCSETVOLCAP:
3280 {
3281 volcap_t vc;
3282 volcapset_t volcap = 0;
3283 mdi_unit_t *ui;
3284
3285 /* Only valid for MN sets */
3286 if (!MD_MNSET_SETNO(MD_MIN2SET(mnum)))
3287 return (EINVAL);
3288
3289 ui = MDI_UNIT(mnum);
3290 if (! (mode & FWRITE))
3291 return (EACCES);
3292
3293 if (ddi_copyin(data, &vc, sizeof (volcap_t), mode))
3294 return (EFAULT);
3295
3296 /* Not valid if a submirror is offline */
3297 if (un->c.un_status & MD_UN_OFFLINE_SM) {
3298 return (EINVAL);
3299 }
3300 if (ui->ui_tstate & MD_ABR_CAP)
3301 volcap |= DKV_ABR_CAP;
3302 /* Only send capability message if there is a change */
3303 if ((vc.vc_set & (DKV_ABR_CAP)) != volcap)
3304 err = mdmn_send_capability_message(mnum, vc, lockp);
3305 return (err);
3306 }
3307
3308 case DKIOCDMR:
3309 {
3310 vol_directed_rd_t *vdr;
3311
3312 #ifdef _MULTI_DATAMODEL
3313 vol_directed_rd32_t *vdr32;
3314 #endif /* _MULTI_DATAMODEL */
3315
3316 /* Only valid for MN sets */
3317 if (!MD_MNSET_SETNO(MD_MIN2SET(mnum)))
3318 return (EINVAL);
3319
3320 vdr = kmem_zalloc(sizeof (vol_directed_rd_t), KM_NOSLEEP);
3321 if (vdr == NULL)
3322 return (ENOMEM);
3323
3324 #ifdef _MULTI_DATAMODEL
3325 vdr32 = kmem_zalloc(sizeof (vol_directed_rd32_t), KM_NOSLEEP);
3326 if (vdr32 == NULL) {
3327 kmem_free(vdr, sizeof (vol_directed_rd_t));
3328 return (ENOMEM);
3329 }
3330
3331 switch (ddi_model_convert_from(mode & FMODELS)) {
3332 case DDI_MODEL_ILP32:
3333 /*
3334 * If we're called from a higher-level driver we don't
3335 * need to manipulate the data. Its already been done by
3336 * the caller.
3337 */
3338 if (!(mode & FKIOCTL)) {
3339 if (ddi_copyin(data, vdr32, sizeof (*vdr32),
3340 mode)) {
3341 kmem_free(vdr, sizeof (*vdr));
3342 return (EFAULT);
3343 }
3344 vdr->vdr_flags = vdr32->vdr_flags;
3345 vdr->vdr_offset = vdr32->vdr_offset;
3346 vdr->vdr_nbytes = vdr32->vdr_nbytes;
3347 vdr->vdr_data =
3348 (void *)(uintptr_t)vdr32->vdr_data;
3349 vdr->vdr_side = vdr32->vdr_side;
3350 break;
3351 }
3352 /* FALLTHROUGH */
3353
3354 case DDI_MODEL_NONE:
3355 if (ddi_copyin(data, vdr, sizeof (*vdr), mode)) {
3356 kmem_free(vdr32, sizeof (*vdr32));
3357 kmem_free(vdr, sizeof (*vdr));
3358 return (EFAULT);
3359 }
3360 break;
3361
3362 default:
3363 kmem_free(vdr32, sizeof (*vdr32));
3364 kmem_free(vdr, sizeof (*vdr));
3365 return (EFAULT);
3366 }
3367 #else /* ! _MULTI_DATAMODEL */
3368 if (ddi_copyin(data, vdr, sizeof (*vdr), mode)) {
3369 kmem_free(vdr, sizeof (*vdr));
3370 return (EFAULT);
3371 }
3372 #endif /* _MULTI_DATAMODEL */
3373
3374 err = mirror_directed_read(ddi_dev, vdr, mode);
3375
3376 if (err == 0) {
3377 #ifdef _MULTI_DATAMODEL
3378 switch (ddi_model_convert_from(mode & FMODELS)) {
3379 case DDI_MODEL_ILP32:
3380 if (!(mode & FKIOCTL)) {
3381 vdr32->vdr_flags = vdr->vdr_flags;
3382 vdr32->vdr_offset = vdr->vdr_offset;
3383 vdr32->vdr_side = vdr->vdr_side;
3384 vdr32->vdr_bytesread =
3385 vdr->vdr_bytesread;
3386 bcopy(vdr->vdr_side_name,
3387 vdr32->vdr_side_name,
3388 sizeof (vdr32->vdr_side_name));
3389
3390 if (ddi_copyout(vdr32, data,
3391 sizeof (*vdr32), mode)) {
3392 err = EFAULT;
3393 }
3394 break;
3395 }
3396 /* FALLTHROUGH */
3397
3398 case DDI_MODEL_NONE:
3399 if (ddi_copyout(vdr, data, sizeof (*vdr), mode))
3400 err = EFAULT;
3401 break;
3402 }
3403 #else /* ! _MULTI_DATAMODEL */
3404 if (ddi_copyout(vdr, data, sizeof (*vdr), mode))
3405 err = EFAULT;
3406 #endif /* _MULTI_DATAMODEL */
3407 if (vdr->vdr_flags & DKV_DMR_ERROR)
3408 err = EIO;
3409 }
3410
3411 #ifdef _MULTI_DATAMODEL
3412 kmem_free(vdr32, sizeof (*vdr32));
3413 #endif /* _MULTI_DATAMODEL */
3414
3415 kmem_free(vdr, sizeof (*vdr));
3416
3417 return (err);
3418 }
3419
3420 default:
3421 return (ENOTTY);
3422 }
3423 }
3424
3425 /*
3426 * rename named service entry points and support functions
3427 */
3428
3429 /*
3430 * rename/exchange role swap functions
3431 *
3432 * most of these are handled by generic role swap functions
3433 */
3434
3435 /*
3436 * MDRNM_UPDATE_KIDS
3437 * rename/exchange of our child or grandchild
3438 */
3439 void
mirror_renexch_update_kids(md_rendelta_t * delta,md_rentxn_t * rtxnp)3440 mirror_renexch_update_kids(md_rendelta_t *delta, md_rentxn_t *rtxnp)
3441 {
3442 mm_submirror_t *sm;
3443 int smi;
3444
3445 ASSERT(rtxnp);
3446 ASSERT((MDRNOP_RENAME == rtxnp->op) || (rtxnp->op == MDRNOP_EXCHANGE));
3447 ASSERT(rtxnp->recids);
3448 ASSERT(delta);
3449 ASSERT(delta->unp);
3450 ASSERT(delta->old_role == MDRR_PARENT);
3451 ASSERT(delta->new_role == MDRR_PARENT);
3452
3453 /*
3454 * since our role isn't changing (parent->parent)
3455 * one of our children must be changing
3456 * find the child being modified, and update
3457 * our notion of it
3458 */
3459 for (smi = 0; smi < NMIRROR; smi++) {
3460 mm_unit_t *un = (mm_unit_t *)delta->unp;
3461
3462 if (!SMS_BY_INDEX_IS(un, smi, SMS_INUSE)) {
3463 continue;
3464 }
3465 sm = &un->un_sm[smi];
3466
3467 if (md_getminor(sm->sm_dev) == rtxnp->from.mnum) {
3468 sm->sm_dev = md_makedevice(md_major, rtxnp->to.mnum);
3469 sm->sm_key = rtxnp->to.key;
3470 break;
3471 }
3472 }
3473
3474 md_store_recid(&rtxnp->rec_idx, rtxnp->recids, delta->unp);
3475 }
3476
3477 /*
3478 * exchange down (self->child)
3479 */
3480 void
mirror_exchange_self_update_from_down(md_rendelta_t * delta,md_rentxn_t * rtxnp)3481 mirror_exchange_self_update_from_down(
3482 md_rendelta_t *delta,
3483 md_rentxn_t *rtxnp
3484 )
3485 {
3486 int smi;
3487 mm_submirror_t *found;
3488 minor_t from_min, to_min;
3489 sv_dev_t sv;
3490
3491 ASSERT(rtxnp);
3492 ASSERT(MDRNOP_EXCHANGE == rtxnp->op);
3493 ASSERT(rtxnp->recids);
3494 ASSERT(rtxnp->rec_idx >= 0);
3495 ASSERT(delta);
3496 ASSERT(delta->unp);
3497 ASSERT(delta->uip);
3498 ASSERT(delta->old_role == MDRR_SELF);
3499 ASSERT(delta->new_role == MDRR_CHILD);
3500 ASSERT(md_getminor(delta->dev) == rtxnp->from.mnum);
3501
3502 from_min = rtxnp->from.mnum;
3503 to_min = rtxnp->to.mnum;
3504
3505 /*
3506 * self id changes in our own unit struct
3507 */
3508
3509 MD_SID(delta->unp) = to_min;
3510
3511 /*
3512 * parent identifier need not change
3513 */
3514
3515 /*
3516 * point the set array pointers at the "new" unit and unit in-cores
3517 * Note: the other half of this transfer is done in the "update_to"
3518 * exchange named service.
3519 */
3520
3521 MDI_VOIDUNIT(to_min) = delta->uip;
3522 MD_VOIDUNIT(to_min) = delta->unp;
3523
3524 /*
3525 * transfer kstats
3526 */
3527
3528 delta->uip->ui_kstat = rtxnp->to.kstatp;
3529
3530 /*
3531 * the unit in-core reference to the get next link's id changes
3532 */
3533
3534 delta->uip->ui_link.ln_id = to_min;
3535
3536 /*
3537 * find the child whose identity we're assuming
3538 */
3539
3540 for (found = NULL, smi = 0; !found && smi < NMIRROR; smi++) {
3541 mm_submirror_t *sm;
3542 mm_unit_t *un = (mm_unit_t *)delta->unp;
3543
3544 if (!SMS_BY_INDEX_IS(un, smi, SMS_INUSE)) {
3545 continue;
3546 }
3547 sm = &un->un_sm[smi];
3548
3549 if (md_getminor(sm->sm_dev) == to_min) {
3550 found = sm;
3551 }
3552 }
3553 ASSERT(found);
3554
3555 /*
3556 * Update the sub-mirror's identity
3557 */
3558 found->sm_dev = md_makedevice(md_major, rtxnp->from.mnum);
3559 sv.key = found->sm_key;
3560
3561 ASSERT(rtxnp->from.key != MD_KEYWILD);
3562 ASSERT(rtxnp->from.key != MD_KEYBAD);
3563
3564 found->sm_key = rtxnp->from.key;
3565
3566 /*
3567 * delete the key for the old sub-mirror from the name space
3568 */
3569
3570 sv.setno = MD_MIN2SET(from_min);
3571 md_rem_names(&sv, 1);
3572
3573 /*
3574 * and store the record id (from the unit struct) into recids
3575 */
3576
3577 md_store_recid(&rtxnp->rec_idx, rtxnp->recids, delta->unp);
3578 }
3579
3580 /*
3581 * exchange down (parent->self)
3582 */
3583 void
mirror_exchange_parent_update_to(md_rendelta_t * delta,md_rentxn_t * rtxnp)3584 mirror_exchange_parent_update_to(
3585 md_rendelta_t *delta,
3586 md_rentxn_t *rtxnp
3587 )
3588 {
3589 int smi;
3590 mm_submirror_t *found;
3591 minor_t from_min, to_min;
3592 sv_dev_t sv;
3593
3594 ASSERT(rtxnp);
3595 ASSERT(MDRNOP_EXCHANGE == rtxnp->op);
3596 ASSERT(rtxnp->recids);
3597 ASSERT(rtxnp->rec_idx >= 0);
3598 ASSERT(delta);
3599 ASSERT(delta->unp);
3600 ASSERT(delta->uip);
3601 ASSERT(delta->old_role == MDRR_PARENT);
3602 ASSERT(delta->new_role == MDRR_SELF);
3603 ASSERT(md_getminor(delta->dev) == rtxnp->to.mnum);
3604
3605 from_min = rtxnp->from.mnum;
3606 to_min = rtxnp->to.mnum;
3607
3608 /*
3609 * self id changes in our own unit struct
3610 */
3611
3612 MD_SID(delta->unp) = from_min;
3613
3614 /*
3615 * parent identifier need not change
3616 */
3617
3618 /*
3619 * point the set array pointers at the "new" unit and unit in-cores
3620 * Note: the other half of this transfer is done in the "update_to"
3621 * exchange named service.
3622 */
3623
3624 MDI_VOIDUNIT(from_min) = delta->uip;
3625 MD_VOIDUNIT(from_min) = delta->unp;
3626
3627 /*
3628 * transfer kstats
3629 */
3630
3631 delta->uip->ui_kstat = rtxnp->from.kstatp;
3632
3633 /*
3634 * the unit in-core reference to the get next link's id changes
3635 */
3636
3637 delta->uip->ui_link.ln_id = from_min;
3638
3639 /*
3640 * find the child whose identity we're assuming
3641 */
3642
3643 for (found = NULL, smi = 0; !found && smi < NMIRROR; smi++) {
3644 mm_submirror_t *sm;
3645 mm_unit_t *un = (mm_unit_t *)delta->unp;
3646
3647 if (!SMS_BY_INDEX_IS(un, smi, SMS_INUSE)) {
3648 continue;
3649 }
3650 sm = &un->un_sm[smi];
3651
3652 if (md_getminor(sm->sm_dev) == from_min) {
3653 found = sm;
3654 }
3655 }
3656 ASSERT(found);
3657
3658 /*
3659 * Update the sub-mirror's identity
3660 */
3661 found->sm_dev = md_makedevice(md_major, rtxnp->to.mnum);
3662 sv.key = found->sm_key;
3663
3664 ASSERT(rtxnp->to.key != MD_KEYWILD);
3665 ASSERT(rtxnp->to.key != MD_KEYBAD);
3666
3667 found->sm_key = rtxnp->to.key;
3668
3669 /*
3670 * delete the key for the old sub-mirror from the name space
3671 */
3672
3673 sv.setno = MD_MIN2SET(to_min);
3674 md_rem_names(&sv, 1);
3675
3676 /*
3677 * and store the record id (from the unit struct) into recids
3678 */
3679
3680 md_store_recid(&rtxnp->rec_idx, rtxnp->recids, delta->unp);
3681 }
3682
3683 /*
3684 * MDRNM_LIST_URKIDS: named svc entry point
3685 * all all delta entries appropriate for our children onto the
3686 * deltalist pointd to by dlpp
3687 */
3688 int
mirror_rename_listkids(md_rendelta_t ** dlpp,md_rentxn_t * rtxnp)3689 mirror_rename_listkids(md_rendelta_t **dlpp, md_rentxn_t *rtxnp)
3690 {
3691 minor_t from_min, to_min;
3692 mm_unit_t *from_un;
3693 md_rendelta_t *new, *p;
3694 int smi;
3695 int n_children;
3696 mm_submirror_t *sm;
3697
3698 ASSERT(rtxnp);
3699 ASSERT(dlpp);
3700 ASSERT((rtxnp->op == MDRNOP_EXCHANGE) || (rtxnp->op == MDRNOP_RENAME));
3701
3702 from_min = rtxnp->from.mnum;
3703 to_min = rtxnp->to.mnum;
3704 n_children = 0;
3705
3706 if (!MDI_UNIT(from_min) || !(from_un = MD_UNIT(from_min))) {
3707 (void) mdmderror(&rtxnp->mde, MDE_UNIT_NOT_SETUP, from_min);
3708 return (-1);
3709 }
3710
3711 for (p = *dlpp; p && p->next != NULL; p = p->next) {
3712 /* NULL */
3713 }
3714
3715 for (smi = 0; smi < NMIRROR; smi++) {
3716 minor_t child_min;
3717
3718 if (!SMS_BY_INDEX_IS(from_un, smi, SMS_INUSE)) {
3719 continue;
3720 }
3721
3722 sm = &from_un->un_sm[smi];
3723 child_min = md_getminor(sm->sm_dev);
3724
3725 p = new = md_build_rendelta(MDRR_CHILD,
3726 to_min == child_min? MDRR_SELF: MDRR_CHILD,
3727 sm->sm_dev, p,
3728 MD_UNIT(child_min), MDI_UNIT(child_min),
3729 &rtxnp->mde);
3730
3731 if (!new) {
3732 if (mdisok(&rtxnp->mde)) {
3733 (void) mdsyserror(&rtxnp->mde, ENOMEM);
3734 }
3735 return (-1);
3736 }
3737 ++n_children;
3738 }
3739
3740 return (n_children);
3741 }
3742
3743 /*
3744 * support routine for MDRNM_CHECK
3745 */
3746 static int
mirror_may_renexch_self(mm_unit_t * un,mdi_unit_t * ui,md_rentxn_t * rtxnp)3747 mirror_may_renexch_self(
3748 mm_unit_t *un,
3749 mdi_unit_t *ui,
3750 md_rentxn_t *rtxnp)
3751 {
3752 minor_t from_min;
3753 minor_t to_min;
3754 bool_t toplevel;
3755 bool_t related;
3756 int smi;
3757 mm_submirror_t *sm;
3758
3759 from_min = rtxnp->from.mnum;
3760 to_min = rtxnp->to.mnum;
3761
3762 if (!un || !ui) {
3763 (void) mdmderror(&rtxnp->mde, MDE_RENAME_CONFIG_ERROR,
3764 from_min);
3765 return (EINVAL);
3766 }
3767
3768 ASSERT(MD_CAPAB(un) & MD_CAN_META_CHILD);
3769 if (!(MD_CAPAB(un) & MD_CAN_META_CHILD)) {
3770 (void) mdmderror(&rtxnp->mde, MDE_RENAME_SOURCE_BAD, from_min);
3771 return (EINVAL);
3772 }
3773
3774 if (MD_PARENT(un) == MD_MULTI_PARENT) {
3775 (void) mdmderror(&rtxnp->mde, MDE_RENAME_SOURCE_BAD, from_min);
3776 return (EINVAL);
3777 }
3778
3779 toplevel = !MD_HAS_PARENT(MD_PARENT(un));
3780
3781 /* we're related if trying to swap with our parent */
3782 related = (!toplevel) && (MD_PARENT(un) == to_min);
3783
3784 switch (rtxnp->op) {
3785 case MDRNOP_EXCHANGE:
3786 /*
3787 * check for a swap with our child
3788 */
3789 for (smi = 0; smi < NMIRROR; smi++) {
3790
3791 if (!SMS_BY_INDEX_IS(un, smi, SMS_INUSE)) {
3792 continue;
3793 }
3794
3795 sm = &un->un_sm[smi];
3796 if (md_getminor(sm->sm_dev) == to_min) {
3797 related |= TRUE;
3798 }
3799 }
3800 if (!related) {
3801 (void) mdmderror(&rtxnp->mde,
3802 MDE_RENAME_TARGET_UNRELATED, to_min);
3803 return (EINVAL);
3804 }
3805
3806 break;
3807
3808 case MDRNOP_RENAME:
3809 /*
3810 * if from is top-level and is open, then the kernel is using
3811 * the md_dev64_t.
3812 */
3813
3814 if (toplevel && md_unit_isopen(ui)) {
3815 (void) mdmderror(&rtxnp->mde, MDE_RENAME_BUSY,
3816 from_min);
3817 return (EBUSY);
3818 }
3819 break;
3820
3821 default:
3822 (void) mdmderror(&rtxnp->mde, MDE_RENAME_CONFIG_ERROR,
3823 from_min);
3824 return (EINVAL);
3825 }
3826
3827 return (0); /* ok */
3828 }
3829
3830 /*
3831 * Named service entry point: MDRNM_CHECK
3832 */
3833 intptr_t
mirror_rename_check(md_rendelta_t * delta,md_rentxn_t * rtxnp)3834 mirror_rename_check(
3835 md_rendelta_t *delta,
3836 md_rentxn_t *rtxnp)
3837 {
3838 mm_submirror_t *sm;
3839 mm_submirror_ic_t *smic;
3840 md_m_shared_t *shared;
3841 int ci;
3842 int i;
3843 int compcnt;
3844 mm_unit_t *un;
3845 int err = 0;
3846
3847 ASSERT(delta);
3848 ASSERT(rtxnp);
3849 ASSERT(delta->unp);
3850 ASSERT(delta->uip);
3851 ASSERT((rtxnp->op == MDRNOP_RENAME) || (rtxnp->op == MDRNOP_EXCHANGE));
3852
3853 if (!delta || !rtxnp || !delta->unp || !delta->uip) {
3854 (void) mdsyserror(&rtxnp->mde, EINVAL);
3855 return (EINVAL);
3856 }
3857
3858 un = (mm_unit_t *)delta->unp;
3859
3860 for (i = 0; i < NMIRROR; i++) {
3861 sm = &un->un_sm[i];
3862 smic = &un->un_smic[i];
3863
3864 if (!SMS_IS(sm, SMS_INUSE))
3865 continue;
3866
3867 ASSERT(smic->sm_get_component_count);
3868 if (!smic->sm_get_component_count) {
3869 (void) mdmderror(&rtxnp->mde, MDE_RENAME_CONFIG_ERROR,
3870 md_getminor(delta->dev));
3871 return (ENXIO);
3872 }
3873
3874 compcnt = (*(smic->sm_get_component_count))(sm->sm_dev, un);
3875
3876 for (ci = 0; ci < compcnt; ci++) {
3877
3878 ASSERT(smic->sm_shared_by_indx);
3879 if (!smic->sm_shared_by_indx) {
3880 (void) mdmderror(&rtxnp->mde,
3881 MDE_RENAME_CONFIG_ERROR,
3882 md_getminor(delta->dev));
3883 return (ENXIO);
3884 }
3885
3886 shared = (md_m_shared_t *)(*(smic->sm_shared_by_indx))
3887 (sm->sm_dev, sm, ci);
3888
3889 ASSERT(shared);
3890 if (!shared) {
3891 (void) mdmderror(&rtxnp->mde,
3892 MDE_RENAME_CONFIG_ERROR,
3893 md_getminor(delta->dev));
3894 return (ENXIO);
3895 }
3896
3897 if (shared->ms_hs_id != 0) {
3898 (void) mdmderror(&rtxnp->mde,
3899 MDE_SM_FAILED_COMPS,
3900 md_getminor(delta->dev));
3901 return (EIO);
3902 }
3903
3904 switch (shared->ms_state) {
3905 case CS_OKAY:
3906 break;
3907
3908 case CS_RESYNC:
3909 (void) mdmderror(&rtxnp->mde,
3910 MDE_RESYNC_ACTIVE,
3911 md_getminor(delta->dev));
3912 return (EBUSY);
3913
3914 default:
3915 (void) mdmderror(&rtxnp->mde,
3916 MDE_SM_FAILED_COMPS,
3917 md_getminor(delta->dev));
3918 return (EINVAL);
3919 }
3920
3921 }
3922 }
3923
3924 /* self does additional checks */
3925 if (delta->old_role == MDRR_SELF) {
3926 err = mirror_may_renexch_self(un, delta->uip, rtxnp);
3927 }
3928
3929 return (err);
3930 }
3931
3932 /* end of rename/exchange */
3933