1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 #include <sys/param.h>
28 #include <sys/systm.h>
29 #include <sys/conf.h>
30 #include <sys/file.h>
31 #include <sys/user.h>
32 #include <sys/uio.h>
33 #include <sys/t_lock.h>
34 #include <sys/buf.h>
35 #include <sys/dkio.h>
36 #include <sys/vtoc.h>
37 #include <sys/kmem.h>
38 #include <vm/page.h>
39 #include <sys/sysmacros.h>
40 #include <sys/types.h>
41 #include <sys/mkdev.h>
42 #include <sys/stat.h>
43 #include <sys/open.h>
44 #include <sys/lvm/mdvar.h>
45 #include <sys/lvm/md_stripe.h>
46 #include <sys/lvm/md_notify.h>
47 #include <sys/modctl.h>
48 #include <sys/ddi.h>
49 #include <sys/sunddi.h>
50 #include <sys/debug.h>
51 #include <sys/model.h>
52
53 #include <sys/sysevent/eventdefs.h>
54 #include <sys/sysevent/svm.h>
55
56 extern int md_status;
57
58 extern unit_t md_nunits;
59 extern set_t md_nsets;
60 extern md_set_t md_set[];
61
62 extern md_ops_t stripe_md_ops;
63 extern md_krwlock_t md_unit_array_rw;
64 extern major_t md_major;
65
66 static int
stripe_replace(replace_params_t * params)67 stripe_replace(replace_params_t *params)
68 {
69 minor_t mnum = params->mnum;
70 ms_unit_t *un;
71 mddb_recid_t recids[6];
72 ms_new_dev_t nd;
73 ms_cd_info_t cd;
74 int ci;
75 int cmpcnt;
76 void *repl_data;
77 md_dev64_t fake_devt;
78 void (*repl_done)();
79
80 mdclrerror(¶ms->mde);
81
82 un = (ms_unit_t *)MD_UNIT(mnum);
83
84 if (MD_STATUS(un) & MD_UN_RESYNC_ACTIVE) {
85 return (mdmderror(¶ms->mde, MDE_RESYNC_ACTIVE, mnum));
86 }
87
88 nd.nd_dev = params->new_dev;
89 nd.nd_key = params->new_key;
90 nd.nd_nblks = params->number_blks;
91 nd.nd_start_blk = params->start_blk;
92 nd.nd_labeled = params->has_label;
93 nd.nd_hs_id = 0;
94
95 /*
96 * stripe_component_count and stripe_get_dev only care about the
97 * minor number associated with the first argument which is a
98 * md_dev64_t
99 *
100 * The comments section for these two routines have been updated
101 * to indicate that this routine calls with fake major numbers.
102 */
103 fake_devt = md_makedevice(0, mnum);
104 cmpcnt = stripe_component_count(fake_devt, NULL);
105 for (ci = 0; ci < cmpcnt; ci++) {
106 (void) stripe_get_dev(fake_devt, NULL, ci, &cd);
107 if ((cd.cd_dev == params->old_dev) ||
108 (cd.cd_orig_dev == params->old_dev))
109 break;
110 }
111 if (ci == cmpcnt) {
112 return (EINVAL);
113 }
114
115 /* In case of a dryrun we're done here */
116 if (params->options & MDIOCTL_DRYRUN) {
117 return (0);
118 }
119
120 (void) stripe_replace_dev(fake_devt, 0, ci, &nd, recids, 6,
121 &repl_done, &repl_data);
122 mddb_commitrecs_wrapper(recids);
123 (*repl_done)(fake_devt, repl_data);
124
125 SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_REPLACE, SVM_TAG_METADEVICE,
126 MD_UN2SET(un), MD_SID(un));
127 return (0);
128 }
129
130 static int
stripe_set(void * d,int mode)131 stripe_set(void *d, int mode)
132 {
133 minor_t mnum;
134 ms_unit_t *un;
135 void *p;
136 mddb_recid_t ms_recid;
137 mddb_recid_t *recids;
138 mddb_type_t typ1;
139 int err;
140 set_t setno;
141 md_error_t *mdep;
142 struct ms_comp *mdcomp;
143 int row;
144 int rid;
145 int num_recs;
146 int i, c;
147 md_set_params_t *msp = d;
148
149 mnum = msp->mnum;
150 setno = MD_MIN2SET(mnum);
151
152 mdep = &msp->mde;
153
154 mdclrerror(mdep);
155
156 if ((setno >= md_nsets) || (MD_MIN2UNIT(mnum) >= md_nunits)) {
157 return (mdmderror(mdep, MDE_INVAL_UNIT, mnum));
158 }
159
160 if (md_get_setstatus(setno) & MD_SET_STALE)
161 return (mdmddberror(mdep, MDE_DB_STALE, mnum, setno));
162
163 un = MD_UNIT(mnum);
164 if (un != NULL) {
165 return (mdmderror(mdep, MDE_UNIT_ALREADY_SETUP, mnum));
166 }
167
168
169 typ1 = (mddb_type_t)md_getshared_key(setno,
170 stripe_md_ops.md_driver.md_drivername);
171
172 /* create the db record for this mdstruct */
173 if (msp->options & MD_CRO_64BIT) {
174 #if defined(_ILP32)
175 return (mdmderror(mdep, MDE_UNIT_TOO_LARGE, mnum));
176 #else
177 ms_recid = mddb_createrec((size_t)msp->size, typ1, 0,
178 MD_CRO_64BIT | MD_CRO_STRIPE | MD_CRO_FN, setno);
179 #endif
180 } else {
181 ms_recid = mddb_createrec((size_t)msp->size, typ1, 0,
182 MD_CRO_32BIT | MD_CRO_STRIPE | MD_CRO_FN, setno);
183 }
184 if (ms_recid < 0)
185 return (mddbstatus2error(mdep, ms_recid, mnum, setno));
186
187 /* get the address of the mdstruct */
188 p = (void *) mddb_getrecaddr(ms_recid);
189 /*
190 * It is okay that we muck with the mdstruct here,
191 * since no one else will know about the mdstruct
192 * until we commit it. If we crash, the record will
193 * be automatically purged, since we haven't
194 * committed it yet.
195 */
196
197 /* copy in the user's mdstruct */
198 if (err = ddi_copyin((caddr_t)(uintptr_t)msp->mdp, (caddr_t)p,
199 (size_t)msp->size, mode)) {
200 mddb_deleterec_wrapper(ms_recid);
201 return (EFAULT);
202 }
203
204 un = (ms_unit_t *)p;
205
206 /* All 64 bit metadevices only support EFI labels. */
207 if (msp->options & MD_CRO_64BIT) {
208 un->c.un_flag |= MD_EFILABEL;
209 }
210
211 /*
212 * allocate the real recids array. since we may have to commit
213 * underlying metadevice records, we need an array
214 * of size: total number of components in stripe + 3
215 * (1 for the stripe itself, one for the hotspare, one
216 * for the end marker).
217 */
218 num_recs = 3;
219 rid = 0;
220 for (row = 0; row < un->un_nrows; row++) {
221 struct ms_row *mdr = &un->un_row[row];
222 num_recs += mdr->un_ncomp;
223 }
224 recids = kmem_alloc(num_recs * sizeof (mddb_recid_t), KM_SLEEP);
225 recids[rid++] = ms_recid;
226
227 MD_SID(un) = mnum;
228 MD_RECID(un) = recids[0];
229 MD_CAPAB(un) = MD_CAN_PARENT | MD_CAN_SUB_MIRROR | MD_CAN_SP;
230 MD_PARENT(un) = MD_NO_PARENT;
231 un->c.un_revision |= MD_FN_META_DEV;
232
233 if (err = stripe_build_incore(p, 0)) {
234 md_nblocks_set(mnum, -1ULL);
235 MD_UNIT(mnum) = NULL;
236
237 mddb_deleterec_wrapper(recids[0]);
238 kmem_free(recids, num_recs * sizeof (mddb_recid_t));
239 return (err);
240 }
241
242 /*
243 * Update unit availability
244 */
245 md_set[setno].s_un_avail--;
246
247 recids[rid] = 0;
248 if (un->un_hsp_id != -1)
249 err = md_hot_spare_ifc(HSP_INCREF, un->un_hsp_id, 0, 0,
250 &recids[rid++], NULL, NULL, NULL);
251
252
253 if (err) {
254 md_nblocks_set(mnum, -1ULL);
255 MD_UNIT(mnum) = NULL;
256
257 mddb_deleterec_wrapper(recids[0]);
258 kmem_free(recids, num_recs * sizeof (mddb_recid_t));
259 return (mdhsperror(mdep, MDE_INVAL_HSP, un->un_hsp_id));
260 }
261
262 /*
263 * set the parent on any metadevice components.
264 * NOTE: currently soft partitions are the only metadevices
265 * which can appear within a stripe.
266 */
267 mdcomp = (ms_comp_t *)((void *)&((char *)un)[un->un_ocomp]);
268 for (row = 0; row < un->un_nrows; row++) {
269 struct ms_row *mdr = &un->un_row[row];
270 for (i = 0, c = mdr->un_icomp; i < mdr->un_ncomp; i++) {
271 ms_comp_t *mdc = &mdcomp[c++];
272 md_dev64_t comp_dev;
273 md_unit_t *comp_un;
274
275 comp_dev = mdc->un_dev;
276 if (md_getmajor(comp_dev) == md_major) {
277 /* set parent and disallow soft partitioning */
278 comp_un = MD_UNIT(md_getminor(comp_dev));
279 recids[rid++] = MD_RECID(comp_un);
280 md_set_parent(mdc->un_dev, MD_SID(un));
281 }
282 }
283 }
284
285 /* set end marker */
286 recids[rid] = 0;
287 mddb_commitrecs_wrapper(recids);
288
289 md_create_unit_incore(mnum, &stripe_md_ops, 0);
290 kmem_free(recids, (num_recs * sizeof (mddb_recid_t)));
291 SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_CREATE, SVM_TAG_METADEVICE,
292 MD_UN2SET(un), MD_SID(un));
293 return (0);
294 }
295
296
297 /*ARGSUSED*/
298 static int
stripe_get(void * d,int mode,IOLOCK * lock)299 stripe_get(void *d, int mode, IOLOCK *lock)
300 {
301 minor_t mnum;
302 mdi_unit_t *ui;
303 ms_unit_t *un;
304 md_error_t *mdep;
305 md_i_get_t *migp = d;
306
307
308 mnum = migp->id;
309 mdep = &migp->mde;
310
311 mdclrerror(mdep);
312
313 if ((MD_MIN2SET(mnum) >= md_nsets) || (MD_MIN2UNIT(mnum) >= md_nunits))
314 return (mdmderror(mdep, MDE_INVAL_UNIT, mnum));
315
316 if ((ui = MDI_UNIT(mnum)) == NULL) {
317 return (mdmderror(mdep, MDE_UNIT_NOT_SETUP, mnum));
318 }
319
320 un = (ms_unit_t *)md_ioctl_readerlock(lock, ui);
321
322 if (migp->size == 0) {
323 migp->size = un->c.un_size;
324 return (0);
325 }
326
327 if (migp->size < un->c.un_size) {
328 return (EFAULT);
329 }
330
331 if (ddi_copyout(un, (void *)(uintptr_t)migp->mdp,
332 un->c.un_size, mode))
333 return (EFAULT);
334 return (0);
335 }
336
337 static int
stripe_reset(md_i_reset_t * mirp)338 stripe_reset(md_i_reset_t *mirp)
339 {
340 minor_t mnum = mirp->mnum;
341 ms_unit_t *un;
342 mdi_unit_t *ui;
343 set_t setno = MD_MIN2SET(mnum);
344
345 mdclrerror(&mirp->mde);
346
347 if ((setno >= md_nsets) || (MD_MIN2UNIT(mnum) >= md_nunits))
348 return (mdmderror(&mirp->mde, MDE_INVAL_UNIT, mnum));
349
350 if (md_get_setstatus(setno) & MD_SET_STALE)
351 return (mdmddberror(&mirp->mde, MDE_DB_STALE, mnum, setno));
352
353 un = MD_UNIT(mnum);
354 if (un == NULL) {
355 return (mdmderror(&mirp->mde, MDE_UNIT_NOT_SETUP, mnum));
356 }
357
358 /* This prevents new opens */
359 rw_enter(&md_unit_array_rw.lock, RW_WRITER);
360
361 if (MD_HAS_PARENT(un->c.un_parent)) {
362 rw_exit(&md_unit_array_rw.lock);
363 return (mdmderror(&mirp->mde, MDE_IN_USE, mnum));
364 }
365
366 /* single thread */
367 ui = MDI_UNIT(mnum);
368 un = md_unit_openclose_enter(ui);
369
370 if (md_unit_isopen(ui)) {
371 md_unit_openclose_exit(ui);
372 rw_exit(&md_unit_array_rw.lock);
373 return (mdmderror(&mirp->mde, MDE_IS_OPEN, mnum));
374 }
375
376 md_unit_openclose_exit(ui);
377 reset_stripe(un, mnum, 1);
378
379 /*
380 * Update unit availability
381 */
382 md_set[setno].s_un_avail++;
383
384 /*
385 * If MN set, reset s_un_next so all nodes can have
386 * the same view of the next available slot when
387 * nodes are -w and -j
388 */
389 if (MD_MNSET_SETNO(setno)) {
390 (void) md_upd_set_unnext(setno, MD_MIN2UNIT(mnum));
391 }
392
393 rw_exit(&md_unit_array_rw.lock);
394 return (0);
395 }
396
397 static int
stripe_grow(void * d,int mode,IOLOCK * lockp)398 stripe_grow(void *d, int mode, IOLOCK *lockp)
399 {
400 minor_t mnum;
401 ms_unit_t *un, *new_un;
402 mdi_unit_t *ui;
403 minor_t *par = NULL;
404 IOLOCK *plock = NULL;
405 ms_comp_t *mdcomp, *new_comp;
406 int row, i, c;
407 mddb_recid_t ms_recid;
408 mddb_recid_t old_vtoc = 0;
409 mddb_recid_t *recids;
410 md_create_rec_option_t options;
411 mddb_type_t typ1;
412 int err;
413 int64_t tb, atb;
414 uint_t nr, oc;
415 int opened;
416 int rval = 0;
417 set_t setno;
418 md_error_t *mdep;
419 int npar;
420 int rid;
421 int num_recs;
422 u_longlong_t rev;
423 md_grow_params_t *mgp = d;
424
425
426 mnum = mgp->mnum;
427 mdep = &mgp->mde;
428 setno = MD_MIN2SET(mnum);
429 npar = mgp->npar;
430
431 mdclrerror(mdep);
432
433 if ((setno >= md_nsets) || (MD_MIN2UNIT(mnum) >= md_nunits))
434 return (mdmderror(mdep, MDE_INVAL_UNIT, mnum));
435
436 if (md_get_setstatus(setno) & MD_SET_STALE)
437 return (mdmddberror(mdep, MDE_DB_STALE, mnum, setno));
438
439 ui = MDI_UNIT(mnum);
440 if (ui == NULL) {
441 return (mdmderror(mdep, MDE_UNIT_NOT_SETUP, mnum));
442 }
443
444 if (npar >= 1) {
445 ASSERT((minor_t *)(uintptr_t)mgp->par != NULL);
446 par = kmem_alloc(npar * sizeof (*par), KM_SLEEP);
447 plock = kmem_alloc(npar * sizeof (*plock), KM_SLEEP);
448 if (ddi_copyin((caddr_t)(uintptr_t)mgp->par, (caddr_t)par,
449 (npar * sizeof (*par)), mode) != 0) {
450 kmem_free(par, npar * sizeof (*par));
451 kmem_free(plock, npar * sizeof (*plock));
452 return (EFAULT);
453 }
454 }
455
456 /*
457 * we grab unit reader/writer first, then parent locks,
458 * then our own.
459 * we expect parent units to be sorted to avoid deadlock
460 */
461 rw_enter(&md_unit_array_rw.lock, RW_WRITER);
462 for (i = 0; i < npar; ++i) {
463 (void) md_ioctl_writerlock(&plock[i],
464 MDI_UNIT(par[i]));
465 }
466 un = (ms_unit_t *)md_ioctl_writerlock(lockp, ui);
467
468 if (un->un_nrows != mgp->nrows) {
469 rval = EINVAL;
470 goto out;
471 }
472
473 typ1 = (mddb_type_t)md_getshared_key(setno,
474 stripe_md_ops.md_driver.md_drivername);
475
476 /*
477 * Preserve the friendly name nature of growing device.
478 */
479 options = MD_CRO_STRIPE;
480 if (un->c.un_revision & MD_FN_META_DEV)
481 options |= MD_CRO_FN;
482 if (mgp->options & MD_CRO_64BIT) {
483 #if defined(_ILP32)
484 rval = mdmderror(mdep, MDE_UNIT_TOO_LARGE, mnum);
485 goto out;
486 #else
487 ms_recid = mddb_createrec((size_t)mgp->size, typ1, 0,
488 MD_CRO_64BIT | options, setno);
489 #endif
490 } else {
491 ms_recid = mddb_createrec((size_t)mgp->size, typ1, 0,
492 MD_CRO_32BIT | options, setno);
493 }
494
495
496 if (ms_recid < 0) {
497 rval = mddbstatus2error(mdep, (int)ms_recid, mnum, setno);
498 goto out;
499 }
500
501 /* get the address of the new unit */
502 new_un = (ms_unit_t *)mddb_getrecaddr(ms_recid);
503
504 /*
505 * It is okay that we muck with the new unit here,
506 * since no one else will know about the unit struct
507 * until we commit it. If we crash, the record will
508 * be automatically purged, since we haven't
509 * committed it yet and the old unit struct will be found.
510 */
511
512 /* copy in the user's unit struct */
513 err = ddi_copyin((caddr_t)(uintptr_t)mgp->mdp, (caddr_t)new_un,
514 (size_t)mgp->size, mode);
515 if (err) {
516 mddb_deleterec_wrapper(ms_recid);
517 rval = EFAULT;
518 goto out;
519 }
520 if (options & MD_CRO_FN)
521 new_un->c.un_revision |= MD_FN_META_DEV;
522
523 /*
524 * allocate the real recids array. since we may have to
525 * commit underlying metadevice records, we need an
526 * array of size: total number of new components being
527 * attached + 2 (one for the stripe itself, one for the
528 * end marker).
529 */
530 num_recs = 2;
531 rid = 0;
532 for (row = 0; row < new_un->un_nrows; row++) {
533 struct ms_row *mdr = &new_un->un_row[row];
534 num_recs += mdr->un_ncomp;
535 }
536 recids = kmem_alloc(num_recs * sizeof (mddb_recid_t), KM_SLEEP);
537 recids[rid++] = ms_recid;
538
539 /*
540 * Save a few of the new unit structs fields.
541 * Before they get clobbered.
542 */
543 tb = new_un->c.un_total_blocks;
544 atb = new_un->c.un_actual_tb;
545 nr = new_un->un_nrows;
546 oc = new_un->un_ocomp;
547 rev = new_un->c.un_revision;
548
549 /*
550 * Copy the old unit struct (static stuff)
551 * into new unit struct
552 */
553 bcopy((caddr_t)un, (caddr_t)new_un,
554 sizeof (ms_unit_t) + ((nr - 2) * (sizeof (struct ms_row))));
555
556 /*
557 * Restore the saved stuff.
558 */
559 new_un->c.un_total_blocks = tb;
560 md_nblocks_set(mnum, new_un->c.un_total_blocks);
561 new_un->c.un_actual_tb = atb;
562 new_un->un_nrows = nr;
563 new_un->un_ocomp = oc;
564 new_un->c.un_revision = rev;
565
566 new_un->c.un_record_id = ms_recid;
567 new_un->c.un_size = mgp->size;
568
569 /* All 64 bit metadevices only support EFI labels. */
570 if (mgp->options & MD_CRO_64BIT) {
571 new_un->c.un_flag |= MD_EFILABEL;
572 /*
573 * If the device was previously smaller than a terabyte,
574 * and had a vtoc record attached to it, we remove the
575 * vtoc record, because the layout has changed completely.
576 */
577 if (((un->c.un_revision & MD_64BIT_META_DEV) == 0) &&
578 (un->c.un_vtoc_id != 0)) {
579 old_vtoc = un->c.un_vtoc_id;
580 new_un->c.un_vtoc_id =
581 md_vtoc_to_efi_record(old_vtoc, setno);
582 }
583 }
584
585 /*
586 * Copy the old component structs into the new unit struct.
587 */
588 mdcomp = (ms_comp_t *)((void *)&((char *)un)[un->un_ocomp]);
589 new_comp = (ms_comp_t *)((void *)&((char *)new_un)[new_un->un_ocomp]);
590 for (row = 0; row < un->un_nrows; row++) {
591 struct ms_row *mdr = &un->un_row[row];
592 for (i = 0, c = mdr->un_icomp; i < mdr->un_ncomp; i++, c++) {
593 bcopy((caddr_t)&mdcomp[c], (caddr_t)&new_comp[c],
594 sizeof (ms_comp_t));
595 }
596 }
597
598 opened = md_unit_isopen(ui);
599
600 /*
601 * Set parent on metadevices being added.
602 * Open the new devices being added.
603 * NOTE: currently soft partitions are the only metadevices
604 * which can appear within a stripe.
605 */
606 for (row = un->un_nrows; row < new_un->un_nrows; row++) {
607 struct ms_row *mdr = &new_un->un_row[row];
608 for (i = 0, c = mdr->un_icomp; i < mdr->un_ncomp; i++) {
609 struct ms_comp *mdc = &new_comp[c++];
610 md_dev64_t comp_dev;
611 md_unit_t *comp_un;
612
613 comp_dev = mdc->un_dev;
614 /* set parent on any metadevices */
615 if (md_getmajor(comp_dev) == md_major) {
616 comp_un = MD_UNIT(md_getminor(comp_dev));
617 recids[rid++] = MD_RECID(comp_un);
618 md_set_parent(comp_dev, MD_SID(new_un));
619 }
620
621 if (opened) {
622 md_dev64_t tmpdev = mdc->un_dev;
623 /*
624 * Open by device id
625 * Check if this comp is hotspared and
626 * if it is then use the key for hotspare
627 */
628 tmpdev = md_resolve_bydevid(mnum, tmpdev,
629 mdc->un_mirror.ms_hs_id ?
630 mdc->un_mirror.ms_hs_key : mdc->un_key);
631 (void) md_layered_open(mnum, &tmpdev,
632 MD_OFLG_NULL);
633 mdc->un_dev = tmpdev;
634 mdc->un_mirror.ms_flags |= MDM_S_ISOPEN;
635 }
636 }
637 }
638
639 /* set end marker */
640 recids[rid] = 0;
641 /* commit new unit struct */
642 mddb_commitrecs_wrapper(recids);
643
644 /* delete old unit struct */
645 mddb_deleterec_wrapper(un->c.un_record_id);
646
647 /* place new unit in in-core array */
648 md_nblocks_set(mnum, new_un->c.un_total_blocks);
649 MD_UNIT(mnum) = new_un;
650
651 /*
652 * If old_vtoc has a non zero value, we know:
653 * - This unit crossed the border from smaller to larger one TB
654 * - There was a vtoc record for the unit,
655 * - This vtoc record is no longer needed, because
656 * a new efi record has been created for this un.
657 */
658 if (old_vtoc != 0) {
659 mddb_deleterec_wrapper(old_vtoc);
660 }
661
662 /* free recids array */
663 kmem_free(recids, num_recs * sizeof (mddb_recid_t));
664
665 SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_GROW, SVM_TAG_METADEVICE,
666 MD_UN2SET(new_un), MD_SID(new_un));
667
668 /* release locks, return success */
669 out:
670 for (i = npar - 1; (i >= 0); --i)
671 md_ioctl_writerexit(&plock[i]);
672 rw_exit(&md_unit_array_rw.lock);
673 if (plock != NULL)
674 kmem_free(plock, npar * sizeof (*plock));
675 if (par != NULL)
676 kmem_free(par, npar * sizeof (*par));
677 return (rval);
678 }
679
680 static int
stripe_get_geom(ms_unit_t * un,struct dk_geom * geomp)681 stripe_get_geom(
682 ms_unit_t *un,
683 struct dk_geom *geomp
684 )
685 {
686 md_get_geom((md_unit_t *)un, geomp);
687
688 return (0);
689 }
690
691 static int
stripe_get_vtoc(ms_unit_t * un,struct vtoc * vtocp)692 stripe_get_vtoc(
693 ms_unit_t *un,
694 struct vtoc *vtocp
695 )
696 {
697 md_get_vtoc((md_unit_t *)un, vtocp);
698
699 return (0);
700 }
701
702 static int
stripe_set_vtoc(ms_unit_t * un,struct vtoc * vtocp)703 stripe_set_vtoc(
704 ms_unit_t *un,
705 struct vtoc *vtocp
706 )
707 {
708 return (md_set_vtoc((md_unit_t *)un, vtocp));
709 }
710
711 static int
stripe_get_extvtoc(ms_unit_t * un,struct extvtoc * vtocp)712 stripe_get_extvtoc(
713 ms_unit_t *un,
714 struct extvtoc *vtocp
715 )
716 {
717 md_get_extvtoc((md_unit_t *)un, vtocp);
718
719 return (0);
720 }
721
722 static int
stripe_set_extvtoc(ms_unit_t * un,struct extvtoc * vtocp)723 stripe_set_extvtoc(
724 ms_unit_t *un,
725 struct extvtoc *vtocp
726 )
727 {
728 return (md_set_extvtoc((md_unit_t *)un, vtocp));
729 }
730
731 static int
stripe_get_cgapart(ms_unit_t * un,struct dk_map * dkmapp)732 stripe_get_cgapart(
733 ms_unit_t *un,
734 struct dk_map *dkmapp
735 )
736 {
737 md_get_cgapart((md_unit_t *)un, dkmapp);
738 return (0);
739 }
740
741 static int
stripe_getdevs(void * d,int mode,IOLOCK * lock)742 stripe_getdevs(
743 void *d,
744 int mode,
745 IOLOCK *lock
746 )
747 {
748 minor_t mnum;
749 mdi_unit_t *ui;
750 ms_unit_t *un;
751 struct ms_row *mdr;
752 ms_comp_t *mdcomp, *mdc;
753 int r, c, i;
754 int cnt;
755 md_error_t *mdep;
756 md_dev64_t *devsp;
757 md_dev64_t unit_dev;
758 md_getdevs_params_t *mgdp = d;
759
760
761 mnum = mgdp->mnum;
762 mdep = &mgdp->mde;
763
764 /* check out unit */
765 mdclrerror(mdep);
766
767 if ((MD_MIN2SET(mnum) >= md_nsets) || (MD_MIN2UNIT(mnum) >= md_nunits))
768 return (mdmderror(mdep, MDE_INVAL_UNIT, mnum));
769
770 if ((ui = MDI_UNIT(mnum)) == NULL) {
771 return (mdmderror(mdep, MDE_UNIT_NOT_SETUP, mnum));
772 }
773
774 un = (ms_unit_t *)md_ioctl_readerlock(lock, ui);
775
776 mdcomp = (struct ms_comp *)((void *)&((char *)un)[un->un_ocomp]);
777 devsp = (md_dev64_t *)(uintptr_t)mgdp->devs;
778
779 for (cnt = 0, r = 0; (r < un->un_nrows); ++r) {
780 mdr = &un->un_row[r];
781 for (c = 0, i = mdr->un_icomp; (c < mdr->un_ncomp); ++c) {
782 mdc = &mdcomp[i++];
783 if (cnt < mgdp->cnt) {
784 unit_dev = mdc->un_dev;
785 if (md_getmajor(unit_dev) != md_major) {
786 if ((unit_dev = md_xlate_mini_2_targ
787 (unit_dev)) == NODEV64)
788 return (ENODEV);
789 }
790
791 if (ddi_copyout((caddr_t)&unit_dev, devsp,
792 sizeof (*devsp), mode) != 0)
793 return (EFAULT);
794 ++devsp;
795 }
796 ++cnt;
797 }
798 }
799 mgdp->cnt = cnt;
800 return (0);
801 }
802
803 static int
stripe_change(md_stripe_params_t * msp,IOLOCK * lock)804 stripe_change(
805 md_stripe_params_t *msp,
806 IOLOCK *lock
807 )
808 {
809 ms_params_t *pp = &msp->params;
810 minor_t mnum = msp->mnum;
811 ms_unit_t *un;
812 mdi_unit_t *ui;
813 int r, c, i;
814 struct ms_row *mdr;
815 ms_comp_t *mdcomp, *mdc;
816 mddb_recid_t recids[4];
817 int irecid;
818 int inc_new_hsp = 0;
819 int err;
820 set_t setno = MD_MIN2SET(mnum);
821
822 mdclrerror(&msp->mde);
823
824 if ((setno >= md_nsets) || (MD_MIN2UNIT(mnum) >= md_nunits))
825 return (mdmderror(&msp->mde, MDE_INVAL_UNIT, mnum));
826
827 if (md_get_setstatus(setno) & MD_SET_STALE)
828 return (mdmddberror(&msp->mde, MDE_DB_STALE, mnum, setno));
829
830 if ((ui = MDI_UNIT(mnum)) == NULL) {
831 return (mdmderror(&msp->mde, MDE_UNIT_NOT_SETUP, mnum));
832 }
833
834 if (!pp->change_hsp_id)
835 return (0);
836
837 un = (ms_unit_t *)md_ioctl_writerlock(lock, ui);
838
839 /* verify that no hot spares are in use */
840 mdcomp = (struct ms_comp *)((void *)&((char *)un)[un->un_ocomp]);
841 for (r = 0; r < un->un_nrows; r++) {
842 mdr = &un->un_row[r];
843 for (c = 0, i = mdr->un_icomp; c < mdr->un_ncomp; c++) {
844 mdc = &mdcomp[i++];
845 if (mdc->un_mirror.ms_hs_id != 0) {
846 return (mdmderror(&msp->mde, MDE_HS_IN_USE,
847 mnum));
848 }
849 }
850 }
851
852 recids[1] = 0;
853 recids[2] = 0;
854 irecid = 1;
855 if (pp->hsp_id != -1) {
856 /* increment the reference count of the new hsp */
857 err = md_hot_spare_ifc(HSP_INCREF, pp->hsp_id, 0, 0,
858 &recids[1], NULL, NULL, NULL);
859 if (err) {
860 return (mdhsperror(&msp->mde, MDE_INVAL_HSP,
861 pp->hsp_id));
862 }
863 inc_new_hsp = 1;
864 irecid++;
865 }
866
867 if (un->un_hsp_id != -1) {
868 /* decrement the reference count of the old hsp */
869 err = md_hot_spare_ifc(HSP_DECREF, un->un_hsp_id, 0, 0,
870 &recids[irecid], NULL, NULL, NULL);
871 if (err) {
872 err = mdhsperror(&msp->mde, MDE_INVAL_HSP,
873 pp->hsp_id);
874 if (inc_new_hsp) {
875 (void) md_hot_spare_ifc(HSP_DECREF,
876 pp->hsp_id, 0, 0,
877 &recids[1], NULL, NULL, NULL);
878 /*
879 * Don't need to commit the record,
880 * cause it never got commit before
881 */
882 }
883 return (err);
884 }
885 }
886
887 un->un_hsp_id = pp->hsp_id;
888
889 recids[0] = un->c.un_record_id;
890 recids[3] = 0;
891 mddb_commitrecs_wrapper(recids);
892 SE_NOTIFY(EC_SVM_STATE, ESC_SVM_CHANGE, SVM_TAG_METADEVICE,
893 MD_UN2SET(un), MD_SID(un));
894
895 return (0);
896 }
897
898 static int
stripe_admin_ioctl(int cmd,void * data,int mode,IOLOCK * lockp)899 stripe_admin_ioctl(int cmd, void *data, int mode, IOLOCK *lockp)
900 {
901 size_t sz = 0;
902 void *d = NULL;
903 int err = 0;
904
905 /* We can only handle 32-bit clients for internal commands */
906 if ((mode & DATAMODEL_MASK) != DATAMODEL_ILP32) {
907 return (EINVAL);
908 }
909
910 /* handle ioctl */
911 switch (cmd) {
912
913 case MD_IOCSET:
914 {
915 if (! (mode & FWRITE))
916 return (EACCES);
917
918 sz = sizeof (struct md_set_params);
919 d = kmem_alloc(sz, KM_SLEEP);
920
921 if (ddi_copyin(data, d, sz, mode)) {
922 err = EFAULT;
923 break;
924 }
925
926 err = stripe_set(d, mode);
927 break;
928 }
929
930 case MD_IOCGET:
931 {
932 if (! (mode & FREAD))
933 return (EACCES);
934
935 sz = sizeof (struct md_i_get);
936 d = kmem_alloc(sz, KM_SLEEP);
937
938 if (ddi_copyin(data, d, sz, mode)) {
939 err = EFAULT;
940 break;
941 }
942
943 err = stripe_get(d, mode, lockp);
944 break;
945 }
946
947 case MD_IOCRESET:
948 {
949 if (! (mode & FWRITE))
950 return (EACCES);
951
952 sz = sizeof (md_i_reset_t);
953 d = kmem_alloc(sz, KM_SLEEP);
954
955 if (ddi_copyin(data, d, sz, mode)) {
956 err = EFAULT;
957 break;
958 }
959
960 err = stripe_reset((md_i_reset_t *)d);
961 break;
962 }
963
964 case MD_IOCGROW:
965 {
966 if (! (mode & FWRITE))
967 return (EACCES);
968
969 sz = sizeof (struct md_grow_params);
970 d = kmem_alloc(sz, KM_SLEEP);
971
972 if (ddi_copyin(data, d, sz, mode)) {
973 err = EFAULT;
974 break;
975 }
976
977 err = stripe_grow(d, mode, lockp);
978 break;
979 }
980
981 case MD_IOCGET_DEVS:
982 {
983 if (! (mode & FREAD))
984 return (EACCES);
985
986 sz = sizeof (struct md_getdevs_params);
987 d = kmem_alloc(sz, KM_SLEEP);
988
989 if (ddi_copyin(data, d, sz, mode)) {
990 err = EFAULT;
991 break;
992 }
993
994 err = stripe_getdevs(d, mode, lockp);
995 break;
996 }
997
998 case MD_IOCCHANGE:
999 {
1000 if (! (mode & FWRITE))
1001 return (EACCES);
1002
1003 sz = sizeof (md_stripe_params_t);
1004 d = kmem_alloc(sz, KM_SLEEP);
1005
1006 if (ddi_copyin(data, d, sz, mode)) {
1007 err = EFAULT;
1008 break;
1009 }
1010
1011 err = stripe_change((md_stripe_params_t *)d, lockp);
1012 break;
1013 }
1014
1015 case MD_IOCREPLACE:
1016 {
1017 if (! (mode & FWRITE))
1018 return (EACCES);
1019
1020 sz = sizeof (replace_params_t);
1021 d = kmem_alloc(sz, KM_SLEEP);
1022
1023 if (ddi_copyin(data, d, sz, mode)) {
1024 err = EFAULT;
1025 break;
1026 }
1027
1028 err = stripe_replace((replace_params_t *)d);
1029 break;
1030 }
1031
1032 case MD_IOCPROBE_DEV:
1033 {
1034 /*
1035 * Ignore the request since stripe is not
1036 * a type of 'redundant' metadevice
1037 */
1038 break;
1039 }
1040
1041 default:
1042 return (ENOTTY);
1043 }
1044
1045 /*
1046 * copyout and free any args
1047 */
1048 if (sz != 0) {
1049 if (err == 0) {
1050 if (ddi_copyout(d, data, sz, mode) != 0) {
1051 err = EFAULT;
1052 }
1053 }
1054 kmem_free(d, sz);
1055 }
1056 return (err);
1057 }
1058
1059 /*
1060 * The parameters of md_stripe_ioctl are defined by the ddi and so
1061 * dev is of type dev_t and not md_dev64_t
1062 */
1063 int
md_stripe_ioctl(dev_t dev,int cmd,void * data,int mode,IOLOCK * lockp)1064 md_stripe_ioctl(dev_t dev, int cmd, void *data, int mode, IOLOCK *lockp)
1065 {
1066 minor_t mnum = getminor(dev);
1067 ms_unit_t *un;
1068 int err = 0;
1069
1070 /* handle admin ioctls */
1071 if (mnum == MD_ADM_MINOR)
1072 return (stripe_admin_ioctl(cmd, data, mode, lockp));
1073
1074 /* check unit */
1075 if ((MD_MIN2SET(mnum) >= md_nsets) ||
1076 (MD_MIN2UNIT(mnum) >= md_nunits) ||
1077 ((un = MD_UNIT(mnum)) == NULL))
1078 return (ENXIO);
1079
1080 /* is this a supported ioctl? */
1081 err = md_check_ioctl_against_unit(cmd, un->c);
1082 if (err != 0) {
1083 return (err);
1084 }
1085
1086 /* handle ioctl */
1087 switch (cmd) {
1088
1089 case DKIOCINFO:
1090 {
1091 struct dk_cinfo *p;
1092
1093 if (! (mode & FREAD))
1094 return (EACCES);
1095
1096 p = kmem_alloc(sizeof (*p), KM_SLEEP);
1097
1098 get_info(p, mnum);
1099 if (ddi_copyout((caddr_t)p, data, sizeof (*p), mode) != 0)
1100 err = EFAULT;
1101
1102 kmem_free(p, sizeof (*p));
1103 return (err);
1104 }
1105
1106 case DKIOCGMEDIAINFO:
1107 {
1108 struct dk_minfo p;
1109
1110 if (! (mode & FREAD))
1111 return (EACCES);
1112
1113 get_minfo(&p, mnum);
1114 if (ddi_copyout(&p, data, sizeof (struct dk_minfo), mode) != 0)
1115 err = EFAULT;
1116
1117 return (err);
1118 }
1119
1120 case DKIOCGGEOM:
1121 {
1122 struct dk_geom *p;
1123
1124 if (! (mode & FREAD))
1125 return (EACCES);
1126
1127 p = kmem_alloc(sizeof (*p), KM_SLEEP);
1128
1129 if ((err = stripe_get_geom(un, p)) == 0) {
1130 if (ddi_copyout((caddr_t)p, data, sizeof (*p),
1131 mode) != 0)
1132 err = EFAULT;
1133 }
1134
1135 kmem_free(p, sizeof (*p));
1136 return (err);
1137 }
1138
1139 case DKIOCGVTOC:
1140 {
1141 struct vtoc *vtoc;
1142
1143 if (! (mode & FREAD))
1144 return (EACCES);
1145
1146 vtoc = kmem_zalloc(sizeof (*vtoc), KM_SLEEP);
1147 if ((err = stripe_get_vtoc(un, vtoc)) != 0) {
1148 kmem_free(vtoc, sizeof (*vtoc));
1149 return (err);
1150 }
1151
1152 if ((mode & DATAMODEL_MASK) == DATAMODEL_NATIVE) {
1153 if (ddi_copyout(vtoc, data, sizeof (*vtoc), mode))
1154 err = EFAULT;
1155 }
1156 #ifdef _SYSCALL32
1157 else {
1158 struct vtoc32 *vtoc32;
1159
1160 vtoc32 = kmem_zalloc(sizeof (*vtoc32), KM_SLEEP);
1161
1162 vtoctovtoc32((*vtoc), (*vtoc32));
1163 if (ddi_copyout(vtoc32, data, sizeof (*vtoc32), mode))
1164 err = EFAULT;
1165 kmem_free(vtoc32, sizeof (*vtoc32));
1166 }
1167 #endif /* _SYSCALL32 */
1168
1169 kmem_free(vtoc, sizeof (*vtoc));
1170 return (err);
1171 }
1172
1173 case DKIOCSVTOC:
1174 {
1175 struct vtoc *vtoc;
1176
1177 if (! (mode & FWRITE))
1178 return (EACCES);
1179
1180 vtoc = kmem_zalloc(sizeof (*vtoc), KM_SLEEP);
1181 if ((mode & DATAMODEL_MASK) == DATAMODEL_NATIVE) {
1182 if (ddi_copyin(data, vtoc, sizeof (*vtoc), mode)) {
1183 err = EFAULT;
1184 }
1185 }
1186 #ifdef _SYSCALL32
1187 else {
1188 struct vtoc32 *vtoc32;
1189
1190 vtoc32 = kmem_zalloc(sizeof (*vtoc32), KM_SLEEP);
1191
1192 if (ddi_copyin(data, vtoc32, sizeof (*vtoc32), mode)) {
1193 err = EFAULT;
1194 } else {
1195 vtoc32tovtoc((*vtoc32), (*vtoc));
1196 }
1197 kmem_free(vtoc32, sizeof (*vtoc32));
1198 }
1199 #endif /* _SYSCALL32 */
1200
1201 if (err == 0) {
1202 err = stripe_set_vtoc(un, vtoc);
1203 }
1204
1205 kmem_free(vtoc, sizeof (*vtoc));
1206 return (err);
1207 }
1208
1209
1210 case DKIOCGEXTVTOC:
1211 {
1212 struct extvtoc *extvtoc;
1213
1214 if (! (mode & FREAD))
1215 return (EACCES);
1216
1217 extvtoc = kmem_zalloc(sizeof (*extvtoc), KM_SLEEP);
1218 if ((err = stripe_get_extvtoc(un, extvtoc)) != 0) {
1219 kmem_free(extvtoc, sizeof (*extvtoc));
1220 return (err);
1221 }
1222
1223 if (ddi_copyout(extvtoc, data, sizeof (*extvtoc), mode))
1224 err = EFAULT;
1225
1226 kmem_free(extvtoc, sizeof (*extvtoc));
1227 return (err);
1228 }
1229
1230 case DKIOCSEXTVTOC:
1231 {
1232 struct extvtoc *extvtoc;
1233
1234 if (! (mode & FWRITE))
1235 return (EACCES);
1236
1237 extvtoc = kmem_zalloc(sizeof (*extvtoc), KM_SLEEP);
1238 if (ddi_copyin(data, extvtoc, sizeof (*extvtoc), mode)) {
1239 err = EFAULT;
1240 }
1241
1242 if (err == 0) {
1243 err = stripe_set_extvtoc(un, extvtoc);
1244 }
1245
1246 kmem_free(extvtoc, sizeof (*extvtoc));
1247 return (err);
1248 }
1249
1250 case DKIOCGAPART:
1251 {
1252 struct dk_map dmp;
1253
1254 if ((err = stripe_get_cgapart(un, &dmp)) != 0) {
1255 return (err);
1256 }
1257
1258 if ((mode & DATAMODEL_MASK) == DATAMODEL_NATIVE) {
1259 if (ddi_copyout((caddr_t)&dmp, data, sizeof (dmp),
1260 mode) != 0)
1261 err = EFAULT;
1262 }
1263 #ifdef _SYSCALL32
1264 else {
1265 struct dk_map32 dmp32;
1266
1267 dmp32.dkl_cylno = dmp.dkl_cylno;
1268 dmp32.dkl_nblk = dmp.dkl_nblk;
1269
1270 if (ddi_copyout((caddr_t)&dmp32, data, sizeof (dmp32),
1271 mode) != 0)
1272 err = EFAULT;
1273 }
1274 #endif /* _SYSCALL32 */
1275
1276 return (err);
1277 }
1278 case DKIOCGETEFI:
1279 {
1280 /*
1281 * This one can be done centralized,
1282 * no need to put in the same code for all types of metadevices
1283 */
1284 return (md_dkiocgetefi(mnum, data, mode));
1285 }
1286 case DKIOCSETEFI:
1287 {
1288 /*
1289 * This one can be done centralized,
1290 * no need to put in the same code for all types of metadevices
1291 */
1292 return (md_dkiocsetefi(mnum, data, mode));
1293 }
1294 case DKIOCPARTITION:
1295 {
1296 return (md_dkiocpartition(mnum, data, mode));
1297 }
1298
1299 default:
1300 return (ENOTTY);
1301 }
1302 }
1303
1304 /*
1305 * rename named service entry points and support functions
1306 */
1307
1308 /*
1309 * rename/exchange role swap functions are handled generically
1310 */
1311
1312 /*
1313 * support routine for MDRNM_CHECK
1314 */
1315 static int
stripe_may_renexch_self(ms_unit_t * un,mdi_unit_t * ui,md_rentxn_t * rtxnp)1316 stripe_may_renexch_self(
1317 ms_unit_t *un,
1318 mdi_unit_t *ui,
1319 md_rentxn_t *rtxnp)
1320 {
1321 minor_t from_min;
1322 minor_t to_min;
1323 bool_t toplevel;
1324 bool_t related;
1325
1326 ASSERT(rtxnp);
1327 ASSERT((rtxnp->op == MDRNOP_RENAME) || (rtxnp->op == MDRNOP_EXCHANGE));
1328
1329 from_min = rtxnp->from.mnum;
1330 to_min = rtxnp->to.mnum;
1331
1332 if (!un || !ui) {
1333 (void) mdmderror(&rtxnp->mde, MDE_RENAME_CONFIG_ERROR,
1334 from_min);
1335 return (EINVAL);
1336 }
1337
1338 ASSERT(!(MD_CAPAB(un) & MD_CAN_META_CHILD));
1339 if (MD_CAPAB(un) & MD_CAN_META_CHILD) {
1340 (void) mdmderror(&rtxnp->mde, MDE_RENAME_SOURCE_BAD, from_min);
1341 return (EINVAL);
1342 }
1343
1344 if (MD_PARENT(un) == MD_MULTI_PARENT) {
1345 (void) mdmderror(&rtxnp->mde, MDE_RENAME_SOURCE_BAD, from_min);
1346 return (EINVAL);
1347 }
1348
1349 toplevel = !MD_HAS_PARENT(MD_PARENT(un));
1350
1351 /* we're related if trying to swap with our parent */
1352 related = (!toplevel) && (MD_PARENT(un) == to_min);
1353
1354 switch (rtxnp->op) {
1355 case MDRNOP_EXCHANGE:
1356
1357 if (!related) {
1358 (void) mdmderror(&rtxnp->mde,
1359 MDE_RENAME_TARGET_UNRELATED, to_min);
1360 return (EINVAL);
1361 }
1362
1363 break;
1364
1365 case MDRNOP_RENAME:
1366 /*
1367 * if from is top-level and is open, then the kernel is using
1368 * the device and we return EBUSY.
1369 */
1370
1371 if (toplevel && md_unit_isopen(ui)) {
1372 (void) mdmderror(&rtxnp->mde, MDE_RENAME_BUSY,
1373 from_min);
1374 return (EBUSY);
1375 }
1376 break;
1377
1378 default:
1379 (void) mdmderror(&rtxnp->mde, MDE_RENAME_CONFIG_ERROR,
1380 from_min);
1381 return (EINVAL);
1382 }
1383
1384 return (0); /* ok */
1385 }
1386
1387 /*
1388 * Named service entry point: MDRNM_CHECK
1389 */
1390 intptr_t
stripe_rename_check(md_rendelta_t * delta,md_rentxn_t * rtxnp)1391 stripe_rename_check(
1392 md_rendelta_t *delta,
1393 md_rentxn_t *rtxnp)
1394 {
1395 int err = 0;
1396
1397 ASSERT(delta);
1398 ASSERT(rtxnp);
1399 ASSERT(delta->unp);
1400 ASSERT(delta->uip);
1401 ASSERT((rtxnp->op == MDRNOP_RENAME) || (MDRNOP_EXCHANGE == rtxnp->op));
1402
1403 if (!delta || !rtxnp || !delta->uip || !delta->unp) {
1404 (void) mdsyserror(&rtxnp->mde, EINVAL);
1405 return (EINVAL);
1406 }
1407
1408 /* self does additional checks */
1409 if (delta->old_role == MDRR_SELF) {
1410 err = stripe_may_renexch_self((ms_unit_t *)delta->unp,
1411 delta->uip, rtxnp);
1412 }
1413 out:
1414 return (err);
1415 }
1416 /* end of rename/exchange */
1417