1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21 /*
22 * Copyright 2006 Sun Microsystems, Inc. All rights reserved.
23 * Use is subject to license terms.
24 */
25
26 #pragma ident "%Z%%M% %I% %E% SMI"
27
28
29 /*
30 * rename or exchange identities of virtual device nodes
31 */
32
33 #include <sys/param.h>
34 #include <sys/systm.h>
35 #include <sys/debug.h>
36 #include <sys/sysmacros.h>
37 #include <sys/types.h>
38 #include <sys/ddi.h>
39 #include <sys/sunddi.h>
40
41 #include <sys/lvm/mdvar.h>
42 #include <sys/lvm/md_rename.h>
43
44 #include <sys/sysevent/eventdefs.h>
45 #include <sys/sysevent/svm.h>
46
47 extern major_t md_major;
48 extern unit_t md_nunits;
49 extern set_t md_nsets;
50 extern md_set_t md_set[];
51
52 #define ROLE(r) \
53 ((r) == MDRR_PARENT? "parent": \
54 (r) == MDRR_SELF? "self": \
55 (r) == MDRR_CHILD? "child": \
56 (r) == MDRR_UNK? "<unknown>": "<garbage>")
57
58 #define OP_STR(op) \
59 (((op) == MDRNOP_UNK)? "<unknown>" : \
60 ((op) == MDRNOP_RENAME)? "rename" : \
61 ((op) == MDRNOP_EXCHANGE)? "exchange" : \
62 "<garbage>")
63 int md_rename_debug = 0;
64
65 /* delta guard rails */
66 const unsigned long long DELTA_BEG = (0xDad08888a110beefull);
67 const unsigned long long DELTA_END = (0xa110Beef88880Dadull);
68
69 const unsigned long long DELTA_BEG_FREED = (0xBad0c0ed0fed0dadull);
70 const unsigned long long DELTA_END_FREED = (0x0Fed0dadbad0c0edull);
71
72 /* transaction guard rails */
73 const unsigned long long TXN_BEG = (0xDad01eadc0ed2badull);
74 const unsigned long long TXN_END = (0xc0ed2badDad01eadull);
75
76 const unsigned long long TXNUN_BEG = (0xcafe0fedbad0beefull);
77 const unsigned long long TXNUN_END = (0xbad0beefcafe0fedull);
78
79 const unsigned int guard_shift = (sizeof (u_longlong_t) - 3);
80 const md_stackcap_t MD_CAN_DO_ANYTHING = (md_stackcap_t)0;
81
82 typedef struct role_change_mapping_tab_t {
83 const int ord;
84 const md_renrole_t old_role;
85 const md_renrole_t new_role;
86 const char *svc_name;
87 md_ren_roleswap_svc_t * const default_svc;
88 } role_change_tab_t;
89
90 /*
91 * The actual table is at the end of the file, so we don't need
92 * many forward references
93 */
94 static role_change_tab_t role_swap_tab[];
95
96 #define ILLEGAL_ROLESWAP_SVC ((md_ren_roleswap_svc_t *)(0xA1100BAD))
97 #define NO_DEFAULT_ROLESWAP_SVC ((md_ren_roleswap_svc_t *)(NULL))
98 #define ILLEGAL_SVC_NAME (NULL)
99
100 /*
101 *
102 * Role swap rule table:
103 *
104 * New Role
105 * +---------------------------------------------------------------|
106 * | | Parent | Self | Child |
107 * +--------+-----------------+----------------+-------------------+
108 * | Parent | no default | ...no default | illegal |
109 * | | 1 (update kids) | 2 (update to) | 3 |
110 * Old +--------+-----------------+----------------+-------------------+
111 * Role | Self | ...self update | ...rename self | no default (down |
112 * | | 4 update up | 5 | 6 update from) |
113 * +--------+-----------------+----------------+-------------------+
114 * | Child | illegal | ...child | ...update |
115 * | | 7 | 8 update to | 9 parent |
116 * +---------------------------------------------------------------+
117 *
118 * and notes:
119 *
120 * - Boxes 1, 4 and 6 are the most interesting. They are responsible
121 * for updating the from unit's data structures. These may involve
122 * finding (former or future) children, resetting name keys and the like.
123 *
124 * - The "rename" operation is boxes 1, 5 and 9. Most of the work
125 * is done in box 5, since that contains both the "from" and "to"
126 * unit struct for rename.
127 *
128 * (There's got to be an eigen function for this; that diagonal
129 * axis is a role identity operation searching for an expression.)
130 *
131 * - Almost every transaction will call more than one of these.
132 * (Only a rename of a unit with no relatives will only call
133 * a single box.)
134 *
135 * - Box 4 "...update from" is the generic self->parent modifier.
136 * - Box 8 "...update to" is the generic child->self modifier.
137 * These can be generic because all of the information which
138 * needs to be updated is in the common portion of the unit
139 * structure when changing from their respective roles.
140 *
141 * - Boxes 1, 2 and 6 ("no default") indicate that per-metadevice
142 * information must be updated. For example, in box 1, children
143 * identities must be updated. Since different metadevice types
144 * detect and manipulate their children differently, there can
145 * be no generic "md_rename" function in this box.
146 *
147 * In addition to the named services in the table above, there
148 * are other named services used by rename/exchange.
149 * MDRNM_LIST_URFOLKS, MDRNM_LIST_URSELF, MDRNM_LIST_URKIDS
150 * list a device's parents, self and children, respectively.
151 * In most cases the default functions can be used for parents
152 * and self. Top-level devices, are not required to have a
153 * "list folks" named service. Likewise, devices which can
154 * not have metadevice children, are not required to have the
155 * "list kids" named service. The LIST_UR* functions call back into
156 * the base driver (md_build_rendelta()) to package the changes to
157 * a device for addition onto the tree. The LIST_UR* named service
158 * then adds this "rename delta" onto the delta tree itself.
159 * This keeps private knowledge appropriately encapsulated.
160 * They return the number of devices which will need to be changed,
161 * and hence the number of elements they've added to the delta list
162 * or -1 for error.
163 *
164 * Other named services used by rename/exchange are:
165 * "lock" (MDRNM_LOCK), "unlock" (MDRNM_UNLOCK) and "check" (MDRNM_CHECK).
166 * These (un) write-lock all of the relevant in-core structs,
167 * including the unit structs for the device and quiesce i/o as necessary.
168 * The "check" named service verifies that this device
169 * is in a state where rename could and may occur at this time.
170 * Since the role_swap functions themselves cannot be undone
171 * (at least in this implementation), it is check()'s job to
172 * verify that the device is renamable (sic) or, if not, abort.
173 * The check function for the device participating in the role
174 * of "self" is usually where rename or exchange validity is verified.
175 *
176 * All of these functions take two arguments which may be thought
177 * of as the collective state changes of the tree of devices
178 * (md_rendelta_t *family) and the rename transaction state
179 * (md_rentxn_t rtxn or rtxnp).
180 *
181 */
182
183
184 /*
185 * rename unit lock
186 * (default name service routine MDRNM_LOCK)
187 */
188 static intptr_t
md_rename_lock(md_rendelta_t * delta,md_rentxn_t * rtxnp)189 md_rename_lock(md_rendelta_t *delta, md_rentxn_t *rtxnp)
190 {
191 minor_t mnum;
192 md_renop_t op;
193
194 ASSERT(delta);
195 ASSERT(rtxnp);
196
197 if (!delta || !rtxnp) {
198 (void) mdsyserror(&rtxnp->mde, EINVAL);
199 return (EINVAL);
200 }
201 mnum = md_getminor(delta->dev);
202 op = rtxnp->op;
203
204 /*
205 * target doesn't exist if renaming (by definition),
206 * so it need not be locked
207 */
208 if (op == MDRNOP_RENAME && mnum == rtxnp->to.mnum) {
209 return (0);
210 }
211
212 ASSERT(delta->uip);
213 if (!delta->uip) {
214 (void) mdmderror(&rtxnp->mde, MDE_UNIT_NOT_SETUP, mnum);
215 return (ENODEV);
216 }
217
218 ASSERT(delta->unp);
219 if (!delta->unp) {
220 (void) mdmderror(&rtxnp->mde, MDE_UNIT_NOT_SETUP, mnum);
221 return (ENODEV);
222 }
223
224 ASSERT(!UNIT_WRITER_HELD(delta->unp));
225
226 (void) md_unit_writerlock(delta->uip);
227
228 ASSERT(UNIT_WRITER_HELD(delta->unp));
229
230 return (0);
231 }
232
233 /*
234 * (default name service routine MDRNM_UNLOCK)
235 */
236 /* ARGSUSED */
237 static void
md_rename_unlock(md_rendelta_t * delta,md_rentxn_t * rtxnp)238 md_rename_unlock(
239 md_rendelta_t *delta,
240 md_rentxn_t *rtxnp)
241 {
242 ASSERT(delta);
243 ASSERT(delta->uip);
244 ASSERT(delta->unp);
245
246 ASSERT(UNIT_WRITER_HELD(delta->unp));
247
248 (void) md_unit_writerexit(delta->uip);
249
250 ASSERT(!UNIT_WRITER_HELD(delta->unp));
251 }
252
253 /*
254 * This is used by the various MDRNM_LIST* named services.
255 */
256 md_rendelta_t *
md_build_rendelta(md_renrole_t old_role,md_renrole_t new_role,md_dev64_t dev,md_rendelta_t * prev,md_unit_t * unp,mdi_unit_t * uip,md_error_t * ep)257 md_build_rendelta(
258 md_renrole_t old_role,
259 md_renrole_t new_role,
260 md_dev64_t dev,
261 md_rendelta_t *prev,
262 md_unit_t *unp,
263 mdi_unit_t *uip,
264 md_error_t *ep)
265 {
266 int err = 0;
267 md_rendelta_t *new;
268
269 new = (md_rendelta_t *)kmem_alloc(sizeof (md_rendelta_t), KM_SLEEP);
270
271 new->beginning = DELTA_BEG;
272 new->dev = dev;
273 new->new_role = new_role;
274 new->old_role = old_role;
275 new->next = NULL;
276 new->prev = prev;
277 new->unp = unp;
278 new->uip = uip;
279 bzero((void *) &new->txn_stat, sizeof (md_rendstat_t));
280
281 /*
282 * For non-meta devices that are being renamed (in the future,
283 * that is) we would need to pass in default functions to
284 * accommodate them, provided the default function is
285 * truly capable of performing the lock/check/unlock function
286 * on opaque devices.
287 */
288
289 new->lock = md_get_named_service(dev, /* modindex */ 0,
290 MDRNM_LOCK, md_rename_lock);
291
292 new->unlock = (md_ren_void_svc_t *)md_get_named_service(dev,
293 /* modindex */ 0, MDRNM_UNLOCK,
294 (intptr_t (*)()) md_rename_unlock);
295
296 new->check = md_get_named_service(dev, /* modindex */ 0,
297 MDRNM_CHECK, /* Default */ NULL);
298
299 new->role_swap = NULL; /* set this when the roles are determined */
300
301 if (!new->lock || !new->unlock || !new->check) {
302 (void) mdmderror(ep, MDE_RENAME_CONFIG_ERROR, md_getminor(dev));
303 err = EINVAL;
304 goto out;
305 }
306
307 new->end = DELTA_END;
308
309 out:
310 if (err != 0) {
311 if (new) {
312 new->beginning = DELTA_BEG_FREED;
313 new->end = DELTA_END_FREED;
314
315 kmem_free(new, sizeof (md_rendelta_t));
316 new = NULL;
317 }
318 }
319
320 if (prev) {
321 prev->next = new;
322 }
323
324 return (new);
325 }
326
327 /*
328 * md_store_recid()
329 * used by role swap functions
330 */
331 void
md_store_recid(int * prec_idx,mddb_recid_t * recid_list,md_unit_t * un)332 md_store_recid(
333 int *prec_idx,
334 mddb_recid_t *recid_list,
335 md_unit_t *un)
336 {
337 mddb_recid_t *rp;
338 bool_t add_recid;
339
340 ASSERT(prec_idx);
341 ASSERT(recid_list);
342 ASSERT(recid_list[*prec_idx] == 0);
343 ASSERT(*prec_idx >= 0);
344
345 for (add_recid = TRUE, rp = recid_list; add_recid && rp && *rp; rp++) {
346 if (MD_RECID(un) == *rp) {
347 add_recid = FALSE;
348 }
349 }
350
351 if (add_recid) {
352 recid_list[(*prec_idx)++] = MD_RECID(un);
353 }
354 }
355
356 /*
357 * MDRNM_LIST_URFOLKS: generic named svc entry point
358 * add all parents onto the list pointed to by dlpp
359 * (only weird multi-parented devices need to have their
360 * own named svc to do this.)
361 */
362 static int
md_rename_listfolks(md_rendelta_t ** dlpp,md_rentxn_t * rtxnp)363 md_rename_listfolks(md_rendelta_t **dlpp, md_rentxn_t *rtxnp)
364 {
365 md_rendelta_t *new;
366
367 ASSERT(rtxnp);
368 ASSERT(dlpp);
369 ASSERT(*dlpp == NULL);
370 ASSERT((rtxnp->op == MDRNOP_EXCHANGE) || (rtxnp->op == MDRNOP_RENAME));
371 ASSERT(rtxnp->from.uip);
372 ASSERT(rtxnp->from.unp);
373
374 if ((!rtxnp->from.uip) || (!rtxnp->from.unp)) {
375 (void) mdmderror(&rtxnp->mde, MDE_UNIT_NOT_SETUP,
376 rtxnp->from.mnum);
377 return (-1);
378 }
379
380 if (!MD_HAS_PARENT(MD_PARENT(rtxnp->from.unp))) {
381 return (0);
382 }
383
384 /*
385 * If supporting log renaming (and other multiparented devices)
386 * callout to each misc module to claim this waif and return the
387 * md_dev64_t of its parents.
388 */
389 if (MD_PARENT(rtxnp->from.unp) == MD_MULTI_PARENT) {
390 (void) mdmderror(&rtxnp->mde, MDE_RENAME_SOURCE_BAD,
391 rtxnp->from.mnum);
392 return (2);
393 }
394
395 if ((rtxnp->op == MDRNOP_RENAME) ||
396 (MD_PARENT(rtxnp->from.unp) != MD_SID(rtxnp->to.unp))) {
397
398 new = md_build_rendelta(
399 MDRR_PARENT,
400 MDRR_PARENT,
401 md_makedevice(md_major, MD_PARENT(rtxnp->from.unp)),
402 NULL,
403 MD_UNIT(MD_PARENT(rtxnp->from.unp)),
404 MDI_UNIT(MD_PARENT(rtxnp->from.unp)),
405 &rtxnp->mde);
406 } else {
407 /* parent is swapping roles with self */
408 new = md_build_rendelta(
409 MDRR_PARENT,
410 MDRR_SELF,
411 md_makedevice(md_major, MD_SID(rtxnp->to.unp)),
412 NULL,
413 rtxnp->to.unp,
414 rtxnp->to.uip,
415 &rtxnp->mde);
416 }
417
418 if (!new) {
419 if (mdisok(&rtxnp->mde)) {
420 (void) mdsyserror(&rtxnp->mde, ENOMEM);
421 }
422 return (-1);
423 }
424
425 *dlpp = new;
426
427 return (1);
428 }
429
430 /*
431 * MDRNM_LIST_URSELF: named svc entry point
432 * add all delta entries appropriate for ourselves onto the deltalist pointed
433 * to by dlpp
434 */
435 static int
md_rename_listself(md_rendelta_t ** dlpp,md_rentxn_t * rtxnp)436 md_rename_listself(md_rendelta_t **dlpp, md_rentxn_t *rtxnp)
437 {
438 md_rendelta_t *new, *p;
439 bool_t exchange_up = FALSE;
440
441 ASSERT(rtxnp);
442 ASSERT(dlpp);
443 ASSERT((rtxnp->op == MDRNOP_EXCHANGE) || (rtxnp->op == MDRNOP_RENAME));
444 ASSERT(rtxnp->from.unp);
445 ASSERT(rtxnp->from.uip);
446
447 if ((!rtxnp->from.uip) || (!rtxnp->from.unp)) {
448 (void) mdmderror(&rtxnp->mde, MDE_UNIT_NOT_SETUP,
449 rtxnp->from.mnum);
450 return (-1);
451 }
452
453 for (p = *dlpp; p && p->next != NULL; p = p->next) {
454 /* NULL */
455 }
456
457 /*
458 * renaming or
459 * from's parent is not to and to's parent is not from
460 */
461 if (rtxnp->op == MDRNOP_RENAME) {
462 new = md_build_rendelta(
463 MDRR_SELF,
464 MDRR_SELF,
465 md_makedevice(md_major, rtxnp->from.mnum),
466 p,
467 rtxnp->from.unp,
468 rtxnp->from.uip,
469 &rtxnp->mde);
470 } else {
471
472 if (MD_PARENT(rtxnp->from.unp) == MD_SID(rtxnp->to.unp)) {
473 exchange_up = TRUE;
474 }
475
476 /* self and parent are flipping */
477 new = md_build_rendelta(
478 MDRR_SELF,
479 exchange_up? MDRR_PARENT: MDRR_CHILD,
480 md_makedevice(md_major, rtxnp->from.mnum),
481 p,
482 rtxnp->from.unp,
483 rtxnp->from.uip,
484 &rtxnp->mde);
485 }
486
487 if (!new) {
488 if (mdisok(&rtxnp->mde)) {
489 (void) mdsyserror(&rtxnp->mde, ENOMEM);
490 }
491 return (-1);
492 }
493
494 if (!*dlpp) {
495 *dlpp = new;
496 }
497
498 return (1);
499 }
500
501 /*
502 * free the tree of all deltas to devices involved in the rename transaction
503 */
504 static void
free_dtree(md_rendelta_t * family)505 free_dtree(md_rendelta_t *family)
506 {
507 md_rendelta_t *next = NULL;
508 int i = 0;
509 md_rendelta_t *r;
510
511 for (r = family; (NULL != r); r = next, i++) {
512
513 next = r->next;
514
515 /* shift << because it makes the resultant pattern readable */
516 r->beginning = DELTA_BEG_FREED ^ (i << guard_shift);
517 r->end = DELTA_END_FREED ^ (i << guard_shift);
518
519 kmem_free(r, sizeof (md_rendelta_t));
520 }
521 }
522
523 /*
524 * walk down family tree, calling lock service function
525 */
526 static int
lock_dtree(md_rendelta_t * family,md_rentxn_t * rtxnp)527 lock_dtree(md_rendelta_t *family, md_rentxn_t *rtxnp)
528 {
529 md_rendelta_t *r;
530 int rc;
531
532 ASSERT(family);
533 ASSERT(rtxnp);
534
535 if (!family || !rtxnp) {
536 return (EINVAL);
537 }
538
539 for (rc = 0, r = family; r; r = r->next) {
540
541 ASSERT(r->unp);
542 ASSERT(!UNIT_WRITER_HELD(r->unp));
543 ASSERT(r->lock);
544
545 if ((rc = (int)(*r->lock) (r, rtxnp)) != 0) {
546 return (rc);
547 }
548 r->txn_stat.locked = TRUE;
549 }
550
551 return (0);
552 }
553
554 /*
555 * We rely on check() (MDRNM_CHECK) to make exhaustive checks,
556 * since we don't attempt to undo role_swap() failures.
557 *
558 * To implement an undo() function would require each role_swap()
559 * to store a log of previous state of the structures it changes,
560 * presumably anchored by the rendelta.
561 *
562 */
563 static int
check_dtree(md_rendelta_t * family,md_rentxn_t * rtxnp)564 check_dtree(md_rendelta_t *family, md_rentxn_t *rtxnp)
565 {
566 md_rendelta_t *r;
567 int rc;
568
569 ASSERT(family);
570 ASSERT(rtxnp);
571
572 if (!family || !rtxnp) {
573 /* no error packet to set? */
574 return (EINVAL);
575 }
576
577 for (r = family, rc = 0; r; r = r->next) {
578
579 ASSERT(UNIT_WRITER_HELD(r->unp));
580 ASSERT(r->txn_stat.locked);
581
582 /*
583 * <to> doesn't exist for rename
584 */
585 if (!(rtxnp->op == MDRNOP_RENAME &&
586 md_getminor(r->dev) == rtxnp->to.mnum)) {
587 ASSERT(r->uip);
588 r->txn_stat.is_open = md_unit_isopen(r->uip);
589 }
590
591 /*
592 * if only allowing offline rename/exchanges, check
593 * for top being trans because it opens its sub-devices
594 */
595
596 switch (rtxnp->revision) {
597 case MD_RENAME_VERSION_OFFLINE:
598 if ((r->txn_stat.is_open) &&
599 (!rtxnp->stat.trans_in_stack)) {
600 (void) mdmderror(&rtxnp->mde, MDE_RENAME_BUSY,
601 md_getminor(r->dev));
602 return (EBUSY);
603 }
604 break;
605
606 case MD_RENAME_VERSION_ONLINE:
607 break;
608
609 default:
610 (void) mdmderror(&rtxnp->mde, MDE_RENAME_CONFIG_ERROR,
611 md_getminor(r->dev));
612 return (EINVAL);
613 }
614
615 /* MD_UN_MOD_INPROGRESS includes the MD_UN_RENAMING bit */
616
617 if (MD_STATUS(r->unp) & MD_UN_MOD_INPROGRESS) {
618 (void) mdmderror(&rtxnp->mde, MDE_RENAME_BUSY,
619 md_getminor(r->dev));
620 return (EBUSY);
621 }
622
623 MD_STATUS(r->unp) |= MD_UN_RENAMING;
624
625 if ((rc = (int)(*r->check)(r, rtxnp)) != 0) {
626 return (rc);
627 }
628
629 /* and be sure we can proceed */
630 if (!(r->role_swap)) {
631 (void) mdmderror(&rtxnp->mde, MDE_RENAME_CONFIG_ERROR,
632 md_getminor(r->dev));
633 return (EINVAL);
634 }
635 r->txn_stat.checked = TRUE;
636 }
637
638 return (0);
639 }
640
641
642 /*
643 * rename role_swap() functions are responsible for updating their
644 * own parent, self and children references in both on-disk
645 * and in-core structures, as well as storing the changed
646 * record ids into recids and incrementing rec_idx.
647 */
648
649 static void
role_swap_dtree(md_rendelta_t * family,md_rentxn_t * rtxnp)650 role_swap_dtree(md_rendelta_t *family, md_rentxn_t *rtxnp)
651 {
652 md_rendelta_t *r;
653
654 ASSERT(family);
655 ASSERT(rtxnp);
656
657 for (r = family; r; r = r->next) {
658 ASSERT(r->role_swap);
659 ASSERT(r->txn_stat.locked);
660 ASSERT(r->txn_stat.checked);
661
662 (*r->role_swap)(r, rtxnp);
663
664 r->txn_stat.role_swapped = TRUE;
665 }
666
667 /*
668 * there's some work to do, but not more than expected
669 */
670 ASSERT(rtxnp->rec_idx > 0);
671 ASSERT(rtxnp->rec_idx < rtxnp->n_recids);
672
673 if (rtxnp->rec_idx >= rtxnp->n_recids || rtxnp->rec_idx <= 0) {
674 /*
675 * There's no way to indicate error from here,
676 * and even if we could, there's no undo mechanism.
677 * We've already modified the in-core structs, so
678 * We can't continue w/o committing, but we
679 * don't appear to have anything to commit.
680 */
681 cmn_err(CE_PANIC,
682 "md_rename: role_swap_dtree(family:%p, rtxnp:%p)",
683 (void *) family, (void *) rtxnp);
684 return;
685 }
686 rtxnp->recids[rtxnp->rec_idx] = 0;
687
688 mddb_commitrecs_wrapper(rtxnp->recids);
689 }
690
691 /*
692 * walk down delta tree, calling the unlock service for each device,
693 * provided any of the devices appear to have been locked
694 */
695 static void
unlock_dtree(md_rendelta_t * family,md_rentxn_t * rtxnp)696 unlock_dtree(md_rendelta_t *family, md_rentxn_t *rtxnp)
697 {
698 md_rendelta_t *r;
699 uint_t any_locked = FALSE;
700
701 ASSERT(family);
702 ASSERT(rtxnp);
703
704 for (r = family; r; r = r->next) {
705
706 ASSERT(!(r->txn_stat.unlocked)); /* "has been unlocked" */
707 any_locked |= r->txn_stat.locked;
708 }
709
710 if (any_locked) {
711
712 /* unwind in reverse order */
713 for (r = family; NULL != r->next; r = r->next) {
714 /* NULL */
715 }
716
717 for (; NULL != r; r = r->prev) {
718 MD_STATUS(r->unp) &= ~MD_UN_RENAMING;
719 ASSERT(r->unlock);
720 r->unlock(r, rtxnp);
721 r->txn_stat.unlocked = TRUE;
722 }
723 }
724 }
725
726 /*
727 * MDRNM_UPDATE_SELF
728 * This role swap function is identical for all unit types,
729 * so keep it here. It's also the best example because it
730 * touches all the modified portions of the relevant
731 * in-common structures.
732 */
733 static void
md_rename_update_self(md_rendelta_t * delta,md_rentxn_t * rtxnp)734 md_rename_update_self(
735 md_rendelta_t *delta,
736 md_rentxn_t *rtxnp)
737 {
738 minor_t from_min, to_min;
739 sv_dev_t sv;
740 mddb_de_ic_t *dep;
741 mddb_rb32_t *rbp;
742
743 ASSERT(rtxnp);
744 ASSERT(rtxnp->op == MDRNOP_RENAME);
745 ASSERT(delta);
746 ASSERT(delta->unp);
747 ASSERT(delta->uip);
748 ASSERT(rtxnp->rec_idx >= 0);
749 ASSERT(rtxnp->recids);
750 ASSERT(delta->old_role == MDRR_SELF);
751 ASSERT(delta->new_role == MDRR_SELF);
752 ASSERT(md_getminor(delta->dev) == rtxnp->from.mnum);
753
754 from_min = rtxnp->from.mnum;
755 to_min = rtxnp->to.mnum;
756
757 /*
758 * self id changes in our own unit struct
759 */
760 MD_SID(delta->unp) = to_min;
761
762 /*
763 * make sure that dest always has correct un_revision
764 * and rb_revision
765 */
766 delta->unp->c.un_revision |= MD_FN_META_DEV;
767 dep = mddb_getrecdep(MD_RECID(delta->unp));
768 ASSERT(dep);
769 rbp = dep->de_rb;
770 if (rbp->rb_revision & MDDB_REV_RB) {
771 rbp->rb_revision = MDDB_REV_RBFN;
772 } else if (rbp->rb_revision & MDDB_REV_RB64) {
773 rbp->rb_revision = MDDB_REV_RB64FN;
774 }
775
776 /*
777 * clear old array pointers to unit in-core and unit
778 */
779
780 MDI_VOIDUNIT(from_min) = NULL;
781 MD_VOIDUNIT(from_min) = NULL;
782
783 /*
784 * and point the new slots at the unit in-core and unit structs
785 */
786
787 MDI_VOIDUNIT(to_min) = delta->uip;
788 MD_VOIDUNIT(to_min) = delta->unp;
789
790 /*
791 * recreate kstats
792 * - destroy the ones associated with our former identity
793 * - reallocate and associate them with our new identity
794 */
795 md_kstat_destroy_ui(delta->uip);
796 md_kstat_init_ui(to_min, delta->uip);
797
798 /*
799 * the unit in-core reference to the get next link's id changes
800 */
801
802 delta->uip->ui_link.ln_id = to_min;
803
804 /*
805 * name space addition of new key was done from user-level
806 * remove the old name's key here
807 */
808
809 sv.setno = MD_MIN2SET(from_min);
810 sv.key = rtxnp->from.key;
811
812 md_rem_names(&sv, 1);
813
814 /*
815 * Remove associated device node as well
816 */
817 md_remove_minor_node(from_min);
818
819 /*
820 * and store the record id (from the unit struct) into recids
821 * for later commitment by md_rename()
822 */
823 md_store_recid(&rtxnp->rec_idx, rtxnp->recids, delta->unp);
824 }
825
826 /*
827 * Either one of our siblings and/or our parent changed identities.
828 */
829 static void
md_renexch_update_parent(md_rendelta_t * delta,md_rentxn_t * rtxnp)830 md_renexch_update_parent(
831 md_rendelta_t *delta,
832 md_rentxn_t *rtxnp)
833 {
834 ASSERT(rtxnp);
835 ASSERT((MDRNOP_RENAME == rtxnp->op) || (rtxnp->op == MDRNOP_EXCHANGE));
836 ASSERT(rtxnp->rec_idx >= 0);
837 ASSERT(rtxnp->recids);
838 ASSERT(delta);
839 ASSERT(delta->unp);
840 ASSERT(delta->old_role == MDRR_CHILD);
841 ASSERT(delta->new_role == MDRR_CHILD);
842 ASSERT((MD_PARENT(delta->unp) == rtxnp->from.mnum) ||
843 (MD_PARENT(delta->unp) == rtxnp->to.mnum));
844
845 if (MD_PARENT(delta->unp) == rtxnp->from.mnum) {
846 MD_PARENT(delta->unp) = rtxnp->to.mnum;
847 }
848
849 md_store_recid(&rtxnp->rec_idx, rtxnp->recids, delta->unp);
850 }
851
852 /*
853 * exchange up (child->self)
854 */
855 static void
md_exchange_child_update_to(md_rendelta_t * delta,md_rentxn_t * rtxnp)856 md_exchange_child_update_to(
857 md_rendelta_t *delta,
858 md_rentxn_t *rtxnp)
859 {
860 minor_t from_min, to_min;
861
862 ASSERT(rtxnp);
863 ASSERT(rtxnp->op == MDRNOP_EXCHANGE);
864 ASSERT(rtxnp->rec_idx >= 0);
865 ASSERT(rtxnp->recids);
866 ASSERT(delta);
867 ASSERT(delta->unp);
868 ASSERT(delta->uip);
869 ASSERT(delta->old_role == MDRR_CHILD);
870 ASSERT(delta->new_role == MDRR_SELF);
871 ASSERT(md_getminor(delta->dev) == rtxnp->to.mnum);
872
873 from_min = rtxnp->from.mnum;
874 to_min = rtxnp->to.mnum;
875
876 /*
877 * self id changes in our own unit struct
878 * Note:
879 * - Since we're assuming the identity of "from" we use its mnum even
880 * though we're updating the "to" structures.
881 */
882
883 MD_SID(delta->unp) = from_min;
884
885 /*
886 * our parent identifier becomes the new self, who was "to"
887 */
888
889 MD_PARENT(delta->unp) = to_min;
890
891 /*
892 * point the set array pointers at the "new" unit and unit in-cores
893 * Note:
894 * - The other half of this transfer is done in the "update from"
895 * rename/exchange named service.
896 */
897
898 MD_VOIDUNIT(from_min) = delta->unp;
899 MDI_VOIDUNIT(from_min) = delta->uip;
900
901 /*
902 * transfer kstats
903 */
904
905 delta->uip->ui_kstat = rtxnp->from.kstatp;
906
907 /*
908 * the unit in-core reference to the get next link's id changes
909 */
910
911 delta->uip->ui_link.ln_id = from_min;
912
913 /*
914 * name space additions, if necessary, were done from user-level.
915 * name space deletions, if necessary, were done in "exchange_from"
916 */
917
918 /*
919 * and store the record id (from the unit struct) into recids
920 * for later comitment by md_rename()
921 */
922
923 md_store_recid(&rtxnp->rec_idx, rtxnp->recids, delta->unp);
924 }
925
926 /*
927 * exchange up (self->parent)
928 */
929 static void
md_exchange_self_update_from_up(md_rendelta_t * delta,md_rentxn_t * rtxnp)930 md_exchange_self_update_from_up(
931 md_rendelta_t *delta,
932 md_rentxn_t *rtxnp)
933 {
934 minor_t from_min, to_min;
935
936 ASSERT(rtxnp);
937 ASSERT(rtxnp->op == MDRNOP_EXCHANGE);
938 ASSERT(rtxnp->rec_idx >= 0);
939 ASSERT(rtxnp->recids);
940 ASSERT(delta);
941 ASSERT(delta->unp);
942 ASSERT(delta->uip);
943 ASSERT(delta->old_role == MDRR_SELF);
944 ASSERT(delta->new_role == MDRR_PARENT);
945 ASSERT(md_getminor(delta->dev) == rtxnp->from.mnum);
946
947 from_min = rtxnp->from.mnum;
948 to_min = rtxnp->to.mnum;
949
950 /*
951 * self id changes in our own unit struct
952 * Note:
953 * - Since we're assuming the identity of "to" we use its mnum
954 * while we're updating the "to" structures.
955 */
956
957 MD_SID(delta->unp) = to_min;
958
959 /*
960 * our parent identifier becomes the new parent, who was "from"
961 */
962
963 MD_PARENT(delta->unp) = from_min;
964
965 /*
966 * point the set array pointers at the "new" unit and unit in-cores
967 * Note:
968 * - The other half of this transfer is done in the "update from"
969 * rename/exchange named service.
970 */
971
972 MD_VOIDUNIT(to_min) = delta->unp;
973 MDI_VOIDUNIT(to_min) = delta->uip;
974
975 /*
976 * transfer kstats
977 */
978
979 delta->uip->ui_kstat = rtxnp->to.kstatp;
980
981 /*
982 * the unit in-core reference to the get next link's id changes
983 */
984
985 delta->uip->ui_link.ln_id = to_min;
986
987 /*
988 * name space additions, if necessary, were done from user-level.
989 * name space deletions, if necessary, were done in "exchange_from"
990 */
991
992 /*
993 * and store the record id (from the unit struct) into recids
994 * for later comitment by md_rename()
995 */
996
997 md_store_recid(&rtxnp->rec_idx, rtxnp->recids, delta->unp);
998 }
999
1000 /*
1001 * The order of the called role swap functions is critical.
1002 * If they're not ordered as "all parents", then "all self"
1003 * then "all child" transitions, we will almost certainly
1004 * corrupt the data base and the in-core linkages. So,
1005 * verify that the list built by the individual drivers is
1006 * ok here.
1007 *
1008 * We could have done fancy bit encodings of the roles so
1009 * it all fit into a single word and we wouldn't need the
1010 * prev_ord field. But, since cpu power is cheaper than
1011 * than people power, they're all separate for easier
1012 * debugging and maintaining. (In the unlikely event that
1013 * rename/exchange ever becomes cpu-limited, and this
1014 * algorithm is the bottleneck, we should revisit this.)
1015 */
1016
1017 static bool_t
role_swap_is_valid(int previous,int current,md_rendelta_t * delta,md_rentxn_t * rtxnp)1018 role_swap_is_valid(
1019 int previous,
1020 int current,
1021 md_rendelta_t *delta,
1022 md_rentxn_t *rtxnp)
1023 {
1024 bool_t valid = FALSE;
1025
1026 /*
1027 * we've backed up in processing the role table
1028 */
1029 if ((previous > current) &&
1030 (delta->prev && (delta->old_role != delta->prev->old_role))) {
1031 goto out;
1032 }
1033
1034 /*
1035 * we're repeating the same role transition
1036 */
1037 if (previous == current) {
1038 switch (delta->old_role) {
1039 case MDRR_PARENT:
1040 /*
1041 * require at least one of the devices to
1042 * be multiparented for us to allow another
1043 * parent transition
1044 */
1045 if ((MD_MULTI_PARENT != MD_PARENT(rtxnp->from.unp)) &&
1046 (MD_MULTI_PARENT != MD_PARENT(rtxnp->to.unp))) {
1047 goto out;
1048 }
1049 break;
1050
1051 case MDRR_CHILD:
1052 /* it's ok to have multiple children */
1053 break;
1054
1055 case MDRR_SELF:
1056 /* it's never ok to have multiple self transitions */
1057 /* FALLTHROUGH */
1058 default:
1059 goto out;
1060 }
1061 }
1062
1063 valid = TRUE;
1064 out:
1065 if (!valid) {
1066 if (md_rename_debug != 0) {
1067 cmn_err(CE_NOTE, "previous: %d, current: %d, role: %s",
1068 previous, current,
1069 ROLE(delta->old_role));
1070 delay(3*drv_usectohz(1000000));
1071 ASSERT(FALSE);
1072 }
1073 }
1074
1075 return (valid);
1076 }
1077
1078 static role_change_tab_t *
lookup_role(md_renrole_t old_role,md_renrole_t new_role)1079 lookup_role(md_renrole_t old_role, md_renrole_t new_role)
1080 {
1081 role_change_tab_t *rp;
1082 role_change_tab_t *found = NULL;
1083
1084 for (rp = role_swap_tab; !found && (rp->old_role != MDRR_UNK); rp++) {
1085
1086 if (rp->old_role == old_role && rp->new_role == new_role) {
1087 found = rp;
1088 }
1089 }
1090 /*
1091 * we require a named svc if we've got two devices
1092 * claiming to be changing roles in this manner
1093 */
1094 ASSERT(found);
1095 ASSERT(found->default_svc != ILLEGAL_ROLESWAP_SVC);
1096 ASSERT(found->svc_name != ILLEGAL_SVC_NAME);
1097
1098 if (!found ||
1099 (found->default_svc == ILLEGAL_ROLESWAP_SVC) ||
1100 (found->svc_name == ILLEGAL_SVC_NAME)) {
1101 return (NULL);
1102 }
1103
1104 return (found);
1105 }
1106
1107 /*
1108 * fill in the role swap named svc., now that we know each device
1109 * and its changing role
1110 */
1111 static int
valid_roleswap_dtree(md_rendelta_t * family,md_rentxn_t * rtxnp)1112 valid_roleswap_dtree(
1113 md_rendelta_t *family,
1114 md_rentxn_t *rtxnp
1115 )
1116 {
1117 md_rendelta_t *r;
1118 role_change_tab_t *rolep;
1119 minor_t from_min, to_min;
1120 int prev_ord = -1;
1121 bool_t found_self = FALSE;
1122 int err = 0;
1123
1124 ASSERT(family);
1125 ASSERT(rtxnp);
1126
1127 from_min = rtxnp->from.mnum;
1128 to_min = rtxnp->to.mnum;
1129
1130 for (r = family; r; r = r->next, prev_ord = rolep->ord) {
1131
1132 if (!(rolep = lookup_role(r->old_role, r->new_role))) {
1133 (void) mdmderror(&rtxnp->mde,
1134 MDE_RENAME_CONFIG_ERROR, from_min);
1135 err = EOPNOTSUPP;
1136 goto out;
1137 }
1138 r->role_swap = (md_ren_roleswap_svc_t *)md_get_named_service(
1139 r->dev, /* modindex */ 0,
1140 (char *)rolep->svc_name,
1141 (intptr_t (*)()) rolep->default_svc);
1142
1143 /*
1144 * someone probably called the ioctl directly and
1145 * incorrectly, rather than via the libmeta wrappers
1146 */
1147 if (!(r->role_swap)) {
1148 (void) mdmderror(&rtxnp->mde,
1149 MDE_RENAME_TARGET_UNRELATED, to_min);
1150 err = EOPNOTSUPP;
1151 goto out;
1152 }
1153
1154 if (!role_swap_is_valid(prev_ord, rolep->ord, r, rtxnp)) {
1155 (void) mdmderror(&rtxnp->mde,
1156 MDE_RENAME_CONFIG_ERROR, from_min);
1157 err = EINVAL;
1158 goto out;
1159 }
1160
1161 if (rolep->old_role == MDRR_SELF) {
1162 found_self = TRUE;
1163 }
1164
1165 if (MD_PARENT(r->unp) == MD_MULTI_PARENT) {
1166 (void) mdmderror(&rtxnp->mde, MDE_RENAME_TARGET_BAD,
1167 md_getminor(r->dev));
1168 err = EINVAL;
1169 goto out;
1170 }
1171 }
1172
1173 /*
1174 * must be at least one selfish device
1175 */
1176 ASSERT(found_self);
1177 if (!found_self) {
1178 (void) mdmderror(&rtxnp->mde,
1179 MDE_RENAME_CONFIG_ERROR, from_min);
1180 err = EINVAL;
1181 goto out;
1182 }
1183
1184 out:
1185 return (err);
1186 }
1187
1188 /*
1189 * dump contents of rename transaction
1190 */
1191 static void
dump_txn(md_rentxn_t * rtxnp)1192 dump_txn(md_rentxn_t *rtxnp) {
1193
1194 if (md_rename_debug == 0) {
1195 return;
1196 }
1197
1198 cmn_err(CE_NOTE, "rtxnp: %p", (void *) rtxnp);
1199 if (rtxnp) {
1200 cmn_err(CE_NOTE, "beginning: %llx, op: %s",
1201 rtxnp->beginning, OP_STR(rtxnp->op));
1202
1203 cmn_err(CE_NOTE,
1204 "revision: %d, uflags: %d, rec_idx: %d, n_recids: %d, rec_ids: %p%s",
1205 rtxnp->revision, rtxnp->uflags,
1206 rtxnp->rec_idx, rtxnp->n_recids, (void *) rtxnp->recids,
1207 rtxnp->stat.trans_in_stack? " (trans in stack)": "");
1208 cmn_err(CE_NOTE, " from: beginning: %llx",
1209 rtxnp->from.beginning);
1210 cmn_err(CE_NOTE, " minor: %lX, key: %lX",
1211 (ulong_t)rtxnp->from.mnum, (ulong_t)rtxnp->from.key);
1212 cmn_err(CE_NOTE, " unp: %lX, uip: %lX",
1213 (ulong_t)rtxnp->from.unp, (ulong_t)rtxnp->from.uip);
1214 cmn_err(CE_NOTE, " end: %llx", rtxnp->from.end);
1215 cmn_err(CE_NOTE, " to: beginning: %llx", rtxnp->to.beginning);
1216 cmn_err(CE_NOTE, " minor: %lX, key: %lX",
1217 (ulong_t)rtxnp->to.mnum, (ulong_t)rtxnp->to.key);
1218 cmn_err(CE_NOTE, " unp: %lX, uip: %lX",
1219 (ulong_t)rtxnp->to.unp, (ulong_t)rtxnp->to.uip);
1220 cmn_err(CE_NOTE, " end: %llx", rtxnp->to.end);
1221 cmn_err(CE_NOTE, "end: %llx\n", rtxnp->end);
1222 }
1223 delay(drv_usectohz(1000000));
1224 }
1225
1226 /*
1227 * dump contents of all deltas
1228 */
1229 static void
dump_dtree(md_rendelta_t * family)1230 dump_dtree(md_rendelta_t *family)
1231 {
1232 md_rendelta_t *r;
1233 int i;
1234
1235 if (md_rename_debug == 0) {
1236 return;
1237 }
1238
1239 for (r = family, i = 0; r; r = r->next, i++) {
1240 cmn_err(CE_NOTE, "%d. beginning: %llx", i, r->beginning);
1241 cmn_err(CE_NOTE, " r: %lX, dev: %lX, next: %lx, prev: %lx",
1242 (ulong_t)r, (ulong_t)r->dev,
1243 (ulong_t)r->next, (ulong_t)r->prev);
1244
1245 cmn_err(CE_NOTE, " role: %s -> %s, unp: %lx, uip: %lx",
1246 ROLE(r->old_role), ROLE(r->new_role),
1247 (ulong_t)r->unp, (ulong_t)r->uip);
1248 cmn_err(CE_NOTE,
1249 " lock: %lx, unlock: %lx\n\t check: %lx, role_swap: %lx",
1250 (ulong_t)r->lock, (ulong_t)r->unlock,
1251 (ulong_t)r->check, (ulong_t)r->role_swap);
1252 if (*((uint_t *)(&r->txn_stat)) != 0) {
1253 cmn_err(CE_NOTE, "status: (0x%x) %s%s%s%s%s",
1254 *((uint_t *)(&r->txn_stat)),
1255 r->txn_stat.is_open? "is_open " : "",
1256 r->txn_stat.locked? "locked " : "",
1257 r->txn_stat.checked? "checked " : "",
1258 r->txn_stat.role_swapped? "role_swapped " : "",
1259 r->txn_stat.unlocked? "unlocked" : "");
1260 }
1261 cmn_err(CE_NOTE, "end: %llx\n", r->end);
1262 }
1263 delay(drv_usectohz(1000000));
1264 }
1265
1266 /*
1267 * validate the rename request parameters
1268 */
1269 static int
validate_txn_parms(md_rentxn_t * rtxnp)1270 validate_txn_parms(md_rentxn_t *rtxnp)
1271 {
1272 minor_t to_min, from_min;
1273
1274 ASSERT(rtxnp);
1275
1276 from_min = rtxnp->from.mnum;
1277 to_min = rtxnp->to.mnum;
1278
1279 switch (rtxnp->revision) {
1280 case MD_RENAME_VERSION_OFFLINE:
1281 if (rtxnp->uflags != 0) {
1282 (void) mdmderror(&rtxnp->mde, MDE_RENAME_CONFIG_ERROR,
1283 from_min);
1284 return (ENOTSUP);
1285 }
1286 break;
1287
1288 case MD_RENAME_VERSION_ONLINE:
1289 /* not supported until 5.0 */
1290 /* FALLTHROUGH */
1291
1292 default:
1293 (void) mdmderror(&rtxnp->mde, MDE_RENAME_CONFIG_ERROR,
1294 from_min);
1295 return (EPROTONOSUPPORT);
1296 }
1297
1298 if ((rtxnp->from.uip = MDI_UNIT(from_min)) == NULL) {
1299 (void) mdmderror(&rtxnp->mde, MDE_UNIT_NOT_SETUP, from_min);
1300 return (ENODEV);
1301 }
1302
1303 if (!md_dev_exists(md_makedevice(md_major, from_min))) {
1304 (void) mdmderror(&rtxnp->mde, MDE_UNIT_NOT_SETUP, from_min);
1305 return (ENODEV);
1306 }
1307
1308 if ((rtxnp->from.key == MD_KEYBAD) || (rtxnp->from.key == MD_KEYWILD)) {
1309 (void) mdmderror(&rtxnp->mde, MDE_INVAL_UNIT, from_min);
1310 return (EINVAL);
1311 }
1312
1313 rtxnp->from.kstatp = rtxnp->from.uip->ui_kstat;
1314 rtxnp->from.unp = MD_UNIT(from_min);
1315
1316 if (MD_MIN2SET(to_min) != MD_MIN2SET(from_min)) {
1317 (void) mdmderror(&rtxnp->mde, MDE_INVAL_UNIT, to_min);
1318 return (EINVAL);
1319 }
1320
1321 switch (rtxnp->op) {
1322 case MDRNOP_EXCHANGE:
1323 rtxnp->to.unp = MD_UNIT(to_min);
1324 rtxnp->to.uip = MDI_UNIT(to_min);
1325
1326 /*
1327 * exchange requires target to exist
1328 */
1329
1330 if ((rtxnp->to.uip == NULL) ||
1331 (md_dev_exists(md_makedevice(md_major, to_min)) == NULL)) {
1332 (void) mdmderror(&rtxnp->mde, MDE_UNIT_NOT_SETUP,
1333 to_min);
1334 return (ENODEV);
1335 }
1336
1337 if ((rtxnp->to.key == MD_KEYBAD) ||
1338 (rtxnp->to.key == MD_KEYWILD)) {
1339 (void) mdmderror(&rtxnp->mde, MDE_INVAL_UNIT, to_min);
1340 return (EINVAL);
1341 }
1342
1343 /*
1344 * <from> is not in the role of <self>,
1345 * that is,
1346 * <from> has a parent, which is <to> and <to> has a parent too
1347 * or
1348 * <to> has a parent, which is <from> and <to> can have a child
1349 */
1350 if ((MD_HAS_PARENT(MD_PARENT(rtxnp->from.unp))) &&
1351 (MD_PARENT(rtxnp->from.unp) == to_min) &&
1352 MD_HAS_PARENT(MD_PARENT(rtxnp->to.unp))) {
1353 (void) mdmderror(&rtxnp->mde, MDE_RENAME_ORDER,
1354 from_min);
1355 return (EINVAL);
1356 }
1357
1358 if ((MD_HAS_PARENT(MD_PARENT(rtxnp->to.unp))) &&
1359 (MD_PARENT(rtxnp->to.unp) == from_min) &&
1360 (MD_CAPAB(rtxnp->to.unp) & MD_CAN_META_CHILD)) {
1361 (void) mdmderror(&rtxnp->mde, MDE_RENAME_ORDER,
1362 from_min);
1363 return (EINVAL);
1364 }
1365
1366 rtxnp->to.kstatp = rtxnp->to.uip->ui_kstat;
1367 break;
1368
1369 case MDRNOP_RENAME:
1370
1371 /*
1372 * rename requires <to> not to exist
1373 */
1374
1375 if (MDI_UNIT(to_min) ||
1376 md_dev_exists(md_makedevice(md_major, to_min))) {
1377
1378 (void) mdmderror(&rtxnp->mde, MDE_UNIT_ALREADY_SETUP,
1379 to_min);
1380 return (EEXIST);
1381 }
1382
1383 /*
1384 * and to be within valid ranges for the current
1385 * limits on number of sets and metadevices
1386 */
1387 if ((MD_MIN2SET(to_min) >= md_nsets) ||
1388 (MD_MIN2UNIT(to_min) >= md_nunits)) {
1389 (void) mdmderror(&rtxnp->mde, MDE_INVAL_UNIT, to_min);
1390 return (EINVAL);
1391 }
1392
1393 rtxnp->to.unp = NULL;
1394 rtxnp->to.uip = NULL;
1395 rtxnp->to.kstatp = NULL;
1396 break;
1397
1398 default:
1399 (void) mdmderror(&rtxnp->mde, MDE_RENAME_CONFIG_ERROR,
1400 from_min);
1401 return (EINVAL);
1402 }
1403
1404 /*
1405 * install guard rails
1406 */
1407 rtxnp->beginning = TXN_BEG;
1408
1409 rtxnp->from.beginning = TXNUN_BEG;
1410 rtxnp->from.end = TXNUN_END;
1411
1412 rtxnp->to.beginning = TXNUN_BEG;
1413 rtxnp->to.end = TXNUN_END;
1414
1415 rtxnp->end = TXN_END;
1416
1417 return (0);
1418 }
1419
1420 /*
1421 * If the device being changed exhibits this capability, set the list
1422 * relatives function pointer to the named service that lists the
1423 * appropriate relatives for this capability.
1424 */
1425 static int
set_list_rels_funcp(md_rentxn_t * rtxnp,md_stackcap_t capability,char * svc_name,md_ren_list_svc_t default_svc_func,md_ren_list_svc_t ** list_relatives_funcp)1426 set_list_rels_funcp(
1427 md_rentxn_t *rtxnp,
1428 md_stackcap_t capability,
1429 char *svc_name,
1430 md_ren_list_svc_t default_svc_func,
1431 md_ren_list_svc_t **list_relatives_funcp
1432 )
1433 {
1434 int err;
1435 minor_t from_min;
1436 md_dev64_t from_dev;
1437 md_unit_t *from_un;
1438 mdi_unit_t *from_ui;
1439
1440 ASSERT(rtxnp);
1441 ASSERT((rtxnp->op == MDRNOP_RENAME) || (rtxnp->op == MDRNOP_EXCHANGE));
1442 ASSERT(list_relatives_funcp);
1443
1444 from_min = rtxnp->from.mnum;
1445 from_dev = md_makedevice(md_major, from_min);
1446 from_un = MD_UNIT(from_min);
1447 from_ui = MDI_UNIT(from_min);
1448 err = 0;
1449
1450 if (!from_ui || !from_un) {
1451 (void) mdmderror(&rtxnp->mde, MDE_UNIT_NOT_SETUP, from_min);
1452 err = EINVAL;
1453 goto out;
1454 }
1455
1456 if ((capability == MD_CAN_DO_ANYTHING) ||
1457 ((MD_CAPAB(from_un) & capability) == capability)) {
1458
1459 *list_relatives_funcp = (md_ren_list_svc_t *)
1460 md_get_named_service(from_dev,
1461 /* modindex */ 0, svc_name,
1462 (intptr_t (*)()) default_svc_func);
1463
1464 ASSERT(*list_relatives_funcp);
1465 if (!(*list_relatives_funcp)) {
1466 (void) mdmderror(&rtxnp->mde,
1467 MDE_RENAME_CONFIG_ERROR, from_min);
1468 err = EINVAL;
1469 goto out;
1470 }
1471 } else {
1472 *list_relatives_funcp = (md_ren_list_svc_t *)NULL;
1473 }
1474
1475 out:
1476 return (err);
1477 }
1478
1479 /*
1480 * call list relations function, bump recid counter
1481 * by number of members added to the delta list.
1482 * Validate that the number of members added is within bounds.
1483 */
1484 static int
list_relations(md_rendelta_t ** family,md_rentxn_t * rtxnp,md_ren_list_svc_t * add_relatives_funcp,int valid_min,int valid_max)1485 list_relations(
1486 md_rendelta_t **family,
1487 md_rentxn_t *rtxnp,
1488 md_ren_list_svc_t *add_relatives_funcp,
1489 int valid_min,
1490 int valid_max
1491 )
1492 {
1493 int n_added;
1494 int err = 0;
1495
1496 ASSERT(family);
1497 ASSERT(rtxnp);
1498
1499 if (!family || !rtxnp) {
1500 err = EINVAL;
1501 goto out;
1502 }
1503
1504 n_added = 0;
1505
1506 /* no relations of this type */
1507 if (!add_relatives_funcp) {
1508 goto out;
1509 }
1510
1511 n_added = (*add_relatives_funcp) (family, rtxnp);
1512
1513 if ((n_added < valid_min) || (n_added > valid_max)) {
1514 if (mdisok(&rtxnp->mde)) {
1515 (void) mdmderror(&rtxnp->mde, MDE_RENAME_CONFIG_ERROR,
1516 rtxnp->from.mnum);
1517 }
1518 err = EINVAL;
1519 goto out;
1520 }
1521
1522 rtxnp->n_recids += n_added;
1523
1524 out:
1525 return (err);
1526 }
1527
1528 /*
1529 * build recid array
1530 */
1531 static int
alloc_recids(md_rendelta_t * family,md_rentxn_t * rtxnp)1532 alloc_recids(md_rendelta_t *family, md_rentxn_t *rtxnp)
1533 {
1534 int err = 0;
1535
1536 if (!family || !rtxnp) {
1537 err = ENOMEM;
1538 goto out;
1539 }
1540
1541 rtxnp->rec_idx = 0;
1542
1543 if (rtxnp->n_recids == 0) {
1544 err = EINVAL;
1545 goto out;
1546 }
1547
1548 rtxnp->n_recids += 1; /* terminator */
1549
1550 rtxnp->recids = kmem_alloc(sizeof (mddb_recid_t) * rtxnp->n_recids,
1551 KM_SLEEP);
1552 if (!(rtxnp->recids)) {
1553 err = ENOMEM;
1554 goto out;
1555 }
1556
1557 bzero((void *) rtxnp->recids,
1558 (sizeof (mddb_recid_t) * rtxnp->n_recids));
1559 out:
1560 if (err != 0) {
1561 (void) mdsyserror(&rtxnp->mde, err);
1562 }
1563
1564 return (err);
1565 }
1566
1567 /*
1568 * build family tree (parent(s), self, children)
1569 * The order of the resultant list is important, as it governs
1570 * the order of locking, checking and changing the unit structures.
1571 * Since we'll be changing them, we may not use the MD_UNIT, MDI_UNIT,
1572 * and other pointer which depend on the array being correct.
1573 * Use only the cached pointers (in rtxnp.)
1574 */
1575 static md_rendelta_t *
build_dtree(md_rentxn_t * rtxnp)1576 build_dtree(md_rentxn_t *rtxnp)
1577 {
1578 md_ren_list_svc_t *add_folks, *add_self, *add_kids;
1579 int err;
1580 md_rendelta_t *family = NULL;
1581
1582 ASSERT(rtxnp);
1583 ASSERT((rtxnp->op == MDRNOP_RENAME) || (rtxnp->op == MDRNOP_EXCHANGE));
1584
1585 err = set_list_rels_funcp(rtxnp, MD_CAN_PARENT, MDRNM_LIST_URFOLKS,
1586 md_rename_listfolks, &add_folks);
1587
1588 if (err) {
1589 goto out;
1590 }
1591
1592 err = set_list_rels_funcp(rtxnp, MD_CAN_DO_ANYTHING, MDRNM_LIST_URSELF,
1593 md_rename_listself, &add_self);
1594 if (err) {
1595 goto out;
1596 }
1597
1598 err = set_list_rels_funcp(rtxnp, MD_CAN_META_CHILD, MDRNM_LIST_URKIDS,
1599 /* no default list func */ ((int (*)()) NULL),
1600 &add_kids);
1601 if (err) {
1602 goto out;
1603 }
1604
1605 rtxnp->n_recids = 0; /* accumulated by list_relations() */
1606
1607 if ((err = list_relations(&family, rtxnp, add_folks, 0, 1)) != 0) {
1608 goto out;
1609 }
1610
1611 if ((err = list_relations(&family, rtxnp, add_self, 1, 1)) != 0) {
1612 goto out;
1613 }
1614
1615 err = list_relations(&family, rtxnp, add_kids, 0, md_nunits);
1616 if (err != 0) {
1617 goto out;
1618 }
1619
1620 /*
1621 * delta tree is still empty?
1622 */
1623 if ((!family) || (rtxnp->n_recids == 0)) {
1624 (void) mdmderror(&rtxnp->mde, MDE_RENAME_CONFIG_ERROR,
1625 rtxnp->from.mnum);
1626 err = EINVAL;
1627 goto out;
1628 }
1629
1630 /*
1631 * verify role change interactions
1632 */
1633 if ((err = valid_roleswap_dtree(family, rtxnp)) != 0) {
1634 goto out;
1635 }
1636
1637 if ((err = alloc_recids(family, rtxnp)) != 0) {
1638 goto out;
1639 }
1640
1641 out:
1642 if (err != 0) {
1643 free_dtree(family);
1644 dump_dtree(family); /* yes, after freeing it */
1645 family = NULL;
1646 }
1647
1648 return (family);
1649 }
1650
1651
1652 /*
1653 * (MD_IOCRENAME) rename/exchange ioctl entry point
1654 * calls individual driver named service entry points
1655 * to build a list of devices which need state changed,
1656 * to verify that they're in a state where renames may occur,
1657 * and to modify themselves into their new identities
1658 */
1659
1660 int
md_rename(md_rename_t * mrp,IOLOCK * iolockp)1661 md_rename(
1662 md_rename_t *mrp,
1663 IOLOCK *iolockp)
1664 {
1665 md_rendelta_t *family = NULL;
1666 md_rentxn_t rtxn;
1667 int err = 0;
1668 set_t setno;
1669 mdc_unit_t *mdc;
1670
1671 ASSERT(iolockp);
1672 if (mrp == NULL)
1673 return (EINVAL);
1674
1675 setno = MD_MIN2SET(mrp->from.mnum);
1676 if (setno >= md_nsets) {
1677 return (EINVAL);
1678 }
1679
1680 /*
1681 * Early exit if top is eof trans
1682 */
1683 mdc = (mdc_unit_t *)md_set[setno].s_un[MD_MIN2UNIT(mrp->from.mnum)];
1684 while (mdc != NULL) {
1685 if (!MD_HAS_PARENT(mdc->un_parent)) {
1686 break;
1687 } else {
1688 mdc = (mdc_unit_t *)md_set[setno].s_un[MD_MIN2UNIT
1689 (mdc->un_parent)];
1690 }
1691 }
1692
1693 if (mdc && mdc->un_type == MD_METATRANS) {
1694 return (EINVAL);
1695 }
1696
1697
1698 mdclrerror(&mrp->mde);
1699
1700 bzero((void *) &rtxn, sizeof (md_rentxn_t));
1701 mdclrerror(&rtxn.mde);
1702
1703 /*
1704 * encapsulate user parameters
1705 */
1706 rtxn.from.key = mrp->from.key;
1707 rtxn.to.key = mrp->to.key;
1708 rtxn.from.mnum = mrp->from.mnum;
1709 rtxn.to.mnum = mrp->to.mnum;
1710 rtxn.op = mrp->op;
1711 rtxn.uflags = mrp->flags;
1712 rtxn.revision = mrp->revision;
1713
1714 if (MD_MIN2UNIT(mrp->to.mnum) >= md_nunits) {
1715 err = EINVAL;
1716 goto cleanup;
1717 }
1718
1719 /*
1720 * catch this early, before taking any locks
1721 */
1722 if (md_get_setstatus(setno) & MD_SET_STALE) {
1723 (void) (mdmddberror(&rtxn.mde, MDE_DB_STALE, rtxn.from.mnum,
1724 MD_MIN2SET(rtxn.from.mnum)));
1725 err = EROFS;
1726 goto cleanup;
1727 }
1728
1729 /*
1730 * Locking and re-validation (of the per-unit state) is
1731 * done by the rename lock/unlock service, for now only take
1732 * the array lock.
1733 */
1734 md_array_writer(iolockp);
1735
1736 /*
1737 * validate the rename/exchange parameters
1738 * rtxn is filled in on succesful completion of validate_txn_parms()
1739 */
1740 if ((err = validate_txn_parms(&rtxn)) != 0) {
1741 goto cleanup;
1742 }
1743
1744 /*
1745 * build list of work to do, the "delta tree" for related devices
1746 */
1747 if (!(family = build_dtree(&rtxn))) {
1748 err = ENOMEM;
1749 goto cleanup;
1750 }
1751 dump_txn(&rtxn);
1752 dump_dtree(family);
1753
1754 if ((err = lock_dtree(family, &rtxn)) != 0) {
1755 goto cleanup;
1756 }
1757
1758 if ((err = check_dtree(family, &rtxn)) != 0) {
1759 goto cleanup;
1760 }
1761 dump_txn(&rtxn);
1762
1763 role_swap_dtree(family, &rtxn); /* commits the recids */
1764
1765 /*
1766 * let folks know
1767 */
1768 SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_RENAME_SRC, SVM_TAG_METADEVICE,
1769 MD_MIN2SET(rtxn.from.mnum), rtxn.from.mnum);
1770 SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_RENAME_DST, SVM_TAG_METADEVICE,
1771 MD_MIN2SET(rtxn.from.mnum), rtxn.from.mnum);
1772
1773 cleanup:
1774
1775 if (err != 0 && mdisok(&rtxn.mde)) {
1776 (void) mdsyserror(&rtxn.mde, EINVAL);
1777 }
1778
1779 if (family) {
1780 unlock_dtree(family, &rtxn);
1781 free_dtree(family);
1782 dump_dtree(family);
1783 family = NULL;
1784 }
1785
1786 if (rtxn.recids && (rtxn.n_recids > 0)) {
1787 kmem_free(rtxn.recids, sizeof (mddb_recid_t) * rtxn.n_recids);
1788 }
1789
1790 if (!mdisok(&rtxn.mde)) {
1791 (void) mdstealerror(&mrp->mde, &rtxn.mde);
1792 }
1793
1794 return (0); /* success/failure will be communicated via rtxn.mde */
1795 }
1796
1797 static role_change_tab_t
1798 role_swap_tab[] =
1799 {
1800 {
1801 1, /* ordinal */
1802 MDRR_PARENT, /* old role */
1803 MDRR_PARENT, /* new role */
1804 MDRNM_UPDATE_KIDS, /* named service */
1805 NO_DEFAULT_ROLESWAP_SVC /* default role swap function */
1806 },
1807 {
1808 2,
1809 MDRR_PARENT,
1810 MDRR_SELF,
1811 MDRNM_PARENT_UPDATE_TO,
1812 NO_DEFAULT_ROLESWAP_SVC
1813 },
1814 {
1815 3,
1816 MDRR_PARENT,
1817 MDRR_CHILD,
1818 ILLEGAL_SVC_NAME,
1819 ILLEGAL_ROLESWAP_SVC
1820 },
1821 {
1822 4,
1823 MDRR_SELF,
1824 MDRR_PARENT,
1825 MDRNM_SELF_UPDATE_FROM_UP,
1826 md_exchange_self_update_from_up
1827 },
1828 {
1829 5,
1830 MDRR_SELF,
1831 MDRR_SELF,
1832 MDRNM_UPDATE_SELF,
1833 md_rename_update_self
1834 },
1835 {
1836 6,
1837 MDRR_SELF,
1838 MDRR_CHILD,
1839 MDRNM_SELF_UPDATE_FROM_DOWN,
1840 NO_DEFAULT_ROLESWAP_SVC
1841 },
1842 {
1843 7,
1844 MDRR_CHILD,
1845 MDRR_PARENT,
1846 ILLEGAL_SVC_NAME,
1847 ILLEGAL_ROLESWAP_SVC
1848 },
1849 {
1850 8,
1851 MDRR_CHILD,
1852 MDRR_SELF,
1853 MDRNM_CHILD_UPDATE_TO,
1854 md_exchange_child_update_to
1855 },
1856 {
1857 9,
1858 MDRR_CHILD,
1859 MDRR_CHILD,
1860 MDRNM_UPDATE_FOLKS,
1861 md_renexch_update_parent
1862 },
1863
1864 /* terminator is old_role == MDRR_UNK */
1865 {
1866 0,
1867 MDRR_UNK,
1868 MDRR_UNK,
1869 ILLEGAL_SVC_NAME,
1870 NO_DEFAULT_ROLESWAP_SVC
1871 }
1872 };
1873