1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License (the "License").
6 * You may not use this file except in compliance with the License.
7 *
8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9 * or http://www.opensolaris.org/os/licensing.
10 * See the License for the specific language governing permissions
11 * and limitations under the License.
12 *
13 * When distributing Covered Code, include this CDDL HEADER in each
14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15 * If applicable, add the following below this CDDL HEADER, with the
16 * fields enclosed by brackets "[]" replaced with your own identifying
17 * information: Portions Copyright [yyyy] [name of copyright owner]
18 *
19 * CDDL HEADER END
20 */
21
22 /*
23 * Copyright 2009 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 * Copyright 2012 Milan Jurik. All rights reserved.
26 */
27
28 #include <sys/param.h>
29 #include <sys/systm.h>
30 #include <sys/conf.h>
31 #include <sys/file.h>
32 #include <sys/user.h>
33 #include <sys/uio.h>
34 #include <sys/t_lock.h>
35 #include <sys/dkio.h>
36 #include <sys/vtoc.h>
37 #include <sys/kmem.h>
38 #include <vm/page.h>
39 #include <sys/cmn_err.h>
40 #include <sys/sysmacros.h>
41 #include <sys/types.h>
42 #include <sys/mkdev.h>
43 #include <sys/stat.h>
44 #include <sys/open.h>
45 #include <sys/lvm/md_trans.h>
46 #include <sys/modctl.h>
47 #include <sys/ddi.h>
48 #include <sys/sunddi.h>
49 #include <sys/debug.h>
50 #include <sys/filio.h>
51 #include <sys/lvm/md_notify.h>
52 #include <sys/callb.h>
53 #include <sys/disp.h>
54
55 #include <sys/sysevent/eventdefs.h>
56 #include <sys/sysevent/svm.h>
57
58 extern int md_status;
59 extern unit_t md_nunits;
60 extern set_t md_nsets;
61 extern md_set_t md_set[];
62 extern md_ops_t trans_md_ops;
63 extern md_krwlock_t md_unit_array_rw;
64 extern uint_t mt_debug;
65
66 extern major_t md_major;
67
68 static mt_unit_t *
trans_getun(minor_t mnum,md_error_t * mde,int flags,IOLOCK * lock)69 trans_getun(minor_t mnum, md_error_t *mde, int flags, IOLOCK *lock)
70 {
71 mt_unit_t *un;
72 mdi_unit_t *ui;
73 set_t setno = MD_MIN2SET(mnum);
74
75 if ((setno >= md_nsets) || (MD_MIN2UNIT(mnum) >= md_nunits)) {
76 (void) mdmderror(mde, MDE_INVAL_UNIT, mnum);
77 return (NULL);
78 }
79
80 if (! (flags & STALE_OK)) {
81 if (md_get_setstatus(setno) & MD_SET_STALE) {
82 (void) mdmddberror(mde, MDE_DB_STALE, mnum, setno);
83 return (NULL);
84 }
85 }
86
87 ui = MDI_UNIT(mnum);
88 if (flags & NO_OLD) {
89 if (ui != NULL) {
90 (void) mdmderror(mde, MDE_UNIT_ALREADY_SETUP, mnum);
91 return (NULL);
92 }
93 return ((mt_unit_t *)1);
94 }
95
96 if (ui == NULL) {
97 (void) mdmderror(mde, MDE_UNIT_NOT_SETUP, mnum);
98 return (NULL);
99 }
100
101 if (flags & ARRAY_WRITER)
102 md_array_writer(lock);
103 else if (flags & ARRAY_READER)
104 md_array_reader(lock);
105
106 if (!(flags & NO_LOCK)) {
107 if (flags & WR_LOCK)
108 (void) md_ioctl_writerlock(lock, ui);
109 else /* RD_LOCK */
110 (void) md_ioctl_readerlock(lock, ui);
111 }
112 un = (mt_unit_t *)MD_UNIT(mnum);
113
114 if (un->c.un_type != MD_METATRANS) {
115 (void) mdmderror(mde, MDE_NOT_MT, mnum);
116 return (NULL);
117 }
118
119 return (un);
120 }
121
122 #ifdef DEBUG
123 /*
124 * DEBUG ROUTINES
125 * THESE ROUTINES ARE ONLY USED WHEN ASSERTS ARE ENABLED
126 */
127
128 extern int (*mdv_strategy_tstpnt)(buf_t *, int, void*);
129
130 /*
131 * return the global stats struct
132 */
133 static int
trans_get_transstats(void * d,int mode)134 trans_get_transstats(void *d, int mode)
135 {
136 md_i_get_t *migp = d;
137
138 mdclrerror(&migp->mde);
139
140 if (migp->size == 0) {
141 migp->size = sizeof (struct transstats);
142 return (0);
143 }
144
145 if (migp->size < sizeof (struct transstats))
146 return (EFAULT);
147
148 if (ddi_copyout(&transstats, (caddr_t)(uintptr_t)migp->mdp,
149 sizeof (struct transstats), mode))
150 return (EFAULT);
151 return (0);
152 }
153
154 /*
155 * test ioctls
156 */
157 /*
158 * TEST TRYGETBLK
159 */
160 /*ARGSUSED1*/
161 static int
trans_test_trygetblk(void * d,int mode,IOLOCK * lock)162 trans_test_trygetblk(void *d, int mode, IOLOCK *lock)
163 {
164 mt_unit_t *un;
165 int test;
166 dev_t dev;
167 struct buf *bp;
168 struct buf *trygetblk();
169
170 md_i_get_t *migp = d;
171
172 mdclrerror(&migp->mde);
173 migp->size = 0;
174
175 un = trans_getun(migp->id, &migp->mde,
176 RD_LOCK, lock);
177 if (un == NULL)
178 return (EINVAL);
179
180 dev = un->un_m_dev;
181
182 /*
183 * test 1 -- don't find nonexistant buf
184 */
185 test = 1;
186 if (bp = trygetblk(dev, 0))
187 goto errout;
188
189 /*
190 * test 2 - don't find stale buf
191 */
192 test = 2;
193 if ((bp = getblk(dev, 0, DEV_BSIZE)) == NULL)
194 goto errout;
195 bp->b_flags |= (B_STALE|B_DONE);
196 brelse(bp);
197 if (bp = trygetblk(dev, 0))
198 goto errout;
199
200 /*
201 * test 3 -- don't find busy buf
202 */
203 test = 3;
204 if ((bp = getblk(dev, 0, DEV_BSIZE)) == NULL)
205 goto errout;
206 if (trygetblk(dev, 0))
207 goto errout;
208 bp->b_flags |= B_STALE;
209 brelse(bp);
210
211 /*
212 * test 4 -- don't find not-done buf
213 */
214 test = 4;
215 if ((bp = getblk(dev, 0, DEV_BSIZE)) == NULL)
216 goto errout;
217 brelse(bp);
218 if (bp = trygetblk(dev, 0))
219 goto errout;
220
221 /*
222 * test 5 -- find an idle buf
223 */
224 test = 5;
225 if ((bp = bread(dev, 0, DEV_BSIZE)) == NULL)
226 goto errout;
227 brelse(bp);
228 if ((bp = trygetblk(dev, 0)) == NULL)
229 goto errout;
230 bp->b_flags |= B_STALE;
231 brelse(bp);
232 bp = 0;
233
234 test = 0; /* no test failed */
235 errout:
236 if (bp) {
237 bp->b_flags |= B_STALE;
238 brelse(bp);
239 }
240 migp->size = test;
241 if (test)
242 return (EINVAL);
243 return (0);
244 }
245 /*
246 * TEST TRYGETPAGE
247 */
248 static page_t *
trans_trypage(struct vnode * vp,uint_t off)249 trans_trypage(struct vnode *vp, uint_t off)
250 {
251 page_t *pp;
252
253 /*
254 * get a locked page
255 */
256 if ((pp = page_lookup_nowait(vp, off, SE_EXCL)) == NULL)
257 return (NULL);
258 /*
259 * get the iolock
260 */
261 if (!page_io_trylock(pp)) {
262 page_unlock(pp);
263 return (NULL);
264 }
265 return (pp);
266 }
267
268 /*ARGSUSED1*/
269 static int
trans_test_trypage(void * d,int mode,IOLOCK * lock)270 trans_test_trypage(void *d, int mode, IOLOCK *lock)
271 {
272 mt_unit_t *un;
273 int test;
274 dev_t dev;
275 struct page *pp;
276 struct vnode *devvp;
277 struct vnode *cvp;
278 extern struct vnode *common_specvp(struct vnode *);
279 extern void pvn_io_done(struct page *);
280
281 md_i_get_t *migp = d;
282
283 mdclrerror(&migp->mde);
284 migp->size = 0;
285
286 un = trans_getun(migp->id, &migp->mde,
287 RD_LOCK, lock);
288 if (un == NULL)
289 return (EINVAL);
290
291 dev = un->un_m_dev;
292 devvp = makespecvp(dev, VBLK);
293 cvp = common_specvp(devvp);
294
295 /*
296 * get rid of the devices pages
297 */
298 (void) VOP_PUTPAGE(cvp, (offset_t)0, (uint_t)0, B_INVAL, CRED(), NULL);
299
300 /*
301 * test 1 -- don't find nonexistant page
302 */
303 test = 1;
304 if (pp = trans_trypage(cvp, 0))
305 goto errout;
306
307 /*
308 * test 2 -- don't find busy page
309 */
310 test = 2;
311 if ((pp = page_create(cvp, 0, 1, PG_WAIT)) == NULL)
312 goto errout;
313 if (trans_trypage(cvp, 0))
314 goto errout;
315 pvn_io_done(pp);
316 pp = 0;
317
318 /*
319 * test 3 - find an idle page
320 */
321 test = 3;
322 if ((pp = page_create(cvp, 0, 1, PG_WAIT)) == NULL)
323 goto errout;
324 pvn_io_done(pp);
325 if ((pp = trans_trypage(cvp, 0)) == NULL)
326 goto errout;
327 pvn_io_done(pp);
328 pp = 0;
329
330 test = 0; /* no test failed */
331 errout:
332 if (pp)
333 pvn_io_done(pp);
334 /*
335 * get rid of the file's pages
336 */
337 (void) VOP_PUTPAGE(cvp, (offset_t)0, (uint_t)0, B_INVAL, CRED(), NULL);
338 VN_RELE(devvp);
339
340 migp->size = test;
341 if (test)
342 return (EINVAL);
343 return (0);
344 }
345 /*
346 * TEST TSD
347 */
348 #define NKEYS (7)
349 #define NTSDTHREADS (3)
350 struct tothread {
351 int test;
352 int error;
353 int exits;
354 int step;
355 kmutex_t lock;
356 kcondvar_t cv;
357 };
358 static uint_t keys[NKEYS];
359 static struct tothread tta[NTSDTHREADS];
360 static int allocatorvalue;
361 static int okdestructoralloc;
362
363 static void
trans_test_stepwait(struct tothread * tp,int step)364 trans_test_stepwait(struct tothread *tp, int step)
365 {
366 /*
367 * wait for other thread
368 */
369 mutex_enter(&tp->lock);
370 while (tp->step < step)
371 cv_wait(&tp->cv, &tp->lock);
372 mutex_exit(&tp->lock);
373 }
374
375 static void
trans_test_step(struct tothread * tp,int step)376 trans_test_step(struct tothread *tp, int step)
377 {
378 /*
379 * wakeup other threads
380 */
381 mutex_enter(&tp->lock);
382 tp->step = step;
383 cv_broadcast(&tp->cv);
384 mutex_exit(&tp->lock);
385 }
386
387 static void
trans_test_destructor(void * voidp)388 trans_test_destructor(void *voidp)
389 {
390 int exits;
391 struct tothread *tp = voidp;
392
393 /*
394 * check that threads clean up *all* TSD at exit
395 */
396 mutex_enter(&tp->lock);
397 exits = ++tp->exits;
398 mutex_exit(&tp->lock);
399 if (exits >= NKEYS)
400 trans_test_step(tp, 3);
401 }
402
403 static void
trans_test_destructor_alloc(void * voidp)404 trans_test_destructor_alloc(void *voidp)
405 {
406 int *value = voidp;
407
408 okdestructoralloc = 0;
409 if (value) {
410 if (*value == allocatorvalue)
411 okdestructoralloc = 1;
412 md_trans_free((caddr_t)value, sizeof (value));
413 }
414 }
415
416 static void *
trans_test_allocator(void)417 trans_test_allocator(void)
418 {
419 int *value;
420
421 value = (int *)md_trans_zalloc(sizeof (value));
422 *value = allocatorvalue;
423 return ((void *)value);
424 }
425
426 /*
427 * thread used to test TSD destroy functionality
428 */
429 static void
trans_test_thread(struct tothread * tp)430 trans_test_thread(struct tothread *tp)
431 {
432 int i;
433 callb_cpr_t cprinfo;
434
435 /*
436 * Register cpr callback
437 */
438 CALLB_CPR_INIT(&cprinfo, &tp->lock, callb_generic_cpr,
439 "trans_test_thread");
440
441 /*
442 * get some TSD
443 */
444 for (i = NKEYS - 1; i >= 0; --i)
445 if (tsd_set(keys[i], tp)) {
446 tp->error = 500;
447 goto errout;
448 }
449 /*
450 * tell parent that we have TSD
451 */
452 trans_test_step(tp, 1);
453
454 /*
455 * wait for parent to destroy some of our TSD
456 */
457 trans_test_stepwait(tp, 2);
458
459 /*
460 * make sure that the appropriate TSD was destroyed
461 */
462 if ((tsd_get(keys[0]) != NULL) ||
463 (tsd_get(keys[NKEYS-1]) != NULL) ||
464 (tsd_get(keys[NKEYS>>1]) != NULL)) {
465 tp->error = 510;
466 goto errout;
467 }
468 for (i = 0; i < NKEYS; ++i)
469 if (tsd_get(keys[i]) != tp)
470 if (i != 0 && i != NKEYS - 1 && i != NKEYS >> 1) {
471 tp->error = 520;
472 goto errout;
473 }
474
475 /*
476 * set up cpr exit
477 */
478 mutex_enter(&tp->lock);
479 CALLB_CPR_EXIT(&cprinfo);
480 thread_exit();
481 errout:
482 /*
483 * error -- make sure the parent will wake up (error code in tp)
484 */
485 trans_test_step(tp, 3);
486
487 /*
488 * set up cpr exit
489 */
490 mutex_enter(&tp->lock);
491 CALLB_CPR_EXIT(&cprinfo);
492 thread_exit();
493 }
494
495 static void
trans_test_threadcreate(struct tothread * tp)496 trans_test_threadcreate(struct tothread *tp)
497 {
498 /*
499 * initialize the per thread struct and make a thread
500 */
501 bzero((caddr_t)tp, sizeof (struct tothread));
502
503 mutex_init(&tp->lock, NULL, MUTEX_DEFAULT, NULL);
504 cv_init(&tp->cv, NULL, CV_DEFAULT, NULL);
505
506 (void) thread_create(NULL, 0, trans_test_thread, tp, 0, &p0,
507 TS_RUN, minclsyspri);
508 }
509 /*
510 * driver for TSD tests -- *NOT REENTRANT*
511 */
512 /*ARGSUSED1*/
513 static int
trans_test_tsd(void * d,int mode)514 trans_test_tsd(void *d, int mode)
515 {
516 int test;
517 uint_t rekeys[NKEYS];
518 int i;
519 uint_t key;
520 int error;
521
522 md_i_get_t *migp = d;
523
524 mdclrerror(&migp->mde);
525 migp->size = 0;
526
527 /*
528 * destroy old keys, if any
529 */
530 for (i = 0; i < NKEYS; ++i)
531 tsd_destroy(&keys[i]);
532 /*
533 * test 1 -- simple create and destroy keys tests
534 */
535 test = 1;
536 error = 0;
537 for (i = 0; i < NKEYS; ++i) {
538 tsd_create(&keys[i], NULL);
539
540 /* get with no set should return NULL */
541 if (tsd_get(keys[i]) != NULL) {
542 error = 100;
543 goto errout;
544 }
545
546 /* destroyed key should be 0 */
547 key = keys[i];
548 tsd_destroy(&keys[i]);
549 if (keys[i]) {
550 error = 110;
551 goto errout;
552 }
553
554 /* destroy the key twice */
555 keys[i] = key;
556 tsd_destroy(&keys[i]);
557
558 /* destroyed key should be 0 */
559 if (keys[i]) {
560 error = 120;
561 goto errout;
562 }
563
564 /* getting a destroyed key should return NULL */
565 if (tsd_get(keys[i]) != NULL) {
566 error = 130;
567 goto errout;
568 }
569 /* recreate the key */
570 tsd_create(&keys[i], NULL);
571
572 /* should be the same key as before */
573 if (key != keys[i]) {
574 error = 140;
575 goto errout;
576 }
577
578 /* initial value should be NULL */
579 if (tsd_get(keys[i]) != NULL) {
580 error = 150;
581 goto errout;
582 }
583
584 /* cleanup */
585 tsd_destroy(&keys[i]);
586 }
587
588 /*
589 * test 2 -- recreate keys
590 */
591 test = 2;
592 error = 0;
593 for (i = 0; i < NKEYS; ++i)
594 tsd_create(&keys[i], NULL);
595 for (i = 0; i < NKEYS; ++i) {
596 /* make sure the keys were created */
597 if (keys[i] == 0) {
598 error = 200;
599 goto errout;
600 }
601
602 /* make sure that recreating key doesn't change it */
603 rekeys[i] = keys[i];
604 tsd_create(&rekeys[i], NULL);
605 if (rekeys[i] != keys[i]) {
606 error = 210;
607 goto errout;
608 }
609 }
610 for (i = 0; i < NKEYS; ++i)
611 tsd_destroy(&keys[i]);
612
613 /*
614 * test 3 -- check processing for unset and destroyed keys
615 */
616 test = 3;
617 error = 0;
618
619 /* getting a 0 key returns NULL */
620 if (tsd_get(0) != NULL) {
621 error = 300;
622 goto errout;
623 }
624
625 /* setting a 0 key returns error */
626 if (tsd_set(0, NULL) != EINVAL) {
627 error = 310;
628 goto errout;
629 }
630 tsd_create(&key, NULL);
631
632 /* setting a created key returns no error */
633 if (tsd_set(key, NULL) == EINVAL) {
634 error = 320;
635 goto errout;
636 }
637 tsd_destroy(&key);
638
639 /* setting a destroyed key returns error */
640 if (tsd_set(key, NULL) != EINVAL) {
641 error = 330;
642 goto errout;
643 }
644
645 /*
646 * test 4 -- make sure that set and get work
647 */
648 test = 4;
649 error = 0;
650
651 for (i = 0; i < NKEYS; ++i) {
652 tsd_create(&keys[i], NULL);
653
654 /* set a value */
655 (void) tsd_set(keys[i], &key);
656
657 /* get the value */
658 if (tsd_get(keys[i]) != &key) {
659 error = 400;
660 goto errout;
661 }
662
663 /* set the value to NULL */
664 (void) tsd_set(keys[i], NULL);
665
666 /* get the NULL */
667 if (tsd_get(keys[i]) != NULL) {
668 error = 410;
669 goto errout;
670 }
671 }
672 /* cleanup */
673 for (i = 0; i < NKEYS; ++i)
674 tsd_destroy(&keys[i]);
675
676 /*
677 * test 5 -- destroying keys w/multiple threads
678 */
679 test = 5;
680 error = 0;
681
682 /* create the keys */
683 for (i = 0; i < NKEYS; ++i)
684 tsd_create(&keys[i], trans_test_destructor);
685
686 /* create some threads */
687 for (i = 0; i < NTSDTHREADS; ++i)
688 trans_test_threadcreate(&tta[i]);
689
690 /* wait for the threads to assign TSD */
691 for (i = 0; i < NTSDTHREADS; ++i)
692 trans_test_stepwait(&tta[i], 1);
693
694 /* destroy some of the keys */
695 tsd_destroy(&keys[0]);
696 tsd_destroy(&keys[NKEYS - 1]);
697 tsd_destroy(&keys[NKEYS >> 1]);
698 tsd_destroy(&keys[NKEYS >> 1]);
699
700 /* wakeup the threads -- they check that the destroy took */
701 for (i = 0; i < NTSDTHREADS; ++i)
702 trans_test_step(&tta[i], 2);
703
704 /* wait for the threads to exit (also checks for TSD cleanup) */
705 for (i = 0; i < NTSDTHREADS; ++i)
706 trans_test_stepwait(&tta[i], 3);
707
708 /* destroy the rest of the keys */
709 for (i = 0; i < NKEYS; ++i)
710 tsd_destroy(&keys[i]);
711
712 /* check for error */
713 for (i = 0; i < NTSDTHREADS; ++i) {
714 if (!error)
715 error = tta[i].error;
716 mutex_destroy(&tta[i].lock);
717 cv_destroy(&tta[i].cv);
718 }
719
720 /*
721 * test 6 -- test getcreate
722 */
723 test = 6;
724 error = 0;
725
726 /* make sure the keys are destroyed */
727 for (i = 0; i < NKEYS; ++i)
728 tsd_destroy(&keys[i]);
729
730 /* get w/create */
731 for (i = 0; i < NKEYS; ++i) {
732 allocatorvalue = i;
733 if (*(int *)tsd_getcreate(&keys[i], trans_test_destructor_alloc,
734 trans_test_allocator) != allocatorvalue) {
735 error = 600;
736 goto errout;
737 }
738 }
739 for (i = 0; i < NKEYS; ++i) {
740 allocatorvalue = i;
741 if (*(int *)tsd_get(keys[i]) != allocatorvalue) {
742 error = 610;
743 goto errout;
744 }
745 }
746 /* make sure destructor gets called when we destroy the keys */
747 for (i = 0; i < NKEYS; ++i) {
748 allocatorvalue = i;
749 okdestructoralloc = 0;
750 tsd_destroy(&keys[i]);
751 if (okdestructoralloc == 0) {
752 error = 620;
753 goto errout;
754 }
755 }
756
757 errout:
758 /* make sure the keys are destroyed */
759 for (i = 0; i < NKEYS; ++i)
760 tsd_destroy(&keys[i]);
761
762 /* return test # and error code (if any) */
763 migp->size = test;
764 return (error);
765 }
766
767 /*
768 * Error Injection Structures, Data, and Functions:
769 *
770 * Error injection is used to test the Harpy error recovery system. The
771 * MD_IOC_INJECTERRORS ioctl is used to start or continue error injection on a
772 * unit, and MD_IOC_STOPERRORS turns it off. An mt_error structure is
773 * associated with every trans device for which we are injecting errors. When
774 * MD_IOC_INJECTERRORS is issued, mdv_strategy_tstpnt is set to point to
775 * trans_error_injector(), so that it gets called for every MDD I/O operation.
776 *
777 * The trans unit can be in one of three states:
778 *
779 * count down - Each I/O causes er_count_down to be decremented.
780 * When er_count_down reaches 0, an error is injected,
781 * the block number is remembered. Without makeing
782 * special provisions, the log area would receive a
783 * small percentage of the injected errors. Thus,
784 * trans_check_error() will be written, so that every
785 * other error is injected on the log.
786 *
787 * suspend - No errors are generated and the counters are not
788 * modified. This is so that fsck/mkfs can do their thing
789 * (we're not testing them) and so that the test script can
790 * set up another test. The transition back to the count
791 * down state occurs when MD_IOC_INJECTERRORS is invoked
792 * again.
793 */
794
795 typedef enum {
796 mte_count_down,
797 mte_suspend,
798 mte_watch_block
799 } mte_state;
800
801 typedef struct mt_error {
802 struct mt_error *er_next; /* next error unit in list. */
803 mte_state er_state;
804 mt_unit_t *er_unitp; /* unit to force errors on. */
805 size_t er_count_down; /* i/o transactions until error. */
806 size_t er_increment; /* increment for reset_count. */
807 size_t er_reset_count; /* used to reset er_count_down */
808 size_t er_total_errors; /* count generated errors. */
809 /* Following fields describe error we are injecting. */
810 dev_t er_bad_unit; /* Unit associated with block in */
811 /* error. */
812 off_t er_bad_block; /* Block in error. */
813 } mt_error_t;
814
815 #define ERROR_INCREMENT (1)
816 #define INITIAL_COUNT (1)
817
818 static int default_increment = ERROR_INCREMENT;
819 static kmutex_t error_mutex; /* protects error_list */
820 static mt_error_t error_list_head;
821 static int initial_count = INITIAL_COUNT;
822 static int (*tstpnt_save)(buf_t *, int, void*) = NULL;
823
824 static mt_error_t *
find_by_mtunit(mt_unit_t * un,mt_error_t ** pred_errp)825 find_by_mtunit(mt_unit_t *un, mt_error_t **pred_errp)
826 {
827 mt_error_t *errp = (mt_error_t *)NULL;
828
829 ASSERT(mutex_owned(&error_mutex) != 0);
830 *pred_errp = &error_list_head;
831 while ((errp = (*pred_errp)->er_next) != (mt_error_t *)NULL) {
832 if (errp->er_unitp == un)
833 break;
834 *pred_errp = errp;
835 }
836 return (errp);
837 }
838
839 static mt_error_t *
find_by_dev(md_dev64_t dev)840 find_by_dev(md_dev64_t dev)
841 {
842 mt_error_t *errp = &error_list_head;
843
844 ASSERT(mutex_owned(&error_mutex) != 0);
845 while ((errp = errp->er_next) != (mt_error_t *)NULL) {
846 if ((errp->er_unitp->un_m_dev == dev) ||
847 (errp->er_unitp->un_l_dev == dev))
848 break;
849 }
850 return (errp);
851 }
852
853 static int
trans_check_error(buf_t * bp,mt_error_t * errp)854 trans_check_error(buf_t *bp, mt_error_t *errp)
855 {
856 int rv = 0;
857 md_dev64_t target = md_expldev(bp->b_edev);
858
859 ASSERT(mutex_owned(&error_mutex) != 0);
860 switch (errp->er_state) {
861 case mte_count_down:
862 errp->er_count_down--;
863 if (errp->er_count_down == 0) {
864 /*
865 * Every other error that we inject should be on
866 * the log device. Errors will be injected on the
867 * log device when errp->er_total_errors is even
868 * and on the master device when it is odd. If
869 * this I/O is not for the appropriate device, we
870 * will set errp->er_count_down to 1, so that we
871 * can try again later.
872 */
873 if ((((errp->er_total_errors % 2) == 0) &&
874 (errp->er_unitp->un_l_dev == target)) ||
875 (((errp->er_total_errors % 2) != 0) &&
876 (errp->er_unitp->un_m_dev == target))) {
877 /* simulate an error */
878 bp->b_flags |= B_ERROR;
879 bp->b_error = EIO;
880 /* remember the error. */
881 errp->er_total_errors++;
882 errp->er_bad_unit = bp->b_edev;
883 errp->er_bad_block = bp->b_blkno;
884 /* reset counters. */
885 errp->er_count_down = errp->er_reset_count;
886 errp->er_reset_count += errp->er_increment;
887 rv = 1;
888 } else {
889 /* Try again next time. */
890 errp->er_count_down = 1;
891 }
892 }
893 break;
894
895 case mte_suspend:
896 /* No errors while suspended. */
897 break;
898
899 case mte_watch_block:
900 if ((bp->b_edev == errp->er_bad_unit) &&
901 (bp->b_blkno == errp->er_bad_block)) {
902 bp->b_flags |= B_ERROR;
903 bp->b_error = EIO;
904 rv = 1;
905 }
906 break;
907 }
908 return (rv);
909 }
910
911 static int
trans_error_injector(buf_t * bp,int flag,void * private)912 trans_error_injector(buf_t *bp, int flag, void* private)
913 {
914 mt_error_t *errp = (mt_error_t *)NULL;
915 int (*tstpnt)(buf_t *, int, void*) = NULL;
916 int rv = 0;
917 md_dev64_t target = md_expldev(bp->b_edev);
918 int trv = 0;
919 mt_unit_t *un;
920
921 mutex_enter(&error_mutex);
922 errp = find_by_dev(target);
923 if (errp != (mt_error_t *)NULL) {
924 un = errp->er_unitp;
925 if (target == un->un_m_dev) {
926 /* Target is our master device. */
927 rv = trans_check_error(bp, errp);
928 }
929 if (target == un->un_l_dev) {
930 /*
931 * Target is our log device. Unfortunately, the same
932 * device may also be used for the MDD database.
933 * Therefore, we need to make sure that the I/O is for
934 * the range of blocks designated as our log.
935 */
936 if ((bp->b_blkno >= un->un_l_pwsblk) &&
937 ((bp->b_blkno + btodb(bp->b_bcount)) <=
938 (un->un_l_sblk + un->un_l_tblks))) {
939 rv = trans_check_error(bp, errp);
940 }
941 }
942 }
943 tstpnt = tstpnt_save;
944 mutex_exit(&error_mutex);
945
946 if (tstpnt != NULL)
947 trv = (*tstpnt)(bp, flag, private);
948
949 /*
950 * If we are producing an error (rv != 0) we need to make sure that
951 * biodone gets called. If the tstpnt returned non-zero,
952 * we'll assume that it called biodone.
953 */
954 if ((rv != 0) && (trv == 0)) {
955 md_biodone(bp);
956 }
957 rv = ((rv == 0) && (trv == 0)) ? 0 : 1;
958 return (rv);
959 }
960
961 /*
962 * Prepare to inject errors on the master and log devices associated with the
963 * unit specified in migp. The first time that trans_inject_errors() is called
964 * for a unit, an mt_error_t structure is allocated and initialized for the
965 * unit. Subsequent calls for the unit will just insure that the unit is in the
966 * count down state.
967 *
968 * If an mt_error structure is allocated and it is the first one to be put in
969 * the list, mdv_strategy_tstpnt (which is referenced in md_call_strategy()) is
970 * set to trans_error_injector so that it will be called to see if an I/O
971 * request should be treated as an error.
972 */
973
974 /*ARGSUSED1*/
975 static int
trans_inject_errors(void * d,int mode,IOLOCK * lock)976 trans_inject_errors(void *d, int mode, IOLOCK *lock)
977 {
978 mt_error_t *errp;
979 mt_error_t *do_not_care;
980 mt_unit_t *un;
981 int rv = 0;
982
983 md_i_get_t *migp = d;
984
985 mdclrerror(&migp->mde);
986
987 un = trans_getun(migp->id, &migp->mde,
988 RD_LOCK, lock);
989 if (un == NULL)
990 return (EINVAL);
991
992 /*
993 * If there is already a an error structure for the unit make sure that
994 * it is in count down mode.
995 */
996
997 mutex_enter(&error_mutex);
998 errp = find_by_mtunit(un, &do_not_care);
999 if (errp != (mt_error_t *)NULL) {
1000 errp->er_state = mte_count_down;
1001 } else {
1002
1003 /*
1004 * Initialize error structure.
1005 */
1006
1007 errp = (mt_error_t *)md_trans_zalloc(sizeof (mt_error_t));
1008 errp->er_state = mte_count_down;
1009 errp->er_unitp = un;
1010 errp->er_count_down = initial_count;
1011 errp->er_increment = default_increment;
1012 errp->er_reset_count = initial_count;
1013 errp->er_total_errors = 0;
1014 errp->er_bad_unit = 0;
1015 errp->er_bad_block = 0;
1016
1017 /* Insert it into the list. */
1018
1019 errp->er_next = error_list_head.er_next;
1020 error_list_head.er_next = errp;
1021
1022 /*
1023 * Set up md_call_strategy to call our error injector.
1024 */
1025
1026 if (mdv_strategy_tstpnt != trans_error_injector) {
1027 tstpnt_save = mdv_strategy_tstpnt;
1028 mdv_strategy_tstpnt = trans_error_injector;
1029 }
1030 }
1031 mutex_exit(&error_mutex);
1032 return (rv);
1033 }
1034
1035 /*ARGSUSED1*/
1036 static int
trans_stop_errors(void * d,int mode,IOLOCK * lock)1037 trans_stop_errors(void *d, int mode, IOLOCK *lock)
1038 {
1039 mt_error_t *errp = (mt_error_t *)NULL;
1040 mt_error_t *pred_errp;
1041 mt_unit_t *un;
1042 int rv = 0;
1043
1044 md_i_get_t *migp = d;
1045
1046 mdclrerror(&migp->mde);
1047
1048 un = trans_getun(migp->id, &migp->mde,
1049 RD_LOCK, lock);
1050 if (un == NULL)
1051 return (EINVAL);
1052
1053 mutex_enter(&error_mutex);
1054 errp = find_by_mtunit(un, &pred_errp);
1055 if (errp != (mt_error_t *)NULL) {
1056 /* Remove from list. */
1057 pred_errp->er_next = errp->er_next;
1058 if ((error_list_head.er_next == (mt_error_t *)NULL) &&
1059 (mdv_strategy_tstpnt == trans_error_injector)) {
1060 mdv_strategy_tstpnt = tstpnt_save;
1061 }
1062 } else {
1063 /* unit not set up for errors. */
1064 rv = ENXIO;
1065 }
1066 mutex_exit(&error_mutex);
1067
1068 /* Free memory. */
1069
1070 if (errp != (mt_error_t *)NULL) {
1071 md_trans_free((void *)errp, sizeof (*errp));
1072 }
1073 return (rv);
1074 }
1075
1076 int
_init_ioctl()1077 _init_ioctl()
1078 {
1079 mutex_init(&error_mutex, NULL, MUTEX_DRIVER, (void *)NULL);
1080 return (1);
1081 }
1082
1083 int
_fini_ioctl()1084 _fini_ioctl()
1085 {
1086 mutex_destroy(&error_mutex);
1087 return (1);
1088 }
1089
1090 /*
1091 * END OF DEBUG ROUTINES
1092 */
1093 #endif /* DEBUG */
1094 /*
1095 * BEGIN RELEASE DEBUG
1096 * The following routines remain in the released product for testability
1097 */
1098
1099 /*
1100 * ufs error injection remains in the released product
1101 */
1102 /*ARGSUSED1*/
1103 static int
trans_ufserror(void * d,int mode,IOLOCK * lock)1104 trans_ufserror(void *d, int mode, IOLOCK *lock)
1105 {
1106 mt_unit_t *un;
1107
1108 md_i_get_t *migp = d;
1109
1110 mdclrerror(&migp->mde);
1111
1112 un = trans_getun(migp->id, &migp->mde,
1113 RD_LOCK, lock);
1114 if (un == NULL || un->un_ut == NULL)
1115 return (EINVAL);
1116
1117 return (0);
1118 }
1119 /*
1120 * shadow test remains in the released product
1121 */
1122 static int
trans_set_shadow(void * d,int mode,IOLOCK * lock)1123 trans_set_shadow(void *d, int mode, IOLOCK *lock)
1124 {
1125 dev32_t device; /* shadow device */
1126 mt_unit_t *un;
1127
1128 md_i_get_t *migp = d;
1129
1130 mdclrerror(&migp->mde);
1131
1132 un = trans_getun(migp->id, &migp->mde,
1133 WR_LOCK, lock);
1134 if (un == NULL)
1135 return (EINVAL);
1136
1137 if ((un->un_debug & MT_SHADOW) == 0)
1138 return (EINVAL);
1139
1140 /* Get shadow device. User always passes down 32 bit devt */
1141
1142 if (ddi_copyin((caddr_t)(uintptr_t)migp->mdp,
1143 &device, sizeof (device), mode)) {
1144 return (EFAULT);
1145 }
1146
1147 /* Save shadow device designator. */
1148 un->un_s_dev = md_expldev((md_dev64_t)device);
1149 return (0);
1150 }
1151
1152 /*
1153 * END RELEASE DEBUG
1154 */
1155
1156 static int
trans_get(void * d,int mode,IOLOCK * lock)1157 trans_get(void *d, int mode, IOLOCK *lock)
1158 {
1159 mt_unit_t *un;
1160 ml_unit_t *ul;
1161
1162 md_i_get_t *migp = d;
1163
1164 mdclrerror(&migp->mde);
1165
1166 un = trans_getun(migp->id, &migp->mde,
1167 RD_LOCK, lock);
1168 if (un == NULL)
1169 return (0);
1170
1171 if (migp->size == 0) {
1172 migp->size = un->c.un_size;
1173 return (0);
1174 }
1175
1176 if (migp->size < un->c.un_size)
1177 return (EFAULT);
1178
1179 log:
1180 ul = un->un_l_unit;
1181 if (ul == NULL)
1182 goto master;
1183
1184 /*
1185 * refresh log fields in case log was metattach'ed
1186 */
1187 un->un_l_head = (daddr32_t)btodb(ul->un_head_lof);
1188 un->un_l_sblk = un->un_l_head;
1189 un->un_l_pwsblk = ul->un_pwsblk;
1190 un->un_l_maxtransfer = (uint_t)btodb(ul->un_maxtransfer);
1191 un->un_l_nblks = ul->un_nblks;
1192 un->un_l_tblks = ul->un_tblks;
1193 un->un_l_tail = (daddr32_t)btodb(ul->un_tail_lof);
1194 un->un_l_resv = ul->un_resv;
1195 un->un_l_maxresv = ul->un_maxresv;
1196 un->un_l_error = ul->un_error;
1197 un->un_l_timestamp = ul->un_timestamp;
1198
1199 /*
1200 * check for log dev dynconcat; can only pick up extra space when the
1201 * tail physically follows the head in the circular log
1202 */
1203 if (un->un_l_head <= un->un_l_tail)
1204 if (ul->un_status & LDL_METADEVICE) {
1205 struct mdc_unit *c = MD_UNIT(md_getminor(ul->un_dev));
1206
1207 if (c->un_total_blocks > un->un_l_tblks) {
1208 un->un_l_tblks = c->un_total_blocks;
1209 un->un_l_nblks = un->un_l_tblks - un->un_l_sblk;
1210 if (un->un_l_nblks > btodb(LDL_MAXLOGSIZE))
1211 un->un_l_nblks = btodb(LDL_MAXLOGSIZE);
1212 un->un_l_maxresv = (uint_t)(un->un_l_nblks *
1213 LDL_USABLE_BSIZE);
1214 }
1215 }
1216
1217 master:
1218
1219 if (ddi_copyout(un, (void *)(uintptr_t)migp->mdp, un->c.un_size, mode))
1220 return (EFAULT);
1221 return (0);
1222 }
1223
1224 static int
trans_replace(replace_params_t * params)1225 trans_replace(replace_params_t *params)
1226 {
1227 minor_t mnum = params->mnum;
1228 mt_unit_t *un;
1229 mdi_unit_t *ui;
1230 md_dev64_t cmp_dev;
1231 md_dev64_t ldev;
1232 md_dev64_t mdev;
1233
1234 mdclrerror(¶ms->mde);
1235
1236 ui = MDI_UNIT(mnum);
1237 un = md_unit_writerlock(ui);
1238
1239 if (MD_STATUS(un) & MD_UN_RESYNC_ACTIVE) {
1240 return (mdmderror(¶ms->mde, MDE_RESYNC_ACTIVE, mnum));
1241 }
1242
1243 cmp_dev = params->old_dev;
1244 mdev = un->un_m_dev;
1245 ldev = un->un_l_dev;
1246 if (cmp_dev == mdev) {
1247 un->un_m_key = params->new_key;
1248 un->un_m_dev = params->new_dev;
1249 } else if (cmp_dev == ldev) {
1250 un->un_l_key = params->new_key;
1251 un->un_l_dev = params->new_dev;
1252 }
1253
1254 trans_commit(un, 1);
1255 md_unit_writerexit(ui);
1256 return (0);
1257 }
1258
1259 /*ARGSUSED1*/
1260 static int
trans_grow(void * d,int mode,IOLOCK * lock)1261 trans_grow(void *d, int mode, IOLOCK *lock)
1262 {
1263 mt_unit_t *un;
1264
1265 md_grow_params_t *mgp = d;
1266
1267 mdclrerror(&mgp->mde);
1268
1269 un = trans_getun(mgp->mnum, &mgp->mde,
1270 RD_LOCK, lock);
1271 if (un == NULL)
1272 return (0);
1273
1274 /*
1275 * check for master dev dynconcat
1276 */
1277 if (md_getmajor(un->un_m_dev) == md_major) {
1278 struct mdc_unit *c;
1279
1280 c = MD_UNIT(md_getminor(un->un_m_dev));
1281 if (c->un_total_blocks > MD_MAX_BLKS_FOR_SMALL_DEVS) {
1282 un->c.un_total_blocks = MD_MAX_BLKS_FOR_SMALL_DEVS;
1283 } else {
1284 un->c.un_total_blocks = c->un_total_blocks;
1285 }
1286 md_nblocks_set(MD_SID(un), un->c.un_total_blocks);
1287 }
1288
1289 return (0);
1290 }
1291
1292 /*ARGSUSED1*/
1293 static int
trans_detach_ioctl(void * d,int mode,IOLOCK * lock)1294 trans_detach_ioctl(void *d, int mode, IOLOCK *lock)
1295 {
1296 mt_unit_t *un;
1297 int error;
1298
1299 md_i_get_t *migp = d;
1300
1301 mdclrerror(&migp->mde);
1302
1303 /* acquire both md_unit_array_rw, and unit_reader lock */
1304 un = trans_getun(migp->id, &migp->mde,
1305 READERS, lock);
1306 if (un == NULL)
1307 return (0);
1308
1309 /*
1310 * simply too much work to make debug modes w/out a log
1311 */
1312 if (un->un_debug)
1313 return (EACCES);
1314
1315 /*
1316 * detach the log
1317 */
1318 error = trans_detach(un, migp->size);
1319
1320 return (error);
1321 }
1322
1323 static int
trans_get_log(void * d,int mode,IOLOCK * lock)1324 trans_get_log(void *d, int mode, IOLOCK *lock)
1325 {
1326 mt_unit_t *un;
1327 ml_unit_t *ul;
1328
1329 md_i_get_t *migp = d;
1330
1331 mdclrerror(&migp->mde);
1332
1333 un = trans_getun(migp->id, &migp->mde, RD_LOCK, lock);
1334
1335 if (un == NULL)
1336 return (0);
1337
1338 ul = un->un_l_unit;
1339
1340 if (migp->size == 0) {
1341 migp->size = ML_UNIT_ONDSZ;
1342 return (0);
1343 }
1344
1345 if (migp->size < ML_UNIT_ONDSZ)
1346 return (EFAULT);
1347
1348 if (ddi_copyout(ul, (void *)(uintptr_t)migp->mdp, ML_UNIT_ONDSZ,
1349 mode))
1350 return (EFAULT);
1351 return (0);
1352 }
1353
1354 static int
trans_getdevs(void * d,int mode,IOLOCK * lock)1355 trans_getdevs(void *d, int mode, IOLOCK *lock)
1356 {
1357 int ndev;
1358 mt_unit_t *un;
1359 md_dev64_t *udevs;
1360 md_dev64_t unit_dev;
1361
1362 md_getdevs_params_t *mgdp = d;
1363
1364 mdclrerror(&mgdp->mde);
1365
1366 un = trans_getun(mgdp->mnum, &mgdp->mde, RD_LOCK, lock);
1367 if (un == NULL)
1368 return (0);
1369
1370 ndev = (un->un_flags & (TRANS_DETACHED | TRANS_ATTACHING)) ? 1 : 2;
1371
1372 if (mgdp->cnt == 0) {
1373 mgdp->cnt = ndev;
1374 return (0);
1375 }
1376
1377 if (mgdp->cnt > 2)
1378 mgdp->cnt = ndev;
1379
1380 udevs = (md_dev64_t *)(uintptr_t)mgdp->devs;
1381 unit_dev = un->un_m_dev;
1382
1383 if (md_getmajor(unit_dev) != md_major) {
1384 if ((unit_dev = md_xlate_mini_2_targ(unit_dev)) == NODEV64)
1385 return (ENODEV);
1386 }
1387
1388 if (mgdp->cnt >= 1)
1389 if (ddi_copyout(&unit_dev, (caddr_t)&udevs[0],
1390 sizeof (*udevs), mode) != 0)
1391 return (EFAULT);
1392
1393 unit_dev = un->un_l_dev;
1394 if (md_getmajor(unit_dev) != md_major) {
1395 if ((unit_dev = md_xlate_mini_2_targ(unit_dev)) == NODEV64)
1396 return (ENODEV);
1397 }
1398
1399 if (mgdp->cnt >= 2)
1400 if (ddi_copyout(&unit_dev, (caddr_t)&udevs[1],
1401 sizeof (*udevs), mode) != 0)
1402 return (EFAULT);
1403
1404 return (0);
1405 }
1406
1407 static int
trans_reset_ioctl(md_i_reset_t * mirp,IOLOCK * lock)1408 trans_reset_ioctl(md_i_reset_t *mirp, IOLOCK *lock)
1409 {
1410 minor_t mnum = mirp->mnum;
1411 mt_unit_t *un;
1412 int error;
1413
1414 mdclrerror(&mirp->mde);
1415
1416 un = trans_getun(mnum, &mirp->mde, NO_LOCK, lock);
1417 if (un == NULL)
1418 return (0);
1419
1420
1421 /* This prevents new opens */
1422 rw_enter(&md_unit_array_rw.lock, RW_WRITER);
1423
1424 if (MD_HAS_PARENT(MD_PARENT(un))) {
1425 rw_exit(&md_unit_array_rw.lock);
1426 return (mdmderror(&mirp->mde, MDE_IN_USE, mnum));
1427 }
1428
1429 if (md_unit_isopen(MDI_UNIT(mnum))) {
1430 rw_exit(&md_unit_array_rw.lock);
1431 return (mdmderror(&mirp->mde, MDE_IS_OPEN, mnum));
1432 }
1433 /*
1434 * detach the log
1435 */
1436 error = trans_detach(un, mirp->force);
1437
1438 /*
1439 * reset (aka remove; aka delete) the trans device
1440 */
1441 if (error == 0)
1442 error = trans_reset(un, mnum, 1, mirp->force);
1443
1444 rw_exit(&md_unit_array_rw.lock);
1445 return (error);
1446 }
1447
1448 static int
trans_get_geom(mt_unit_t * un,struct dk_geom * geomp)1449 trans_get_geom(mt_unit_t *un, struct dk_geom *geomp)
1450 {
1451 md_get_geom((md_unit_t *)un, geomp);
1452
1453 return (0);
1454 }
1455
1456 static int
trans_get_vtoc(mt_unit_t * un,struct vtoc * vtocp)1457 trans_get_vtoc(mt_unit_t *un, struct vtoc *vtocp)
1458 {
1459 md_get_vtoc((md_unit_t *)un, vtocp);
1460
1461 return (0);
1462 }
1463
1464 static int
trans_get_extvtoc(mt_unit_t * un,struct extvtoc * vtocp)1465 trans_get_extvtoc(mt_unit_t *un, struct extvtoc *vtocp)
1466 {
1467 md_get_extvtoc((md_unit_t *)un, vtocp);
1468
1469 return (0);
1470 }
1471
1472 static int
trans_islog(mt_unit_t * un)1473 trans_islog(mt_unit_t *un)
1474 {
1475 if (un->un_l_unit == NULL)
1476 return (ENXIO);
1477 return (0);
1478 }
1479
1480 static int
trans_set_vtoc(mt_unit_t * un,struct vtoc * vtocp)1481 trans_set_vtoc(
1482 mt_unit_t *un,
1483 struct vtoc *vtocp
1484 )
1485 {
1486 return (md_set_vtoc((md_unit_t *)un, vtocp));
1487 }
1488
1489 static int
trans_set_extvtoc(mt_unit_t * un,struct extvtoc * vtocp)1490 trans_set_extvtoc(mt_unit_t *un, struct extvtoc *vtocp)
1491 {
1492 return (md_set_extvtoc((md_unit_t *)un, vtocp));
1493 }
1494
1495 static int
trans_get_cgapart(mt_unit_t * un,struct dk_map * dkmapp)1496 trans_get_cgapart(
1497 mt_unit_t *un,
1498 struct dk_map *dkmapp
1499 )
1500 {
1501 md_get_cgapart((md_unit_t *)un, dkmapp);
1502 return (0);
1503 }
1504
1505 static int
trans_admin_ioctl(int cmd,void * data,int mode,IOLOCK * lockp)1506 trans_admin_ioctl(int cmd, void *data, int mode, IOLOCK *lockp)
1507 {
1508 size_t sz = 0;
1509 void *d = NULL;
1510 int err = 0;
1511
1512 /* We can only handle 32-bit clients for internal commands */
1513 if ((mode & DATAMODEL_MASK) != DATAMODEL_ILP32) {
1514 return (EINVAL);
1515 }
1516
1517 switch (cmd) {
1518
1519 case MD_IOCGET:
1520 {
1521 if (! (mode & FREAD))
1522 return (EACCES);
1523
1524 sz = sizeof (md_i_get_t);
1525
1526 if ((d = md_trans_zalloc(sz)) == NULL)
1527 return (ENOMEM);
1528
1529 if (ddi_copyin(data, d, sz, mode)) {
1530 err = EFAULT;
1531 break;
1532 }
1533
1534 err = trans_get(d, mode, lockp);
1535 break;
1536 }
1537
1538 case MD_IOCGET_LOG:
1539 {
1540 if (! (mode & FREAD))
1541 return (EACCES);
1542
1543 sz = sizeof (md_i_get_t);
1544
1545 if ((d = md_trans_zalloc(sz)) == NULL)
1546 return (ENOMEM);
1547
1548 if (ddi_copyin(data, d, sz, mode)) {
1549 err = EFAULT;
1550 break;
1551 }
1552
1553 err = trans_get_log(d, mode, lockp);
1554 break;
1555 }
1556
1557 case MD_IOCRESET:
1558 {
1559 md_i_reset_t *p;
1560
1561 if (! (mode & FWRITE))
1562 return (EACCES);
1563
1564 if ((d = p = md_trans_zalloc((sz = sizeof (*p)))) == NULL)
1565 return (ENOMEM);
1566
1567 if (ddi_copyin(data, d, sz, mode)) {
1568 err = EFAULT;
1569 break;
1570 }
1571
1572 err = trans_reset_ioctl(p, lockp);
1573 break;
1574 }
1575
1576 case MD_IOCGROW:
1577 {
1578 if (! (mode & FWRITE))
1579 return (EACCES);
1580
1581 sz = sizeof (md_grow_params_t);
1582
1583 if ((d = md_trans_zalloc(sz)) == NULL)
1584 return (ENOMEM);
1585
1586 if (ddi_copyin(data, d, sz, mode)) {
1587 err = EFAULT;
1588 break;
1589 }
1590
1591 err = trans_grow(d, mode, lockp);
1592 break;
1593 }
1594
1595 case MD_IOC_TRANS_DETACH:
1596 {
1597 if (! (mode & FWRITE))
1598 return (EACCES);
1599
1600 sz = sizeof (md_i_get_t);
1601
1602 if ((d = md_trans_zalloc(sz)) == NULL)
1603 return (ENOMEM);
1604
1605 if (ddi_copyin(data, d, sz, mode)) {
1606 err = EFAULT;
1607 break;
1608 }
1609
1610 err = trans_detach_ioctl(d, mode, lockp);
1611 break;
1612 }
1613
1614 case MD_IOCREPLACE:
1615 {
1616 replace_params_t *p;
1617
1618 if (! (mode & FWRITE))
1619 return (EACCES);
1620
1621 if ((d = p = kmem_alloc((sz = sizeof (*p)), KM_SLEEP)) == NULL)
1622 return (ENOMEM);
1623
1624 if (ddi_copyin(data, d, sz, mode)) {
1625 err = EFAULT;
1626 break;
1627 }
1628
1629 err = trans_replace(p);
1630 break;
1631 }
1632
1633
1634 case MD_IOCGET_DEVS:
1635 {
1636 if (! (mode & FREAD))
1637 return (EACCES);
1638
1639 sz = sizeof (md_getdevs_params_t);
1640
1641 if ((d = md_trans_zalloc(sz)) == NULL)
1642 return (ENOMEM);
1643
1644 if (ddi_copyin(data, d, sz, mode)) {
1645 err = EFAULT;
1646 break;
1647 }
1648
1649 err = trans_getdevs(d, mode, lockp);
1650 break;
1651 }
1652
1653 /*
1654 * debug ioctls
1655 */
1656 #ifdef DEBUG
1657
1658
1659 case MD_IOCGET_TRANSSTATS:
1660 {
1661 if (! (mode & FREAD))
1662 return (EACCES);
1663
1664 sz = sizeof (md_i_get_t);
1665
1666 if ((d = md_trans_zalloc(sz)) == NULL)
1667 return (ENOMEM);
1668
1669 if (ddi_copyin(data, d, sz, mode)) {
1670 err = EFAULT;
1671 break;
1672 }
1673
1674 err = trans_get_transstats(d, mode);
1675 break;
1676 }
1677
1678 case MD_IOC_DEBUG:
1679 {
1680 md_i_get_t *mdigp;
1681
1682 if (! (mode & FWRITE))
1683 return (EACCES);
1684
1685 sz = sizeof (md_i_get_t);
1686
1687 if ((d = md_trans_zalloc(sz)) == NULL)
1688 return (ENOMEM);
1689
1690 if (ddi_copyin(data, d, sz, mode)) {
1691 err = EFAULT;
1692 break;
1693 }
1694
1695 mdigp = d;
1696
1697 mdclrerror(&mdigp->mde);
1698 mt_debug = mdigp->size;
1699 break;
1700 }
1701
1702 case MD_IOC_TSD:
1703 {
1704 if (! (mode & FWRITE))
1705 return (EACCES);
1706
1707
1708 sz = sizeof (md_i_get_t);
1709
1710 if ((d = md_trans_zalloc(sz)) == NULL)
1711 return (ENOMEM);
1712
1713 if (ddi_copyin(data, d, sz, mode)) {
1714 err = EFAULT;
1715 break;
1716 }
1717
1718 err = trans_test_tsd(d, mode);
1719 break;
1720 }
1721
1722 case MD_IOC_TRYGETBLK:
1723 {
1724 if (! (mode & FWRITE))
1725 return (EACCES);
1726
1727
1728 sz = sizeof (md_i_get_t);
1729
1730 if ((d = md_trans_zalloc(sz)) == NULL)
1731 return (ENOMEM);
1732
1733 if (ddi_copyin(data, d, sz, mode)) {
1734 err = EFAULT;
1735 break;
1736 }
1737
1738 err = trans_test_trygetblk(d, mode, lockp);
1739 break;
1740 }
1741
1742 case MD_IOC_TRYPAGE:
1743 {
1744 if (! (mode & FWRITE))
1745 return (EACCES);
1746
1747
1748 sz = sizeof (md_i_get_t);
1749
1750 if ((d = md_trans_zalloc(sz)) == NULL)
1751 return (ENOMEM);
1752
1753 if (ddi_copyin(data, d, sz, mode)) {
1754 err = EFAULT;
1755 break;
1756 }
1757
1758 err = trans_test_trypage(d, mode, lockp);
1759 break;
1760 }
1761
1762
1763 case MD_IOC_INJECTERRORS:
1764 {
1765 if (! (mode & FWRITE))
1766 return (EACCES);
1767
1768
1769 sz = sizeof (md_i_get_t);
1770
1771 if ((d = md_trans_zalloc(sz)) == NULL)
1772 return (ENOMEM);
1773
1774 if (ddi_copyin(data, d, sz, mode)) {
1775 err = EFAULT;
1776 break;
1777 }
1778
1779 err = trans_inject_errors(d, mode, lockp);
1780 break;
1781 }
1782
1783 case MD_IOC_STOPERRORS:
1784 {
1785 if (! (mode & FWRITE))
1786 return (EACCES);
1787
1788
1789 sz = sizeof (md_i_get_t);
1790
1791 if ((d = md_trans_zalloc(sz)) == NULL)
1792 return (ENOMEM);
1793
1794 if (ddi_copyin(data, d, sz, mode)) {
1795 err = EFAULT;
1796 break;
1797 }
1798
1799 err = trans_stop_errors(d, mode, lockp);
1800 break;
1801 }
1802
1803 case MD_IOC_ISDEBUG:
1804 break;
1805
1806 #else /* ! DEBUG */
1807
1808 case MD_IOC_ISDEBUG:
1809 case MD_IOCGET_TRANSSTATS:
1810 case MD_IOC_STOPERRORS:
1811 case MD_IOC_TSD:
1812 case MD_IOC_TRYGETBLK:
1813 case MD_IOC_TRYPAGE:
1814 break;
1815
1816 /*
1817 * error injection behaves like MD_IOC_UFSERROR in released product
1818 */
1819 case MD_IOC_INJECTERRORS:
1820 {
1821 if (! (mode & FWRITE))
1822 return (EACCES);
1823
1824
1825 sz = sizeof (md_i_get_t);
1826
1827 if ((d = md_trans_zalloc(sz)) == NULL)
1828 return (ENOMEM);
1829
1830 if (ddi_copyin(data, d, sz, mode)) {
1831 err = EFAULT;
1832 break;
1833 }
1834
1835 err = trans_ufserror(d, mode, lockp);
1836 break;
1837 }
1838
1839 /*
1840 * only the shadow test is allowed in the released product
1841 */
1842 case MD_IOC_DEBUG:
1843 {
1844 md_i_get_t *mdigp;
1845
1846 if (! (mode & FWRITE))
1847 return (EACCES);
1848
1849 sz = sizeof (md_i_get_t);
1850
1851 if ((d = md_trans_zalloc(sz)) == NULL)
1852 return (ENOMEM);
1853
1854 if (ddi_copyin(data, d, sz, mode)) {
1855 err = EFAULT;
1856 break;
1857 }
1858
1859 mdigp = d;
1860
1861 mdclrerror(&mdigp->mde);
1862 mt_debug = mdigp->size & MT_SHADOW;
1863 break;
1864 }
1865
1866 #endif /* ! DEBUG */
1867
1868 /*
1869 * BEGIN RELEASE DEBUG
1870 * The following routines remain in the released product for testability
1871 */
1872
1873 case MD_IOC_UFSERROR:
1874 {
1875 if (! (mode & FWRITE))
1876 return (EACCES);
1877
1878 sz = sizeof (md_i_get_t);
1879
1880 if ((d = md_trans_zalloc(sz)) == NULL)
1881 return (ENOMEM);
1882
1883 if (ddi_copyin(data, d, sz, mode)) {
1884 err = EFAULT;
1885 break;
1886 }
1887
1888 err = trans_ufserror(d, mode, lockp);
1889 break;
1890 }
1891
1892 case MD_IOC_SETSHADOW:
1893 {
1894 if (! (mode & FWRITE))
1895 return (EACCES);
1896
1897 sz = sizeof (md_i_get_t);
1898
1899 if ((d = md_trans_zalloc(sz)) == NULL)
1900 return (ENOMEM);
1901
1902 if (ddi_copyin(data, d, sz, mode)) {
1903 err = EFAULT;
1904 break;
1905 }
1906
1907 err = trans_set_shadow(d, mode, lockp);
1908 break;
1909 }
1910
1911 /*
1912 * END RELEASE DEBUG
1913 */
1914
1915
1916 default:
1917 return (ENOTTY);
1918 }
1919
1920 /*
1921 * copyout and free any args
1922 */
1923 if (sz != 0) {
1924 if (err == 0) {
1925 if (ddi_copyout(d, data, sz, mode) != 0) {
1926 err = EFAULT;
1927 }
1928 }
1929 md_trans_free(d, sz);
1930 }
1931 return (err);
1932 }
1933
1934 int
md_trans_ioctl(dev_t dev,int cmd,void * data,int mode,IOLOCK * lockp)1935 md_trans_ioctl(dev_t dev, int cmd, void *data, int mode, IOLOCK *lockp)
1936 {
1937 minor_t mnum = getminor(dev);
1938 mt_unit_t *un;
1939 md_error_t mde = mdnullerror;
1940 int err = 0;
1941
1942 /* handle admin ioctls */
1943 if (mnum == MD_ADM_MINOR)
1944 return (trans_admin_ioctl(cmd, data, mode, lockp));
1945
1946 /* check unit */
1947 if ((MD_MIN2SET(mnum) >= md_nsets) ||
1948 (MD_MIN2UNIT(mnum) >= md_nunits) ||
1949 ((un = trans_getun(mnum, &mde, RD_LOCK, lockp)) == NULL))
1950 return (ENXIO);
1951
1952 /* dispatch ioctl */
1953 switch (cmd) {
1954
1955 case DKIOCINFO:
1956 {
1957 struct dk_cinfo *p;
1958
1959 if (! (mode & FREAD))
1960 return (EACCES);
1961
1962 if ((p = md_trans_zalloc(sizeof (*p))) == NULL)
1963 return (ENOMEM);
1964
1965 get_info(p, mnum);
1966 if (ddi_copyout((caddr_t)p, data, sizeof (*p), mode) != 0)
1967 err = EFAULT;
1968
1969 md_trans_free(p, sizeof (*p));
1970 return (err);
1971 }
1972
1973 case DKIOCGGEOM:
1974 {
1975 struct dk_geom *p;
1976
1977 if (! (mode & FREAD))
1978 return (EACCES);
1979
1980 if ((p = md_trans_zalloc(sizeof (*p))) == NULL)
1981 return (ENOMEM);
1982
1983 if ((err = trans_get_geom(un, p)) == 0) {
1984 if (ddi_copyout((caddr_t)p, data, sizeof (*p),
1985 mode) != 0)
1986 err = EFAULT;
1987 }
1988
1989 md_trans_free(p, sizeof (*p));
1990 return (err);
1991 }
1992
1993 case DKIOCGVTOC:
1994 {
1995 struct vtoc *vtoc;
1996
1997 if (! (mode & FREAD))
1998 return (EACCES);
1999
2000 vtoc = kmem_zalloc(sizeof (*vtoc), KM_SLEEP);
2001 if ((err = trans_get_vtoc(un, vtoc)) != 0) {
2002 kmem_free(vtoc, sizeof (*vtoc));
2003 return (err);
2004 }
2005
2006 if ((mode & DATAMODEL_MASK) == DATAMODEL_NATIVE) {
2007 if (ddi_copyout(vtoc, data, sizeof (*vtoc), mode))
2008 err = EFAULT;
2009 }
2010 #ifdef _SYSCALL32
2011 else {
2012 struct vtoc32 *vtoc32;
2013
2014 vtoc32 = kmem_zalloc(sizeof (*vtoc32), KM_SLEEP);
2015
2016 vtoctovtoc32((*vtoc), (*vtoc32));
2017 if (ddi_copyout(vtoc32, data, sizeof (*vtoc32), mode))
2018 err = EFAULT;
2019 kmem_free(vtoc32, sizeof (*vtoc32));
2020 }
2021 #endif /* _SYSCALL32 */
2022
2023 kmem_free(vtoc, sizeof (*vtoc));
2024 return (err);
2025 }
2026
2027 case DKIOCSVTOC:
2028 {
2029 struct vtoc *vtoc;
2030
2031 if (! (mode & FWRITE))
2032 return (EACCES);
2033
2034 vtoc = kmem_zalloc(sizeof (*vtoc), KM_SLEEP);
2035 if ((mode & DATAMODEL_MASK) == DATAMODEL_NATIVE) {
2036 if (ddi_copyin(data, vtoc, sizeof (*vtoc), mode)) {
2037 err = EFAULT;
2038 }
2039 }
2040 #ifdef _SYSCALL32
2041 else {
2042 struct vtoc32 *vtoc32;
2043
2044 vtoc32 = kmem_zalloc(sizeof (*vtoc32), KM_SLEEP);
2045
2046 if (ddi_copyin(data, vtoc32, sizeof (*vtoc32), mode)) {
2047 err = EFAULT;
2048 } else {
2049 vtoc32tovtoc((*vtoc32), (*vtoc));
2050 }
2051 kmem_free(vtoc32, sizeof (*vtoc32));
2052 }
2053 #endif /* _SYSCALL32 */
2054
2055 if (err == 0)
2056 err = trans_set_vtoc(un, vtoc);
2057
2058 kmem_free(vtoc, sizeof (*vtoc));
2059 return (err);
2060 }
2061
2062
2063 case DKIOCGEXTVTOC:
2064 {
2065 struct extvtoc *extvtoc;
2066
2067 if (! (mode & FREAD))
2068 return (EACCES);
2069
2070 extvtoc = kmem_zalloc(sizeof (*extvtoc), KM_SLEEP);
2071 if ((err = trans_get_extvtoc(un, extvtoc)) != 0) {
2072 return (err);
2073 }
2074
2075 if (ddi_copyout(extvtoc, data, sizeof (*extvtoc), mode))
2076 err = EFAULT;
2077
2078 kmem_free(extvtoc, sizeof (*extvtoc));
2079 return (err);
2080 }
2081
2082 case DKIOCSEXTVTOC:
2083 {
2084 struct extvtoc *extvtoc;
2085
2086 if (! (mode & FWRITE))
2087 return (EACCES);
2088
2089 extvtoc = kmem_zalloc(sizeof (*extvtoc), KM_SLEEP);
2090 if (ddi_copyin(data, extvtoc, sizeof (*extvtoc), mode)) {
2091 err = EFAULT;
2092 }
2093
2094 if (err == 0)
2095 err = trans_set_extvtoc(un, extvtoc);
2096
2097 kmem_free(extvtoc, sizeof (*extvtoc));
2098 return (err);
2099 }
2100
2101 case DKIOCGAPART:
2102 {
2103 struct dk_map dmp;
2104
2105 if ((err = trans_get_cgapart(un, &dmp)) != 0) {
2106 return (err);
2107 }
2108
2109 if ((mode & DATAMODEL_MASK) == DATAMODEL_NATIVE) {
2110 if (ddi_copyout((caddr_t)&dmp, data, sizeof (dmp),
2111 mode) != 0)
2112 err = EFAULT;
2113 }
2114 #ifdef _SYSCALL32
2115 else {
2116 struct dk_map32 dmp32;
2117
2118 dmp32.dkl_cylno = dmp.dkl_cylno;
2119 dmp32.dkl_nblk = dmp.dkl_nblk;
2120
2121 if (ddi_copyout((caddr_t)&dmp32, data, sizeof (dmp32),
2122 mode) != 0)
2123 err = EFAULT;
2124 }
2125 #endif /* _SYSCALL32 */
2126
2127 return (err);
2128 }
2129
2130 /*
2131 * _FIOISLOG, _FIOISLOGOK, _FIOLOGRESET are used by fsck/mkfs
2132 * after opening the device. fsck/mkfs use these ioctls for
2133 * error recovery.
2134 */
2135 case _FIOISLOG:
2136 return (trans_islog(un));
2137
2138 default:
2139 return (ENOTTY);
2140 }
2141 }
2142
2143 /*
2144 * rename named service entry points and support functions
2145 */
2146
2147 /* rename/exchange role swap functions */
2148
2149 /*
2150 * MDRNM_UPDATE_SELF
2151 * This role swap function is identical for all unit types,
2152 * so keep it here. It's also the best example because it
2153 * touches all the modified portions of the relevant
2154 * in-common structures.
2155 */
2156 void
trans_rename_update_self(md_rendelta_t * delta,md_rentxn_t * rtxnp)2157 trans_rename_update_self(
2158 md_rendelta_t *delta,
2159 md_rentxn_t *rtxnp)
2160 {
2161 minor_t from_min, to_min;
2162 sv_dev_t sv;
2163 mt_unit_t *un;
2164
2165 ASSERT(rtxnp);
2166 ASSERT(rtxnp->op == MDRNOP_RENAME);
2167 ASSERT(delta);
2168 ASSERT(delta->unp);
2169 ASSERT(delta->uip);
2170 ASSERT(rtxnp->rec_idx >= 0);
2171 ASSERT(rtxnp->recids);
2172 ASSERT(delta->old_role == MDRR_SELF);
2173 ASSERT(delta->new_role == MDRR_SELF);
2174
2175 from_min = rtxnp->from.mnum;
2176 to_min = rtxnp->to.mnum;
2177 un = (mt_unit_t *)delta->unp;
2178
2179 /*
2180 * self id changes in our own unit struct
2181 * both mechanisms for identifying the trans must be reset.
2182 */
2183
2184 MD_SID(delta->unp) = to_min;
2185 un->un_dev = makedevice(md_major, to_min);
2186
2187 /*
2188 * clear old array pointers to unit in-core and unit
2189 */
2190
2191 MDI_VOIDUNIT(from_min) = NULL;
2192 MD_VOIDUNIT(from_min) = NULL;
2193
2194 /*
2195 * and point the new slots at the unit in-core and unit structs
2196 */
2197
2198 MDI_VOIDUNIT(to_min) = delta->uip;
2199 MD_VOIDUNIT(to_min) = delta->unp;
2200
2201 /*
2202 * recreate kstats
2203 */
2204 md_kstat_destroy_ui(delta->uip);
2205 md_kstat_init_ui(to_min, delta->uip);
2206
2207 /*
2208 * the unit in-core reference to the get next link's id changes
2209 */
2210
2211 delta->uip->ui_link.ln_id = to_min;
2212
2213 /*
2214 * name space addition of new key was done from user-level
2215 * remove the old name's key here
2216 */
2217
2218 sv.setno = MD_MIN2SET(from_min);
2219 sv.key = rtxnp->from.key;
2220
2221 md_rem_names(&sv, 1);
2222
2223
2224 /*
2225 * and store the record id (from the unit struct) into recids
2226 * for later commitment by md_rename()
2227 */
2228
2229 md_store_recid(&rtxnp->rec_idx, rtxnp->recids, delta->unp);
2230 }
2231
2232 /*
2233 * MDRNM_UPDATE_KIDS
2234 * rename/exchange of our child or grandchild
2235 */
2236 void
trans_renexch_update_kids(md_rendelta_t * delta,md_rentxn_t * rtxnp)2237 trans_renexch_update_kids(
2238 md_rendelta_t *delta,
2239 md_rentxn_t *rtxnp)
2240 {
2241 mt_unit_t *un;
2242 minor_t from_min, to_min, log_min, master_min;
2243
2244 ASSERT(delta);
2245 ASSERT(rtxnp);
2246 ASSERT((rtxnp->op == MDRNOP_RENAME) || (rtxnp->op == MDRNOP_EXCHANGE));
2247 ASSERT(delta->unp);
2248 ASSERT(rtxnp->recids);
2249 ASSERT(rtxnp->rec_idx >= 0);
2250 ASSERT(delta->old_role == MDRR_PARENT);
2251 ASSERT(delta->new_role == MDRR_PARENT);
2252
2253 un = (mt_unit_t *)delta->unp;
2254 from_min = rtxnp->from.mnum;
2255 to_min = rtxnp->to.mnum;
2256 log_min = md_getminor(un->un_l_dev);
2257 master_min = md_getminor(un->un_m_dev);
2258
2259 /*
2260 * since our role isn't changing (parent->parent)
2261 * one of our children must be changing; which one is it?
2262 * find the child being modified, and update
2263 * our notion of it
2264 */
2265
2266 /* both devices must be metadevices in order to be updated */
2267 ASSERT(md_getmajor(un->un_m_dev) == md_major);
2268 ASSERT(!(un->un_l_unit && (md_getmajor(un->un_l_dev) != md_major)));
2269
2270 if ((md_getmajor(un->un_m_dev) == md_major) &&
2271 (master_min == from_min)) {
2272
2273 ASSERT(!(un->un_l_unit && (log_min == from_min)));
2274
2275 un->un_m_dev = makedevice(md_major, to_min);
2276 un->un_m_key = rtxnp->to.key;
2277
2278 } else if ((md_getmajor(un->un_m_dev) == md_major) &&
2279 un->un_l_unit && (log_min == from_min)) {
2280
2281 ASSERT(master_min != from_min);
2282
2283 un->un_l_dev = makedevice(md_major, to_min);
2284 un->un_l_key = rtxnp->to.key;
2285
2286 } else {
2287 ASSERT(FALSE);
2288 panic("trans_renexch_update_kids: not a metadevice");
2289 /*NOTREACHED*/
2290 }
2291
2292 md_store_recid(&rtxnp->rec_idx, rtxnp->recids, delta->unp);
2293 }
2294
2295 /*
2296 * MDRNM_SELF_UPDATE_FROM (exchange down) [self->child]
2297 */
2298 void
trans_exchange_self_update_from_down(md_rendelta_t * delta,md_rentxn_t * rtxnp)2299 trans_exchange_self_update_from_down(
2300 md_rendelta_t *delta,
2301 md_rentxn_t *rtxnp)
2302 {
2303 mt_unit_t *un;
2304 minor_t from_min, to_min, master_min, log_min;
2305 sv_dev_t sv;
2306
2307 ASSERT(delta);
2308 ASSERT(delta->unp);
2309 ASSERT(delta->uip);
2310 ASSERT(rtxnp);
2311 ASSERT(MDRNOP_EXCHANGE == rtxnp->op);
2312 ASSERT(rtxnp->from.uip);
2313 ASSERT(rtxnp->rec_idx >= 0);
2314 ASSERT(rtxnp->recids);
2315 ASSERT(delta->old_role == MDRR_SELF);
2316 ASSERT(delta->new_role == MDRR_CHILD);
2317 ASSERT(md_getminor(delta->dev) == rtxnp->from.mnum);
2318
2319 un = (mt_unit_t *)delta->unp;
2320
2321 /*
2322 * if we're exchanging a trans, it had better be a metadevice
2323 */
2324 ASSERT(md_getmajor(un->un_m_dev) == md_major);
2325
2326 to_min = rtxnp->to.mnum;
2327 from_min = rtxnp->from.mnum;
2328 master_min = md_getminor(un->un_m_dev);
2329 log_min = md_getminor(un->un_l_dev);
2330
2331 /*
2332 * both mechanisms for identifying a trans must be updated
2333 */
2334
2335 MD_SID(delta->unp) = to_min;
2336 un->un_dev = makedevice(md_major, to_min);
2337
2338 /*
2339 * parent identifier need not change
2340 */
2341
2342 /*
2343 * point the set array pointers at the "new" unit and unit in-cores
2344 * Note: the other half of this transfer is done in the "update to"
2345 * rename/exchange named service.
2346 */
2347
2348 MDI_VOIDUNIT(to_min) = delta->uip;
2349 MD_VOIDUNIT(to_min) = delta->unp;
2350
2351 /*
2352 * transfer kstats
2353 */
2354
2355 delta->uip->ui_kstat = rtxnp->to.kstatp;
2356
2357 /*
2358 * the unit in-core reference to the get next link's id changes
2359 */
2360
2361 delta->uip->ui_link.ln_id = to_min;
2362
2363 /*
2364 * which one of our children is changing?
2365 *
2366 * Note that the check routines forbid changing the log (for now)
2367 * because there's no lockfs-like trans-ufs "freeze and remount"
2368 * or "freeze and bobbit the log."
2369 */
2370
2371 /* both devices must be metadevices in order to be updated */
2372 ASSERT(md_getmajor(un->un_m_dev) == md_major);
2373 ASSERT(!(un->un_l_unit && (md_getmajor(un->un_l_dev) != md_major)));
2374
2375 if ((md_getmajor(un->un_m_dev) == md_major) &&
2376 (master_min == to_min)) {
2377
2378 /* master and log can't both be changed */
2379 ASSERT(!(un->un_l_unit && (log_min == to_min)));
2380
2381 un->un_m_dev = makedevice(md_major, from_min);
2382 sv.key = un->un_m_key;
2383 un->un_m_key = rtxnp->from.key;
2384
2385 } else if ((md_getmajor(un->un_m_dev) == md_major) &&
2386 un->un_l_unit && (log_min == to_min)) {
2387
2388 /* master and log can't both be changed */
2389 ASSERT(!(master_min == to_min));
2390
2391 un->un_l_dev = makedevice(md_major, from_min);
2392 sv.key = un->un_l_key;
2393 un->un_l_key = rtxnp->from.key;
2394
2395 } else {
2396 ASSERT(FALSE);
2397 panic("trans_exchange_self_update_from_down: not a metadevice");
2398 /*NOTREACHED*/
2399 }
2400
2401 /*
2402 * the new master must exist in the name space
2403 */
2404 ASSERT(rtxnp->from.key != MD_KEYWILD);
2405 ASSERT(rtxnp->from.key != MD_KEYBAD);
2406
2407 /*
2408 * delete the key for the changed child from the namespace
2409 */
2410
2411 sv.setno = MD_MIN2SET(from_min);
2412 md_rem_names(&sv, 1);
2413
2414 /*
2415 * and store the record id (from the unit struct) into recids
2416 */
2417
2418 md_store_recid(&rtxnp->rec_idx, rtxnp->recids, delta->unp);
2419 }
2420
2421 /*
2422 * MDRNM_PARENT_UPDATE_TO (exchange down) [parent->self]
2423 */
2424 void
trans_exchange_parent_update_to(md_rendelta_t * delta,md_rentxn_t * rtxnp)2425 trans_exchange_parent_update_to(
2426 md_rendelta_t *delta,
2427 md_rentxn_t *rtxnp)
2428 {
2429 mt_unit_t *un;
2430 minor_t from_min, to_min, master_min, log_min;
2431 sv_dev_t sv;
2432
2433 ASSERT(delta);
2434 ASSERT(delta->unp);
2435 ASSERT(delta->uip);
2436 ASSERT(rtxnp);
2437 ASSERT(MDRNOP_EXCHANGE == rtxnp->op);
2438 ASSERT(rtxnp->from.uip);
2439 ASSERT(rtxnp->rec_idx >= 0);
2440 ASSERT(rtxnp->recids);
2441 ASSERT(delta->old_role == MDRR_PARENT);
2442 ASSERT(delta->new_role == MDRR_SELF);
2443 ASSERT(md_getminor(delta->dev) == rtxnp->to.mnum);
2444
2445 un = (mt_unit_t *)delta->unp;
2446
2447 ASSERT(md_getmajor(un->un_m_dev) == md_major);
2448
2449 to_min = rtxnp->to.mnum;
2450 from_min = rtxnp->from.mnum;
2451 master_min = md_getminor(un->un_m_dev);
2452 log_min = md_getminor(un->un_l_dev);
2453
2454 /*
2455 * both mechanisms for identifying a trans must be updated
2456 */
2457
2458 MD_SID(delta->unp) = from_min;
2459 un->un_dev = makedevice(md_major, from_min);
2460
2461 /*
2462 * parent identifier need not change
2463 */
2464
2465 /*
2466 * point the set array pointers at the "new" unit and unit in-cores
2467 * Note: the other half of this transfer is done in the "update to"
2468 * rename/exchange named service.
2469 */
2470
2471 MDI_VOIDUNIT(from_min) = delta->uip;
2472 MD_VOIDUNIT(from_min) = delta->unp;
2473
2474 /*
2475 * transfer kstats
2476 */
2477
2478 delta->uip->ui_kstat = rtxnp->from.kstatp;
2479
2480 /*
2481 * the unit in-core reference to the get next link's id changes
2482 */
2483
2484 delta->uip->ui_link.ln_id = from_min;
2485
2486 /*
2487 * which one of our children is changing?
2488 */
2489
2490 /* both devices must be metadevices in order to be updated */
2491 ASSERT(md_getmajor(un->un_m_dev) == md_major);
2492 ASSERT(!(un->un_l_unit && (md_getmajor(un->un_l_dev) != md_major)));
2493
2494 if ((md_getmajor(un->un_m_dev) == md_major) &&
2495 (master_min == from_min)) {
2496
2497 /* can't be changing log and master */
2498 ASSERT(!(un->un_l_unit && (log_min == to_min)));
2499
2500 un->un_m_dev = makedevice(md_major, to_min);
2501 sv.key = un->un_m_key;
2502 un->un_m_key = rtxnp->to.key;
2503
2504 } else if (un->un_l_unit &&
2505 ((md_getmajor(un->un_l_dev) == md_major) && log_min == to_min)) {
2506
2507 /* can't be changing log and master */
2508 ASSERT(master_min != from_min);
2509
2510 un->un_l_dev = makedevice(md_major, to_min);
2511 sv.key = un->un_l_key;
2512 un->un_l_key = rtxnp->to.key;
2513
2514 } else {
2515 ASSERT(FALSE);
2516 panic("trans_exchange_parent_update_to: not a metadevice");
2517 /*NOTREACHED*/
2518 }
2519
2520 /*
2521 * delete the key for the changed child from the namespace
2522 */
2523
2524 sv.setno = MD_MIN2SET(from_min);
2525 md_rem_names(&sv, 1);
2526
2527 /*
2528 * and store the record id (from the unit struct) into recids
2529 */
2530
2531 md_store_recid(&rtxnp->rec_idx, rtxnp->recids, delta->unp);
2532 }
2533
2534 /*
2535 * MDRNM_LIST_URKIDS: named svc entry point
2536 * all all delta entries appropriate for our children onto the
2537 * deltalist pointd to by dlpp
2538 */
2539 int
trans_rename_listkids(md_rendelta_t ** dlpp,md_rentxn_t * rtxnp)2540 trans_rename_listkids(
2541 md_rendelta_t **dlpp,
2542 md_rentxn_t *rtxnp)
2543 {
2544 minor_t from_min, to_min, master_min, log_min;
2545 mt_unit_t *from_un;
2546 md_rendelta_t *new, *p;
2547 int n_children;
2548
2549 ASSERT(rtxnp);
2550 ASSERT(dlpp);
2551 ASSERT((rtxnp->op == MDRNOP_EXCHANGE) || (rtxnp->op == MDRNOP_RENAME));
2552
2553 from_min = rtxnp->from.mnum;
2554 to_min = rtxnp->to.mnum;
2555 n_children = 0;
2556
2557 if (!MDI_UNIT(from_min) || !(from_un = MD_UNIT(from_min))) {
2558 (void) mdmderror(&rtxnp->mde, MDE_UNIT_NOT_SETUP, from_min);
2559 return (-1);
2560 }
2561
2562 for (p = *dlpp; p && p->next != NULL; p = p->next) {
2563 /* NULL */
2564 }
2565
2566 if (md_getmajor(from_un->un_m_dev) == md_major) {
2567
2568 master_min = md_getminor(from_un->un_m_dev);
2569
2570 p = new = md_build_rendelta(MDRR_CHILD,
2571 to_min == master_min? MDRR_SELF: MDRR_CHILD,
2572 from_un->un_m_dev, p, MD_UNIT(master_min),
2573 MDI_UNIT(master_min), &rtxnp->mde);
2574
2575 if (!new) {
2576 if (mdisok(&rtxnp->mde)) {
2577 (void) mdsyserror(&rtxnp->mde, ENOMEM);
2578 }
2579 return (-1);
2580 }
2581 ++n_children;
2582 }
2583
2584 if (from_un->un_l_unit &&
2585 (md_getmajor(from_un->un_l_dev) == md_major)) {
2586
2587 log_min = md_getminor(from_un->un_l_dev);
2588
2589 new = md_build_rendelta(MDRR_CHILD,
2590 to_min == log_min? MDRR_SELF: MDRR_CHILD,
2591 from_un->un_l_dev, p, MD_UNIT(log_min),
2592 MDI_UNIT(log_min), &rtxnp->mde);
2593 if (!new) {
2594 if (mdisok(&rtxnp->mde)) {
2595 (void) mdsyserror(&rtxnp->mde, ENOMEM);
2596 }
2597 return (-1);
2598 }
2599 ++n_children;
2600 }
2601
2602 return (n_children);
2603 }
2604
2605 /*
2606 * support routine for MDRNM_CHECK
2607 */
2608 static int
trans_may_renexch_self(mt_unit_t * un,mdi_unit_t * ui,md_rentxn_t * rtxnp)2609 trans_may_renexch_self(
2610 mt_unit_t *un,
2611 mdi_unit_t *ui,
2612 md_rentxn_t *rtxnp)
2613 {
2614 minor_t from_min;
2615 minor_t to_min;
2616
2617 ASSERT(rtxnp);
2618 ASSERT((rtxnp->op == MDRNOP_RENAME) || (rtxnp->op == MDRNOP_EXCHANGE));
2619
2620 from_min = rtxnp->from.mnum;
2621 to_min = rtxnp->to.mnum;
2622
2623 if (!un || !ui) {
2624 (void) mdmderror(&rtxnp->mde, MDE_RENAME_CONFIG_ERROR,
2625 from_min);
2626 return (EINVAL);
2627 }
2628
2629 ASSERT(MD_CAPAB(un) & MD_CAN_META_CHILD);
2630
2631 if (!(MD_CAPAB(un) & MD_CAN_META_CHILD)) {
2632 (void) mdmderror(&rtxnp->mde, MDE_RENAME_SOURCE_BAD, from_min);
2633 return (EINVAL);
2634 }
2635
2636 if (MD_PARENT(un) == MD_MULTI_PARENT) {
2637 (void) mdmderror(&rtxnp->mde, MDE_RENAME_SOURCE_BAD, from_min);
2638 return (EINVAL);
2639 }
2640
2641 switch (rtxnp->op) {
2642 case MDRNOP_EXCHANGE:
2643 /*
2644 * may only swap with our child (master) if it is a metadevice
2645 */
2646 if (md_getmajor(un->un_m_dev) != md_major) {
2647 (void) mdmderror(&rtxnp->mde, MDE_RENAME_TARGET_BAD,
2648 to_min);
2649 return (EINVAL);
2650 }
2651
2652 if (un->un_l_unit &&
2653 (md_getmajor(un->un_l_dev) != md_major)) {
2654
2655 (void) mdmderror(&rtxnp->mde, MDE_RENAME_TARGET_BAD,
2656 to_min);
2657 return (EINVAL);
2658 }
2659
2660 if (md_getminor(un->un_m_dev) != to_min) {
2661 (void) mdmderror(&rtxnp->mde, MDE_RENAME_TARGET_BAD,
2662 to_min);
2663 return (EINVAL);
2664 }
2665
2666 break;
2667
2668 case MDRNOP_RENAME:
2669 break;
2670
2671 default:
2672 (void) mdmderror(&rtxnp->mde, MDE_RENAME_CONFIG_ERROR,
2673 from_min);
2674 return (EINVAL);
2675 }
2676
2677 return (0); /* ok */
2678 }
2679
2680 /*
2681 * Named service entry point: MDRNM_CHECK
2682 */
2683 intptr_t
trans_rename_check(md_rendelta_t * delta,md_rentxn_t * rtxnp)2684 trans_rename_check(
2685 md_rendelta_t *delta,
2686 md_rentxn_t *rtxnp)
2687 {
2688 int err = 0;
2689 mt_unit_t *un;
2690
2691 ASSERT(delta);
2692 ASSERT(rtxnp);
2693 ASSERT(delta->unp);
2694 ASSERT(delta->uip);
2695 ASSERT((rtxnp->op == MDRNOP_RENAME) || (rtxnp->op == MDRNOP_EXCHANGE));
2696
2697 if (!delta || !rtxnp || !delta->unp || !delta->uip) {
2698 (void) mdsyserror(&rtxnp->mde, EINVAL);
2699 return (EINVAL);
2700 }
2701
2702 un = (mt_unit_t *)delta->unp;
2703
2704 if (rtxnp->revision == MD_RENAME_VERSION_OFFLINE) {
2705 /*
2706 * trans' may not be open, if it is being modified in the exchange
2707 * or rename; trans-UFS hasn't been verified to handle the change
2708 * out from underneath it.
2709 */
2710 if ((md_unit_isopen(delta->uip)) &&
2711 ((md_getminor(delta->dev) == rtxnp->from.mnum) ||
2712 (md_getminor(delta->dev) == rtxnp->to.mnum))) {
2713 (void) mdmderror(&rtxnp->mde,
2714 MDE_RENAME_BUSY, rtxnp->from.mnum);
2715 return (EBUSY);
2716 }
2717 }
2718
2719 /*
2720 * can't rename or exchange with a log attached
2721 */
2722
2723 if (un->un_l_unit) {
2724 (void) mdmderror(&rtxnp->mde,
2725 MDE_RENAME_BUSY, rtxnp->from.mnum);
2726 return (EBUSY);
2727 }
2728
2729 switch (delta->old_role) {
2730 case MDRR_SELF:
2731 /*
2732 * self does additional checks
2733 */
2734 err = trans_may_renexch_self((mt_unit_t *)delta->unp,
2735 delta->uip, rtxnp);
2736 if (err != 0) {
2737 goto out;
2738 }
2739 /* FALLTHROUGH */
2740
2741 case MDRR_PARENT:
2742 /*
2743 * top_is_trans is only used to check for online
2744 * rename/exchange when MD_RENAME_VERSION == OFFLINE
2745 * since trans holds the sub-devices open
2746 */
2747 rtxnp->stat.trans_in_stack = TRUE;
2748 break;
2749 default:
2750 break;
2751 }
2752 out:
2753 return (err);
2754 }
2755
2756 /* end of rename/exchange */
2757