xref: /titanic_41/usr/src/uts/common/io/lvm/trans/trans_ioctl.c (revision 1210f54884e43adf01df70c22de6715184c23aa7)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  * Copyright 2012 Milan Jurik. All rights reserved.
26  */
27 
28 #include <sys/param.h>
29 #include <sys/systm.h>
30 #include <sys/conf.h>
31 #include <sys/file.h>
32 #include <sys/user.h>
33 #include <sys/uio.h>
34 #include <sys/t_lock.h>
35 #include <sys/dkio.h>
36 #include <sys/vtoc.h>
37 #include <sys/kmem.h>
38 #include <vm/page.h>
39 #include <sys/cmn_err.h>
40 #include <sys/sysmacros.h>
41 #include <sys/types.h>
42 #include <sys/mkdev.h>
43 #include <sys/stat.h>
44 #include <sys/open.h>
45 #include <sys/lvm/md_trans.h>
46 #include <sys/modctl.h>
47 #include <sys/ddi.h>
48 #include <sys/sunddi.h>
49 #include <sys/debug.h>
50 #include <sys/filio.h>
51 #include <sys/lvm/md_notify.h>
52 #include <sys/callb.h>
53 #include <sys/disp.h>
54 
55 #include <sys/sysevent/eventdefs.h>
56 #include <sys/sysevent/svm.h>
57 
58 extern int		md_status;
59 extern unit_t		md_nunits;
60 extern set_t		md_nsets;
61 extern md_set_t		md_set[];
62 extern md_ops_t		trans_md_ops;
63 extern md_krwlock_t	md_unit_array_rw;
64 extern uint_t		mt_debug;
65 
66 extern major_t	md_major;
67 
68 static mt_unit_t *
trans_getun(minor_t mnum,md_error_t * mde,int flags,IOLOCK * lock)69 trans_getun(minor_t mnum, md_error_t *mde, int flags, IOLOCK *lock)
70 {
71 	mt_unit_t	*un;
72 	mdi_unit_t	*ui;
73 	set_t		setno = MD_MIN2SET(mnum);
74 
75 	if ((setno >= md_nsets) || (MD_MIN2UNIT(mnum) >= md_nunits)) {
76 		(void) mdmderror(mde, MDE_INVAL_UNIT, mnum);
77 		return (NULL);
78 	}
79 
80 	if (! (flags & STALE_OK)) {
81 		if (md_get_setstatus(setno) & MD_SET_STALE) {
82 			(void) mdmddberror(mde, MDE_DB_STALE, mnum, setno);
83 			return (NULL);
84 		}
85 	}
86 
87 	ui = MDI_UNIT(mnum);
88 	if (flags & NO_OLD) {
89 		if (ui != NULL) {
90 			(void) mdmderror(mde, MDE_UNIT_ALREADY_SETUP, mnum);
91 			return (NULL);
92 		}
93 		return ((mt_unit_t *)1);
94 	}
95 
96 	if (ui == NULL) {
97 		(void) mdmderror(mde, MDE_UNIT_NOT_SETUP, mnum);
98 		return (NULL);
99 	}
100 
101 	if (flags & ARRAY_WRITER)
102 		md_array_writer(lock);
103 	else if (flags & ARRAY_READER)
104 		md_array_reader(lock);
105 
106 	if (!(flags & NO_LOCK)) {
107 		if (flags & WR_LOCK)
108 			(void) md_ioctl_writerlock(lock, ui);
109 		else /* RD_LOCK */
110 			(void) md_ioctl_readerlock(lock, ui);
111 	}
112 	un = (mt_unit_t *)MD_UNIT(mnum);
113 
114 	if (un->c.un_type != MD_METATRANS) {
115 		(void) mdmderror(mde, MDE_NOT_MT, mnum);
116 		return (NULL);
117 	}
118 
119 	return (un);
120 }
121 
122 #ifdef	DEBUG
123 /*
124  * DEBUG ROUTINES
125  * 	THESE ROUTINES ARE ONLY USED WHEN ASSERTS ARE ENABLED
126  */
127 
128 extern int		(*mdv_strategy_tstpnt)(buf_t *, int, void*);
129 
130 /*
131  * return the global stats struct
132  */
133 static int
trans_get_transstats(void * d,int mode)134 trans_get_transstats(void *d, int mode)
135 {
136 	md_i_get_t *migp = d;
137 
138 	mdclrerror(&migp->mde);
139 
140 	if (migp->size == 0) {
141 		migp->size = sizeof (struct transstats);
142 		return (0);
143 	}
144 
145 	if (migp->size < sizeof (struct transstats))
146 		return (EFAULT);
147 
148 	if (ddi_copyout(&transstats, (caddr_t)(uintptr_t)migp->mdp,
149 	    sizeof (struct transstats), mode))
150 		return (EFAULT);
151 	return (0);
152 }
153 
154 /*
155  * test ioctls
156  */
157 /*
158  * TEST TRYGETBLK
159  */
160 /*ARGSUSED1*/
161 static int
trans_test_trygetblk(void * d,int mode,IOLOCK * lock)162 trans_test_trygetblk(void *d, int mode, IOLOCK *lock)
163 {
164 	mt_unit_t	*un;
165 	int		test;
166 	dev_t		dev;
167 	struct buf	*bp;
168 	struct buf	*trygetblk();
169 
170 	md_i_get_t *migp = d;
171 
172 	mdclrerror(&migp->mde);
173 	migp->size = 0;
174 
175 	un = trans_getun(migp->id, &migp->mde,
176 	    RD_LOCK, lock);
177 	if (un == NULL)
178 		return (EINVAL);
179 
180 	dev = un->un_m_dev;
181 
182 	/*
183 	 * test 1 -- don't find nonexistant buf
184 	 */
185 	test = 1;
186 	if (bp = trygetblk(dev, 0))
187 		goto errout;
188 
189 	/*
190 	 * test 2 - don't find stale buf
191 	 */
192 	test = 2;
193 	if ((bp = getblk(dev, 0, DEV_BSIZE)) == NULL)
194 		goto errout;
195 	bp->b_flags |= (B_STALE|B_DONE);
196 	brelse(bp);
197 	if (bp = trygetblk(dev, 0))
198 		goto errout;
199 
200 	/*
201 	 * test 3 -- don't find busy buf
202 	 */
203 	test = 3;
204 	if ((bp = getblk(dev, 0, DEV_BSIZE)) == NULL)
205 		goto errout;
206 	if (trygetblk(dev, 0))
207 		goto errout;
208 	bp->b_flags |= B_STALE;
209 	brelse(bp);
210 
211 	/*
212 	 * test 4 -- don't find not-done buf
213 	 */
214 	test = 4;
215 	if ((bp = getblk(dev, 0, DEV_BSIZE)) == NULL)
216 		goto errout;
217 	brelse(bp);
218 	if (bp = trygetblk(dev, 0))
219 		goto errout;
220 
221 	/*
222 	 * test 5 -- find an idle buf
223 	 */
224 	test = 5;
225 	if ((bp = bread(dev, 0, DEV_BSIZE)) == NULL)
226 		goto errout;
227 	brelse(bp);
228 	if ((bp = trygetblk(dev, 0)) == NULL)
229 		goto errout;
230 	bp->b_flags |= B_STALE;
231 	brelse(bp);
232 	bp = 0;
233 
234 	test = 0;	/* no test failed */
235 errout:
236 	if (bp) {
237 		bp->b_flags |= B_STALE;
238 		brelse(bp);
239 	}
240 	migp->size = test;
241 	if (test)
242 		return (EINVAL);
243 	return (0);
244 }
245 /*
246  * TEST TRYGETPAGE
247  */
248 static page_t *
trans_trypage(struct vnode * vp,uint_t off)249 trans_trypage(struct vnode *vp, uint_t off)
250 {
251 	page_t		*pp;
252 
253 	/*
254 	 * get a locked page
255 	 */
256 	if ((pp = page_lookup_nowait(vp, off, SE_EXCL)) == NULL)
257 		return (NULL);
258 	/*
259 	 * get the iolock
260 	 */
261 	if (!page_io_trylock(pp)) {
262 		page_unlock(pp);
263 		return (NULL);
264 	}
265 	return (pp);
266 }
267 
268 /*ARGSUSED1*/
269 static int
trans_test_trypage(void * d,int mode,IOLOCK * lock)270 trans_test_trypage(void *d, int mode, IOLOCK *lock)
271 {
272 	mt_unit_t		*un;
273 	int			test;
274 	dev_t			dev;
275 	struct page		*pp;
276 	struct vnode		*devvp;
277 	struct vnode		*cvp;
278 	extern struct vnode	*common_specvp(struct vnode *);
279 	extern void		pvn_io_done(struct page *);
280 
281 	md_i_get_t *migp = d;
282 
283 	mdclrerror(&migp->mde);
284 	migp->size = 0;
285 
286 	un = trans_getun(migp->id, &migp->mde,
287 	    RD_LOCK, lock);
288 	if (un == NULL)
289 		return (EINVAL);
290 
291 	dev = un->un_m_dev;
292 	devvp = makespecvp(dev, VBLK);
293 	cvp = common_specvp(devvp);
294 
295 	/*
296 	 * get rid of the devices pages
297 	 */
298 	(void) VOP_PUTPAGE(cvp, (offset_t)0, (uint_t)0, B_INVAL, CRED(), NULL);
299 
300 	/*
301 	 * test 1 -- don't find nonexistant page
302 	 */
303 	test = 1;
304 	if (pp = trans_trypage(cvp, 0))
305 		goto errout;
306 
307 	/*
308 	 * test 2 -- don't find busy page
309 	 */
310 	test = 2;
311 	if ((pp = page_create(cvp, 0, 1, PG_WAIT)) == NULL)
312 		goto errout;
313 	if (trans_trypage(cvp, 0))
314 		goto errout;
315 	pvn_io_done(pp);
316 	pp = 0;
317 
318 	/*
319 	 * test 3 - find an idle page
320 	 */
321 	test = 3;
322 	if ((pp = page_create(cvp, 0, 1, PG_WAIT)) == NULL)
323 		goto errout;
324 	pvn_io_done(pp);
325 	if ((pp = trans_trypage(cvp, 0)) == NULL)
326 		goto errout;
327 	pvn_io_done(pp);
328 	pp = 0;
329 
330 	test = 0;	/* no test failed */
331 errout:
332 	if (pp)
333 		pvn_io_done(pp);
334 	/*
335 	 * get rid of the file's pages
336 	 */
337 	(void) VOP_PUTPAGE(cvp, (offset_t)0, (uint_t)0, B_INVAL, CRED(), NULL);
338 	VN_RELE(devvp);
339 
340 	migp->size = test;
341 	if (test)
342 		return (EINVAL);
343 	return (0);
344 }
345 /*
346  * TEST TSD
347  */
348 #define	NKEYS		(7)
349 #define	NTSDTHREADS	(3)
350 struct tothread {
351 	int		test;
352 	int		error;
353 	int		exits;
354 	int		step;
355 	kmutex_t	lock;
356 	kcondvar_t	cv;
357 };
358 static uint_t		keys[NKEYS];
359 static struct tothread	tta[NTSDTHREADS];
360 static int		allocatorvalue;
361 static int		okdestructoralloc;
362 
363 static void
trans_test_stepwait(struct tothread * tp,int step)364 trans_test_stepwait(struct tothread *tp, int step)
365 {
366 	/*
367 	 * wait for other thread
368 	 */
369 	mutex_enter(&tp->lock);
370 	while (tp->step < step)
371 		cv_wait(&tp->cv, &tp->lock);
372 	mutex_exit(&tp->lock);
373 }
374 
375 static void
trans_test_step(struct tothread * tp,int step)376 trans_test_step(struct tothread *tp, int step)
377 {
378 	/*
379 	 * wakeup other threads
380 	 */
381 	mutex_enter(&tp->lock);
382 	tp->step = step;
383 	cv_broadcast(&tp->cv);
384 	mutex_exit(&tp->lock);
385 }
386 
387 static void
trans_test_destructor(void * voidp)388 trans_test_destructor(void *voidp)
389 {
390 	int		exits;
391 	struct tothread	*tp	= voidp;
392 
393 	/*
394 	 * check that threads clean up *all* TSD at exit
395 	 */
396 	mutex_enter(&tp->lock);
397 	exits = ++tp->exits;
398 	mutex_exit(&tp->lock);
399 	if (exits >= NKEYS)
400 		trans_test_step(tp, 3);
401 }
402 
403 static void
trans_test_destructor_alloc(void * voidp)404 trans_test_destructor_alloc(void *voidp)
405 {
406 	int	*value	= voidp;
407 
408 	okdestructoralloc = 0;
409 	if (value) {
410 		if (*value == allocatorvalue)
411 			okdestructoralloc = 1;
412 		md_trans_free((caddr_t)value, sizeof (value));
413 	}
414 }
415 
416 static void *
trans_test_allocator(void)417 trans_test_allocator(void)
418 {
419 	int	*value;
420 
421 	value = (int *)md_trans_zalloc(sizeof (value));
422 	*value = allocatorvalue;
423 	return ((void *)value);
424 }
425 
426 /*
427  * thread used to test TSD destroy functionality
428  */
429 static void
trans_test_thread(struct tothread * tp)430 trans_test_thread(struct tothread *tp)
431 {
432 	int	i;
433 	callb_cpr_t	cprinfo;
434 
435 	/*
436 	 * Register cpr callback
437 	 */
438 	CALLB_CPR_INIT(&cprinfo, &tp->lock, callb_generic_cpr,
439 	    "trans_test_thread");
440 
441 	/*
442 	 * get some TSD
443 	 */
444 	for (i = NKEYS - 1; i >= 0; --i)
445 		if (tsd_set(keys[i], tp)) {
446 			tp->error = 500;
447 			goto errout;
448 		}
449 	/*
450 	 * tell parent that we have TSD
451 	 */
452 	trans_test_step(tp, 1);
453 
454 	/*
455 	 * wait for parent to destroy some of our TSD
456 	 */
457 	trans_test_stepwait(tp, 2);
458 
459 	/*
460 	 * make sure that the appropriate TSD was destroyed
461 	 */
462 	if ((tsd_get(keys[0]) != NULL) ||
463 	    (tsd_get(keys[NKEYS-1]) != NULL) ||
464 	    (tsd_get(keys[NKEYS>>1]) != NULL)) {
465 		tp->error = 510;
466 		goto errout;
467 	}
468 	for (i = 0; i < NKEYS; ++i)
469 		if (tsd_get(keys[i]) != tp)
470 			if (i != 0 && i != NKEYS - 1 && i != NKEYS >> 1) {
471 				tp->error = 520;
472 				goto errout;
473 			}
474 
475 	/*
476 	 * set up cpr exit
477 	 */
478 	mutex_enter(&tp->lock);
479 	CALLB_CPR_EXIT(&cprinfo);
480 	thread_exit();
481 errout:
482 	/*
483 	 * error -- make sure the parent will wake up (error code in tp)
484 	 */
485 	trans_test_step(tp, 3);
486 
487 	/*
488 	 * set up cpr exit
489 	 */
490 	mutex_enter(&tp->lock);
491 	CALLB_CPR_EXIT(&cprinfo);
492 	thread_exit();
493 }
494 
495 static void
trans_test_threadcreate(struct tothread * tp)496 trans_test_threadcreate(struct tothread *tp)
497 {
498 	/*
499 	 * initialize the per thread struct and make a thread
500 	 */
501 	bzero((caddr_t)tp, sizeof (struct tothread));
502 
503 	mutex_init(&tp->lock, NULL, MUTEX_DEFAULT, NULL);
504 	cv_init(&tp->cv, NULL, CV_DEFAULT, NULL);
505 
506 	(void) thread_create(NULL, 0, trans_test_thread, tp, 0, &p0,
507 	    TS_RUN, minclsyspri);
508 }
509 /*
510  * driver for TSD tests -- *NOT REENTRANT*
511  */
512 /*ARGSUSED1*/
513 static int
trans_test_tsd(void * d,int mode)514 trans_test_tsd(void *d, int mode)
515 {
516 	int		test;
517 	uint_t		rekeys[NKEYS];
518 	int		i;
519 	uint_t		key;
520 	int		error;
521 
522 	md_i_get_t *migp = d;
523 
524 	mdclrerror(&migp->mde);
525 	migp->size = 0;
526 
527 	/*
528 	 * destroy old keys, if any
529 	 */
530 	for (i = 0; i < NKEYS; ++i)
531 		tsd_destroy(&keys[i]);
532 	/*
533 	 * test 1 -- simple create and destroy keys tests
534 	 */
535 	test = 1;
536 	error = 0;
537 	for (i = 0; i < NKEYS; ++i) {
538 		tsd_create(&keys[i], NULL);
539 
540 		/* get with no set should return NULL */
541 		if (tsd_get(keys[i]) != NULL) {
542 			error = 100;
543 			goto errout;
544 		}
545 
546 		/* destroyed key should be 0 */
547 		key = keys[i];
548 		tsd_destroy(&keys[i]);
549 		if (keys[i]) {
550 			error = 110;
551 			goto errout;
552 		}
553 
554 		/* destroy the key twice */
555 		keys[i] = key;
556 		tsd_destroy(&keys[i]);
557 
558 		/* destroyed key should be 0 */
559 		if (keys[i]) {
560 			error = 120;
561 			goto errout;
562 		}
563 
564 		/* getting a destroyed key should return NULL */
565 		if (tsd_get(keys[i]) != NULL) {
566 			error = 130;
567 			goto errout;
568 		}
569 		/* recreate the key */
570 		tsd_create(&keys[i], NULL);
571 
572 		/* should be the same key as before */
573 		if (key != keys[i]) {
574 			error = 140;
575 			goto errout;
576 		}
577 
578 		/* initial value should be NULL */
579 		if (tsd_get(keys[i]) != NULL) {
580 			error = 150;
581 			goto errout;
582 		}
583 
584 		/* cleanup */
585 		tsd_destroy(&keys[i]);
586 	}
587 
588 	/*
589 	 * test 2 -- recreate keys
590 	 */
591 	test = 2;
592 	error = 0;
593 	for (i = 0; i < NKEYS; ++i)
594 		tsd_create(&keys[i], NULL);
595 	for (i = 0; i < NKEYS; ++i) {
596 		/* make sure the keys were created */
597 		if (keys[i] == 0) {
598 			error = 200;
599 			goto errout;
600 		}
601 
602 		/* make sure that recreating key doesn't change it */
603 		rekeys[i] = keys[i];
604 		tsd_create(&rekeys[i], NULL);
605 		if (rekeys[i] != keys[i]) {
606 			error = 210;
607 			goto errout;
608 		}
609 	}
610 	for (i = 0; i < NKEYS; ++i)
611 		tsd_destroy(&keys[i]);
612 
613 	/*
614 	 * test 3 -- check processing for unset and destroyed keys
615 	 */
616 	test = 3;
617 	error = 0;
618 
619 	/* getting a 0 key returns NULL */
620 	if (tsd_get(0) != NULL) {
621 		error = 300;
622 		goto errout;
623 	}
624 
625 	/* setting a 0 key returns error */
626 	if (tsd_set(0, NULL) != EINVAL) {
627 		error = 310;
628 		goto errout;
629 	}
630 	tsd_create(&key, NULL);
631 
632 	/* setting a created key returns no error */
633 	if (tsd_set(key, NULL) == EINVAL) {
634 		error = 320;
635 		goto errout;
636 	}
637 	tsd_destroy(&key);
638 
639 	/* setting a destroyed key returns error */
640 	if (tsd_set(key, NULL) != EINVAL) {
641 		error = 330;
642 		goto errout;
643 	}
644 
645 	/*
646 	 * test 4 -- make sure that set and get work
647 	 */
648 	test = 4;
649 	error = 0;
650 
651 	for (i = 0; i < NKEYS; ++i) {
652 		tsd_create(&keys[i], NULL);
653 
654 		/* set a value */
655 		(void) tsd_set(keys[i], &key);
656 
657 		/* get the value */
658 		if (tsd_get(keys[i]) != &key) {
659 			error = 400;
660 			goto errout;
661 		}
662 
663 		/* set the value to NULL */
664 		(void) tsd_set(keys[i], NULL);
665 
666 		/* get the NULL */
667 		if (tsd_get(keys[i]) != NULL) {
668 			error = 410;
669 			goto errout;
670 		}
671 	}
672 	/* cleanup */
673 	for (i = 0; i < NKEYS; ++i)
674 		tsd_destroy(&keys[i]);
675 
676 	/*
677 	 * test 5 -- destroying keys w/multiple threads
678 	 */
679 	test = 5;
680 	error = 0;
681 
682 	/* create the keys */
683 	for (i = 0; i < NKEYS; ++i)
684 		tsd_create(&keys[i], trans_test_destructor);
685 
686 	/* create some threads */
687 	for (i = 0; i < NTSDTHREADS; ++i)
688 		trans_test_threadcreate(&tta[i]);
689 
690 	/* wait for the threads to assign TSD */
691 	for (i = 0; i < NTSDTHREADS; ++i)
692 		trans_test_stepwait(&tta[i], 1);
693 
694 	/* destroy some of the keys */
695 	tsd_destroy(&keys[0]);
696 	tsd_destroy(&keys[NKEYS - 1]);
697 	tsd_destroy(&keys[NKEYS >> 1]);
698 	tsd_destroy(&keys[NKEYS >> 1]);
699 
700 	/* wakeup the threads -- they check that the destroy took */
701 	for (i = 0; i < NTSDTHREADS; ++i)
702 		trans_test_step(&tta[i], 2);
703 
704 	/* wait for the threads to exit (also checks for TSD cleanup) */
705 	for (i = 0; i < NTSDTHREADS; ++i)
706 		trans_test_stepwait(&tta[i], 3);
707 
708 	/* destroy the rest of the keys */
709 	for (i = 0; i < NKEYS; ++i)
710 		tsd_destroy(&keys[i]);
711 
712 	/* check for error */
713 	for (i = 0; i < NTSDTHREADS; ++i) {
714 		if (!error)
715 			error = tta[i].error;
716 		mutex_destroy(&tta[i].lock);
717 		cv_destroy(&tta[i].cv);
718 	}
719 
720 	/*
721 	 * test 6 -- test getcreate
722 	 */
723 	test = 6;
724 	error = 0;
725 
726 	/* make sure the keys are destroyed */
727 	for (i = 0; i < NKEYS; ++i)
728 		tsd_destroy(&keys[i]);
729 
730 	/* get w/create */
731 	for (i = 0; i < NKEYS; ++i) {
732 		allocatorvalue = i;
733 		if (*(int *)tsd_getcreate(&keys[i], trans_test_destructor_alloc,
734 		    trans_test_allocator) != allocatorvalue) {
735 			error = 600;
736 			goto errout;
737 		}
738 	}
739 	for (i = 0; i < NKEYS; ++i) {
740 		allocatorvalue = i;
741 		if (*(int *)tsd_get(keys[i]) != allocatorvalue) {
742 			error = 610;
743 			goto errout;
744 		}
745 	}
746 	/* make sure destructor gets called when we destroy the keys */
747 	for (i = 0; i < NKEYS; ++i) {
748 		allocatorvalue = i;
749 		okdestructoralloc = 0;
750 		tsd_destroy(&keys[i]);
751 		if (okdestructoralloc == 0) {
752 			error = 620;
753 			goto errout;
754 		}
755 	}
756 
757 errout:
758 	/* make sure the keys are destroyed */
759 	for (i = 0; i < NKEYS; ++i)
760 		tsd_destroy(&keys[i]);
761 
762 	/* return test # and error code (if any) */
763 	migp->size = test;
764 	return (error);
765 }
766 
767 /*
768  * Error Injection Structures, Data, and Functions:
769  *
770  * Error injection is used to test the Harpy error recovery system.  The
771  * MD_IOC_INJECTERRORS ioctl is used to start or continue error injection on a
772  * unit, and MD_IOC_STOPERRORS turns it off.  An mt_error structure is
773  * associated with every trans device for which we are injecting errors.  When
774  * MD_IOC_INJECTERRORS is issued, mdv_strategy_tstpnt is set to point to
775  * trans_error_injector(), so that it gets called for every MDD I/O operation.
776  *
777  * The trans unit can be in one of three states:
778  *
779  *	count down -	Each I/O causes er_count_down to be decremented.
780  *			When er_count_down reaches 0, an error is injected,
781  *			the block number is remembered.  Without makeing
782  *			special provisions, the log area would receive a
783  *			small percentage of the injected errors.  Thus,
784  *			trans_check_error() will be written, so that every
785  *			other error is injected on the log.
786  *
787  *	suspend -	No errors are generated and the counters are not
788  *			modified.  This is so that fsck/mkfs can do their thing
789  *			(we're not testing them) and so that the test script can
790  *			set up another test.  The transition back to the count
791  *			down state occurs when MD_IOC_INJECTERRORS is invoked
792  *			again.
793  */
794 
795 typedef enum {
796 	mte_count_down,
797 	mte_suspend,
798 	mte_watch_block
799 } mte_state;
800 
801 typedef struct mt_error {
802 	struct mt_error	*er_next;	/* next error unit in list. */
803 	mte_state	er_state;
804 	mt_unit_t	*er_unitp;	/* unit to force errors on. */
805 	size_t		er_count_down;	/* i/o transactions until error. */
806 	size_t		er_increment;	/* increment for reset_count. */
807 	size_t		er_reset_count;	/* used to reset er_count_down */
808 	size_t		er_total_errors; /* count generated errors. */
809 	/* Following fields describe error we are injecting. */
810 	dev_t		er_bad_unit;	/* Unit associated with block in */
811 					/* error. */
812 	off_t		er_bad_block;	/* Block in error. */
813 } mt_error_t;
814 
815 #define	ERROR_INCREMENT	(1)
816 #define	INITIAL_COUNT	(1)
817 
818 static int		default_increment	= ERROR_INCREMENT;
819 static kmutex_t		error_mutex;	/* protects error_list */
820 static mt_error_t	error_list_head;
821 static int		initial_count		= INITIAL_COUNT;
822 static int		(*tstpnt_save)(buf_t *, int, void*) = NULL;
823 
824 static mt_error_t *
find_by_mtunit(mt_unit_t * un,mt_error_t ** pred_errp)825 find_by_mtunit(mt_unit_t *un, mt_error_t **pred_errp)
826 {
827 	mt_error_t	*errp	= (mt_error_t *)NULL;
828 
829 	ASSERT(mutex_owned(&error_mutex) != 0);
830 	*pred_errp = &error_list_head;
831 	while ((errp = (*pred_errp)->er_next) != (mt_error_t *)NULL) {
832 		if (errp->er_unitp == un)
833 			break;
834 		*pred_errp = errp;
835 	}
836 	return (errp);
837 }
838 
839 static mt_error_t *
find_by_dev(md_dev64_t dev)840 find_by_dev(md_dev64_t dev)
841 {
842 	mt_error_t	*errp	= &error_list_head;
843 
844 	ASSERT(mutex_owned(&error_mutex) != 0);
845 	while ((errp = errp->er_next) != (mt_error_t *)NULL) {
846 		if ((errp->er_unitp->un_m_dev == dev) ||
847 		    (errp->er_unitp->un_l_dev == dev))
848 			break;
849 	}
850 	return (errp);
851 }
852 
853 static int
trans_check_error(buf_t * bp,mt_error_t * errp)854 trans_check_error(buf_t *bp, mt_error_t *errp)
855 {
856 	int		rv	= 0;
857 	md_dev64_t	target	= md_expldev(bp->b_edev);
858 
859 	ASSERT(mutex_owned(&error_mutex) != 0);
860 	switch (errp->er_state) {
861 	case mte_count_down:
862 		errp->er_count_down--;
863 		if (errp->er_count_down == 0) {
864 			/*
865 			 * Every other error that we inject should be on
866 			 * the log device.  Errors will be injected on the
867 			 * log device when errp->er_total_errors is even
868 			 * and on the master device when it is odd.  If
869 			 * this I/O is not for the appropriate device, we
870 			 * will set errp->er_count_down to 1, so that we
871 			 * can try again later.
872 			 */
873 			if ((((errp->er_total_errors % 2) == 0) &&
874 			    (errp->er_unitp->un_l_dev == target)) ||
875 			    (((errp->er_total_errors % 2) != 0) &&
876 			    (errp->er_unitp->un_m_dev == target))) {
877 				/* simulate an error */
878 				bp->b_flags |= B_ERROR;
879 				bp->b_error = EIO;
880 				/* remember the error. */
881 				errp->er_total_errors++;
882 				errp->er_bad_unit = bp->b_edev;
883 				errp->er_bad_block = bp->b_blkno;
884 				/* reset counters. */
885 				errp->er_count_down = errp->er_reset_count;
886 				errp->er_reset_count += errp->er_increment;
887 				rv = 1;
888 			} else {
889 				/* Try again next time. */
890 				errp->er_count_down = 1;
891 			}
892 		}
893 		break;
894 
895 	case mte_suspend:
896 		/* No errors while suspended. */
897 		break;
898 
899 	case mte_watch_block:
900 		if ((bp->b_edev == errp->er_bad_unit) &&
901 		    (bp->b_blkno == errp->er_bad_block)) {
902 			bp->b_flags |= B_ERROR;
903 			bp->b_error = EIO;
904 			rv = 1;
905 		}
906 		break;
907 	}
908 	return (rv);
909 }
910 
911 static int
trans_error_injector(buf_t * bp,int flag,void * private)912 trans_error_injector(buf_t *bp, int flag, void* private)
913 {
914 	mt_error_t	*errp	= (mt_error_t *)NULL;
915 	int		(*tstpnt)(buf_t *, int, void*) = NULL;
916 	int		rv	= 0;
917 	md_dev64_t	target	= md_expldev(bp->b_edev);
918 	int		trv	= 0;
919 	mt_unit_t	*un;
920 
921 	mutex_enter(&error_mutex);
922 	errp = find_by_dev(target);
923 	if (errp != (mt_error_t *)NULL) {
924 		un = errp->er_unitp;
925 		if (target == un->un_m_dev) {
926 			/* Target is our master device. */
927 			rv = trans_check_error(bp, errp);
928 		}
929 		if (target == un->un_l_dev) {
930 			/*
931 			 * Target is our log device.  Unfortunately, the same
932 			 * device may also be used for the MDD database.
933 			 * Therefore, we need to make sure that the I/O is for
934 			 * the range of blocks designated as our log.
935 			 */
936 			if ((bp->b_blkno >= un->un_l_pwsblk) &&
937 			    ((bp->b_blkno + btodb(bp->b_bcount)) <=
938 			    (un->un_l_sblk + un->un_l_tblks))) {
939 				rv = trans_check_error(bp, errp);
940 			}
941 		}
942 	}
943 	tstpnt = tstpnt_save;
944 	mutex_exit(&error_mutex);
945 
946 	if (tstpnt != NULL)
947 		trv = (*tstpnt)(bp, flag, private);
948 
949 	/*
950 	 * If we are producing an error (rv != 0) we need to make sure that
951 	 * biodone gets called.  If the tstpnt returned non-zero,
952 	 * we'll assume that it called biodone.
953 	 */
954 	if ((rv != 0) && (trv == 0)) {
955 		md_biodone(bp);
956 	}
957 	rv = ((rv == 0) && (trv == 0)) ? 0 : 1;
958 	return (rv);
959 }
960 
961 /*
962  * Prepare to inject errors on the master and log devices associated with the
963  * unit specified in migp.  The first time that trans_inject_errors() is called
964  * for a unit, an mt_error_t structure is allocated and initialized for the
965  * unit.  Subsequent calls for the unit will just insure that the unit is in the
966  * count down state.
967  *
968  * If an mt_error structure is allocated and it is the first one to be put in
969  * the list, mdv_strategy_tstpnt (which is referenced in md_call_strategy()) is
970  * set to trans_error_injector so that it will be called to see if an I/O
971  * request should be treated as an error.
972  */
973 
974 /*ARGSUSED1*/
975 static int
trans_inject_errors(void * d,int mode,IOLOCK * lock)976 trans_inject_errors(void *d, int mode, IOLOCK *lock)
977 {
978 	mt_error_t	*errp;
979 	mt_error_t	*do_not_care;
980 	mt_unit_t	*un;
981 	int		rv = 0;
982 
983 	md_i_get_t *migp = d;
984 
985 	mdclrerror(&migp->mde);
986 
987 	un = trans_getun(migp->id, &migp->mde,
988 	    RD_LOCK, lock);
989 	if (un == NULL)
990 		return (EINVAL);
991 
992 	/*
993 	 * If there is already a an error structure for the unit make sure that
994 	 * it is in count down mode.
995 	 */
996 
997 	mutex_enter(&error_mutex);
998 	errp = find_by_mtunit(un, &do_not_care);
999 	if (errp != (mt_error_t *)NULL) {
1000 		errp->er_state = mte_count_down;
1001 	} else {
1002 
1003 		/*
1004 		 * Initialize error structure.
1005 		 */
1006 
1007 		errp = (mt_error_t *)md_trans_zalloc(sizeof (mt_error_t));
1008 		errp->er_state = mte_count_down;
1009 		errp->er_unitp = un;
1010 		errp->er_count_down = initial_count;
1011 		errp->er_increment = default_increment;
1012 		errp->er_reset_count = initial_count;
1013 		errp->er_total_errors = 0;
1014 		errp->er_bad_unit = 0;
1015 		errp->er_bad_block = 0;
1016 
1017 		/* Insert it into the list. */
1018 
1019 		errp->er_next = error_list_head.er_next;
1020 		error_list_head.er_next = errp;
1021 
1022 		/*
1023 		 * Set up md_call_strategy to call our error injector.
1024 		 */
1025 
1026 		if (mdv_strategy_tstpnt != trans_error_injector) {
1027 			tstpnt_save = mdv_strategy_tstpnt;
1028 			mdv_strategy_tstpnt = trans_error_injector;
1029 		}
1030 	}
1031 	mutex_exit(&error_mutex);
1032 	return (rv);
1033 }
1034 
1035 /*ARGSUSED1*/
1036 static int
trans_stop_errors(void * d,int mode,IOLOCK * lock)1037 trans_stop_errors(void *d, int mode, IOLOCK *lock)
1038 {
1039 	mt_error_t	*errp	= (mt_error_t *)NULL;
1040 	mt_error_t	*pred_errp;
1041 	mt_unit_t	*un;
1042 	int		rv	= 0;
1043 
1044 	md_i_get_t *migp = d;
1045 
1046 	mdclrerror(&migp->mde);
1047 
1048 	un = trans_getun(migp->id, &migp->mde,
1049 	    RD_LOCK, lock);
1050 	if (un == NULL)
1051 		return (EINVAL);
1052 
1053 	mutex_enter(&error_mutex);
1054 	errp = find_by_mtunit(un, &pred_errp);
1055 	if (errp != (mt_error_t *)NULL) {
1056 		/* Remove from list. */
1057 		pred_errp->er_next = errp->er_next;
1058 		if ((error_list_head.er_next == (mt_error_t *)NULL) &&
1059 		    (mdv_strategy_tstpnt == trans_error_injector)) {
1060 			mdv_strategy_tstpnt = tstpnt_save;
1061 		}
1062 	} else {
1063 		/* unit not set up for errors. */
1064 		rv = ENXIO;
1065 	}
1066 	mutex_exit(&error_mutex);
1067 
1068 	/* Free memory. */
1069 
1070 	if (errp != (mt_error_t *)NULL) {
1071 		md_trans_free((void *)errp, sizeof (*errp));
1072 	}
1073 	return (rv);
1074 }
1075 
1076 int
_init_ioctl()1077 _init_ioctl()
1078 {
1079 	mutex_init(&error_mutex, NULL, MUTEX_DRIVER, (void *)NULL);
1080 	return (1);
1081 }
1082 
1083 int
_fini_ioctl()1084 _fini_ioctl()
1085 {
1086 	mutex_destroy(&error_mutex);
1087 	return (1);
1088 }
1089 
1090 /*
1091  * END OF DEBUG ROUTINES
1092  */
1093 #endif	/* DEBUG */
1094 /*
1095  * BEGIN RELEASE DEBUG
1096  *	The following routines remain in the released product for testability
1097  */
1098 
1099 /*
1100  * ufs error injection remains in the released product
1101  */
1102 /*ARGSUSED1*/
1103 static int
trans_ufserror(void * d,int mode,IOLOCK * lock)1104 trans_ufserror(void *d, int mode, IOLOCK *lock)
1105 {
1106 	mt_unit_t	*un;
1107 
1108 	md_i_get_t *migp = d;
1109 
1110 	mdclrerror(&migp->mde);
1111 
1112 	un = trans_getun(migp->id, &migp->mde,
1113 	    RD_LOCK, lock);
1114 	if (un == NULL || un->un_ut == NULL)
1115 		return (EINVAL);
1116 
1117 	return (0);
1118 }
1119 /*
1120  * shadow test remains in the released product
1121  */
1122 static int
trans_set_shadow(void * d,int mode,IOLOCK * lock)1123 trans_set_shadow(void *d, int mode, IOLOCK *lock)
1124 {
1125 	dev32_t		device;			/* shadow device */
1126 	mt_unit_t 	*un;
1127 
1128 	md_i_get_t *migp = d;
1129 
1130 	mdclrerror(&migp->mde);
1131 
1132 	un = trans_getun(migp->id, &migp->mde,
1133 	    WR_LOCK, lock);
1134 	if (un == NULL)
1135 		return (EINVAL);
1136 
1137 	if ((un->un_debug & MT_SHADOW) == 0)
1138 		return (EINVAL);
1139 
1140 	/* Get shadow device.  User always passes down 32 bit devt */
1141 
1142 	if (ddi_copyin((caddr_t)(uintptr_t)migp->mdp,
1143 	    &device, sizeof (device), mode)) {
1144 		return (EFAULT);
1145 	}
1146 
1147 	/* Save shadow device designator. */
1148 	un->un_s_dev = md_expldev((md_dev64_t)device);
1149 	return (0);
1150 }
1151 
1152 /*
1153  * END RELEASE DEBUG
1154  */
1155 
1156 static int
trans_get(void * d,int mode,IOLOCK * lock)1157 trans_get(void *d, int mode, IOLOCK *lock)
1158 {
1159 	mt_unit_t	*un;
1160 	ml_unit_t	*ul;
1161 
1162 	md_i_get_t *migp = d;
1163 
1164 	mdclrerror(&migp->mde);
1165 
1166 	un = trans_getun(migp->id, &migp->mde,
1167 	    RD_LOCK, lock);
1168 	if (un == NULL)
1169 		return (0);
1170 
1171 	if (migp->size == 0) {
1172 		migp->size = un->c.un_size;
1173 		return (0);
1174 	}
1175 
1176 	if (migp->size < un->c.un_size)
1177 		return (EFAULT);
1178 
1179 log:
1180 	ul = un->un_l_unit;
1181 	if (ul == NULL)
1182 		goto master;
1183 
1184 	/*
1185 	 * refresh log fields in case log was metattach'ed
1186 	 */
1187 	un->un_l_head = (daddr32_t)btodb(ul->un_head_lof);
1188 	un->un_l_sblk = un->un_l_head;
1189 	un->un_l_pwsblk = ul->un_pwsblk;
1190 	un->un_l_maxtransfer = (uint_t)btodb(ul->un_maxtransfer);
1191 	un->un_l_nblks = ul->un_nblks;
1192 	un->un_l_tblks = ul->un_tblks;
1193 	un->un_l_tail = (daddr32_t)btodb(ul->un_tail_lof);
1194 	un->un_l_resv = ul->un_resv;
1195 	un->un_l_maxresv = ul->un_maxresv;
1196 	un->un_l_error = ul->un_error;
1197 	un->un_l_timestamp = ul->un_timestamp;
1198 
1199 	/*
1200 	 * check for log dev dynconcat; can only pick up extra space when the
1201 	 * tail physically follows the head in the circular log
1202 	 */
1203 	if (un->un_l_head <= un->un_l_tail)
1204 		if (ul->un_status & LDL_METADEVICE) {
1205 			struct mdc_unit	*c = MD_UNIT(md_getminor(ul->un_dev));
1206 
1207 			if (c->un_total_blocks > un->un_l_tblks) {
1208 				un->un_l_tblks = c->un_total_blocks;
1209 				un->un_l_nblks = un->un_l_tblks - un->un_l_sblk;
1210 				if (un->un_l_nblks > btodb(LDL_MAXLOGSIZE))
1211 					un->un_l_nblks = btodb(LDL_MAXLOGSIZE);
1212 				un->un_l_maxresv = (uint_t)(un->un_l_nblks *
1213 				    LDL_USABLE_BSIZE);
1214 			}
1215 	}
1216 
1217 master:
1218 
1219 	if (ddi_copyout(un, (void *)(uintptr_t)migp->mdp, un->c.un_size, mode))
1220 		return (EFAULT);
1221 	return (0);
1222 }
1223 
1224 static int
trans_replace(replace_params_t * params)1225 trans_replace(replace_params_t *params)
1226 {
1227 	minor_t		mnum = params->mnum;
1228 	mt_unit_t	*un;
1229 	mdi_unit_t	*ui;
1230 	md_dev64_t	cmp_dev;
1231 	md_dev64_t	ldev;
1232 	md_dev64_t	mdev;
1233 
1234 	mdclrerror(&params->mde);
1235 
1236 	ui = MDI_UNIT(mnum);
1237 	un = md_unit_writerlock(ui);
1238 
1239 	if (MD_STATUS(un) & MD_UN_RESYNC_ACTIVE) {
1240 		return (mdmderror(&params->mde, MDE_RESYNC_ACTIVE, mnum));
1241 	}
1242 
1243 	cmp_dev = params->old_dev;
1244 	mdev = un->un_m_dev;
1245 	ldev = un->un_l_dev;
1246 	if (cmp_dev == mdev) {
1247 		un->un_m_key = params->new_key;
1248 		un->un_m_dev = params->new_dev;
1249 	} else if (cmp_dev == ldev) {
1250 		un->un_l_key = params->new_key;
1251 		un->un_l_dev = params->new_dev;
1252 	}
1253 
1254 	trans_commit(un, 1);
1255 	md_unit_writerexit(ui);
1256 	return (0);
1257 }
1258 
1259 /*ARGSUSED1*/
1260 static int
trans_grow(void * d,int mode,IOLOCK * lock)1261 trans_grow(void *d, int mode, IOLOCK  *lock)
1262 {
1263 	mt_unit_t		*un;
1264 
1265 	md_grow_params_t *mgp = d;
1266 
1267 	mdclrerror(&mgp->mde);
1268 
1269 	un = trans_getun(mgp->mnum, &mgp->mde,
1270 	    RD_LOCK, lock);
1271 	if (un == NULL)
1272 		return (0);
1273 
1274 	/*
1275 	 * check for master dev dynconcat
1276 	 */
1277 	if (md_getmajor(un->un_m_dev) == md_major) {
1278 		struct mdc_unit	*c;
1279 
1280 		c = MD_UNIT(md_getminor(un->un_m_dev));
1281 		if (c->un_total_blocks > MD_MAX_BLKS_FOR_SMALL_DEVS) {
1282 			un->c.un_total_blocks = MD_MAX_BLKS_FOR_SMALL_DEVS;
1283 		} else {
1284 			un->c.un_total_blocks = c->un_total_blocks;
1285 		}
1286 		md_nblocks_set(MD_SID(un), un->c.un_total_blocks);
1287 	}
1288 
1289 	return (0);
1290 }
1291 
1292 /*ARGSUSED1*/
1293 static int
trans_detach_ioctl(void * d,int mode,IOLOCK * lock)1294 trans_detach_ioctl(void *d, int mode, IOLOCK *lock)
1295 {
1296 	mt_unit_t	*un;
1297 	int		error;
1298 
1299 	md_i_get_t *migp = d;
1300 
1301 	mdclrerror(&migp->mde);
1302 
1303 	/* acquire both md_unit_array_rw, and unit_reader lock */
1304 	un = trans_getun(migp->id, &migp->mde,
1305 	    READERS, lock);
1306 	if (un == NULL)
1307 		return (0);
1308 
1309 	/*
1310 	 * simply too much work to make debug modes w/out a log
1311 	 */
1312 	if (un->un_debug)
1313 		return (EACCES);
1314 
1315 	/*
1316 	 * detach the log
1317 	 */
1318 	error = trans_detach(un, migp->size);
1319 
1320 	return (error);
1321 }
1322 
1323 static int
trans_get_log(void * d,int mode,IOLOCK * lock)1324 trans_get_log(void *d, int mode, IOLOCK	*lock)
1325 {
1326 	mt_unit_t	*un;
1327 	ml_unit_t	*ul;
1328 
1329 	md_i_get_t *migp = d;
1330 
1331 	mdclrerror(&migp->mde);
1332 
1333 	un = trans_getun(migp->id, &migp->mde, RD_LOCK, lock);
1334 
1335 	if (un == NULL)
1336 		return (0);
1337 
1338 	ul = un->un_l_unit;
1339 
1340 	if (migp->size == 0) {
1341 		migp->size = ML_UNIT_ONDSZ;
1342 		return (0);
1343 	}
1344 
1345 	if (migp->size < ML_UNIT_ONDSZ)
1346 		return (EFAULT);
1347 
1348 	if (ddi_copyout(ul, (void *)(uintptr_t)migp->mdp, ML_UNIT_ONDSZ,
1349 	    mode))
1350 		return (EFAULT);
1351 	return (0);
1352 }
1353 
1354 static int
trans_getdevs(void * d,int mode,IOLOCK * lock)1355 trans_getdevs(void *d, int mode, IOLOCK	*lock)
1356 {
1357 	int			ndev;
1358 	mt_unit_t		*un;
1359 	md_dev64_t		*udevs;
1360 	md_dev64_t		unit_dev;
1361 
1362 	md_getdevs_params_t *mgdp = d;
1363 
1364 	mdclrerror(&mgdp->mde);
1365 
1366 	un = trans_getun(mgdp->mnum, &mgdp->mde, RD_LOCK, lock);
1367 	if (un == NULL)
1368 		return (0);
1369 
1370 	ndev = (un->un_flags & (TRANS_DETACHED | TRANS_ATTACHING)) ? 1 : 2;
1371 
1372 	if (mgdp->cnt == 0) {
1373 		mgdp->cnt = ndev;
1374 		return (0);
1375 	}
1376 
1377 	if (mgdp->cnt > 2)
1378 		mgdp->cnt = ndev;
1379 
1380 	udevs = (md_dev64_t *)(uintptr_t)mgdp->devs;
1381 	unit_dev = un->un_m_dev;
1382 
1383 	if (md_getmajor(unit_dev) != md_major) {
1384 		if ((unit_dev = md_xlate_mini_2_targ(unit_dev)) == NODEV64)
1385 			return (ENODEV);
1386 	}
1387 
1388 	if (mgdp->cnt >= 1)
1389 		if (ddi_copyout(&unit_dev, (caddr_t)&udevs[0],
1390 		    sizeof (*udevs), mode) != 0)
1391 			return (EFAULT);
1392 
1393 	unit_dev = un->un_l_dev;
1394 	if (md_getmajor(unit_dev) != md_major) {
1395 		if ((unit_dev = md_xlate_mini_2_targ(unit_dev)) == NODEV64)
1396 			return (ENODEV);
1397 	}
1398 
1399 	if (mgdp->cnt >= 2)
1400 		if (ddi_copyout(&unit_dev, (caddr_t)&udevs[1],
1401 		    sizeof (*udevs), mode) != 0)
1402 			return (EFAULT);
1403 
1404 	return (0);
1405 }
1406 
1407 static int
trans_reset_ioctl(md_i_reset_t * mirp,IOLOCK * lock)1408 trans_reset_ioctl(md_i_reset_t *mirp, IOLOCK *lock)
1409 {
1410 	minor_t		mnum = mirp->mnum;
1411 	mt_unit_t	*un;
1412 	int		error;
1413 
1414 	mdclrerror(&mirp->mde);
1415 
1416 	un = trans_getun(mnum, &mirp->mde, NO_LOCK, lock);
1417 	if (un == NULL)
1418 		return (0);
1419 
1420 
1421 	/* This prevents new opens */
1422 	rw_enter(&md_unit_array_rw.lock, RW_WRITER);
1423 
1424 	if (MD_HAS_PARENT(MD_PARENT(un))) {
1425 		rw_exit(&md_unit_array_rw.lock);
1426 		return (mdmderror(&mirp->mde, MDE_IN_USE, mnum));
1427 	}
1428 
1429 	if (md_unit_isopen(MDI_UNIT(mnum))) {
1430 		rw_exit(&md_unit_array_rw.lock);
1431 		return (mdmderror(&mirp->mde, MDE_IS_OPEN, mnum));
1432 	}
1433 	/*
1434 	 * detach the log
1435 	 */
1436 	error = trans_detach(un, mirp->force);
1437 
1438 	/*
1439 	 * reset (aka remove; aka delete) the trans device
1440 	 */
1441 	if (error == 0)
1442 		error = trans_reset(un, mnum, 1, mirp->force);
1443 
1444 	rw_exit(&md_unit_array_rw.lock);
1445 	return (error);
1446 }
1447 
1448 static int
trans_get_geom(mt_unit_t * un,struct dk_geom * geomp)1449 trans_get_geom(mt_unit_t *un, struct dk_geom *geomp)
1450 {
1451 	md_get_geom((md_unit_t *)un, geomp);
1452 
1453 	return (0);
1454 }
1455 
1456 static int
trans_get_vtoc(mt_unit_t * un,struct vtoc * vtocp)1457 trans_get_vtoc(mt_unit_t *un, struct vtoc *vtocp)
1458 {
1459 	md_get_vtoc((md_unit_t *)un, vtocp);
1460 
1461 	return (0);
1462 }
1463 
1464 static int
trans_get_extvtoc(mt_unit_t * un,struct extvtoc * vtocp)1465 trans_get_extvtoc(mt_unit_t *un, struct extvtoc *vtocp)
1466 {
1467 	md_get_extvtoc((md_unit_t *)un, vtocp);
1468 
1469 	return (0);
1470 }
1471 
1472 static int
trans_islog(mt_unit_t * un)1473 trans_islog(mt_unit_t *un)
1474 {
1475 	if (un->un_l_unit == NULL)
1476 		return (ENXIO);
1477 	return (0);
1478 }
1479 
1480 static int
trans_set_vtoc(mt_unit_t * un,struct vtoc * vtocp)1481 trans_set_vtoc(
1482 	mt_unit_t	*un,
1483 	struct vtoc	*vtocp
1484 )
1485 {
1486 	return (md_set_vtoc((md_unit_t *)un, vtocp));
1487 }
1488 
1489 static int
trans_set_extvtoc(mt_unit_t * un,struct extvtoc * vtocp)1490 trans_set_extvtoc(mt_unit_t *un, struct extvtoc *vtocp)
1491 {
1492 	return (md_set_extvtoc((md_unit_t *)un, vtocp));
1493 }
1494 
1495 static int
trans_get_cgapart(mt_unit_t * un,struct dk_map * dkmapp)1496 trans_get_cgapart(
1497 	mt_unit_t	*un,
1498 	struct dk_map	*dkmapp
1499 )
1500 {
1501 	md_get_cgapart((md_unit_t *)un, dkmapp);
1502 	return (0);
1503 }
1504 
1505 static int
trans_admin_ioctl(int cmd,void * data,int mode,IOLOCK * lockp)1506 trans_admin_ioctl(int cmd, void *data, int mode, IOLOCK *lockp)
1507 {
1508 	size_t	sz = 0;
1509 	void	*d = NULL;
1510 	int	err = 0;
1511 
1512 	/* We can only handle 32-bit clients for internal commands */
1513 	if ((mode & DATAMODEL_MASK) != DATAMODEL_ILP32) {
1514 		return (EINVAL);
1515 	}
1516 
1517 	switch (cmd) {
1518 
1519 	case MD_IOCGET:
1520 	{
1521 		if (! (mode & FREAD))
1522 			return (EACCES);
1523 
1524 		sz = sizeof (md_i_get_t);
1525 
1526 		if ((d = md_trans_zalloc(sz)) == NULL)
1527 			return (ENOMEM);
1528 
1529 		if (ddi_copyin(data, d, sz, mode)) {
1530 			err = EFAULT;
1531 			break;
1532 		}
1533 
1534 		err = trans_get(d, mode, lockp);
1535 		break;
1536 	}
1537 
1538 	case MD_IOCGET_LOG:
1539 	{
1540 		if (! (mode & FREAD))
1541 			return (EACCES);
1542 
1543 		sz = sizeof (md_i_get_t);
1544 
1545 		if ((d = md_trans_zalloc(sz)) == NULL)
1546 			return (ENOMEM);
1547 
1548 		if (ddi_copyin(data, d, sz, mode)) {
1549 			err = EFAULT;
1550 			break;
1551 		}
1552 
1553 		err = trans_get_log(d, mode, lockp);
1554 		break;
1555 	}
1556 
1557 	case MD_IOCRESET:
1558 	{
1559 		md_i_reset_t	*p;
1560 
1561 		if (! (mode & FWRITE))
1562 			return (EACCES);
1563 
1564 		if ((d = p = md_trans_zalloc((sz = sizeof (*p)))) == NULL)
1565 			return (ENOMEM);
1566 
1567 		if (ddi_copyin(data, d, sz, mode)) {
1568 			err = EFAULT;
1569 			break;
1570 		}
1571 
1572 		err = trans_reset_ioctl(p, lockp);
1573 		break;
1574 	}
1575 
1576 	case MD_IOCGROW:
1577 	{
1578 		if (! (mode & FWRITE))
1579 			return (EACCES);
1580 
1581 		sz = sizeof (md_grow_params_t);
1582 
1583 		if ((d = md_trans_zalloc(sz)) == NULL)
1584 			return (ENOMEM);
1585 
1586 		if (ddi_copyin(data, d, sz, mode)) {
1587 			err = EFAULT;
1588 			break;
1589 		}
1590 
1591 		err = trans_grow(d, mode, lockp);
1592 		break;
1593 	}
1594 
1595 	case MD_IOC_TRANS_DETACH:
1596 	{
1597 		if (! (mode & FWRITE))
1598 			return (EACCES);
1599 
1600 		sz = sizeof (md_i_get_t);
1601 
1602 		if ((d = md_trans_zalloc(sz)) == NULL)
1603 			return (ENOMEM);
1604 
1605 		if (ddi_copyin(data, d, sz, mode)) {
1606 			err = EFAULT;
1607 			break;
1608 		}
1609 
1610 		err = trans_detach_ioctl(d, mode, lockp);
1611 		break;
1612 	}
1613 
1614 	case MD_IOCREPLACE:
1615 	{
1616 		replace_params_t	*p;
1617 
1618 		if (! (mode & FWRITE))
1619 			return (EACCES);
1620 
1621 		if ((d = p = kmem_alloc((sz = sizeof (*p)), KM_SLEEP)) == NULL)
1622 			return (ENOMEM);
1623 
1624 		if (ddi_copyin(data, d, sz, mode)) {
1625 			err = EFAULT;
1626 			break;
1627 		}
1628 
1629 		err = trans_replace(p);
1630 		break;
1631 	}
1632 
1633 
1634 	case MD_IOCGET_DEVS:
1635 	{
1636 		if (! (mode & FREAD))
1637 			return (EACCES);
1638 
1639 		sz = sizeof (md_getdevs_params_t);
1640 
1641 		if ((d = md_trans_zalloc(sz)) == NULL)
1642 			return (ENOMEM);
1643 
1644 		if (ddi_copyin(data, d, sz, mode)) {
1645 			err = EFAULT;
1646 			break;
1647 		}
1648 
1649 		err = trans_getdevs(d, mode, lockp);
1650 		break;
1651 	}
1652 
1653 /*
1654  * debug ioctls
1655  */
1656 #ifdef	DEBUG
1657 
1658 
1659 	case MD_IOCGET_TRANSSTATS:
1660 	{
1661 		if (! (mode & FREAD))
1662 			return (EACCES);
1663 
1664 		sz = sizeof (md_i_get_t);
1665 
1666 		if ((d = md_trans_zalloc(sz)) == NULL)
1667 			return (ENOMEM);
1668 
1669 		if (ddi_copyin(data, d, sz, mode)) {
1670 			err = EFAULT;
1671 			break;
1672 		}
1673 
1674 		err = trans_get_transstats(d, mode);
1675 		break;
1676 	}
1677 
1678 	case MD_IOC_DEBUG:
1679 	{
1680 		md_i_get_t *mdigp;
1681 
1682 		if (! (mode & FWRITE))
1683 			return (EACCES);
1684 
1685 		sz = sizeof (md_i_get_t);
1686 
1687 		if ((d = md_trans_zalloc(sz)) == NULL)
1688 			return (ENOMEM);
1689 
1690 		if (ddi_copyin(data, d, sz, mode)) {
1691 			err = EFAULT;
1692 			break;
1693 		}
1694 
1695 		mdigp = d;
1696 
1697 		mdclrerror(&mdigp->mde);
1698 		mt_debug = mdigp->size;
1699 		break;
1700 	}
1701 
1702 	case MD_IOC_TSD:
1703 	{
1704 		if (! (mode & FWRITE))
1705 			return (EACCES);
1706 
1707 
1708 		sz = sizeof (md_i_get_t);
1709 
1710 		if ((d = md_trans_zalloc(sz)) == NULL)
1711 			return (ENOMEM);
1712 
1713 		if (ddi_copyin(data, d, sz, mode)) {
1714 			err = EFAULT;
1715 			break;
1716 		}
1717 
1718 		err = trans_test_tsd(d, mode);
1719 		break;
1720 	}
1721 
1722 	case MD_IOC_TRYGETBLK:
1723 	{
1724 		if (! (mode & FWRITE))
1725 			return (EACCES);
1726 
1727 
1728 		sz = sizeof (md_i_get_t);
1729 
1730 		if ((d = md_trans_zalloc(sz)) == NULL)
1731 			return (ENOMEM);
1732 
1733 		if (ddi_copyin(data, d, sz, mode)) {
1734 			err = EFAULT;
1735 			break;
1736 		}
1737 
1738 		err = trans_test_trygetblk(d, mode, lockp);
1739 		break;
1740 	}
1741 
1742 	case MD_IOC_TRYPAGE:
1743 	{
1744 		if (! (mode & FWRITE))
1745 			return (EACCES);
1746 
1747 
1748 		sz = sizeof (md_i_get_t);
1749 
1750 		if ((d = md_trans_zalloc(sz)) == NULL)
1751 			return (ENOMEM);
1752 
1753 		if (ddi_copyin(data, d, sz, mode)) {
1754 			err = EFAULT;
1755 			break;
1756 		}
1757 
1758 		err = trans_test_trypage(d, mode, lockp);
1759 		break;
1760 	}
1761 
1762 
1763 	case MD_IOC_INJECTERRORS:
1764 	{
1765 		if (! (mode & FWRITE))
1766 			return (EACCES);
1767 
1768 
1769 		sz = sizeof (md_i_get_t);
1770 
1771 		if ((d = md_trans_zalloc(sz)) == NULL)
1772 			return (ENOMEM);
1773 
1774 		if (ddi_copyin(data, d, sz, mode)) {
1775 			err = EFAULT;
1776 			break;
1777 		}
1778 
1779 		err = trans_inject_errors(d, mode, lockp);
1780 		break;
1781 	}
1782 
1783 	case MD_IOC_STOPERRORS:
1784 	{
1785 		if (! (mode & FWRITE))
1786 			return (EACCES);
1787 
1788 
1789 		sz = sizeof (md_i_get_t);
1790 
1791 		if ((d = md_trans_zalloc(sz)) == NULL)
1792 			return (ENOMEM);
1793 
1794 		if (ddi_copyin(data, d, sz, mode)) {
1795 			err = EFAULT;
1796 			break;
1797 		}
1798 
1799 		err = trans_stop_errors(d, mode, lockp);
1800 		break;
1801 	}
1802 
1803 	case MD_IOC_ISDEBUG:
1804 		break;
1805 
1806 #else	/* ! DEBUG */
1807 
1808 	case MD_IOC_ISDEBUG:
1809 	case MD_IOCGET_TRANSSTATS:
1810 	case MD_IOC_STOPERRORS:
1811 	case MD_IOC_TSD:
1812 	case MD_IOC_TRYGETBLK:
1813 	case MD_IOC_TRYPAGE:
1814 		break;
1815 
1816 	/*
1817 	 * error injection behaves like MD_IOC_UFSERROR in released product
1818 	 */
1819 	case MD_IOC_INJECTERRORS:
1820 	{
1821 		if (! (mode & FWRITE))
1822 			return (EACCES);
1823 
1824 
1825 		sz = sizeof (md_i_get_t);
1826 
1827 		if ((d = md_trans_zalloc(sz)) == NULL)
1828 			return (ENOMEM);
1829 
1830 		if (ddi_copyin(data, d, sz, mode)) {
1831 			err = EFAULT;
1832 			break;
1833 		}
1834 
1835 		err = trans_ufserror(d, mode, lockp);
1836 		break;
1837 	}
1838 
1839 	/*
1840 	 * only the shadow test is allowed in the released product
1841 	 */
1842 	case MD_IOC_DEBUG:
1843 	{
1844 		md_i_get_t *mdigp;
1845 
1846 		if (! (mode & FWRITE))
1847 			return (EACCES);
1848 
1849 		sz = sizeof (md_i_get_t);
1850 
1851 		if ((d = md_trans_zalloc(sz)) == NULL)
1852 			return (ENOMEM);
1853 
1854 		if (ddi_copyin(data, d, sz, mode)) {
1855 			err = EFAULT;
1856 			break;
1857 		}
1858 
1859 		mdigp = d;
1860 
1861 		mdclrerror(&mdigp->mde);
1862 		mt_debug = mdigp->size & MT_SHADOW;
1863 		break;
1864 	}
1865 
1866 #endif	/* ! DEBUG */
1867 
1868 /*
1869  * BEGIN RELEASE DEBUG
1870  *	The following routines remain in the released product for testability
1871  */
1872 
1873 	case MD_IOC_UFSERROR:
1874 	{
1875 		if (! (mode & FWRITE))
1876 			return (EACCES);
1877 
1878 		sz = sizeof (md_i_get_t);
1879 
1880 		if ((d = md_trans_zalloc(sz)) == NULL)
1881 			return (ENOMEM);
1882 
1883 		if (ddi_copyin(data, d, sz, mode)) {
1884 			err = EFAULT;
1885 			break;
1886 		}
1887 
1888 		err = trans_ufserror(d, mode, lockp);
1889 		break;
1890 	}
1891 
1892 	case MD_IOC_SETSHADOW:
1893 	{
1894 		if (! (mode & FWRITE))
1895 			return (EACCES);
1896 
1897 		sz = sizeof (md_i_get_t);
1898 
1899 		if ((d = md_trans_zalloc(sz)) == NULL)
1900 			return (ENOMEM);
1901 
1902 		if (ddi_copyin(data, d, sz, mode)) {
1903 			err = EFAULT;
1904 			break;
1905 		}
1906 
1907 		err = trans_set_shadow(d, mode, lockp);
1908 		break;
1909 	}
1910 
1911 /*
1912  * END RELEASE DEBUG
1913  */
1914 
1915 
1916 	default:
1917 		return (ENOTTY);
1918 	}
1919 
1920 	/*
1921 	 * copyout and free any args
1922 	 */
1923 	if (sz != 0) {
1924 		if (err == 0) {
1925 			if (ddi_copyout(d, data, sz, mode) != 0) {
1926 				err = EFAULT;
1927 			}
1928 		}
1929 		md_trans_free(d, sz);
1930 	}
1931 	return (err);
1932 }
1933 
1934 int
md_trans_ioctl(dev_t dev,int cmd,void * data,int mode,IOLOCK * lockp)1935 md_trans_ioctl(dev_t dev, int cmd, void *data, int mode, IOLOCK *lockp)
1936 {
1937 	minor_t		mnum = getminor(dev);
1938 	mt_unit_t	*un;
1939 	md_error_t	mde = mdnullerror;
1940 	int		err = 0;
1941 
1942 	/* handle admin ioctls */
1943 	if (mnum == MD_ADM_MINOR)
1944 		return (trans_admin_ioctl(cmd, data, mode, lockp));
1945 
1946 	/* check unit */
1947 	if ((MD_MIN2SET(mnum) >= md_nsets) ||
1948 	    (MD_MIN2UNIT(mnum) >= md_nunits) ||
1949 	    ((un = trans_getun(mnum, &mde, RD_LOCK, lockp)) == NULL))
1950 		return (ENXIO);
1951 
1952 	/* dispatch ioctl */
1953 	switch (cmd) {
1954 
1955 	case DKIOCINFO:
1956 	{
1957 		struct dk_cinfo	*p;
1958 
1959 		if (! (mode & FREAD))
1960 			return (EACCES);
1961 
1962 		if ((p = md_trans_zalloc(sizeof (*p))) == NULL)
1963 			return (ENOMEM);
1964 
1965 		get_info(p, mnum);
1966 		if (ddi_copyout((caddr_t)p, data, sizeof (*p), mode) != 0)
1967 			err = EFAULT;
1968 
1969 		md_trans_free(p, sizeof (*p));
1970 		return (err);
1971 	}
1972 
1973 	case DKIOCGGEOM:
1974 	{
1975 		struct dk_geom	*p;
1976 
1977 		if (! (mode & FREAD))
1978 			return (EACCES);
1979 
1980 		if ((p = md_trans_zalloc(sizeof (*p))) == NULL)
1981 			return (ENOMEM);
1982 
1983 		if ((err = trans_get_geom(un, p)) == 0) {
1984 			if (ddi_copyout((caddr_t)p, data, sizeof (*p),
1985 			    mode) != 0)
1986 				err = EFAULT;
1987 		}
1988 
1989 		md_trans_free(p, sizeof (*p));
1990 		return (err);
1991 	}
1992 
1993 	case DKIOCGVTOC:
1994 	{
1995 		struct vtoc	*vtoc;
1996 
1997 		if (! (mode & FREAD))
1998 			return (EACCES);
1999 
2000 		vtoc = kmem_zalloc(sizeof (*vtoc), KM_SLEEP);
2001 		if ((err = trans_get_vtoc(un, vtoc)) != 0) {
2002 			kmem_free(vtoc, sizeof (*vtoc));
2003 			return (err);
2004 		}
2005 
2006 		if ((mode & DATAMODEL_MASK) == DATAMODEL_NATIVE) {
2007 			if (ddi_copyout(vtoc, data, sizeof (*vtoc), mode))
2008 				err = EFAULT;
2009 		}
2010 #ifdef _SYSCALL32
2011 		else {
2012 			struct vtoc32	*vtoc32;
2013 
2014 			vtoc32 = kmem_zalloc(sizeof (*vtoc32), KM_SLEEP);
2015 
2016 			vtoctovtoc32((*vtoc), (*vtoc32));
2017 			if (ddi_copyout(vtoc32, data, sizeof (*vtoc32), mode))
2018 				err = EFAULT;
2019 			kmem_free(vtoc32, sizeof (*vtoc32));
2020 		}
2021 #endif /* _SYSCALL32 */
2022 
2023 		kmem_free(vtoc, sizeof (*vtoc));
2024 		return (err);
2025 	}
2026 
2027 	case DKIOCSVTOC:
2028 	{
2029 		struct vtoc	*vtoc;
2030 
2031 		if (! (mode & FWRITE))
2032 			return (EACCES);
2033 
2034 		vtoc = kmem_zalloc(sizeof (*vtoc), KM_SLEEP);
2035 		if ((mode & DATAMODEL_MASK) == DATAMODEL_NATIVE) {
2036 			if (ddi_copyin(data, vtoc, sizeof (*vtoc), mode)) {
2037 				err = EFAULT;
2038 			}
2039 		}
2040 #ifdef _SYSCALL32
2041 		else {
2042 			struct vtoc32	*vtoc32;
2043 
2044 			vtoc32 = kmem_zalloc(sizeof (*vtoc32), KM_SLEEP);
2045 
2046 			if (ddi_copyin(data, vtoc32, sizeof (*vtoc32), mode)) {
2047 				err = EFAULT;
2048 			} else {
2049 				vtoc32tovtoc((*vtoc32), (*vtoc));
2050 			}
2051 			kmem_free(vtoc32, sizeof (*vtoc32));
2052 		}
2053 #endif /* _SYSCALL32 */
2054 
2055 		if (err == 0)
2056 			err = trans_set_vtoc(un, vtoc);
2057 
2058 		kmem_free(vtoc, sizeof (*vtoc));
2059 		return (err);
2060 	}
2061 
2062 
2063 	case DKIOCGEXTVTOC:
2064 	{
2065 		struct extvtoc	*extvtoc;
2066 
2067 		if (! (mode & FREAD))
2068 			return (EACCES);
2069 
2070 		extvtoc = kmem_zalloc(sizeof (*extvtoc), KM_SLEEP);
2071 		if ((err = trans_get_extvtoc(un, extvtoc)) != 0) {
2072 			return (err);
2073 		}
2074 
2075 		if (ddi_copyout(extvtoc, data, sizeof (*extvtoc), mode))
2076 			err = EFAULT;
2077 
2078 		kmem_free(extvtoc, sizeof (*extvtoc));
2079 		return (err);
2080 	}
2081 
2082 	case DKIOCSEXTVTOC:
2083 	{
2084 		struct extvtoc	*extvtoc;
2085 
2086 		if (! (mode & FWRITE))
2087 			return (EACCES);
2088 
2089 		extvtoc = kmem_zalloc(sizeof (*extvtoc), KM_SLEEP);
2090 		if (ddi_copyin(data, extvtoc, sizeof (*extvtoc), mode)) {
2091 			err = EFAULT;
2092 		}
2093 
2094 		if (err == 0)
2095 			err = trans_set_extvtoc(un, extvtoc);
2096 
2097 		kmem_free(extvtoc, sizeof (*extvtoc));
2098 		return (err);
2099 	}
2100 
2101 	case DKIOCGAPART:
2102 	{
2103 		struct dk_map	dmp;
2104 
2105 		if ((err = trans_get_cgapart(un, &dmp)) != 0) {
2106 			return (err);
2107 		}
2108 
2109 		if ((mode & DATAMODEL_MASK) == DATAMODEL_NATIVE) {
2110 			if (ddi_copyout((caddr_t)&dmp, data, sizeof (dmp),
2111 			    mode) != 0)
2112 				err = EFAULT;
2113 		}
2114 #ifdef _SYSCALL32
2115 		else {
2116 			struct dk_map32 dmp32;
2117 
2118 			dmp32.dkl_cylno = dmp.dkl_cylno;
2119 			dmp32.dkl_nblk = dmp.dkl_nblk;
2120 
2121 			if (ddi_copyout((caddr_t)&dmp32, data, sizeof (dmp32),
2122 			    mode) != 0)
2123 				err = EFAULT;
2124 		}
2125 #endif /* _SYSCALL32 */
2126 
2127 		return (err);
2128 	}
2129 
2130 	/*
2131 	 * _FIOISLOG, _FIOISLOGOK, _FIOLOGRESET are used by fsck/mkfs
2132 	 * after opening the device.  fsck/mkfs use these ioctls for
2133 	 * error recovery.
2134 	 */
2135 	case _FIOISLOG:
2136 		return (trans_islog(un));
2137 
2138 	default:
2139 		return (ENOTTY);
2140 	}
2141 }
2142 
2143 /*
2144  * rename named service entry points and support functions
2145  */
2146 
2147 /* rename/exchange role swap functions */
2148 
2149 /*
2150  * MDRNM_UPDATE_SELF
2151  * This role swap function is identical for all unit types,
2152  * so keep it here. It's also the best example because it
2153  * touches all the modified portions of the relevant
2154  * in-common structures.
2155  */
2156 void
trans_rename_update_self(md_rendelta_t * delta,md_rentxn_t * rtxnp)2157 trans_rename_update_self(
2158 	md_rendelta_t	*delta,
2159 	md_rentxn_t	*rtxnp)
2160 {
2161 	minor_t		 from_min, to_min;
2162 	sv_dev_t	 sv;
2163 	mt_unit_t	*un;
2164 
2165 	ASSERT(rtxnp);
2166 	ASSERT(rtxnp->op == MDRNOP_RENAME);
2167 	ASSERT(delta);
2168 	ASSERT(delta->unp);
2169 	ASSERT(delta->uip);
2170 	ASSERT(rtxnp->rec_idx >= 0);
2171 	ASSERT(rtxnp->recids);
2172 	ASSERT(delta->old_role == MDRR_SELF);
2173 	ASSERT(delta->new_role == MDRR_SELF);
2174 
2175 	from_min = rtxnp->from.mnum;
2176 	to_min   = rtxnp->to.mnum;
2177 	un	 = (mt_unit_t *)delta->unp;
2178 
2179 	/*
2180 	 * self id changes in our own unit struct
2181 	 * both mechanisms for identifying the trans must be reset.
2182 	 */
2183 
2184 	MD_SID(delta->unp) = to_min;
2185 	un->un_dev = makedevice(md_major, to_min);
2186 
2187 	/*
2188 	 * clear old array pointers to unit in-core and unit
2189 	 */
2190 
2191 	MDI_VOIDUNIT(from_min) = NULL;
2192 	MD_VOIDUNIT(from_min) = NULL;
2193 
2194 	/*
2195 	 * and point the new slots at the unit in-core and unit structs
2196 	 */
2197 
2198 	MDI_VOIDUNIT(to_min) = delta->uip;
2199 	MD_VOIDUNIT(to_min) = delta->unp;
2200 
2201 	/*
2202 	 * recreate kstats
2203 	 */
2204 	md_kstat_destroy_ui(delta->uip);
2205 	md_kstat_init_ui(to_min, delta->uip);
2206 
2207 	/*
2208 	 * the unit in-core reference to the get next link's id changes
2209 	 */
2210 
2211 	delta->uip->ui_link.ln_id = to_min;
2212 
2213 	/*
2214 	 * name space addition of new key was done from user-level
2215 	 * remove the old name's key here
2216 	 */
2217 
2218 	sv.setno = MD_MIN2SET(from_min);
2219 	sv.key	 = rtxnp->from.key;
2220 
2221 	md_rem_names(&sv, 1);
2222 
2223 
2224 	/*
2225 	 * and store the record id (from the unit struct) into recids
2226 	 * for later commitment by md_rename()
2227 	 */
2228 
2229 	md_store_recid(&rtxnp->rec_idx, rtxnp->recids, delta->unp);
2230 }
2231 
2232 /*
2233  * MDRNM_UPDATE_KIDS
2234  * rename/exchange of our child or grandchild
2235  */
2236 void
trans_renexch_update_kids(md_rendelta_t * delta,md_rentxn_t * rtxnp)2237 trans_renexch_update_kids(
2238 	md_rendelta_t	*delta,
2239 	md_rentxn_t	*rtxnp)
2240 {
2241 	mt_unit_t	*un;
2242 	minor_t		 from_min, to_min, log_min, master_min;
2243 
2244 	ASSERT(delta);
2245 	ASSERT(rtxnp);
2246 	ASSERT((rtxnp->op == MDRNOP_RENAME) || (rtxnp->op == MDRNOP_EXCHANGE));
2247 	ASSERT(delta->unp);
2248 	ASSERT(rtxnp->recids);
2249 	ASSERT(rtxnp->rec_idx >= 0);
2250 	ASSERT(delta->old_role == MDRR_PARENT);
2251 	ASSERT(delta->new_role == MDRR_PARENT);
2252 
2253 	un		= (mt_unit_t *)delta->unp;
2254 	from_min	= rtxnp->from.mnum;
2255 	to_min		= rtxnp->to.mnum;
2256 	log_min		= md_getminor(un->un_l_dev);
2257 	master_min	= md_getminor(un->un_m_dev);
2258 
2259 	/*
2260 	 * since our role isn't changing (parent->parent)
2261 	 * one of our children must be changing; which one is it?
2262 	 * find the child being modified, and update
2263 	 * our notion of it
2264 	 */
2265 
2266 	/* both devices must be metadevices in order to be updated */
2267 	ASSERT(md_getmajor(un->un_m_dev) == md_major);
2268 	ASSERT(!(un->un_l_unit && (md_getmajor(un->un_l_dev) != md_major)));
2269 
2270 	if ((md_getmajor(un->un_m_dev) == md_major) &&
2271 	    (master_min == from_min)) {
2272 
2273 		ASSERT(!(un->un_l_unit && (log_min == from_min)));
2274 
2275 		un->un_m_dev = makedevice(md_major, to_min);
2276 		un->un_m_key = rtxnp->to.key;
2277 
2278 	} else if ((md_getmajor(un->un_m_dev) == md_major) &&
2279 	    un->un_l_unit && (log_min == from_min)) {
2280 
2281 		ASSERT(master_min != from_min);
2282 
2283 		un->un_l_dev = makedevice(md_major, to_min);
2284 		un->un_l_key = rtxnp->to.key;
2285 
2286 	} else {
2287 		ASSERT(FALSE);
2288 		panic("trans_renexch_update_kids: not a metadevice");
2289 		/*NOTREACHED*/
2290 	}
2291 
2292 	md_store_recid(&rtxnp->rec_idx, rtxnp->recids, delta->unp);
2293 }
2294 
2295 /*
2296  * MDRNM_SELF_UPDATE_FROM (exchange down) [self->child]
2297  */
2298 void
trans_exchange_self_update_from_down(md_rendelta_t * delta,md_rentxn_t * rtxnp)2299 trans_exchange_self_update_from_down(
2300 	md_rendelta_t	*delta,
2301 	md_rentxn_t	*rtxnp)
2302 {
2303 	mt_unit_t	*un;
2304 	minor_t		from_min, to_min, master_min, log_min;
2305 	sv_dev_t	sv;
2306 
2307 	ASSERT(delta);
2308 	ASSERT(delta->unp);
2309 	ASSERT(delta->uip);
2310 	ASSERT(rtxnp);
2311 	ASSERT(MDRNOP_EXCHANGE == rtxnp->op);
2312 	ASSERT(rtxnp->from.uip);
2313 	ASSERT(rtxnp->rec_idx >= 0);
2314 	ASSERT(rtxnp->recids);
2315 	ASSERT(delta->old_role == MDRR_SELF);
2316 	ASSERT(delta->new_role == MDRR_CHILD);
2317 	ASSERT(md_getminor(delta->dev) == rtxnp->from.mnum);
2318 
2319 	un = (mt_unit_t *)delta->unp;
2320 
2321 	/*
2322 	 * if we're exchanging a trans, it had better be a metadevice
2323 	 */
2324 	ASSERT(md_getmajor(un->un_m_dev) == md_major);
2325 
2326 	to_min		= rtxnp->to.mnum;
2327 	from_min	= rtxnp->from.mnum;
2328 	master_min	= md_getminor(un->un_m_dev);
2329 	log_min		= md_getminor(un->un_l_dev);
2330 
2331 	/*
2332 	 * both mechanisms for identifying a trans must be updated
2333 	 */
2334 
2335 	MD_SID(delta->unp) = to_min;
2336 	un->un_dev = makedevice(md_major, to_min);
2337 
2338 	/*
2339 	 * parent identifier need not change
2340 	 */
2341 
2342 	/*
2343 	 * point the set array pointers at the "new" unit and unit in-cores
2344 	 * Note: the other half of this transfer is done in the "update to"
2345 	 * rename/exchange named service.
2346 	 */
2347 
2348 	MDI_VOIDUNIT(to_min) = delta->uip;
2349 	MD_VOIDUNIT(to_min) = delta->unp;
2350 
2351 	/*
2352 	 * transfer kstats
2353 	 */
2354 
2355 	delta->uip->ui_kstat = rtxnp->to.kstatp;
2356 
2357 	/*
2358 	 * the unit in-core reference to the get next link's id changes
2359 	 */
2360 
2361 	delta->uip->ui_link.ln_id = to_min;
2362 
2363 	/*
2364 	 * which one of our children is changing?
2365 	 *
2366 	 * Note that the check routines forbid changing the log (for now)
2367 	 * because there's no lockfs-like trans-ufs "freeze and remount"
2368 	 * or "freeze and bobbit the log."
2369 	 */
2370 
2371 	/* both devices must be metadevices in order to be updated */
2372 	ASSERT(md_getmajor(un->un_m_dev) == md_major);
2373 	ASSERT(!(un->un_l_unit && (md_getmajor(un->un_l_dev) != md_major)));
2374 
2375 	if ((md_getmajor(un->un_m_dev) == md_major) &&
2376 	    (master_min == to_min)) {
2377 
2378 		/* master and log can't both be changed */
2379 		ASSERT(!(un->un_l_unit && (log_min == to_min)));
2380 
2381 		un->un_m_dev = makedevice(md_major, from_min);
2382 		sv.key = un->un_m_key;
2383 		un->un_m_key = rtxnp->from.key;
2384 
2385 	} else if ((md_getmajor(un->un_m_dev) == md_major) &&
2386 	    un->un_l_unit && (log_min == to_min)) {
2387 
2388 		/* master and log can't both be changed */
2389 		ASSERT(!(master_min == to_min));
2390 
2391 		un->un_l_dev = makedevice(md_major, from_min);
2392 		sv.key = un->un_l_key;
2393 		un->un_l_key = rtxnp->from.key;
2394 
2395 	} else {
2396 		ASSERT(FALSE);
2397 		panic("trans_exchange_self_update_from_down: not a metadevice");
2398 		/*NOTREACHED*/
2399 	}
2400 
2401 	/*
2402 	 * the new master must exist in the name space
2403 	 */
2404 	ASSERT(rtxnp->from.key != MD_KEYWILD);
2405 	ASSERT(rtxnp->from.key != MD_KEYBAD);
2406 
2407 	/*
2408 	 * delete the key for the changed child from the namespace
2409 	 */
2410 
2411 	sv.setno = MD_MIN2SET(from_min);
2412 	md_rem_names(&sv, 1);
2413 
2414 	/*
2415 	 * and store the record id (from the unit struct) into recids
2416 	 */
2417 
2418 	md_store_recid(&rtxnp->rec_idx, rtxnp->recids, delta->unp);
2419 }
2420 
2421 /*
2422  * MDRNM_PARENT_UPDATE_TO (exchange down) [parent->self]
2423  */
2424 void
trans_exchange_parent_update_to(md_rendelta_t * delta,md_rentxn_t * rtxnp)2425 trans_exchange_parent_update_to(
2426 	md_rendelta_t	*delta,
2427 	md_rentxn_t	*rtxnp)
2428 {
2429 	mt_unit_t	*un;
2430 	minor_t		from_min, to_min, master_min, log_min;
2431 	sv_dev_t	sv;
2432 
2433 	ASSERT(delta);
2434 	ASSERT(delta->unp);
2435 	ASSERT(delta->uip);
2436 	ASSERT(rtxnp);
2437 	ASSERT(MDRNOP_EXCHANGE == rtxnp->op);
2438 	ASSERT(rtxnp->from.uip);
2439 	ASSERT(rtxnp->rec_idx >= 0);
2440 	ASSERT(rtxnp->recids);
2441 	ASSERT(delta->old_role == MDRR_PARENT);
2442 	ASSERT(delta->new_role == MDRR_SELF);
2443 	ASSERT(md_getminor(delta->dev) == rtxnp->to.mnum);
2444 
2445 	un = (mt_unit_t *)delta->unp;
2446 
2447 	ASSERT(md_getmajor(un->un_m_dev) == md_major);
2448 
2449 	to_min		= rtxnp->to.mnum;
2450 	from_min	= rtxnp->from.mnum;
2451 	master_min	= md_getminor(un->un_m_dev);
2452 	log_min		= md_getminor(un->un_l_dev);
2453 
2454 	/*
2455 	 * both mechanisms for identifying a trans must be updated
2456 	 */
2457 
2458 	MD_SID(delta->unp) = from_min;
2459 	un->un_dev = makedevice(md_major, from_min);
2460 
2461 	/*
2462 	 * parent identifier need not change
2463 	 */
2464 
2465 	/*
2466 	 * point the set array pointers at the "new" unit and unit in-cores
2467 	 * Note: the other half of this transfer is done in the "update to"
2468 	 * rename/exchange named service.
2469 	 */
2470 
2471 	MDI_VOIDUNIT(from_min) = delta->uip;
2472 	MD_VOIDUNIT(from_min) = delta->unp;
2473 
2474 	/*
2475 	 * transfer kstats
2476 	 */
2477 
2478 	delta->uip->ui_kstat = rtxnp->from.kstatp;
2479 
2480 	/*
2481 	 * the unit in-core reference to the get next link's id changes
2482 	 */
2483 
2484 	delta->uip->ui_link.ln_id = from_min;
2485 
2486 	/*
2487 	 * which one of our children is changing?
2488 	 */
2489 
2490 	/* both devices must be metadevices in order to be updated */
2491 	ASSERT(md_getmajor(un->un_m_dev) == md_major);
2492 	ASSERT(!(un->un_l_unit && (md_getmajor(un->un_l_dev) != md_major)));
2493 
2494 	if ((md_getmajor(un->un_m_dev) == md_major) &&
2495 	    (master_min == from_min)) {
2496 
2497 		/* can't be changing log and master */
2498 		ASSERT(!(un->un_l_unit && (log_min == to_min)));
2499 
2500 		un->un_m_dev = makedevice(md_major, to_min);
2501 		sv.key = un->un_m_key;
2502 		un->un_m_key = rtxnp->to.key;
2503 
2504 	} else if (un->un_l_unit &&
2505 	    ((md_getmajor(un->un_l_dev) == md_major) && log_min == to_min)) {
2506 
2507 		/* can't be changing log and master */
2508 		ASSERT(master_min != from_min);
2509 
2510 		un->un_l_dev = makedevice(md_major, to_min);
2511 		sv.key = un->un_l_key;
2512 		un->un_l_key = rtxnp->to.key;
2513 
2514 	} else {
2515 		ASSERT(FALSE);
2516 		panic("trans_exchange_parent_update_to: not a metadevice");
2517 		/*NOTREACHED*/
2518 	}
2519 
2520 	/*
2521 	 * delete the key for the changed child from the namespace
2522 	 */
2523 
2524 	sv.setno = MD_MIN2SET(from_min);
2525 	md_rem_names(&sv, 1);
2526 
2527 	/*
2528 	 * and store the record id (from the unit struct) into recids
2529 	 */
2530 
2531 	md_store_recid(&rtxnp->rec_idx, rtxnp->recids, delta->unp);
2532 }
2533 
2534 /*
2535  * MDRNM_LIST_URKIDS: named svc entry point
2536  * all all delta entries appropriate for our children onto the
2537  * deltalist pointd to by dlpp
2538  */
2539 int
trans_rename_listkids(md_rendelta_t ** dlpp,md_rentxn_t * rtxnp)2540 trans_rename_listkids(
2541 	md_rendelta_t	**dlpp,
2542 	md_rentxn_t	 *rtxnp)
2543 {
2544 	minor_t		 from_min, to_min, master_min, log_min;
2545 	mt_unit_t	*from_un;
2546 	md_rendelta_t	*new, *p;
2547 	int		 n_children;
2548 
2549 	ASSERT(rtxnp);
2550 	ASSERT(dlpp);
2551 	ASSERT((rtxnp->op == MDRNOP_EXCHANGE) || (rtxnp->op == MDRNOP_RENAME));
2552 
2553 	from_min = rtxnp->from.mnum;
2554 	to_min = rtxnp->to.mnum;
2555 	n_children = 0;
2556 
2557 	if (!MDI_UNIT(from_min) || !(from_un = MD_UNIT(from_min))) {
2558 		(void) mdmderror(&rtxnp->mde, MDE_UNIT_NOT_SETUP, from_min);
2559 		return (-1);
2560 	}
2561 
2562 	for (p = *dlpp; p && p->next != NULL; p = p->next) {
2563 		/* NULL */
2564 	}
2565 
2566 	if (md_getmajor(from_un->un_m_dev) == md_major) {
2567 
2568 		master_min = md_getminor(from_un->un_m_dev);
2569 
2570 		p = new = md_build_rendelta(MDRR_CHILD,
2571 		    to_min == master_min? MDRR_SELF: MDRR_CHILD,
2572 		    from_un->un_m_dev, p, MD_UNIT(master_min),
2573 		    MDI_UNIT(master_min), &rtxnp->mde);
2574 
2575 		if (!new) {
2576 			if (mdisok(&rtxnp->mde)) {
2577 				(void) mdsyserror(&rtxnp->mde, ENOMEM);
2578 			}
2579 			return (-1);
2580 		}
2581 		++n_children;
2582 	}
2583 
2584 	if (from_un->un_l_unit &&
2585 	    (md_getmajor(from_un->un_l_dev) == md_major)) {
2586 
2587 		log_min = md_getminor(from_un->un_l_dev);
2588 
2589 		new = md_build_rendelta(MDRR_CHILD,
2590 		    to_min == log_min? MDRR_SELF: MDRR_CHILD,
2591 		    from_un->un_l_dev, p, MD_UNIT(log_min),
2592 		    MDI_UNIT(log_min), &rtxnp->mde);
2593 		if (!new) {
2594 			if (mdisok(&rtxnp->mde)) {
2595 				(void) mdsyserror(&rtxnp->mde, ENOMEM);
2596 			}
2597 			return (-1);
2598 		}
2599 		++n_children;
2600 	}
2601 
2602 	return (n_children);
2603 }
2604 
2605 /*
2606  * support routine for MDRNM_CHECK
2607  */
2608 static int
trans_may_renexch_self(mt_unit_t * un,mdi_unit_t * ui,md_rentxn_t * rtxnp)2609 trans_may_renexch_self(
2610 	mt_unit_t	*un,
2611 	mdi_unit_t	*ui,
2612 	md_rentxn_t	*rtxnp)
2613 {
2614 	minor_t			from_min;
2615 	minor_t			to_min;
2616 
2617 	ASSERT(rtxnp);
2618 	ASSERT((rtxnp->op == MDRNOP_RENAME) || (rtxnp->op == MDRNOP_EXCHANGE));
2619 
2620 	from_min = rtxnp->from.mnum;
2621 	to_min	 = rtxnp->to.mnum;
2622 
2623 	if (!un || !ui) {
2624 		(void) mdmderror(&rtxnp->mde, MDE_RENAME_CONFIG_ERROR,
2625 		    from_min);
2626 		return (EINVAL);
2627 	}
2628 
2629 	ASSERT(MD_CAPAB(un) & MD_CAN_META_CHILD);
2630 
2631 	if (!(MD_CAPAB(un) & MD_CAN_META_CHILD)) {
2632 		(void) mdmderror(&rtxnp->mde, MDE_RENAME_SOURCE_BAD, from_min);
2633 		return (EINVAL);
2634 	}
2635 
2636 	if (MD_PARENT(un) == MD_MULTI_PARENT) {
2637 		(void) mdmderror(&rtxnp->mde, MDE_RENAME_SOURCE_BAD, from_min);
2638 		return (EINVAL);
2639 	}
2640 
2641 	switch (rtxnp->op) {
2642 	case MDRNOP_EXCHANGE:
2643 		/*
2644 		 * may only swap with our child (master) if it is a metadevice
2645 		 */
2646 		if (md_getmajor(un->un_m_dev) != md_major) {
2647 			(void) mdmderror(&rtxnp->mde, MDE_RENAME_TARGET_BAD,
2648 			    to_min);
2649 			return (EINVAL);
2650 		}
2651 
2652 		if (un->un_l_unit &&
2653 		    (md_getmajor(un->un_l_dev) != md_major)) {
2654 
2655 			(void) mdmderror(&rtxnp->mde, MDE_RENAME_TARGET_BAD,
2656 			    to_min);
2657 			return (EINVAL);
2658 		}
2659 
2660 		if (md_getminor(un->un_m_dev) != to_min) {
2661 			(void) mdmderror(&rtxnp->mde, MDE_RENAME_TARGET_BAD,
2662 			    to_min);
2663 			return (EINVAL);
2664 		}
2665 
2666 		break;
2667 
2668 	case MDRNOP_RENAME:
2669 		break;
2670 
2671 	default:
2672 		(void) mdmderror(&rtxnp->mde, MDE_RENAME_CONFIG_ERROR,
2673 		    from_min);
2674 		return (EINVAL);
2675 	}
2676 
2677 	return (0);	/* ok */
2678 }
2679 
2680 /*
2681  * Named service entry point: MDRNM_CHECK
2682  */
2683 intptr_t
trans_rename_check(md_rendelta_t * delta,md_rentxn_t * rtxnp)2684 trans_rename_check(
2685 	md_rendelta_t	*delta,
2686 	md_rentxn_t	*rtxnp)
2687 {
2688 	int		 err = 0;
2689 	mt_unit_t	*un;
2690 
2691 	ASSERT(delta);
2692 	ASSERT(rtxnp);
2693 	ASSERT(delta->unp);
2694 	ASSERT(delta->uip);
2695 	ASSERT((rtxnp->op == MDRNOP_RENAME) || (rtxnp->op == MDRNOP_EXCHANGE));
2696 
2697 	if (!delta || !rtxnp || !delta->unp || !delta->uip) {
2698 		(void) mdsyserror(&rtxnp->mde, EINVAL);
2699 		return (EINVAL);
2700 	}
2701 
2702 	un = (mt_unit_t *)delta->unp;
2703 
2704 	if (rtxnp->revision == MD_RENAME_VERSION_OFFLINE) {
2705 	/*
2706 	 * trans' may not be open, if it is being modified in the exchange
2707 	 * or rename; trans-UFS hasn't been verified to handle the change
2708 	 * out from underneath it.
2709 	 */
2710 		if ((md_unit_isopen(delta->uip)) &&
2711 		    ((md_getminor(delta->dev) == rtxnp->from.mnum) ||
2712 		    (md_getminor(delta->dev) == rtxnp->to.mnum))) {
2713 			(void) mdmderror(&rtxnp->mde,
2714 			    MDE_RENAME_BUSY, rtxnp->from.mnum);
2715 			return (EBUSY);
2716 		}
2717 	}
2718 
2719 	/*
2720 	 * can't rename or exchange with a log attached
2721 	 */
2722 
2723 	if (un->un_l_unit) {
2724 		(void) mdmderror(&rtxnp->mde,
2725 		    MDE_RENAME_BUSY, rtxnp->from.mnum);
2726 		return (EBUSY);
2727 	}
2728 
2729 	switch (delta->old_role) {
2730 	case MDRR_SELF:
2731 		/*
2732 		 * self does additional checks
2733 		 */
2734 		err = trans_may_renexch_self((mt_unit_t *)delta->unp,
2735 		    delta->uip, rtxnp);
2736 		if (err != 0) {
2737 			goto out;
2738 		}
2739 		/* FALLTHROUGH */
2740 
2741 	case MDRR_PARENT:
2742 		/*
2743 		 * top_is_trans is only used to check for online
2744 		 * rename/exchange when MD_RENAME_VERSION == OFFLINE
2745 		 * since trans holds the sub-devices open
2746 		 */
2747 		rtxnp->stat.trans_in_stack = TRUE;
2748 		break;
2749 	default:
2750 		break;
2751 	}
2752 out:
2753 	return (err);
2754 }
2755 
2756 /* end of rename/exchange */
2757