xref: /titanic_41/usr/src/uts/common/io/lvm/softpart/sp_ioctl.c (revision ff5ca3bd17dee7e2bf2e4f2e3a2b354e0ecbd00d)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License (the "License").
6  * You may not use this file except in compliance with the License.
7  *
8  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
9  * or http://www.opensolaris.org/os/licensing.
10  * See the License for the specific language governing permissions
11  * and limitations under the License.
12  *
13  * When distributing Covered Code, include this CDDL HEADER in each
14  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
15  * If applicable, add the following below this CDDL HEADER, with the
16  * fields enclosed by brackets "[]" replaced with your own identifying
17  * information: Portions Copyright [yyyy] [name of copyright owner]
18  *
19  * CDDL HEADER END
20  */
21 
22 /*
23  * Copyright 2009 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 /*
28  * Soft partitioning metadevice driver (md_sp), administrative routines.
29  *
30  * This file contains the administrative routines for the soft partitioning
31  * metadevice driver.  All administration is done through the use of ioctl's.
32  *
33  * The primary ioctl's supported by soft partitions are as follows:
34  *
35  *	MD_IOCSET	- set up a new soft partition.
36  *	MD_IOCGET	- get the unit structure of a soft partition.
37  *	MD_IOCRESET	- delete a soft partition.
38  *	MD_IOCGROW	- add space to a soft partition.
39  *	MD_IOCGETDEVS	- get the device the soft partition is built on.
40  *	MD_IOC_SPSTATUS	- set the status (un_status field in the soft
41  *			  partition unit structure) for one or more soft
42  *			  partitions.
43  *
44  * Note that, as with other metadevices, the majority of the work for
45  * building/growing/deleting soft partitions is performed in userland
46  * (specifically in libmeta, see meta_sp.c).  The driver's main administrative
47  * function is to maintain the in-core & metadb entries associated with a soft
48  * partition.
49  *
50  * In addition, a few other ioctl's are supported via helper routines in
51  * the md driver.  These are:
52  *
53  *	DKIOCINFO	- get "disk" information.
54  *	DKIOCGEOM	- get geometry information.
55  *	DKIOCGVTOC	- get vtoc information.
56  */
57 #include <sys/param.h>
58 #include <sys/systm.h>
59 #include <sys/conf.h>
60 #include <sys/file.h>
61 #include <sys/user.h>
62 #include <sys/uio.h>
63 #include <sys/t_lock.h>
64 #include <sys/buf.h>
65 #include <sys/dkio.h>
66 #include <sys/vtoc.h>
67 #include <sys/kmem.h>
68 #include <vm/page.h>
69 #include <sys/sysmacros.h>
70 #include <sys/types.h>
71 #include <sys/mkdev.h>
72 #include <sys/stat.h>
73 #include <sys/open.h>
74 #include <sys/lvm/mdvar.h>
75 #include <sys/lvm/md_sp.h>
76 #include <sys/lvm/md_notify.h>
77 #include <sys/modctl.h>
78 #include <sys/ddi.h>
79 #include <sys/sunddi.h>
80 #include <sys/debug.h>
81 #include <sys/model.h>
82 
83 #include <sys/sysevent/eventdefs.h>
84 #include <sys/sysevent/svm.h>
85 
86 extern int		md_status;
87 
88 extern unit_t		md_nunits;
89 extern set_t		md_nsets;
90 extern md_set_t		md_set[];
91 
92 extern md_ops_t		sp_md_ops;
93 extern md_krwlock_t	md_unit_array_rw;
94 extern major_t		md_major;
95 
96 /*
97  * FUNCTION:	sp_getun()
98  * INPUT:	mnum	- minor number of soft partition to get.
99  * OUTPUT:	mde	- return error pointer.
100  * RETURNS:	mp_unit_t *	- ptr to unit structure requested
101  *		NULL		- error
102  * PURPOSE:	Returns a reference to the soft partition unit structure
103  *		indicated by the passed-in minor number.
104  */
105 static mp_unit_t *
sp_getun(minor_t mnum,md_error_t * mde)106 sp_getun(minor_t mnum, md_error_t *mde)
107 {
108 	mp_unit_t	*un;
109 	mdi_unit_t	*ui;
110 	set_t		setno = MD_MIN2SET(mnum);
111 
112 	/* check set */
113 	if ((setno >= md_nsets) || (MD_MIN2UNIT(mnum) >= md_nunits)) {
114 		(void) mdmderror(mde, MDE_INVAL_UNIT, mnum);
115 		return (NULL);
116 	}
117 
118 	if (md_get_setstatus(setno) & MD_SET_STALE) {
119 		(void) mdmddberror(mde, MDE_DB_STALE, mnum, setno);
120 		return (NULL);
121 	}
122 
123 	ui = MDI_UNIT(mnum);
124 
125 	if (ui == NULL) {
126 		(void) mdmderror(mde, MDE_UNIT_NOT_SETUP, mnum);
127 		return (NULL);
128 	}
129 
130 	un = (mp_unit_t *)MD_UNIT(mnum);
131 
132 	if (un->c.un_type != MD_METASP) {
133 		(void) mdmderror(mde, MDE_NOT_SP, mnum);
134 		return (NULL);
135 	}
136 
137 	return (un);
138 }
139 
140 
141 /*
142  * FUNCTION:	sp_setstatus()
143  * INPUT:	d	- data ptr passed in from ioctl.
144  *		mode	- pass-through to ddi_copyin.
145  *		lockp	- lock ptr.
146  * OUTPUT:	none.
147  * RETURNS:	0		- success.
148  *		non-zero	- error.
149  * PURPOSE:	Set the status of one or more soft partitions atomically.
150  *		this implements the MD_IOC_SPSTATUS ioctl.  Soft partitions
151  *		are passed in as an array of minor numbers.  The un_status
152  *		field in the unit structure of each soft partition is set to
153  *		the status passed in and all unit structures are recommitted
154  *		to the metadb at once.
155  */
156 static int
sp_setstatus(void * d,int mode,IOLOCK * lockp)157 sp_setstatus(void *d, int mode, IOLOCK *lockp)
158 {
159 	minor_t		*minors;
160 	mp_unit_t	*un;
161 	mddb_recid_t	*recids;
162 	int		i, nunits, sz;
163 	int		err = 0;
164 	sp_status_t	status;
165 	md_error_t	*mdep;
166 
167 	md_sp_statusset_t	*msp = (md_sp_statusset_t *)d;
168 
169 	nunits = msp->num_units;
170 	sz = msp->size;
171 	status = msp->new_status;
172 	mdep = &msp->mde;
173 
174 	mdclrerror(mdep);
175 	/* allocate minor number and recids arrays */
176 	minors = kmem_alloc(sz, KM_SLEEP);
177 	recids = kmem_alloc((nunits + 1) * sizeof (mddb_recid_t), KM_SLEEP);
178 
179 	/* copyin minor number array */
180 	if (err = ddi_copyin((void *)(uintptr_t)msp->minors, minors, sz, mode))
181 		goto out;
182 
183 	/* check to make sure all units are valid first */
184 	for (i = 0; i < nunits; i++) {
185 		if ((un = sp_getun(minors[i], mdep)) == NULL) {
186 			err = mdmderror(mdep, MDE_INVAL_UNIT, minors[i]);
187 			goto out;
188 		}
189 	}
190 
191 	/* update state for all units */
192 	for (i = 0; i < nunits; i++) {
193 		un = sp_getun(minors[i], mdep);
194 		(void) md_ioctl_writerlock(lockp, MDI_UNIT(minors[i]));
195 		un->un_status = status;
196 		recids[i] = un->c.un_record_id;
197 		md_ioctl_writerexit(lockp);
198 	}
199 
200 	recids[i] = 0;
201 	mddb_commitrecs_wrapper(recids);
202 
203 out:
204 	kmem_free(minors, sz);
205 	kmem_free(recids, ((nunits + 1) * sizeof (mddb_recid_t)));
206 	return (err);
207 }
208 
209 
210 /*
211  * FUNCTION:	sp_update_watermarks()
212  * INPUT:	d	- data ptr passed in from ioctl.
213  *		mode	- pass-through to ddi_copyin.
214  * OUTPUT:	none.
215  * RETURNS:	0		- success.
216  *		non-zero	- error.
217  * PURPOSE:	This implements the MD_IOC_SPUPDATEWM ioctl.
218  *              Watermarks are passed in an array.
219  */
220 static int
sp_update_watermarks(void * d,int mode)221 sp_update_watermarks(void *d, int mode)
222 {
223 	minor_t			mnum;
224 	set_t			setno;
225 	md_error_t		*mdep;
226 	mp_unit_t		*un;
227 	int			err = 0;
228 	size_t			wsz;
229 	size_t			osz;
230 	mp_watermark_t		*watermarks;
231 	sp_ext_offset_t		*offsets;
232 	md_dev64_t		device;
233 	buf_t			*bp;
234 	int			i;
235 	md_sp_update_wm_t	*mup = (md_sp_update_wm_t *)d;
236 	side_t			side;
237 
238 	mnum = mup->mnum;
239 	setno = MD_MIN2SET(mnum);
240 	side = mddb_getsidenum(setno);
241 	un = MD_UNIT(mnum);
242 
243 	if (un == NULL)
244 		return (EFAULT);
245 
246 	mdep = &mup->mde;
247 
248 	mdclrerror(mdep);
249 
250 	/* Validate the set */
251 	if ((setno >= md_nsets) || (MD_MIN2UNIT(mnum) >= md_nunits))
252 		return (mdmderror(mdep, MDE_INVAL_UNIT, mnum));
253 	if (md_get_setstatus(setno) & MD_SET_STALE)
254 		return (mdmddberror(mdep, MDE_DB_STALE, mnum, setno));
255 
256 	wsz = mup->count * sizeof (mp_watermark_t);
257 	watermarks = kmem_alloc(wsz, KM_SLEEP);
258 
259 	osz = mup->count * sizeof (sp_ext_offset_t);
260 	offsets = kmem_alloc(osz, KM_SLEEP);
261 
262 	/*
263 	 * Once we're here, we are no longer stateless: we cannot
264 	 * return without first freeing the watermarks and offset
265 	 * arrays we just allocated.  So use the "out" label instead
266 	 * of "return."
267 	 */
268 
269 	/* Retrieve the watermark and offset arrays from user land */
270 
271 	if (ddi_copyin((void *)(uintptr_t)mup->wmp, watermarks, wsz, mode)) {
272 		err = EFAULT;
273 		goto out;
274 	}
275 
276 	if (ddi_copyin((void *)(uintptr_t)mup->osp, offsets, osz, mode)) {
277 		err = EFAULT;
278 		goto out;
279 	}
280 
281 	/*
282 	 * NOTE: For multi-node sets we only commit the watermarks if we are
283 	 * the master node. This avoids an ioctl-within-ioctl deadlock if the
284 	 * underlying device is a mirror.
285 	 */
286 	if (MD_MNSET_SETNO(setno) && !md_set[setno].s_am_i_master) {
287 		goto out;
288 	}
289 
290 	device = un->un_dev;
291 	if ((md_getmajor(device) != md_major) &&
292 	    (md_devid_found(setno, side, un->un_key) == 1)) {
293 		device = md_resolve_bydevid(mnum, device, un->un_key);
294 	}
295 	/*
296 	 * Flag the fact that we're coming from an ioctl handler to the
297 	 * underlying device so that it can take appropriate action if needed.
298 	 * This is necessary for multi-owner mirrors as they may need to
299 	 * update the metadevice state as a result of the layered open.
300 	 */
301 	if (md_layered_open(mnum, &device, MD_OFLG_FROMIOCTL)) {
302 		err = mdcomperror(mdep, MDE_SP_COMP_OPEN_ERR,
303 		    mnum, device);
304 		goto out;
305 	}
306 
307 	bp = kmem_alloc(biosize(), KM_SLEEP);
308 	bioinit(bp);
309 
310 	for (i = 0; i < mup->count; i++) {
311 
312 		/*
313 		 * Even the "constant" fields should be initialized
314 		 * here, since bioreset() below will clear them.
315 		 */
316 		bp->b_flags = B_WRITE;
317 		bp->b_bcount = sizeof (mp_watermark_t);
318 		bp->b_bufsize = sizeof (mp_watermark_t);
319 		bp->b_un.b_addr = (caddr_t)&watermarks[i];
320 		bp->b_lblkno = offsets[i];
321 		bp->b_edev = md_dev64_to_dev(device);
322 
323 		/*
324 		 * For MN sets only:
325 		 * Use a special flag MD_STR_WMUPDATE, for the following case:
326 		 * If the watermarks reside on a mirror disk and a switch
327 		 * of ownership is triggered by this IO,
328 		 * the message that is generated by that request must be
329 		 * processed even if the commd subsystem is currently suspended.
330 		 *
331 		 * For non-MN sets or non-mirror metadevices,
332 		 * this flag has no meaning and is not checked.
333 		 */
334 
335 		md_call_strategy(bp, MD_NOBLOCK | MD_STR_WMUPDATE, NULL);
336 
337 		if (biowait(bp)) {
338 			err = mdmderror(mdep,
339 			    MDE_SP_BADWMWRITE, mnum);
340 			break;
341 		}
342 
343 		/* Get the buf_t ready for the next iteration */
344 		bioreset(bp);
345 	}
346 
347 	biofini(bp);
348 	kmem_free(bp, biosize());
349 
350 	md_layered_close(device, MD_OFLG_NULL);
351 
352 out:
353 	kmem_free(watermarks, wsz);
354 	kmem_free(offsets, osz);
355 
356 	return (err);
357 }
358 
359 
360 /*
361  * FUNCTION:	sp_read_watermark()
362  * INPUT:	d	- data ptr passed in from ioctl.
363  *		mode	- pass-through to ddi_copyin.
364  * OUTPUT:	none.
365  * RETURNS:	0		- success.
366  *		non-zero	- error.
367  * PURPOSE:	This implements the MD_IOC_SPREADWM ioctl.
368  */
369 static int
sp_read_watermark(void * d,int mode)370 sp_read_watermark(void *d, int mode)
371 {
372 	md_error_t		*mdep;
373 	mp_watermark_t		watermark;
374 	md_dev64_t		device;
375 	buf_t			*bp;
376 	md_sp_read_wm_t		*mrp = (md_sp_read_wm_t *)d;
377 
378 	mdep = &mrp->mde;
379 
380 	mdclrerror(mdep);
381 
382 	device = mrp->rdev;
383 
384 	/*
385 	 * Flag the fact that we are being called from ioctl context so that
386 	 * the underlying device can take any necessary extra steps to handle
387 	 * this scenario.
388 	 */
389 	if (md_layered_open((minor_t)-1, &device, MD_OFLG_FROMIOCTL)) {
390 		return (mdcomperror(mdep, MDE_SP_COMP_OPEN_ERR,
391 		    (minor_t)NODEV, device));
392 	}
393 
394 	bp = kmem_alloc(biosize(), KM_SLEEP);
395 	bioinit(bp);
396 
397 	bp->b_flags = B_READ;
398 	bp->b_bcount = sizeof (mp_watermark_t);
399 	bp->b_bufsize = sizeof (mp_watermark_t);
400 	bp->b_un.b_addr = (caddr_t)&watermark;
401 	bp->b_lblkno = mrp->offset;
402 	bp->b_edev = md_dev64_to_dev(device);
403 
404 	md_call_strategy(bp, MD_NOBLOCK, NULL);
405 
406 	if (biowait(bp)) {
407 		/*
408 		 * Taking advantage of the knowledge that mdmderror()
409 		 * returns 0, so we don't really need to keep track of
410 		 * an error code other than in the error struct.
411 		 */
412 		(void) mdmderror(mdep, MDE_SP_BADWMREAD,
413 		    getminor(device));
414 	}
415 
416 	biofini(bp);
417 	kmem_free(bp, biosize());
418 
419 	md_layered_close(device, MD_OFLG_NULL);
420 
421 	if (ddi_copyout(&watermark, (void *)(uintptr_t)mrp->wmp,
422 	    sizeof (mp_watermark_t), mode)) {
423 		return (EFAULT);
424 	}
425 
426 	return (0);
427 }
428 
429 
430 /*
431  * FUNCTION:	sp_set()
432  * INPUT:	d	- data ptr passed in from ioctl.
433  *		mode	- pass-through to ddi_copyin.
434  * OUTPUT:	none.
435  * RETURNS:	0		- success.
436  *		non-zero	- error.
437  * PURPOSE:	Create a soft partition.  The unit structure representing
438  *		the soft partiton is passed down from userland.  We allocate
439  *		a metadb entry, copyin the unit the structure, handle any
440  *		metadevice parenting issues, then commit the record to the
441  *		metadb.  Once the record is in the metadb, we must also
442  *		build the associated in-core structures.  This is done via
443  *		sp_build_incore() (see sp.c).
444  */
445 static int
sp_set(void * d,int mode)446 sp_set(void *d, int mode)
447 {
448 	minor_t		mnum;
449 	mp_unit_t	*un;
450 	void		*rec_addr;
451 	mddb_recid_t	recids[3];
452 	mddb_type_t	rec_type;
453 	int		err;
454 	set_t		setno;
455 	md_error_t	*mdep;
456 	md_unit_t	*child_un;
457 	md_set_params_t *msp = (md_set_params_t *)d;
458 
459 	mnum = msp->mnum;
460 	setno = MD_MIN2SET(mnum);
461 	mdep = &msp->mde;
462 
463 	mdclrerror(mdep);
464 
465 	/* validate set */
466 
467 	if ((setno >= md_nsets) || (MD_MIN2UNIT(mnum) >= md_nunits))
468 		return (mdmderror(mdep, MDE_INVAL_UNIT, mnum));
469 	if (md_get_setstatus(setno) & MD_SET_STALE)
470 		return (mdmddberror(mdep, MDE_DB_STALE, mnum, setno));
471 
472 	/* get the record type */
473 	rec_type = (mddb_type_t)md_getshared_key(setno,
474 	    sp_md_ops.md_driver.md_drivername);
475 
476 	/* check if there is already a device with this minor number */
477 	un = MD_UNIT(mnum);
478 	if (un != NULL)
479 		return (mdmderror(mdep, MDE_UNIT_ALREADY_SETUP, mnum));
480 
481 	/* create the db record for this soft partition */
482 
483 	if (msp->options & MD_CRO_64BIT) {
484 #if defined(_ILP32)
485 		return (mdmderror(mdep, MDE_UNIT_TOO_LARGE, mnum));
486 #else
487 		recids[0] = mddb_createrec((size_t)msp->size, rec_type, 0,
488 		    MD_CRO_64BIT | MD_CRO_SOFTPART | MD_CRO_FN, setno);
489 #endif
490 	} else {
491 		recids[0] = mddb_createrec((size_t)msp->size, rec_type, 0,
492 		    MD_CRO_32BIT | MD_CRO_SOFTPART | MD_CRO_FN, setno);
493 	}
494 	/* set initial value for possible child record */
495 	recids[1] = 0;
496 	if (recids[0] < 0)
497 		return (mddbstatus2error(mdep, recids[0], mnum, setno));
498 
499 	/* get the address of the soft partition db record */
500 	rec_addr = (void *) mddb_getrecaddr(recids[0]);
501 
502 	/*
503 	 * at this point we can happily mess with the soft partition
504 	 * db record since we haven't committed it to the metadb yet.
505 	 * if we crash before we commit, the uncommitted record will be
506 	 * automatically purged.
507 	 */
508 
509 	/* copy in the user's soft partition unit struct */
510 	if (err = ddi_copyin((void *)(uintptr_t)msp->mdp,
511 	    rec_addr, (size_t)msp->size, mode)) {
512 		mddb_deleterec_wrapper(recids[0]);
513 		return (EFAULT);
514 	}
515 
516 	/* fill in common unit structure fields which aren't set in userland */
517 	un = (mp_unit_t *)rec_addr;
518 
519 	/* All 64 bit metadevices only support EFI labels. */
520 	if (msp->options & MD_CRO_64BIT) {
521 		un->c.un_flag |= MD_EFILABEL;
522 	}
523 
524 	MD_SID(un) = mnum;
525 	MD_RECID(un) = recids[0];
526 	MD_PARENT(un) = MD_NO_PARENT;
527 	un->c.un_revision |= MD_FN_META_DEV;
528 
529 	/* if we are parenting a metadevice, set our child's parent field */
530 	if (md_getmajor(un->un_dev) == md_major) {
531 		/* it's a metadevice, need to parent it */
532 		child_un = MD_UNIT(md_getminor(un->un_dev));
533 		if (child_un == NULL) {
534 			mddb_deleterec_wrapper(recids[0]);
535 			return (mdmderror(mdep, MDE_INVAL_UNIT,
536 			    md_getminor(un->un_dev)));
537 		}
538 		md_set_parent(un->un_dev, MD_SID(un));
539 
540 		/* set child recid and recids end marker */
541 		recids[1] = MD_RECID(child_un);
542 		recids[2] = 0;
543 	}
544 
545 	/*
546 	 * build the incore structures.
547 	 */
548 	if (err = sp_build_incore(rec_addr, 0)) {
549 		md_nblocks_set(mnum, -1ULL);
550 		MD_UNIT(mnum) = NULL;
551 
552 		mddb_deleterec_wrapper(recids[0]);
553 		return (err);
554 	}
555 
556 	/*
557 	 * Update unit availability
558 	 */
559 	md_set[setno].s_un_avail--;
560 
561 	/*
562 	 * commit the record.
563 	 * if we had to update a child record, it will get commited
564 	 * as well.
565 	 */
566 	mddb_commitrecs_wrapper(recids);
567 
568 	/* create the mdi_unit struct for this soft partition */
569 	md_create_unit_incore(mnum, &sp_md_ops, 0);
570 
571 	SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_CREATE, TAG_METADEVICE, MD_UN2SET(un),
572 	    MD_SID(un));
573 	return (0);
574 }
575 
576 
577 /*
578  * FUNCTION:	sp_get()
579  * INPUT:	d	- data ptr.
580  *		mode	- pass-through to ddi_copyout.
581  *		lock	- lock ptr.
582  * OUTPUT:	none.
583  * RETURNS:	0		- success.
584  *		non-zero	- error.
585  * PURPOSE:	Get the soft partition unit structure specified by the
586  *		minor number.  the in-core unit structure is obtained
587  *		and copied into the md_i_get structure passed down from
588  *		userland.
589  */
590 static int
sp_get(void * d,int mode,IOLOCK * lock)591 sp_get(void *d, int mode, IOLOCK *lock)
592 {
593 	minor_t		mnum;
594 	mdi_unit_t	*ui;
595 	mp_unit_t	*un;
596 	md_error_t	*mdep;
597 	md_i_get_t	*migp = d;
598 
599 
600 	mnum = migp->id;
601 	mdep = &migp->mde;
602 
603 	mdclrerror(mdep);
604 
605 	/* make sure this is a valid unit structure */
606 	if ((MD_MIN2SET(mnum) >= md_nsets) || (MD_MIN2UNIT(mnum) >= md_nunits))
607 		return (mdmderror(mdep, MDE_INVAL_UNIT, mnum));
608 
609 	/* get the mdi_unit */
610 	if ((ui = MDI_UNIT(mnum)) == NULL) {
611 		return (mdmderror(mdep, MDE_UNIT_NOT_SETUP, mnum));
612 	}
613 
614 	/*
615 	 * md_ioctl_readerlock returns a reference to the in-core
616 	 * unit structure.  this lock will be dropped by
617 	 * md_ioctl_lock_exit() before the ioctl returns.
618 	 */
619 	un = (mp_unit_t *)md_ioctl_readerlock(lock, ui);
620 
621 	/* verify the md_i_get structure */
622 	if (migp->size == 0) {
623 		migp->size = un->c.un_size;
624 		return (0);
625 	}
626 	if (migp->size < un->c.un_size) {
627 		return (EFAULT);
628 	}
629 
630 	/* copyout unit */
631 	if (ddi_copyout(un, (void *)(uintptr_t)migp->mdp,
632 	    un->c.un_size, mode))
633 		return (EFAULT);
634 	return (0);
635 }
636 
637 
638 /*
639  * FUNCTION:	sp_reset()
640  * INPUT:	reset_params	- soft partitioning reset parameters.
641  * OUTPUT:	none.
642  * RETURNS:	0		- success.
643  *		non-zero	- error.
644  * PURPOSE:	Do the setup work needed to delete a soft partition.
645  *		note that the actual removal of both in-core and metadb
646  *		structures is done in the reset_sp() routine (see sp.c).
647  *		In addition, since multiple soft partitions may exist
648  *		on top of a single metadevice, the soft partition reset
649  *		parameters (md_sp_reset_t) contains information about
650  *		how the soft partition should deparent/reparent the
651  *		underlying metadevice.  If the underlying metadevice is
652  *		to be deparented, the new_parent field will be MD_NO_PARENT,
653  *		otherwise it will be contain the minor number of another
654  *		soft partition built on top of the underlying metadevice.
655  */
656 static int
sp_reset(md_sp_reset_t * softp)657 sp_reset(md_sp_reset_t *softp)
658 {
659 	minor_t		mnum = softp->mnum;
660 	mdi_unit_t	*ui;
661 	mp_unit_t	*un;
662 	md_unit_t	*child_un;
663 	set_t		setno = MD_MIN2SET(mnum);
664 
665 	mdclrerror(&softp->mde);
666 
667 	/* get the unit structure */
668 	if ((un = sp_getun(mnum, &softp->mde)) == NULL) {
669 		return (mdmderror(&softp->mde, MDE_INVAL_UNIT, mnum));
670 	}
671 
672 	/* don't delete if we have a parent */
673 	if (MD_HAS_PARENT(un->c.un_parent)) {
674 		return (mdmderror(&softp->mde, MDE_IN_USE, mnum));
675 	}
676 
677 	rw_enter(&md_unit_array_rw.lock, RW_WRITER);
678 
679 	ui = MDI_UNIT(mnum);
680 	(void) md_unit_openclose_enter(ui);
681 
682 	/* don't delete if we are currently open */
683 	if (md_unit_isopen(ui)) {
684 		md_unit_openclose_exit(ui);
685 		rw_exit(&md_unit_array_rw.lock);
686 		return (mdmderror(&softp->mde, MDE_IS_OPEN, mnum));
687 	}
688 
689 	md_unit_openclose_exit(ui);
690 
691 	/*
692 	 * if we are built on metadevice, we need to deparent
693 	 * or reparent that metadevice.
694 	 */
695 	if (md_getmajor(un->un_dev) == md_major) {
696 		child_un = MD_UNIT(md_getminor(un->un_dev));
697 		md_set_parent(un->un_dev, softp->new_parent);
698 		mddb_commitrec_wrapper(MD_RECID(child_un));
699 	}
700 	/* remove the soft partition */
701 	reset_sp(un, mnum, 1);
702 
703 	/*
704 	 * Update unit availability
705 	 */
706 	md_set[setno].s_un_avail++;
707 
708 	/*
709 	 * If MN set, reset s_un_next so all nodes can have
710 	 * the same view of the next available slot when
711 	 * nodes are -w and -j
712 	 */
713 	if (MD_MNSET_SETNO(setno)) {
714 		md_upd_set_unnext(setno, MD_MIN2UNIT(mnum));
715 	}
716 
717 	/* release locks and return */
718 out:
719 	rw_exit(&md_unit_array_rw.lock);
720 	return (0);
721 }
722 
723 
724 /*
725  * FUNCTION:	sp_grow()
726  * INPUT:	d	- data ptr.
727  *		mode	- pass-through to ddi_copyin.
728  *		lockp	- lock ptr.
729  * OUTPUT:	none.
730  * RETURNS:	0		- success.
731  *		non-zero	- error.
732  * PURPOSE:	Attach more space to a soft partition.  We are passed in
733  *		a new unit structure with the new extents and other updated
734  *		information.  The new unit structure essentially replaces
735  *		the old unit for this soft partition.  We place the new
736  *		unit into the metadb, delete the old metadb record, and
737  *		then update the in-core unit structure array to point to
738  *		the new unit.
739  */
740 static int
sp_grow(void * d,int mode,IOLOCK * lockp)741 sp_grow(void *d, int mode, IOLOCK *lockp)
742 {
743 	minor_t		mnum;
744 	mp_unit_t	*un, *new_un;
745 	mdi_unit_t	*ui;
746 	minor_t		*par = NULL;
747 	IOLOCK		*plock = NULL;
748 	int		i;
749 	mddb_recid_t	recid;
750 	mddb_type_t	rec_type;
751 	mddb_recid_t	old_vtoc = 0;
752 	md_create_rec_option_t options;
753 	int		err;
754 	int		rval = 0;
755 	set_t		setno;
756 	md_error_t	*mdep;
757 	int		npar;
758 	md_grow_params_t *mgp = (md_grow_params_t *)d;
759 
760 	mnum = mgp->mnum;
761 	mdep = &mgp->mde;
762 	setno = MD_MIN2SET(mnum);
763 	npar = mgp->npar;
764 
765 	mdclrerror(mdep);
766 
767 	/* validate set */
768 	if ((setno >= md_nsets) || (MD_MIN2UNIT(mnum) >= md_nunits))
769 		return (mdmderror(mdep, MDE_INVAL_UNIT, mnum));
770 	if (md_get_setstatus(setno) & MD_SET_STALE)
771 		return (mdmddberror(mdep, MDE_DB_STALE, mnum, setno));
772 
773 	/* make sure this soft partition already exists */
774 	ui = MDI_UNIT(mnum);
775 	if (ui == NULL)
776 		return (mdmderror(mdep, MDE_UNIT_NOT_SETUP, mnum));
777 
778 	/* handle any parents */
779 	if (npar >= 1) {
780 		ASSERT((minor_t *)(uintptr_t)mgp->par != NULL);
781 		par = kmem_alloc(npar * sizeof (*par), KM_SLEEP);
782 		plock = kmem_alloc(npar * sizeof (*plock), KM_SLEEP);
783 		if (ddi_copyin((void *)(uintptr_t)mgp->par, par,
784 		    (npar * sizeof (*par)), mode) != 0) {
785 			kmem_free(par, npar * sizeof (*par));
786 			kmem_free(plock, npar * sizeof (*plock));
787 			return (EFAULT);
788 		}
789 	}
790 
791 	/*
792 	 * handle parent locking.  grab the unit writer lock,
793 	 * then all parent ioctl locks, and then finally our own.
794 	 * parents should be sorted to avoid deadlock.
795 	 */
796 	rw_enter(&md_unit_array_rw.lock, RW_WRITER);
797 	for (i = 0; i < npar; ++i) {
798 		(void) md_ioctl_writerlock(&plock[i],
799 		    MDI_UNIT(par[i]));
800 	}
801 	un = (mp_unit_t *)md_ioctl_writerlock(lockp, ui);
802 
803 	rec_type = (mddb_type_t)md_getshared_key(setno,
804 	    sp_md_ops.md_driver.md_drivername);
805 
806 	/*
807 	 * Preserve the friendly name nature of the unit that is growing.
808 	 */
809 	options = MD_CRO_SOFTPART;
810 	if (un->c.un_revision & MD_FN_META_DEV)
811 		options |= MD_CRO_FN;
812 	if (mgp->options & MD_CRO_64BIT) {
813 #if defined(_ILP32)
814 		rval = mdmderror(mdep, MDE_UNIT_TOO_LARGE, mnum);
815 		goto out;
816 #else
817 		recid = mddb_createrec((size_t)mgp->size, rec_type, 0,
818 		    MD_CRO_64BIT | options, setno);
819 #endif
820 	} else {
821 		recid = mddb_createrec((size_t)mgp->size, rec_type, 0,
822 		    MD_CRO_32BIT | options, setno);
823 	}
824 	if (recid < 0) {
825 		rval = mddbstatus2error(mdep, (int)recid, mnum, setno);
826 		goto out;
827 	}
828 
829 	/* get the address of the new unit */
830 	new_un = (mp_unit_t *)mddb_getrecaddr(recid);
831 
832 	/* copy in the user's unit struct */
833 	err = ddi_copyin((void *)(uintptr_t)mgp->mdp, new_un,
834 	    (size_t)mgp->size, mode);
835 	if (err) {
836 		mddb_deleterec_wrapper(recid);
837 		rval = EFAULT;
838 		goto out;
839 	}
840 	if (options & MD_CRO_FN)
841 		new_un->c.un_revision |= MD_FN_META_DEV;
842 
843 	/* All 64 bit metadevices only support EFI labels. */
844 	if (mgp->options & MD_CRO_64BIT) {
845 		new_un->c.un_flag |= MD_EFILABEL;
846 		/*
847 		 * If the device was previously smaller than a terabyte,
848 		 * and had a vtoc record attached to it, we remove the
849 		 * vtoc record, because the layout has changed completely.
850 		 */
851 		if (((un->c.un_revision & MD_64BIT_META_DEV) == 0) &&
852 		    (un->c.un_vtoc_id != 0)) {
853 			old_vtoc = un->c.un_vtoc_id;
854 			new_un->c.un_vtoc_id =
855 			    md_vtoc_to_efi_record(old_vtoc, setno);
856 		}
857 	}
858 
859 	/* commit new unit struct */
860 	MD_RECID(new_un) = recid;
861 	mddb_commitrec_wrapper(recid);
862 
863 	/*
864 	 * delete old unit struct.
865 	 */
866 	mddb_deleterec_wrapper(MD_RECID(un));
867 
868 	/* place new unit in in-core array */
869 	md_nblocks_set(mnum, new_un->c.un_total_blocks);
870 	MD_UNIT(mnum) = new_un;
871 
872 	SE_NOTIFY(EC_SVM_CONFIG, ESC_SVM_GROW, TAG_METADEVICE,
873 	    MD_UN2SET(new_un), MD_SID(new_un));
874 
875 	/*
876 	 * If old_vtoc has a non zero value, we know:
877 	 * - This unit crossed the border from smaller to larger one TB
878 	 * - There was a vtoc record for the unit,
879 	 * - This vtoc record is no longer needed, because
880 	 *   a new efi record has been created for this un.
881 	 */
882 	if (old_vtoc != 0) {
883 		mddb_deleterec_wrapper(old_vtoc);
884 	}
885 
886 	/* release locks, return success */
887 out:
888 	for (i =  npar - 1; (i >= 0); --i)
889 		md_ioctl_writerexit(&plock[i]);
890 	rw_exit(&md_unit_array_rw.lock);
891 	if (plock != NULL)
892 		kmem_free(plock, npar * sizeof (*plock));
893 	if (par != NULL)
894 		kmem_free(par, npar * sizeof (*par));
895 	return (rval);
896 }
897 
898 /*
899  * FUNCTION:	sp_getdevs()
900  * INPUT:	d	- data ptr.
901  *		mode	- pass-through to ddi_copyout.
902  *		lockp	- lock ptr.
903  * OUTPUT:	none.
904  * RETURNS:	0		- success.
905  *		non-zero	- error.
906  * PURPOSE:	Get the device on which the soft partition is built.
907  *		This is simply a matter of copying out the md_dev64_t stored
908  *		in the soft partition unit structure.
909  */
910 static int
sp_getdevs(void * d,int mode,IOLOCK * lockp)911 sp_getdevs(
912 	void			*d,
913 	int			mode,
914 	IOLOCK			*lockp
915 )
916 {
917 	minor_t			mnum;
918 	mdi_unit_t		*ui;
919 	mp_unit_t		*un;
920 	md_error_t		*mdep;
921 	md_dev64_t		*devsp;
922 	md_dev64_t		unit_dev;
923 	md_getdevs_params_t	*mgdp = (md_getdevs_params_t *)d;
924 
925 
926 	mnum = mgdp->mnum;
927 	mdep = &(mgdp->mde);
928 
929 	mdclrerror(mdep);
930 
931 	/* check set */
932 	if ((MD_MIN2SET(mnum) >= md_nsets) || (MD_MIN2UNIT(mnum) >= md_nunits))
933 		return (mdmderror(mdep, MDE_INVAL_UNIT, mnum));
934 	/* check unit */
935 	if ((ui = MDI_UNIT(mnum)) == NULL) {
936 		return (mdmderror(mdep, MDE_UNIT_NOT_SETUP, mnum));
937 	}
938 	/* get unit */
939 	un = (mp_unit_t *)md_ioctl_readerlock(lockp, ui);
940 	devsp = (md_dev64_t *)(uintptr_t)mgdp->devs;
941 
942 	/* only ever 1 device for a soft partition */
943 	if (mgdp->cnt != 0) {
944 		/* do miniroot->target device translation */
945 		unit_dev = un->un_dev;
946 		if (md_getmajor(unit_dev) != md_major) {
947 			if ((unit_dev = md_xlate_mini_2_targ(unit_dev))
948 			    == NODEV64)
949 				return (ENODEV);
950 		}
951 		/* copyout dev information */
952 		if (ddi_copyout(&unit_dev, devsp, sizeof (*devsp), mode) != 0)
953 			return (EFAULT);
954 	}
955 	mgdp->cnt = 1;
956 
957 	return (0);
958 }
959 
960 /*
961  * sp_set_capability:
962  * ------------------
963  * Called to set or clear a capability for a softpart
964  * called by the MD_MN_SET_CAP ioctl.
965  */
966 static int
sp_set_capability(md_mn_setcap_params_t * p,IOLOCK * lockp)967 sp_set_capability(md_mn_setcap_params_t *p, IOLOCK *lockp)
968 {
969 	set_t		setno;
970 	mdi_unit_t	*ui;
971 	mp_unit_t	*un;
972 	int		err = 0;
973 
974 	if ((un = sp_getun(p->mnum, &p->mde)) == NULL)
975 		return (EINVAL);
976 
977 	/* This function is only valid for a multi-node set */
978 	setno = MD_MIN2SET(p->mnum);
979 	if (!MD_MNSET_SETNO(setno)) {
980 		return (EINVAL);
981 	}
982 	ui = MDI_UNIT(p->mnum);
983 	(void) md_ioctl_readerlock(lockp, ui);
984 
985 	if (p->sc_set & DKV_ABR_CAP) {
986 		void (*inc_abr_count)();
987 
988 		ui->ui_tstate |= MD_ABR_CAP; /* Set ABR capability */
989 		/* Increment abr count in underlying metadevice */
990 		inc_abr_count = (void(*)())md_get_named_service(un->un_dev,
991 		    0, MD_INC_ABR_COUNT, 0);
992 		if (inc_abr_count != NULL)
993 			(void) (*inc_abr_count)(un->un_dev);
994 	} else {
995 		void (*dec_abr_count)();
996 
997 		ui->ui_tstate &= ~MD_ABR_CAP; /* Clear ABR capability */
998 		/* Decrement abr count in underlying metadevice */
999 		dec_abr_count = (void(*)())md_get_named_service(un->un_dev,
1000 		    0, MD_DEC_ABR_COUNT, 0);
1001 		if (dec_abr_count != NULL)
1002 			(void) (*dec_abr_count)(un->un_dev);
1003 	}
1004 	if (p->sc_set & DKV_DMR_CAP) {
1005 		ui->ui_tstate |= MD_DMR_CAP; /* Set DMR capability */
1006 	} else {
1007 		ui->ui_tstate &= ~MD_DMR_CAP; /* Clear DMR capability */
1008 	}
1009 	md_ioctl_readerexit(lockp);
1010 	return (err);
1011 }
1012 
1013 
1014 /*
1015  * FUNCTION:	sp_admin_ioctl().
1016  * INPUT:	cmd	- ioctl to be handled.
1017  *		data	- data ptr.
1018  *		mode	- pass-through to copyin/copyout routines.
1019  *		lockp	- lock ptr.
1020  * OUTPUT:	none.
1021  * RETURNS:	0		- success.
1022  *		non-zero	- error.
1023  * PURPOSE:	Handle administrative ioctl's.  Essentially a large
1024  *		switch statement to dispatch the ioctl's to their
1025  *		handlers.  See comment at beginning of file for specifics
1026  *		on which ioctl's are handled.
1027  */
1028 static int
sp_admin_ioctl(int cmd,void * data,int mode,IOLOCK * lockp)1029 sp_admin_ioctl(int cmd, void *data, int mode, IOLOCK *lockp)
1030 {
1031 	size_t	sz = 0;
1032 	void	*d = NULL;
1033 	int	err = 0;
1034 
1035 	/* We can only handle 32-bit clients for internal commands */
1036 	if ((mode & DATAMODEL_MASK) != DATAMODEL_ILP32) {
1037 		return (EINVAL);
1038 	}
1039 
1040 	/* handle ioctl */
1041 	switch (cmd) {
1042 
1043 	case MD_IOCSET:
1044 	{
1045 		/* create new soft partition */
1046 		if (! (mode & FWRITE))
1047 			return (EACCES);
1048 
1049 		sz = sizeof (md_set_params_t);
1050 
1051 		d = kmem_alloc(sz, KM_SLEEP);
1052 
1053 		if (ddi_copyin(data, d, sz, mode)) {
1054 			err = EFAULT;
1055 			break;
1056 		}
1057 
1058 		err = sp_set(d, mode);
1059 		break;
1060 	}
1061 
1062 	case MD_IOCGET:
1063 	{
1064 		/* get soft partition unit structure */
1065 		if (! (mode & FREAD))
1066 			return (EACCES);
1067 
1068 		sz = sizeof (md_i_get_t);
1069 
1070 		d = kmem_alloc(sz, KM_SLEEP);
1071 
1072 		if (ddi_copyin(data, d, sz, mode)) {
1073 			err = EFAULT;
1074 			break;
1075 		}
1076 
1077 		err = sp_get(d, mode, lockp);
1078 		break;
1079 	}
1080 	case MD_IOCRESET:
1081 	{
1082 		/* delete soft partition */
1083 		if (! (mode & FWRITE))
1084 			return (EACCES);
1085 
1086 		sz = sizeof (md_sp_reset_t);
1087 		d = kmem_alloc(sz, KM_SLEEP);
1088 
1089 		if (ddi_copyin(data, d, sz, mode)) {
1090 			err = EFAULT;
1091 			break;
1092 		}
1093 
1094 		err = sp_reset((md_sp_reset_t *)d);
1095 		break;
1096 	}
1097 
1098 	case MD_IOCGROW:
1099 	{
1100 		/* grow soft partition */
1101 		if (! (mode & FWRITE))
1102 			return (EACCES);
1103 
1104 		sz = sizeof (md_grow_params_t);
1105 		d  = kmem_alloc(sz, KM_SLEEP);
1106 
1107 		if (ddi_copyin(data, d, sz, mode)) {
1108 			err = EFAULT;
1109 			break;
1110 		}
1111 
1112 		err = sp_grow(d, mode, lockp);
1113 		break;
1114 	}
1115 
1116 	case MD_IOCGET_DEVS:
1117 	{
1118 		/* get underlying device */
1119 		if (! (mode & FREAD))
1120 			return (EACCES);
1121 
1122 		sz = sizeof (md_getdevs_params_t);
1123 		d  = kmem_alloc(sz, KM_SLEEP);
1124 
1125 		if (ddi_copyin(data, d, sz, mode)) {
1126 			err = EFAULT;
1127 			break;
1128 		}
1129 
1130 		err = sp_getdevs(d, mode, lockp);
1131 		break;
1132 	}
1133 
1134 	case MD_IOC_SPSTATUS:
1135 	{
1136 		/* set the status field of one or more soft partitions */
1137 		if (! (mode & FWRITE))
1138 			return (EACCES);
1139 
1140 		sz = sizeof (md_sp_statusset_t);
1141 		d  = kmem_alloc(sz, KM_SLEEP);
1142 
1143 		if (ddi_copyin(data, d, sz, mode)) {
1144 			err = EFAULT;
1145 			break;
1146 		}
1147 
1148 		err = sp_setstatus(d, mode, lockp);
1149 		break;
1150 	}
1151 
1152 	case MD_IOC_SPUPDATEWM:
1153 	case MD_MN_IOC_SPUPDATEWM:
1154 	{
1155 		if (! (mode & FWRITE))
1156 			return (EACCES);
1157 
1158 		sz = sizeof (md_sp_update_wm_t);
1159 		d  = kmem_alloc(sz, KM_SLEEP);
1160 
1161 		if (ddi_copyin(data, d, sz, mode)) {
1162 			err = EFAULT;
1163 			break;
1164 		}
1165 
1166 		err = sp_update_watermarks(d, mode);
1167 		break;
1168 	}
1169 
1170 	case MD_IOC_SPREADWM:
1171 	{
1172 		if (! (mode & FREAD))
1173 			return (EACCES);
1174 
1175 		sz = sizeof (md_sp_read_wm_t);
1176 		d  = kmem_alloc(sz, KM_SLEEP);
1177 
1178 		if (ddi_copyin(data, d, sz, mode)) {
1179 			err = EFAULT;
1180 			break;
1181 		}
1182 
1183 		err = sp_read_watermark(d, mode);
1184 		break;
1185 	}
1186 
1187 	case MD_MN_SET_CAP:
1188 	{
1189 		if (! (mode & FWRITE))
1190 			return (EACCES);
1191 
1192 		sz = sizeof (md_mn_setcap_params_t);
1193 		d  = kmem_alloc(sz, KM_SLEEP);
1194 
1195 		if (ddi_copyin(data, d, sz, mode)) {
1196 			err = EFAULT;
1197 			break;
1198 		}
1199 
1200 		err = sp_set_capability((md_mn_setcap_params_t *)d, lockp);
1201 		break;
1202 	}
1203 
1204 	default:
1205 		return (ENOTTY);
1206 	}
1207 
1208 	/*
1209 	 * copyout and free any args
1210 	 */
1211 	if (sz != 0) {
1212 		if (err == 0) {
1213 			if (ddi_copyout(d, data, sz, mode) != 0) {
1214 				err = EFAULT;
1215 			}
1216 		}
1217 		kmem_free(d, sz);
1218 	}
1219 	return (err);
1220 }
1221 
1222 
1223 /*
1224  * FUNCTION:	md_sp_ioctl()
1225  * INPUT:	dev	- device we are operating on.
1226  *		cmd	- ioctl to be handled.
1227  *		data	- data ptr.
1228  *		mode	- pass-through to copyin/copyout routines.
1229  *		lockp	- lock ptr.
1230  * OUTPUT:	none.
1231  * RETURNS:	0		- success.
1232  *		non-zero	- error.
1233  * PURPOSE:	Dispatch ioctl's.  Administrative ioctl's are handled
1234  *		by sp_admin_ioctl.  All others (see comment at beginning
1235  *		of this file) are handled in-line here.
1236  */
1237 int
md_sp_ioctl(dev_t dev,int cmd,void * data,int mode,IOLOCK * lockp)1238 md_sp_ioctl(dev_t dev, int cmd, void *data, int mode, IOLOCK *lockp)
1239 {
1240 	minor_t		mnum = getminor(dev);
1241 	mp_unit_t	*un;
1242 	mdi_unit_t	*ui;
1243 	int		err = 0;
1244 
1245 	/* handle admin ioctls */
1246 	if (mnum == MD_ADM_MINOR)
1247 		return (sp_admin_ioctl(cmd, data, mode, lockp));
1248 
1249 	/* check unit */
1250 	if ((MD_MIN2SET(mnum) >= md_nsets) ||
1251 	    (MD_MIN2UNIT(mnum) >= md_nunits) ||
1252 	    ((ui = MDI_UNIT(mnum)) == NULL) ||
1253 	    ((un = MD_UNIT(mnum)) == NULL))
1254 		return (ENXIO);
1255 
1256 	/* is this a supported ioctl? */
1257 	err = md_check_ioctl_against_unit(cmd, un->c);
1258 	if (err != 0) {
1259 		return (err);
1260 	}
1261 
1262 
1263 	/* handle ioctl */
1264 	switch (cmd) {
1265 
1266 	case DKIOCINFO:
1267 	{
1268 		/* "disk" info */
1269 		struct dk_cinfo		*p;
1270 
1271 		if (! (mode & FREAD))
1272 			return (EACCES);
1273 
1274 		p = kmem_alloc(sizeof (*p), KM_SLEEP);
1275 
1276 		get_info(p, mnum);
1277 		if (ddi_copyout((caddr_t)p, data, sizeof (*p), mode) != 0)
1278 			err = EFAULT;
1279 
1280 		kmem_free(p, sizeof (*p));
1281 		return (err);
1282 	}
1283 
1284 	case DKIOCGMEDIAINFO:
1285 	{
1286 		struct dk_minfo	p;
1287 
1288 		if (! (mode & FREAD))
1289 			return (EACCES);
1290 
1291 		get_minfo(&p, mnum);
1292 		if (ddi_copyout(&p, data, sizeof (struct dk_minfo), mode) != 0)
1293 			err = EFAULT;
1294 
1295 		return (err);
1296 	}
1297 
1298 	case DKIOCGGEOM:
1299 	{
1300 		/* geometry information */
1301 		struct dk_geom		*p;
1302 
1303 		if (! (mode & FREAD))
1304 			return (EACCES);
1305 
1306 		p = kmem_alloc(sizeof (*p), KM_SLEEP);
1307 
1308 		md_get_geom((md_unit_t *)un, p);
1309 		if (ddi_copyout((caddr_t)p, data, sizeof (*p),
1310 		    mode) != 0)
1311 			err = EFAULT;
1312 
1313 		kmem_free(p, sizeof (*p));
1314 		return (err);
1315 	}
1316 	case DKIOCGAPART:
1317 	{
1318 		struct dk_map	dmp;
1319 
1320 		err = 0;
1321 		md_get_cgapart((md_unit_t *)un, &dmp);
1322 
1323 		if ((mode & DATAMODEL_MASK) == DATAMODEL_NATIVE) {
1324 			if (ddi_copyout((caddr_t)&dmp, data, sizeof (dmp),
1325 			    mode) != 0)
1326 				err = EFAULT;
1327 		}
1328 #ifdef _SYSCALL32
1329 		else {
1330 			struct dk_map32 dmp32;
1331 
1332 			dmp32.dkl_cylno = dmp.dkl_cylno;
1333 			dmp32.dkl_nblk = dmp.dkl_nblk;
1334 
1335 			if (ddi_copyout((caddr_t)&dmp32, data, sizeof (dmp32),
1336 			    mode) != 0)
1337 				err = EFAULT;
1338 		}
1339 #endif /* _SYSCALL32 */
1340 
1341 		return (err);
1342 	}
1343 	case DKIOCGVTOC:
1344 	{
1345 		/* vtoc information */
1346 		struct vtoc	*vtoc;
1347 
1348 		if (! (mode & FREAD))
1349 			return (EACCES);
1350 
1351 		vtoc = kmem_zalloc(sizeof (*vtoc), KM_SLEEP);
1352 		md_get_vtoc((md_unit_t *)un, vtoc);
1353 
1354 		if ((mode & DATAMODEL_MASK) == DATAMODEL_NATIVE) {
1355 			if (ddi_copyout(vtoc, data, sizeof (*vtoc), mode))
1356 				err = EFAULT;
1357 		}
1358 #ifdef _SYSCALL32
1359 		else {
1360 			struct vtoc32	*vtoc32;
1361 
1362 			vtoc32 = kmem_zalloc(sizeof (*vtoc32), KM_SLEEP);
1363 
1364 			vtoctovtoc32((*vtoc), (*vtoc32));
1365 			if (ddi_copyout(vtoc32, data, sizeof (*vtoc32), mode))
1366 				err = EFAULT;
1367 			kmem_free(vtoc32, sizeof (*vtoc32));
1368 		}
1369 #endif /* _SYSCALL32 */
1370 
1371 		kmem_free(vtoc, sizeof (*vtoc));
1372 		return (err);
1373 	}
1374 
1375 	case DKIOCSVTOC:
1376 	{
1377 		struct vtoc	*vtoc;
1378 
1379 		if (! (mode & FWRITE))
1380 			return (EACCES);
1381 
1382 		vtoc = kmem_zalloc(sizeof (*vtoc), KM_SLEEP);
1383 		if ((mode & DATAMODEL_MASK) == DATAMODEL_NATIVE) {
1384 			if (ddi_copyin(data, vtoc, sizeof (*vtoc), mode)) {
1385 				err = EFAULT;
1386 			}
1387 		}
1388 #ifdef _SYSCALL32
1389 		else {
1390 			struct vtoc32	*vtoc32;
1391 
1392 			vtoc32 = kmem_zalloc(sizeof (*vtoc32), KM_SLEEP);
1393 
1394 			if (ddi_copyin(data, vtoc32, sizeof (*vtoc32), mode)) {
1395 				err = EFAULT;
1396 			} else {
1397 				vtoc32tovtoc((*vtoc32), (*vtoc));
1398 			}
1399 			kmem_free(vtoc32, sizeof (*vtoc32));
1400 		}
1401 #endif /* _SYSCALL32 */
1402 
1403 		if (err == 0)
1404 			err = md_set_vtoc((md_unit_t *)un, vtoc);
1405 
1406 		kmem_free(vtoc, sizeof (*vtoc));
1407 		return (err);
1408 	}
1409 
1410 	case DKIOCGEXTVTOC:
1411 	{
1412 		/* extended vtoc information */
1413 		struct extvtoc	*extvtoc;
1414 
1415 		if (! (mode & FREAD))
1416 			return (EACCES);
1417 
1418 		extvtoc = kmem_zalloc(sizeof (*extvtoc), KM_SLEEP);
1419 		md_get_extvtoc((md_unit_t *)un, extvtoc);
1420 
1421 		if (ddi_copyout(extvtoc, data, sizeof (*extvtoc), mode))
1422 			err = EFAULT;
1423 
1424 		kmem_free(extvtoc, sizeof (*extvtoc));
1425 		return (err);
1426 	}
1427 
1428 	case DKIOCSEXTVTOC:
1429 	{
1430 		struct extvtoc	*extvtoc;
1431 
1432 		if (! (mode & FWRITE))
1433 			return (EACCES);
1434 
1435 		extvtoc = kmem_zalloc(sizeof (*extvtoc), KM_SLEEP);
1436 		if (ddi_copyin(data, extvtoc, sizeof (*extvtoc), mode)) {
1437 			err = EFAULT;
1438 		}
1439 
1440 		if (err == 0)
1441 			err = md_set_extvtoc((md_unit_t *)un, extvtoc);
1442 
1443 		kmem_free(extvtoc, sizeof (*extvtoc));
1444 		return (err);
1445 	}
1446 
1447 	case DKIOCGETEFI:
1448 	{
1449 		/*
1450 		 * This one can be done centralized,
1451 		 * no need to put in the same code for all types of metadevices
1452 		 */
1453 		return (md_dkiocgetefi(mnum, data, mode));
1454 	}
1455 	case DKIOCSETEFI:
1456 	{
1457 		/*
1458 		 * This one can be done centralized,
1459 		 * no need to put in the same code for all types of metadevices
1460 		 */
1461 		return (md_dkiocsetefi(mnum, data, mode));
1462 	}
1463 
1464 	case DKIOCPARTITION:
1465 	{
1466 		return (md_dkiocpartition(mnum, data, mode));
1467 	}
1468 
1469 	case DKIOCGETVOLCAP:
1470 	{
1471 		/*
1472 		 * Return the supported capabilities for the soft-partition.
1473 		 * We can only support those caps that are provided by the
1474 		 * underlying device.
1475 		 */
1476 
1477 		volcap_t	vc;
1478 
1479 		if (!MD_MNSET_SETNO(MD_MIN2SET(mnum)))
1480 			return (EINVAL);
1481 
1482 		if (! (mode & FREAD))
1483 			return (EACCES);
1484 
1485 		bzero(&vc, sizeof (vc));
1486 
1487 		/* Send ioctl to underlying driver */
1488 
1489 		err = md_call_ioctl(un->un_dev, cmd, &vc, (mode | FKIOCTL),
1490 		    lockp);
1491 
1492 		if (err == 0)
1493 			ui->ui_capab = vc.vc_info;
1494 
1495 		if (ddi_copyout(&vc, data, sizeof (vc), mode))
1496 			err = EFAULT;
1497 
1498 		return (err);
1499 	}
1500 
1501 	case DKIOCSETVOLCAP:
1502 	{
1503 		/*
1504 		 * Enable a supported capability (as returned by DKIOCGETVOLCAP)
1505 		 * Do not pass the request down as we're the top-level device
1506 		 * handler for the application.
1507 		 * If the requested capability is supported (set in ui_capab),
1508 		 * set the corresponding bit in ui_tstate so that we can pass
1509 		 * the appropriate flag when performing i/o.
1510 		 * This request is propagated to all nodes.
1511 		 */
1512 		volcap_t	vc, vc1;
1513 		volcapset_t	volcap = 0;
1514 		void 		(*check_offline)();
1515 		int		offline_status = 0;
1516 
1517 		if (!MD_MNSET_SETNO(MD_MIN2SET(mnum)))
1518 			return (EINVAL);
1519 
1520 		if (! (mode & FWRITE))
1521 			return (EACCES);
1522 
1523 		if (ddi_copyin(data, &vc, sizeof (vc), mode))
1524 			return (EFAULT);
1525 
1526 		/*
1527 		 * Send DKIOCGETVOLCAP to underlying driver to see if
1528 		 * capability supported
1529 		 */
1530 
1531 		vc1.vc_info = 0;
1532 		err = md_call_ioctl(un->un_dev, DKIOCGETVOLCAP, &vc1,
1533 		    (mode | FKIOCTL), lockp);
1534 		if (err != 0)
1535 			return (err);
1536 
1537 		/* Save capabilities */
1538 		ui->ui_capab = vc1.vc_info;
1539 		/*
1540 		 * Error if required capability not supported by underlying
1541 		 * driver
1542 		 */
1543 		if ((vc1.vc_info & vc.vc_set) == 0)
1544 			return (ENOTSUP);
1545 
1546 
1547 		/*
1548 		 * Check if underlying mirror has an offline submirror,
1549 		 * fail if there is on offline submirror
1550 		 */
1551 		check_offline = (void(*)())md_get_named_service(un->un_dev,
1552 		    0, MD_CHECK_OFFLINE, 0);
1553 		if (check_offline != NULL)
1554 			(void) (*check_offline)(un->un_dev, &offline_status);
1555 		if (offline_status)
1556 			return (EINVAL);
1557 
1558 		if (ui->ui_tstate & MD_ABR_CAP)
1559 			volcap |= DKV_ABR_CAP;
1560 
1561 		/* Only send capability message if there is a change */
1562 		if ((vc.vc_set & (DKV_ABR_CAP)) != volcap)
1563 			err = mdmn_send_capability_message(mnum, vc, lockp);
1564 		return (err);
1565 	}
1566 
1567 	case DKIOCDMR:
1568 	{
1569 		/*
1570 		 * Only valid for MN sets. We need to pass it down to the
1571 		 * underlying driver if its a metadevice, after we've modified
1572 		 * the offsets to pick up the correct lower-level device
1573 		 * position.
1574 		 */
1575 		vol_directed_rd_t	*vdr;
1576 #ifdef _MULTI_DATAMODEL
1577 		vol_directed_rd32_t	*vdr32;
1578 #endif	/* _MULTI_DATAMODEL */
1579 
1580 		if (!MD_MNSET_SETNO(MD_MIN2SET(mnum)))
1581 			return (EINVAL);
1582 
1583 		if (! (ui->ui_capab & DKV_DMR_CAP))
1584 			return (EINVAL);
1585 
1586 		vdr = kmem_zalloc(sizeof (vol_directed_rd_t), KM_NOSLEEP);
1587 		if (vdr == NULL)
1588 			return (ENOMEM);
1589 
1590 		/*
1591 		 * Underlying device supports directed mirror read, so update
1592 		 * the user-supplied offset to pick the correct block from the
1593 		 * partitioned metadevice.
1594 		 */
1595 #ifdef _MULTI_DATAMODEL
1596 		vdr32 = kmem_zalloc(sizeof (vol_directed_rd32_t), KM_NOSLEEP);
1597 		if (vdr32 == NULL) {
1598 			kmem_free(vdr, sizeof (vol_directed_rd_t));
1599 			return (ENOMEM);
1600 		}
1601 
1602 		switch (ddi_model_convert_from(mode & FMODELS)) {
1603 		case DDI_MODEL_ILP32:
1604 			if (ddi_copyin(data, vdr32, sizeof (*vdr32), mode)) {
1605 				kmem_free(vdr, sizeof (*vdr));
1606 				return (EFAULT);
1607 			}
1608 			vdr->vdr_flags = vdr32->vdr_flags;
1609 			vdr->vdr_offset = vdr32->vdr_offset;
1610 			vdr->vdr_nbytes = vdr32->vdr_nbytes;
1611 			vdr->vdr_data = (void *)(uintptr_t)vdr32->vdr_data;
1612 			vdr->vdr_side = vdr32->vdr_side;
1613 			break;
1614 
1615 		case DDI_MODEL_NONE:
1616 			if (ddi_copyin(data, vdr, sizeof (*vdr), mode)) {
1617 				kmem_free(vdr32, sizeof (*vdr32));
1618 				kmem_free(vdr, sizeof (*vdr));
1619 				return (EFAULT);
1620 			}
1621 			break;
1622 
1623 		default:
1624 			kmem_free(vdr32, sizeof (*vdr32));
1625 			kmem_free(vdr, sizeof (*vdr));
1626 			return (EFAULT);
1627 		}
1628 #else	/* ! _MULTI_DATAMODEL */
1629 		if (ddi_copyin(data, vdr, sizeof (*vdr), mode)) {
1630 			kmem_free(vdr, sizeof (*vdr));
1631 			return (EFAULT);
1632 		}
1633 #endif	/* _MULTI_DATA_MODEL */
1634 
1635 		err = sp_directed_read(mnum, vdr, mode);
1636 
1637 
1638 #ifdef _MULTI_DATAMODEL
1639 		switch (ddi_model_convert_from(mode & FMODELS)) {
1640 		case DDI_MODEL_ILP32:
1641 			vdr32->vdr_flags = vdr->vdr_flags;
1642 			vdr32->vdr_offset = vdr->vdr_offset;
1643 			vdr32->vdr_side = vdr->vdr_side;
1644 			vdr32->vdr_bytesread = vdr->vdr_bytesread;
1645 			bcopy(vdr->vdr_side_name, vdr32->vdr_side_name,
1646 			    sizeof (vdr32->vdr_side_name));
1647 
1648 			if (ddi_copyout(vdr32, data, sizeof (*vdr32), mode))
1649 				err = EFAULT;
1650 			break;
1651 
1652 		case DDI_MODEL_NONE:
1653 			if (ddi_copyout(&vdr, data, sizeof (vdr), mode))
1654 				err = EFAULT;
1655 			break;
1656 		}
1657 #else	/* ! _MULTI_DATA_MODEL */
1658 		if (ddi_copyout(&vdr, data, sizeof (vdr), mode))
1659 			err = EFAULT;
1660 #endif	/* _MULTI_DATA_MODEL */
1661 
1662 #ifdef _MULTI_DATAMODEL
1663 		kmem_free(vdr32, sizeof (*vdr32));
1664 #endif	/* _MULTI_DATAMODEL */
1665 		kmem_free(vdr, sizeof (*vdr));
1666 
1667 		return (err);
1668 	}
1669 
1670 	}
1671 
1672 	/* Option not handled */
1673 	return (ENOTTY);
1674 }
1675