xref: /titanic_41/usr/src/lib/lvm/libmeta/common/meta_repartition.c (revision f5f2d263454d943a366844932bdb677530ba733b)
1 /*
2  * CDDL HEADER START
3  *
4  * The contents of this file are subject to the terms of the
5  * Common Development and Distribution License, Version 1.0 only
6  * (the "License").  You may not use this file except in compliance
7  * with the License.
8  *
9  * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10  * or http://www.opensolaris.org/os/licensing.
11  * See the License for the specific language governing permissions
12  * and limitations under the License.
13  *
14  * When distributing Covered Code, include this CDDL HEADER in each
15  * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16  * If applicable, add the following below this CDDL HEADER, with the
17  * fields enclosed by brackets "[]" replaced with your own identifying
18  * information: Portions Copyright [yyyy] [name of copyright owner]
19  *
20  * CDDL HEADER END
21  */
22 /*
23  * Copyright 2004 Sun Microsystems, Inc.  All rights reserved.
24  * Use is subject to license terms.
25  */
26 
27 #pragma ident	"%Z%%M%	%I%	%E% SMI"
28 
29 #include <stdio.h>
30 #include <meta.h>
31 #include "meta_repartition.h"
32 
33 
34 
35 /*
36  * FUNCTION:	meta_replicaslice()
37  * INPUT:	dnp	- the name of the drive to check
38  * OUTPUT:	slicep	- pointer to slice number
39  *		ep	- pointer to an md_error_t structure in which
40  *			  to return errors to the caller
41  * RETURNS:	int	-  0 - value pointed to by slicep is valid
42  *			  -1 - otherwise
43  *
44  * PURPOSE:	Determine which slice of the specified drive to
45  *		reserve, presumably for metadb replica usage.
46  *
47  * NOTE:	If slicep is NULL, the return code will indicate
48  *		whether or not the slice number could be determined
49  */
50 int
51 meta_replicaslice(
52 	mddrivename_t	*dnp,
53 	uint_t		*slicep,
54 	md_error_t	*ep
55 )
56 {
57 	int		err = 0;
58 	int		ioctl_return;
59 	int		fd;
60 	char		*rname;
61 	struct dk_geom	geom;
62 
63 	rname = dnp->rname;
64 	if ((fd = open(rname, (O_RDONLY|O_NDELAY), 0)) < 0) {
65 		char	*n;
66 		int	open_errno;
67 		size_t	len;
68 
69 		if (errno != ENOENT)
70 			return (mdsyserror(ep, errno, rname));
71 
72 		len = strlen(rname) + 3;
73 		n = Zalloc(len);
74 		(void) snprintf(n, len, "%ss0", rname);
75 		fd = open(n, (O_RDONLY|O_NDELAY), 0);
76 		open_errno = errno;
77 		Free(n);
78 		if (fd < 0) {
79 			return (mdsyserror(ep, open_errno, rname));
80 		}
81 	}
82 
83 	/*
84 	 * if our drivenamep points to a device not supporting
85 	 * DKIOCGGEOM, we have an EFI label.
86 	 */
87 	errno = 0;
88 	ioctl_return = ioctl(fd, DKIOCGGEOM, &geom);
89 	err = errno;
90 
91 	(void) close(fd);
92 
93 	/*
94 	 * If the DKIOCGGEOM ioctl succeeded, then the device has a
95 	 * VTOC style label.  In this case, we use slice 7.
96 	 */
97 	if (ioctl_return == 0) {
98 		if (slicep != NULL) {
99 			*slicep = MD_SLICE7;
100 		}
101 		return (0);
102 	}
103 
104 	/*
105 	 * ENOTSUP indicates an EFI style label, in which case slice 7
106 	 * cannot be used because its minor number is reserved.  In
107 	 * this case, use slice 6.
108 	 */
109 	if (err == ENOTSUP) {
110 		if (slicep != NULL) {
111 			*slicep = MD_SLICE6;
112 		}
113 		return (0);
114 	}
115 
116 	/*
117 	 * Those are the only two cases we know how to deal with;
118 	 * either the drivenamep didn't point to a disk, or the ioctl
119 	 * failed for some other reason.
120 	 */
121 	if (err == ENOTTY) {
122 		return (mddeverror(ep, MDE_NOT_DISK, NODEV, rname));
123 	}
124 
125 	return (mdsyserror(ep, err, rname));
126 }
127 
128 
129 
130 /*
131  * FUNCTION:	meta_repartition_drive()
132  * INPUT:	sp	- the set name for the device to check
133  *		dnp	- the name of the drive to partition
134  *              options - options (see NOTES)
135  * OUTPUT:	vtocp	- pointer to an mdvtoc_t structure in which
136  *			  to return the new VTOC to the caller
137  *		ep	- pointer to an md_error_t structure in which
138  *			  to return errors to the caller
139  * RETURNS:	int	-  0 - drive was or can be repartitioned
140  *			  -1 - drive could not or should not be
141  *			       repartitioned
142  * PURPOSE:	Repartition a disk for use in a disk set or in order
143  *		to create soft partitions on it.  Alternatively,
144  *		return the VTOC that the disk would have if it were
145  *		repartitioned without actually repartitioning it.
146  *
147  * NOTES:
148  *
149  *     This routine will repartition a drive to make it suitable for
150  *     inclusion in a diskset.  Specifically, it will create a
151  *     proposed VTOC that specifies a replica slice that begins at the
152  *     first valid lba, is large enough to hold a label and a metadb
153  *     replica, does not overlap any other slices, and is unmountable.
154  *     If the current replica slice already satisfies those criteria,
155  *     the routine will neither create a proposed VTOC nor repartition
156  *     the drive unless the MD_REPART_FORCE flag is passed into the
157  *     routine in the options argument.  If the routine does create a
158  *     proposed VTOC, it will return the proposed VTOC in *vtocp if
159  *     vtocp isn't NULL.
160  *
161  *     The slice to be used as the replica slice is determined by the
162  *     function meta_replicaslice().
163  *
164  *     If the replica slice does not satisfy the above criteria or the
165  *     MD_REPART_FORCE flag is set, the proposed VTOC will specify a
166  *     replica slice that satisfies the above criteria, a slice zero
167  *     that contains the remaining space on the disk, and no other
168  *     slices.  If that repartitioning would cause the replica slice
169  *     to move or shrink, and the MD_REPART_LEAVE_REP option is set,
170  *     the routine will return -1 without creating or returning a
171  *     proposed vtoc, and without repartitioning the disk.  Otherwise
172  *     the routine will repartition the disk unless the
173  *     MD_REPART_DONT_LABEL flag is set in the options argument.
174  *
175  *     If the MD_REPART_DONT_LABEL flag is set in the options argument,
176  *     but the routine would otherwise repartition the drive, the
177  *     routine won't repartition the drive, but will create a proposed
178  *     VTOC that satisfies the criteria defined above and return it
179  *     it in *vtocp if vtocp isn't NULL,  The MD_REPART_DONT_LABEL
180  *     option allows calling routines to determine what the contents of
181  *     the drive's VTOC would be if the drive were repartitioned without
182  *     actually repartitioning the drive.
183  */
184 int
185 meta_repartition_drive(
186 	mdsetname_t	*sp,
187 	mddrivename_t	*dnp,
188 	int		options,
189 	mdvtoc_t	*vtocp,
190 	md_error_t	*ep
191 )
192 {
193 	uint_t			 replicaslice;
194 	diskaddr_t		 first_lba, last_lba;
195 	int			 round_sizes = 1;
196 	unsigned long long	 cylsize;
197 	unsigned long long	 drvsize;
198 	int			 i;
199 	mdgeom_t		*mdgp;
200 	mdvtoc_t		*mdvp;
201 	mdvtoc_t		 proposed_vtoc;
202 	uint_t			 reservedcyl;
203 	ushort_t		 resflag;
204 	mdname_t		*resnp;
205 	unsigned long long	 ressize;
206 	md_set_desc		*sd;
207 	daddr_t			 dbsize;
208 	diskaddr_t		 replica_start;
209 	diskaddr_t		 replica_size;
210 	diskaddr_t		 replica_end;
211 	diskaddr_t		 data_start;
212 	diskaddr_t		 data_size;
213 
214 	if (meta_replicaslice(dnp, &replicaslice, ep) != 0) {
215 		return (-1);
216 	}
217 
218 	/* Don't round for EFI disks */
219 	if (replicaslice == MD_SLICE6)
220 		round_sizes = 0;
221 
222 	/*
223 	 * We took as argument a drive name pointer, but we need a
224 	 * slice name pointer to retrieve vtoc information.  So get
225 	 * the name pointer for slice zero first, then use it to get
226 	 * the vtoc info for the disk.
227 	 */
228 	if ((resnp = metaslicename(dnp, MD_SLICE0, ep)) == NULL)
229 		return (-1);
230 
231 	if ((mdvp = metagetvtoc(resnp, FALSE, NULL, ep)) == NULL)
232 		return (-1);
233 
234 	/*
235 	 * Determine the metadb size.
236 	 */
237 	dbsize = MD_DBSIZE;
238 	if (!metaislocalset(sp)) {
239 		if ((sd = metaget_setdesc(sp, ep)) == NULL)
240 			return (-1);
241 
242 		if (MD_MNSET_DESC(sd))
243 			dbsize = MD_MN_DBSIZE;
244 	}
245 
246 	/* If we've got an efi disk, we better have lba info */
247 	first_lba = mdvp->first_lba;
248 	last_lba = mdvp->last_lba;
249 	ASSERT((round_sizes != 0) || (last_lba > 0));
250 
251 	/*
252 	 * At this point, ressize is used as a minimum value.  Later
253 	 * it will be rounded up to a cylinder boundary if
254 	 * appropriate.  ressize is in units of disk sectors.
255 	 */
256 	ressize = dbsize + VTOC_SIZE;
257 	resflag = V_UNMNT;
258 
259 	/*
260 	 * If we're forcing the repartition, we can skip the replica
261 	 * slice and overlap tests.
262 	 */
263 	if (options & MD_REPART_FORCE) {
264 		goto do_repartition;
265 	}
266 
267 	/*
268 	 * Replica slice tests: it must begin at first_lba, be long
269 	 * enough, have the right flags, and not overlap any other
270 	 * slices.  If any of these conditions is violated, we need to
271 	 * repartition the disk.
272 	 */
273 	if (mdvp->parts[replicaslice].start != first_lba) {
274 		goto do_repartition;
275 	}
276 
277 	if (mdvp->parts[replicaslice].size < ressize) {
278 		goto do_repartition;
279 	}
280 
281 	if (mdvp->parts[replicaslice].flag != resflag) {
282 		goto do_repartition;
283 	}
284 
285 	/*
286 	 * Check for overlap: this test should use the actual size of
287 	 * the replica slice, as contained in the vtoc, and NOT the
288 	 * minimum size calculated above.
289 	 */
290 	replica_end = first_lba + mdvp->parts[replicaslice].size;
291 	for (i = 0; i < mdvp->nparts; i++) {
292 		if (i != replicaslice) {
293 			if ((mdvp->parts[i].size > 0) &&
294 			    (mdvp->parts[i].start < replica_end)) {
295 				goto do_repartition;
296 			}
297 		}
298 	}
299 
300 	/*
301 	 * If we passed the above tests, then the disk is already
302 	 * partitioned appropriately, and we're not being told to
303 	 * force a change.
304 	 */
305 	return (0);
306 
307 do_repartition:
308 
309 	/* Retrieve disk geometry info and round to cylinder sizes */
310 	if (round_sizes != 0) {
311 
312 		if ((mdgp = metagetgeom(resnp, ep)) == NULL)
313 			return (-1);
314 
315 		/*
316 		 * Both cylsize and drvsize are in units of disk
317 		 * sectors.
318 		 *
319 		 * The intended results are of type unsigned long
320 		 * long.  Since each operand of the first
321 		 * multiplication is of type unsigned int, we risk
322 		 * overflow by multiplying and then converting the
323 		 * result.  Therefore we explicitly cast (at least)
324 		 * one of the operands, forcing conversion BEFORE
325 		 * multiplication, and avoiding overflow.  The second
326 		 * assignment is OK, since one of the operands is
327 		 * already of the desired type.
328 		 */
329 		cylsize =
330 		    ((unsigned long long)mdgp->nhead) * mdgp->nsect;
331 		drvsize = cylsize * mdgp->ncyl;
332 
333 		/*
334 		 * How many cylinders must we reserve for the replica
335 		 * slice to ensure that it meets the previously
336 		 * calculated minimum size?
337 		 */
338 		reservedcyl = (ressize + cylsize - 1) / cylsize;
339 		ressize = reservedcyl * cylsize;
340 	} else {
341 		drvsize = last_lba - first_lba;
342 	}
343 
344 	/* Would this require a forbidden change? */
345 	if (options & MD_REPART_LEAVE_REP) {
346 		if ((mdvp->parts[replicaslice].start != first_lba) ||
347 		    (mdvp->parts[replicaslice].size < ressize)) {
348 			return (mddeverror(ep, MDE_REPART_REPLICA,
349 			    resnp->dev, NULL));
350 		}
351 	}
352 
353 	/*
354 	 * It seems unlikely that someone would pass us too small a
355 	 * disk, but it's still worth checking for...
356 	 */
357 	if (((round_sizes != 0) && (reservedcyl >= (int)mdgp->ncyl)) ||
358 	    ((round_sizes == 0) && (ressize + first_lba >= last_lba))) {
359 		return (mdmddberror(ep, MDE_DB_TOOSMALL,
360 		    meta_getminor(resnp->dev), sp->setno, 0, NULL));
361 	}
362 
363 	replica_start = first_lba;
364 	replica_size = ressize;
365 	data_start = first_lba + ressize;
366 	data_size = drvsize - ressize;
367 
368 	/*
369 	 * Create the proposed VTOC.  First copy the current VTOC
370 	 * into the proposed VTOC to duplicate the values that don't
371 	 * need to change.  Then change the partition table and set
372 	 * the flag value for the replica slice to resflag to reserve it
373 	 * for metadata.
374 	 */
375 	proposed_vtoc = *mdvp;
376 	/* We need at least replicaslice partitions in the proposed vtoc */
377 	if (replicaslice >= proposed_vtoc.nparts) {
378 		proposed_vtoc.nparts = replicaslice + 1;
379 	}
380 	for (i = 0; i < proposed_vtoc.nparts; i++) {
381 		/* don't change the reserved partition of an EFI device */
382 		if (proposed_vtoc.parts[i].tag == V_RESERVED)
383 			data_size = proposed_vtoc.parts[i].start - data_start;
384 		else
385 			(void) memset(&proposed_vtoc.parts[i], '\0',
386 				sizeof (proposed_vtoc.parts[i]));
387 	}
388 
389 	proposed_vtoc.parts[MD_SLICE0].start = data_start;
390 	proposed_vtoc.parts[MD_SLICE0].size = data_size;
391 	proposed_vtoc.parts[MD_SLICE0].tag = V_USR;
392 	proposed_vtoc.parts[replicaslice].start = replica_start;
393 	proposed_vtoc.parts[replicaslice].size = replica_size;
394 	proposed_vtoc.parts[replicaslice].flag = resflag;
395 	proposed_vtoc.parts[replicaslice].tag = V_USR;
396 
397 	if (!(options & MD_REPART_DONT_LABEL)) {
398 		/*
399 		 * Label the disk with the proposed VTOC.
400 		 */
401 		*mdvp = proposed_vtoc;
402 		if (metasetvtoc(resnp, ep) != 0) {
403 			return (-1);
404 		}
405 	}
406 
407 	if (vtocp != NULL) {
408 		/*
409 		 * Return the proposed VTOC.
410 		 */
411 		*vtocp = proposed_vtoc;
412 	}
413 
414 	return (0);
415 }
416