1 /*
2 * CDDL HEADER START
3 *
4 * The contents of this file are subject to the terms of the
5 * Common Development and Distribution License, Version 1.0 only
6 * (the "License"). You may not use this file except in compliance
7 * with the License.
8 *
9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE
10 * or http://www.opensolaris.org/os/licensing.
11 * See the License for the specific language governing permissions
12 * and limitations under the License.
13 *
14 * When distributing Covered Code, include this CDDL HEADER in each
15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE.
16 * If applicable, add the following below this CDDL HEADER, with the
17 * fields enclosed by brackets "[]" replaced with your own identifying
18 * information: Portions Copyright [yyyy] [name of copyright owner]
19 *
20 * CDDL HEADER END
21 */
22 /*
23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved.
24 * Use is subject to license terms.
25 */
26
27 #pragma ident "%Z%%M% %I% %E% SMI"
28
29 #include <stdio.h>
30 #include <meta.h>
31 #include "meta_repartition.h"
32
33
34
35 /*
36 * FUNCTION: meta_replicaslice()
37 * INPUT: dnp - the name of the drive to check
38 * OUTPUT: slicep - pointer to slice number
39 * ep - pointer to an md_error_t structure in which
40 * to return errors to the caller
41 * RETURNS: int - 0 - value pointed to by slicep is valid
42 * -1 - otherwise
43 *
44 * PURPOSE: Determine which slice of the specified drive to
45 * reserve, presumably for metadb replica usage.
46 *
47 * NOTE: If slicep is NULL, the return code will indicate
48 * whether or not the slice number could be determined
49 */
50 int
meta_replicaslice(mddrivename_t * dnp,uint_t * slicep,md_error_t * ep)51 meta_replicaslice(
52 mddrivename_t *dnp,
53 uint_t *slicep,
54 md_error_t *ep
55 )
56 {
57 int err = 0;
58 int ioctl_return;
59 int fd;
60 char *rname;
61 struct dk_geom geom;
62
63 rname = dnp->rname;
64 if ((fd = open(rname, (O_RDONLY|O_NDELAY), 0)) < 0) {
65 char *n;
66 int open_errno;
67 size_t len;
68
69 if (errno != ENOENT)
70 return (mdsyserror(ep, errno, rname));
71
72 len = strlen(rname) + 3;
73 n = Zalloc(len);
74 (void) snprintf(n, len, "%ss0", rname);
75 fd = open(n, (O_RDONLY|O_NDELAY), 0);
76 open_errno = errno;
77 Free(n);
78 if (fd < 0) {
79 return (mdsyserror(ep, open_errno, rname));
80 }
81 }
82
83 /*
84 * if our drivenamep points to a device not supporting
85 * DKIOCGGEOM, we have an EFI label.
86 */
87 errno = 0;
88 ioctl_return = ioctl(fd, DKIOCGGEOM, &geom);
89 err = errno;
90
91 (void) close(fd);
92
93 /*
94 * If the DKIOCGGEOM ioctl succeeded, then the device has a
95 * VTOC style label. In this case, we use slice 7.
96 */
97 if (ioctl_return == 0) {
98 if (slicep != NULL) {
99 *slicep = MD_SLICE7;
100 }
101 return (0);
102 }
103
104 /*
105 * ENOTSUP indicates an EFI style label, in which case slice 7
106 * cannot be used because its minor number is reserved. In
107 * this case, use slice 6.
108 */
109 if (err == ENOTSUP) {
110 if (slicep != NULL) {
111 *slicep = MD_SLICE6;
112 }
113 return (0);
114 }
115
116 /*
117 * Those are the only two cases we know how to deal with;
118 * either the drivenamep didn't point to a disk, or the ioctl
119 * failed for some other reason.
120 */
121 if (err == ENOTTY) {
122 return (mddeverror(ep, MDE_NOT_DISK, NODEV, rname));
123 }
124
125 return (mdsyserror(ep, err, rname));
126 }
127
128
129
130 /*
131 * FUNCTION: meta_repartition_drive()
132 * INPUT: sp - the set name for the device to check
133 * dnp - the name of the drive to partition
134 * options - options (see NOTES)
135 * OUTPUT: vtocp - pointer to an mdvtoc_t structure in which
136 * to return the new VTOC to the caller
137 * ep - pointer to an md_error_t structure in which
138 * to return errors to the caller
139 * RETURNS: int - 0 - drive was or can be repartitioned
140 * -1 - drive could not or should not be
141 * repartitioned
142 * PURPOSE: Repartition a disk for use in a disk set or in order
143 * to create soft partitions on it. Alternatively,
144 * return the VTOC that the disk would have if it were
145 * repartitioned without actually repartitioning it.
146 *
147 * NOTES:
148 *
149 * This routine will repartition a drive to make it suitable for
150 * inclusion in a diskset. Specifically, it will create a
151 * proposed VTOC that specifies a replica slice that begins at the
152 * first valid lba, is large enough to hold a label and a metadb
153 * replica, does not overlap any other slices, and is unmountable.
154 * If the current replica slice already satisfies those criteria,
155 * the routine will neither create a proposed VTOC nor repartition
156 * the drive unless the MD_REPART_FORCE flag is passed into the
157 * routine in the options argument. If the routine does create a
158 * proposed VTOC, it will return the proposed VTOC in *vtocp if
159 * vtocp isn't NULL.
160 *
161 * The slice to be used as the replica slice is determined by the
162 * function meta_replicaslice().
163 *
164 * If the replica slice does not satisfy the above criteria or the
165 * MD_REPART_FORCE flag is set, the proposed VTOC will specify a
166 * replica slice that satisfies the above criteria, a slice zero
167 * that contains the remaining space on the disk, and no other
168 * slices. If that repartitioning would cause the replica slice
169 * to move or shrink, and the MD_REPART_LEAVE_REP option is set,
170 * the routine will return -1 without creating or returning a
171 * proposed vtoc, and without repartitioning the disk. Otherwise
172 * the routine will repartition the disk unless the
173 * MD_REPART_DONT_LABEL flag is set in the options argument.
174 *
175 * If the MD_REPART_DONT_LABEL flag is set in the options argument,
176 * but the routine would otherwise repartition the drive, the
177 * routine won't repartition the drive, but will create a proposed
178 * VTOC that satisfies the criteria defined above and return it
179 * it in *vtocp if vtocp isn't NULL, The MD_REPART_DONT_LABEL
180 * option allows calling routines to determine what the contents of
181 * the drive's VTOC would be if the drive were repartitioned without
182 * actually repartitioning the drive.
183 */
184 int
meta_repartition_drive(mdsetname_t * sp,mddrivename_t * dnp,int options,mdvtoc_t * vtocp,md_error_t * ep)185 meta_repartition_drive(
186 mdsetname_t *sp,
187 mddrivename_t *dnp,
188 int options,
189 mdvtoc_t *vtocp,
190 md_error_t *ep
191 )
192 {
193 uint_t replicaslice;
194 diskaddr_t first_lba, last_lba;
195 int round_sizes = 1;
196 unsigned long long cylsize;
197 unsigned long long drvsize;
198 int i;
199 mdgeom_t *mdgp;
200 mdvtoc_t *mdvp;
201 mdvtoc_t proposed_vtoc;
202 uint_t reservedcyl;
203 ushort_t resflag;
204 mdname_t *resnp;
205 unsigned long long ressize;
206 md_set_desc *sd;
207 daddr_t dbsize;
208 diskaddr_t replica_start;
209 diskaddr_t replica_size;
210 diskaddr_t replica_end;
211 diskaddr_t data_start;
212 diskaddr_t data_size;
213
214 if (meta_replicaslice(dnp, &replicaslice, ep) != 0) {
215 return (-1);
216 }
217
218 /* Don't round for EFI disks */
219 if (replicaslice == MD_SLICE6)
220 round_sizes = 0;
221
222 /*
223 * We took as argument a drive name pointer, but we need a
224 * slice name pointer to retrieve vtoc information. So get
225 * the name pointer for slice zero first, then use it to get
226 * the vtoc info for the disk.
227 */
228 if ((resnp = metaslicename(dnp, MD_SLICE0, ep)) == NULL)
229 return (-1);
230
231 if ((mdvp = metagetvtoc(resnp, FALSE, NULL, ep)) == NULL)
232 return (-1);
233
234 /*
235 * Determine the metadb size.
236 */
237 dbsize = MD_DBSIZE;
238 if (!metaislocalset(sp)) {
239 if ((sd = metaget_setdesc(sp, ep)) == NULL)
240 return (-1);
241
242 if (MD_MNSET_DESC(sd))
243 dbsize = MD_MN_DBSIZE;
244 }
245
246 /* If we've got an efi disk, we better have lba info */
247 first_lba = mdvp->first_lba;
248 last_lba = mdvp->last_lba;
249 ASSERT((round_sizes != 0) || (last_lba > 0));
250
251 /*
252 * At this point, ressize is used as a minimum value. Later
253 * it will be rounded up to a cylinder boundary if
254 * appropriate. ressize is in units of disk sectors.
255 */
256 ressize = dbsize + VTOC_SIZE;
257 resflag = V_UNMNT;
258
259 /*
260 * If we're forcing the repartition, we can skip the replica
261 * slice and overlap tests.
262 */
263 if (options & MD_REPART_FORCE) {
264 goto do_repartition;
265 }
266
267 /*
268 * Replica slice tests: it must begin at first_lba, be long
269 * enough, have the right flags, and not overlap any other
270 * slices. If any of these conditions is violated, we need to
271 * repartition the disk.
272 */
273 if (mdvp->parts[replicaslice].start != first_lba) {
274 goto do_repartition;
275 }
276
277 if (mdvp->parts[replicaslice].size < ressize) {
278 goto do_repartition;
279 }
280
281 if (mdvp->parts[replicaslice].flag != resflag) {
282 goto do_repartition;
283 }
284
285 /*
286 * Check for overlap: this test should use the actual size of
287 * the replica slice, as contained in the vtoc, and NOT the
288 * minimum size calculated above.
289 */
290 replica_end = first_lba + mdvp->parts[replicaslice].size;
291 for (i = 0; i < mdvp->nparts; i++) {
292 if (i != replicaslice) {
293 if ((mdvp->parts[i].size > 0) &&
294 (mdvp->parts[i].start < replica_end)) {
295 goto do_repartition;
296 }
297 }
298 }
299
300 /*
301 * If we passed the above tests, then the disk is already
302 * partitioned appropriately, and we're not being told to
303 * force a change.
304 */
305 return (0);
306
307 do_repartition:
308
309 /* Retrieve disk geometry info and round to cylinder sizes */
310 if (round_sizes != 0) {
311
312 if ((mdgp = metagetgeom(resnp, ep)) == NULL)
313 return (-1);
314
315 /*
316 * Both cylsize and drvsize are in units of disk
317 * sectors.
318 *
319 * The intended results are of type unsigned long
320 * long. Since each operand of the first
321 * multiplication is of type unsigned int, we risk
322 * overflow by multiplying and then converting the
323 * result. Therefore we explicitly cast (at least)
324 * one of the operands, forcing conversion BEFORE
325 * multiplication, and avoiding overflow. The second
326 * assignment is OK, since one of the operands is
327 * already of the desired type.
328 */
329 cylsize =
330 ((unsigned long long)mdgp->nhead) * mdgp->nsect;
331 drvsize = cylsize * mdgp->ncyl;
332
333 /*
334 * How many cylinders must we reserve for the replica
335 * slice to ensure that it meets the previously
336 * calculated minimum size?
337 */
338 reservedcyl = (ressize + cylsize - 1) / cylsize;
339 ressize = reservedcyl * cylsize;
340 } else {
341 drvsize = last_lba - first_lba;
342 }
343
344 /* Would this require a forbidden change? */
345 if (options & MD_REPART_LEAVE_REP) {
346 if ((mdvp->parts[replicaslice].start != first_lba) ||
347 (mdvp->parts[replicaslice].size < ressize)) {
348 return (mddeverror(ep, MDE_REPART_REPLICA,
349 resnp->dev, NULL));
350 }
351 }
352
353 /*
354 * It seems unlikely that someone would pass us too small a
355 * disk, but it's still worth checking for...
356 */
357 if (((round_sizes != 0) && (reservedcyl >= (int)mdgp->ncyl)) ||
358 ((round_sizes == 0) && (ressize + first_lba >= last_lba))) {
359 return (mdmddberror(ep, MDE_DB_TOOSMALL,
360 meta_getminor(resnp->dev), sp->setno, 0, NULL));
361 }
362
363 replica_start = first_lba;
364 replica_size = ressize;
365 data_start = first_lba + ressize;
366 data_size = drvsize - ressize;
367
368 /*
369 * Create the proposed VTOC. First copy the current VTOC
370 * into the proposed VTOC to duplicate the values that don't
371 * need to change. Then change the partition table and set
372 * the flag value for the replica slice to resflag to reserve it
373 * for metadata.
374 */
375 proposed_vtoc = *mdvp;
376 /* We need at least replicaslice partitions in the proposed vtoc */
377 if (replicaslice >= proposed_vtoc.nparts) {
378 proposed_vtoc.nparts = replicaslice + 1;
379 }
380 for (i = 0; i < proposed_vtoc.nparts; i++) {
381 /* don't change the reserved partition of an EFI device */
382 if (proposed_vtoc.parts[i].tag == V_RESERVED)
383 data_size = proposed_vtoc.parts[i].start - data_start;
384 else
385 (void) memset(&proposed_vtoc.parts[i], '\0',
386 sizeof (proposed_vtoc.parts[i]));
387 }
388
389 proposed_vtoc.parts[MD_SLICE0].start = data_start;
390 proposed_vtoc.parts[MD_SLICE0].size = data_size;
391 proposed_vtoc.parts[MD_SLICE0].tag = V_USR;
392 proposed_vtoc.parts[replicaslice].start = replica_start;
393 proposed_vtoc.parts[replicaslice].size = replica_size;
394 proposed_vtoc.parts[replicaslice].flag = resflag;
395 proposed_vtoc.parts[replicaslice].tag = V_USR;
396
397 if (!(options & MD_REPART_DONT_LABEL)) {
398 /*
399 * Label the disk with the proposed VTOC.
400 */
401 *mdvp = proposed_vtoc;
402 if (metasetvtoc(resnp, ep) != 0) {
403 return (-1);
404 }
405 }
406
407 if (vtocp != NULL) {
408 /*
409 * Return the proposed VTOC.
410 */
411 *vtocp = proposed_vtoc;
412 }
413
414 return (0);
415 }
416