1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License, Version 1.0 only 6 * (the "License"). You may not use this file except in compliance 7 * with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or http://www.opensolaris.org/os/licensing. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 /* 23 * Copyright 2004 Sun Microsystems, Inc. All rights reserved. 24 * Use is subject to license terms. 25 */ 26 27 #pragma ident "%Z%%M% %I% %E% SMI" 28 29 #include <stdio.h> 30 #include <meta.h> 31 #include "meta_repartition.h" 32 33 34 35 /* 36 * FUNCTION: meta_replicaslice() 37 * INPUT: dnp - the name of the drive to check 38 * OUTPUT: slicep - pointer to slice number 39 * ep - pointer to an md_error_t structure in which 40 * to return errors to the caller 41 * RETURNS: int - 0 - value pointed to by slicep is valid 42 * -1 - otherwise 43 * 44 * PURPOSE: Determine which slice of the specified drive to 45 * reserve, presumably for metadb replica usage. 46 * 47 * NOTE: If slicep is NULL, the return code will indicate 48 * whether or not the slice number could be determined 49 */ 50 int 51 meta_replicaslice( 52 mddrivename_t *dnp, 53 uint_t *slicep, 54 md_error_t *ep 55 ) 56 { 57 int err = 0; 58 int ioctl_return; 59 int fd; 60 char *rname; 61 struct dk_geom geom; 62 63 rname = dnp->rname; 64 if ((fd = open(rname, (O_RDONLY|O_NDELAY), 0)) < 0) { 65 char *n; 66 int open_errno; 67 size_t len; 68 69 if (errno != ENOENT) 70 return (mdsyserror(ep, errno, rname)); 71 72 len = strlen(rname) + 3; 73 n = Zalloc(len); 74 (void) snprintf(n, len, "%ss0", rname); 75 fd = open(n, (O_RDONLY|O_NDELAY), 0); 76 open_errno = errno; 77 Free(n); 78 if (fd < 0) { 79 return (mdsyserror(ep, open_errno, rname)); 80 } 81 } 82 83 /* 84 * if our drivenamep points to a device not supporting 85 * DKIOCGGEOM, we have an EFI label. 86 */ 87 errno = 0; 88 ioctl_return = ioctl(fd, DKIOCGGEOM, &geom); 89 err = errno; 90 91 (void) close(fd); 92 93 /* 94 * If the DKIOCGGEOM ioctl succeeded, then the device has a 95 * VTOC style label. In this case, we use slice 7. 96 */ 97 if (ioctl_return == 0) { 98 if (slicep != NULL) { 99 *slicep = MD_SLICE7; 100 } 101 return (0); 102 } 103 104 /* 105 * ENOTSUP indicates an EFI style label, in which case slice 7 106 * cannot be used because its minor number is reserved. In 107 * this case, use slice 6. 108 */ 109 if (err == ENOTSUP) { 110 if (slicep != NULL) { 111 *slicep = MD_SLICE6; 112 } 113 return (0); 114 } 115 116 /* 117 * Those are the only two cases we know how to deal with; 118 * either the drivenamep didn't point to a disk, or the ioctl 119 * failed for some other reason. 120 */ 121 if (err == ENOTTY) { 122 return (mddeverror(ep, MDE_NOT_DISK, NODEV, rname)); 123 } 124 125 return (mdsyserror(ep, err, rname)); 126 } 127 128 129 130 /* 131 * FUNCTION: meta_repartition_drive() 132 * INPUT: sp - the set name for the device to check 133 * dnp - the name of the drive to partition 134 * options - options (see NOTES) 135 * OUTPUT: vtocp - pointer to an mdvtoc_t structure in which 136 * to return the new VTOC to the caller 137 * ep - pointer to an md_error_t structure in which 138 * to return errors to the caller 139 * RETURNS: int - 0 - drive was or can be repartitioned 140 * -1 - drive could not or should not be 141 * repartitioned 142 * PURPOSE: Repartition a disk for use in a disk set or in order 143 * to create soft partitions on it. Alternatively, 144 * return the VTOC that the disk would have if it were 145 * repartitioned without actually repartitioning it. 146 * 147 * NOTES: 148 * 149 * This routine will repartition a drive to make it suitable for 150 * inclusion in a diskset. Specifically, it will create a 151 * proposed VTOC that specifies a replica slice that begins at the 152 * first valid lba, is large enough to hold a label and a metadb 153 * replica, does not overlap any other slices, and is unmountable. 154 * If the current replica slice already satisfies those criteria, 155 * the routine will neither create a proposed VTOC nor repartition 156 * the drive unless the MD_REPART_FORCE flag is passed into the 157 * routine in the options argument. If the routine does create a 158 * proposed VTOC, it will return the proposed VTOC in *vtocp if 159 * vtocp isn't NULL. 160 * 161 * The slice to be used as the replica slice is determined by the 162 * function meta_replicaslice(). 163 * 164 * If the replica slice does not satisfy the above criteria or the 165 * MD_REPART_FORCE flag is set, the proposed VTOC will specify a 166 * replica slice that satisfies the above criteria, a slice zero 167 * that contains the remaining space on the disk, and no other 168 * slices. If that repartitioning would cause the replica slice 169 * to move or shrink, and the MD_REPART_LEAVE_REP option is set, 170 * the routine will return -1 without creating or returning a 171 * proposed vtoc, and without repartitioning the disk. Otherwise 172 * the routine will repartition the disk unless the 173 * MD_REPART_DONT_LABEL flag is set in the options argument. 174 * 175 * If the MD_REPART_DONT_LABEL flag is set in the options argument, 176 * but the routine would otherwise repartition the drive, the 177 * routine won't repartition the drive, but will create a proposed 178 * VTOC that satisfies the criteria defined above and return it 179 * it in *vtocp if vtocp isn't NULL, The MD_REPART_DONT_LABEL 180 * option allows calling routines to determine what the contents of 181 * the drive's VTOC would be if the drive were repartitioned without 182 * actually repartitioning the drive. 183 */ 184 int 185 meta_repartition_drive( 186 mdsetname_t *sp, 187 mddrivename_t *dnp, 188 int options, 189 mdvtoc_t *vtocp, 190 md_error_t *ep 191 ) 192 { 193 uint_t replicaslice; 194 diskaddr_t first_lba, last_lba; 195 int round_sizes = 1; 196 unsigned long long cylsize; 197 unsigned long long drvsize; 198 int i; 199 mdgeom_t *mdgp; 200 mdvtoc_t *mdvp; 201 mdvtoc_t proposed_vtoc; 202 uint_t reservedcyl; 203 ushort_t resflag; 204 mdname_t *resnp; 205 unsigned long long ressize; 206 md_set_desc *sd; 207 daddr_t dbsize; 208 diskaddr_t replica_start; 209 diskaddr_t replica_size; 210 diskaddr_t replica_end; 211 diskaddr_t data_start; 212 diskaddr_t data_size; 213 214 if (meta_replicaslice(dnp, &replicaslice, ep) != 0) { 215 return (-1); 216 } 217 218 /* Don't round for EFI disks */ 219 if (replicaslice == MD_SLICE6) 220 round_sizes = 0; 221 222 /* 223 * We took as argument a drive name pointer, but we need a 224 * slice name pointer to retrieve vtoc information. So get 225 * the name pointer for slice zero first, then use it to get 226 * the vtoc info for the disk. 227 */ 228 if ((resnp = metaslicename(dnp, MD_SLICE0, ep)) == NULL) 229 return (-1); 230 231 if ((mdvp = metagetvtoc(resnp, FALSE, NULL, ep)) == NULL) 232 return (-1); 233 234 /* 235 * Determine the metadb size. 236 */ 237 dbsize = MD_DBSIZE; 238 if (!metaislocalset(sp)) { 239 if ((sd = metaget_setdesc(sp, ep)) == NULL) 240 return (-1); 241 242 if (MD_MNSET_DESC(sd)) 243 dbsize = MD_MN_DBSIZE; 244 } 245 246 /* If we've got an efi disk, we better have lba info */ 247 first_lba = mdvp->first_lba; 248 last_lba = mdvp->last_lba; 249 ASSERT((round_sizes != 0) || (last_lba > 0)); 250 251 /* 252 * At this point, ressize is used as a minimum value. Later 253 * it will be rounded up to a cylinder boundary if 254 * appropriate. ressize is in units of disk sectors. 255 */ 256 ressize = dbsize + VTOC_SIZE; 257 resflag = V_UNMNT; 258 259 /* 260 * If we're forcing the repartition, we can skip the replica 261 * slice and overlap tests. 262 */ 263 if (options & MD_REPART_FORCE) { 264 goto do_repartition; 265 } 266 267 /* 268 * Replica slice tests: it must begin at first_lba, be long 269 * enough, have the right flags, and not overlap any other 270 * slices. If any of these conditions is violated, we need to 271 * repartition the disk. 272 */ 273 if (mdvp->parts[replicaslice].start != first_lba) { 274 goto do_repartition; 275 } 276 277 if (mdvp->parts[replicaslice].size < ressize) { 278 goto do_repartition; 279 } 280 281 if (mdvp->parts[replicaslice].flag != resflag) { 282 goto do_repartition; 283 } 284 285 /* 286 * Check for overlap: this test should use the actual size of 287 * the replica slice, as contained in the vtoc, and NOT the 288 * minimum size calculated above. 289 */ 290 replica_end = first_lba + mdvp->parts[replicaslice].size; 291 for (i = 0; i < mdvp->nparts; i++) { 292 if (i != replicaslice) { 293 if ((mdvp->parts[i].size > 0) && 294 (mdvp->parts[i].start < replica_end)) { 295 goto do_repartition; 296 } 297 } 298 } 299 300 /* 301 * If we passed the above tests, then the disk is already 302 * partitioned appropriately, and we're not being told to 303 * force a change. 304 */ 305 return (0); 306 307 do_repartition: 308 309 /* Retrieve disk geometry info and round to cylinder sizes */ 310 if (round_sizes != 0) { 311 312 if ((mdgp = metagetgeom(resnp, ep)) == NULL) 313 return (-1); 314 315 /* 316 * Both cylsize and drvsize are in units of disk 317 * sectors. 318 * 319 * The intended results are of type unsigned long 320 * long. Since each operand of the first 321 * multiplication is of type unsigned int, we risk 322 * overflow by multiplying and then converting the 323 * result. Therefore we explicitly cast (at least) 324 * one of the operands, forcing conversion BEFORE 325 * multiplication, and avoiding overflow. The second 326 * assignment is OK, since one of the operands is 327 * already of the desired type. 328 */ 329 cylsize = 330 ((unsigned long long)mdgp->nhead) * mdgp->nsect; 331 drvsize = cylsize * mdgp->ncyl; 332 333 /* 334 * How many cylinders must we reserve for the replica 335 * slice to ensure that it meets the previously 336 * calculated minimum size? 337 */ 338 reservedcyl = (ressize + cylsize - 1) / cylsize; 339 ressize = reservedcyl * cylsize; 340 } else { 341 drvsize = last_lba - first_lba; 342 } 343 344 /* Would this require a forbidden change? */ 345 if (options & MD_REPART_LEAVE_REP) { 346 if ((mdvp->parts[replicaslice].start != first_lba) || 347 (mdvp->parts[replicaslice].size < ressize)) { 348 return (mddeverror(ep, MDE_REPART_REPLICA, 349 resnp->dev, NULL)); 350 } 351 } 352 353 /* 354 * It seems unlikely that someone would pass us too small a 355 * disk, but it's still worth checking for... 356 */ 357 if (((round_sizes != 0) && (reservedcyl >= (int)mdgp->ncyl)) || 358 ((round_sizes == 0) && (ressize + first_lba >= last_lba))) { 359 return (mdmddberror(ep, MDE_DB_TOOSMALL, 360 meta_getminor(resnp->dev), sp->setno, 0, NULL)); 361 } 362 363 replica_start = first_lba; 364 replica_size = ressize; 365 data_start = first_lba + ressize; 366 data_size = drvsize - ressize; 367 368 /* 369 * Create the proposed VTOC. First copy the current VTOC 370 * into the proposed VTOC to duplicate the values that don't 371 * need to change. Then change the partition table and set 372 * the flag value for the replica slice to resflag to reserve it 373 * for metadata. 374 */ 375 proposed_vtoc = *mdvp; 376 /* We need at least replicaslice partitions in the proposed vtoc */ 377 if (replicaslice >= proposed_vtoc.nparts) { 378 proposed_vtoc.nparts = replicaslice + 1; 379 } 380 for (i = 0; i < proposed_vtoc.nparts; i++) { 381 /* don't change the reserved partition of an EFI device */ 382 if (proposed_vtoc.parts[i].tag == V_RESERVED) 383 data_size = proposed_vtoc.parts[i].start - data_start; 384 else 385 (void) memset(&proposed_vtoc.parts[i], '\0', 386 sizeof (proposed_vtoc.parts[i])); 387 } 388 389 proposed_vtoc.parts[MD_SLICE0].start = data_start; 390 proposed_vtoc.parts[MD_SLICE0].size = data_size; 391 proposed_vtoc.parts[MD_SLICE0].tag = V_USR; 392 proposed_vtoc.parts[replicaslice].start = replica_start; 393 proposed_vtoc.parts[replicaslice].size = replica_size; 394 proposed_vtoc.parts[replicaslice].flag = resflag; 395 proposed_vtoc.parts[replicaslice].tag = V_USR; 396 397 if (!(options & MD_REPART_DONT_LABEL)) { 398 /* 399 * Label the disk with the proposed VTOC. 400 */ 401 *mdvp = proposed_vtoc; 402 if (metasetvtoc(resnp, ep) != 0) { 403 return (-1); 404 } 405 } 406 407 if (vtocp != NULL) { 408 /* 409 * Return the proposed VTOC. 410 */ 411 *vtocp = proposed_vtoc; 412 } 413 414 return (0); 415 } 416