1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. 24 * Copyright 2012 Nexenta Systems, Inc. All rights reserved. 25 * Copyright (c) 2018 by Delphix. All rights reserved. 26 */ 27 28 #include <stdio.h> 29 #include <stdlib.h> 30 #include <errno.h> 31 #include <string.h> 32 #include <strings.h> 33 #include <unistd.h> 34 #include <uuid/uuid.h> 35 #include <zlib.h> 36 #include <libintl.h> 37 #include <sys/types.h> 38 #include <sys/dkio.h> 39 #include <sys/vtoc.h> 40 #include <sys/mhd.h> 41 #include <sys/param.h> 42 #include <sys/dktp/fdisk.h> 43 #include <sys/efi_partition.h> 44 #include <sys/byteorder.h> 45 #include <sys/vdev_disk.h> 46 #include <linux/fs.h> 47 48 static struct uuid_to_ptag { 49 struct uuid uuid; 50 } conversion_array[] = { 51 { EFI_UNUSED }, 52 { EFI_BOOT }, 53 { EFI_ROOT }, 54 { EFI_SWAP }, 55 { EFI_USR }, 56 { EFI_BACKUP }, 57 { EFI_UNUSED }, /* STAND is never used */ 58 { EFI_VAR }, 59 { EFI_HOME }, 60 { EFI_ALTSCTR }, 61 { EFI_UNUSED }, /* CACHE (cachefs) is never used */ 62 { EFI_RESERVED }, 63 { EFI_SYSTEM }, 64 { EFI_LEGACY_MBR }, 65 { EFI_SYMC_PUB }, 66 { EFI_SYMC_CDS }, 67 { EFI_MSFT_RESV }, 68 { EFI_DELL_BASIC }, 69 { EFI_DELL_RAID }, 70 { EFI_DELL_SWAP }, 71 { EFI_DELL_LVM }, 72 { EFI_DELL_RESV }, 73 { EFI_AAPL_HFS }, 74 { EFI_AAPL_UFS }, 75 { EFI_FREEBSD_BOOT }, 76 { EFI_FREEBSD_SWAP }, 77 { EFI_FREEBSD_UFS }, 78 { EFI_FREEBSD_VINUM }, 79 { EFI_FREEBSD_ZFS }, 80 { EFI_BIOS_BOOT }, 81 { EFI_INTC_RS }, 82 { EFI_SNE_BOOT }, 83 { EFI_LENOVO_BOOT }, 84 { EFI_MSFT_LDMM }, 85 { EFI_MSFT_LDMD }, 86 { EFI_MSFT_RE }, 87 { EFI_IBM_GPFS }, 88 { EFI_MSFT_STORAGESPACES }, 89 { EFI_HPQ_DATA }, 90 { EFI_HPQ_SVC }, 91 { EFI_RHT_DATA }, 92 { EFI_RHT_HOME }, 93 { EFI_RHT_SRV }, 94 { EFI_RHT_DMCRYPT }, 95 { EFI_RHT_LUKS }, 96 { EFI_FREEBSD_DISKLABEL }, 97 { EFI_AAPL_RAID }, 98 { EFI_AAPL_RAIDOFFLINE }, 99 { EFI_AAPL_BOOT }, 100 { EFI_AAPL_LABEL }, 101 { EFI_AAPL_TVRECOVERY }, 102 { EFI_AAPL_CORESTORAGE }, 103 { EFI_NETBSD_SWAP }, 104 { EFI_NETBSD_FFS }, 105 { EFI_NETBSD_LFS }, 106 { EFI_NETBSD_RAID }, 107 { EFI_NETBSD_CAT }, 108 { EFI_NETBSD_CRYPT }, 109 { EFI_GOOG_KERN }, 110 { EFI_GOOG_ROOT }, 111 { EFI_GOOG_RESV }, 112 { EFI_HAIKU_BFS }, 113 { EFI_MIDNIGHTBSD_BOOT }, 114 { EFI_MIDNIGHTBSD_DATA }, 115 { EFI_MIDNIGHTBSD_SWAP }, 116 { EFI_MIDNIGHTBSD_UFS }, 117 { EFI_MIDNIGHTBSD_VINUM }, 118 { EFI_MIDNIGHTBSD_ZFS }, 119 { EFI_CEPH_JOURNAL }, 120 { EFI_CEPH_DMCRYPTJOURNAL }, 121 { EFI_CEPH_OSD }, 122 { EFI_CEPH_DMCRYPTOSD }, 123 { EFI_CEPH_CREATE }, 124 { EFI_CEPH_DMCRYPTCREATE }, 125 { EFI_OPENBSD_DISKLABEL }, 126 { EFI_BBRY_QNX }, 127 { EFI_BELL_PLAN9 }, 128 { EFI_VMW_KCORE }, 129 { EFI_VMW_VMFS }, 130 { EFI_VMW_RESV }, 131 { EFI_RHT_ROOTX86 }, 132 { EFI_RHT_ROOTAMD64 }, 133 { EFI_RHT_ROOTARM }, 134 { EFI_RHT_ROOTARM64 }, 135 { EFI_ACRONIS_SECUREZONE }, 136 { EFI_ONIE_BOOT }, 137 { EFI_ONIE_CONFIG }, 138 { EFI_IBM_PPRPBOOT }, 139 { EFI_FREEDESKTOP_BOOT } 140 }; 141 142 /* 143 * Default vtoc information for non-SVr4 partitions 144 */ 145 struct dk_map2 default_vtoc_map[NDKMAP] = { 146 { V_ROOT, 0 }, /* a - 0 */ 147 { V_SWAP, V_UNMNT }, /* b - 1 */ 148 { V_BACKUP, V_UNMNT }, /* c - 2 */ 149 { V_UNASSIGNED, 0 }, /* d - 3 */ 150 { V_UNASSIGNED, 0 }, /* e - 4 */ 151 { V_UNASSIGNED, 0 }, /* f - 5 */ 152 { V_USR, 0 }, /* g - 6 */ 153 { V_UNASSIGNED, 0 }, /* h - 7 */ 154 155 #if defined(_SUNOS_VTOC_16) 156 157 #if defined(i386) || defined(__amd64) || defined(__arm) || \ 158 defined(__powerpc) || defined(__sparc) || defined(__s390__) || \ 159 defined(__mips__) || defined(__rv64g__) 160 { V_BOOT, V_UNMNT }, /* i - 8 */ 161 { V_ALTSCTR, 0 }, /* j - 9 */ 162 163 #else 164 #error No VTOC format defined. 165 #endif /* defined(i386) */ 166 167 { V_UNASSIGNED, 0 }, /* k - 10 */ 168 { V_UNASSIGNED, 0 }, /* l - 11 */ 169 { V_UNASSIGNED, 0 }, /* m - 12 */ 170 { V_UNASSIGNED, 0 }, /* n - 13 */ 171 { V_UNASSIGNED, 0 }, /* o - 14 */ 172 { V_UNASSIGNED, 0 }, /* p - 15 */ 173 #endif /* defined(_SUNOS_VTOC_16) */ 174 }; 175 176 int efi_debug = 0; 177 178 static int efi_read(int, struct dk_gpt *); 179 180 /* 181 * Return a 32-bit CRC of the contents of the buffer. Pre-and-post 182 * one's conditioning will be handled by crc32() internally. 183 */ 184 static uint32_t 185 efi_crc32(const unsigned char *buf, unsigned int size) 186 { 187 uint32_t crc = crc32(0, Z_NULL, 0); 188 189 crc = crc32(crc, buf, size); 190 191 return (crc); 192 } 193 194 static int 195 read_disk_info(int fd, diskaddr_t *capacity, uint_t *lbsize) 196 { 197 int sector_size; 198 unsigned long long capacity_size; 199 200 if (ioctl(fd, BLKSSZGET, §or_size) < 0) 201 return (-1); 202 203 if (ioctl(fd, BLKGETSIZE64, &capacity_size) < 0) 204 return (-1); 205 206 *lbsize = (uint_t)sector_size; 207 *capacity = (diskaddr_t)(capacity_size / sector_size); 208 209 return (0); 210 } 211 212 static int 213 efi_get_info(int fd, struct dk_cinfo *dki_info) 214 { 215 char *path; 216 char *dev_path; 217 int rval = 0; 218 219 memset(dki_info, 0, sizeof (*dki_info)); 220 221 path = calloc(1, PATH_MAX); 222 if (path == NULL) 223 goto error; 224 225 /* 226 * The simplest way to get the partition number under linux is 227 * to parse it out of the /dev/<disk><partition> block device name. 228 * The kernel creates this using the partition number when it 229 * populates /dev/ so it may be trusted. The tricky bit here is 230 * that the naming convention is based on the block device type. 231 * So we need to take this in to account when parsing out the 232 * partition information. Another issue is that the libefi API 233 * API only provides the open fd and not the file path. To handle 234 * this realpath(3) is used to resolve the block device name from 235 * /proc/self/fd/<fd>. Aside from the partition number we collect 236 * some additional device info. 237 */ 238 (void) sprintf(path, "/proc/self/fd/%d", fd); 239 dev_path = realpath(path, NULL); 240 free(path); 241 242 if (dev_path == NULL) 243 goto error; 244 245 if ((strncmp(dev_path, "/dev/sd", 7) == 0)) { 246 strcpy(dki_info->dki_cname, "sd"); 247 dki_info->dki_ctype = DKC_SCSI_CCS; 248 rval = sscanf(dev_path, "/dev/%[a-zA-Z]%hu", 249 dki_info->dki_dname, 250 &dki_info->dki_partition); 251 } else if ((strncmp(dev_path, "/dev/hd", 7) == 0)) { 252 strcpy(dki_info->dki_cname, "hd"); 253 dki_info->dki_ctype = DKC_DIRECT; 254 rval = sscanf(dev_path, "/dev/%[a-zA-Z]%hu", 255 dki_info->dki_dname, 256 &dki_info->dki_partition); 257 } else if ((strncmp(dev_path, "/dev/md", 7) == 0)) { 258 strcpy(dki_info->dki_cname, "pseudo"); 259 dki_info->dki_ctype = DKC_MD; 260 strcpy(dki_info->dki_dname, "md"); 261 rval = sscanf(dev_path, "/dev/md%[0-9]p%hu", 262 dki_info->dki_dname + 2, 263 &dki_info->dki_partition); 264 } else if ((strncmp(dev_path, "/dev/vd", 7) == 0)) { 265 strcpy(dki_info->dki_cname, "vd"); 266 dki_info->dki_ctype = DKC_MD; 267 rval = sscanf(dev_path, "/dev/%[a-zA-Z]%hu", 268 dki_info->dki_dname, 269 &dki_info->dki_partition); 270 } else if ((strncmp(dev_path, "/dev/xvd", 8) == 0)) { 271 strcpy(dki_info->dki_cname, "xvd"); 272 dki_info->dki_ctype = DKC_MD; 273 rval = sscanf(dev_path, "/dev/%[a-zA-Z]%hu", 274 dki_info->dki_dname, 275 &dki_info->dki_partition); 276 } else if ((strncmp(dev_path, "/dev/zd", 7) == 0)) { 277 strcpy(dki_info->dki_cname, "zd"); 278 dki_info->dki_ctype = DKC_MD; 279 strcpy(dki_info->dki_dname, "zd"); 280 rval = sscanf(dev_path, "/dev/zd%[0-9]p%hu", 281 dki_info->dki_dname + 2, 282 &dki_info->dki_partition); 283 } else if ((strncmp(dev_path, "/dev/dm-", 8) == 0)) { 284 strcpy(dki_info->dki_cname, "pseudo"); 285 dki_info->dki_ctype = DKC_VBD; 286 strcpy(dki_info->dki_dname, "dm-"); 287 rval = sscanf(dev_path, "/dev/dm-%[0-9]p%hu", 288 dki_info->dki_dname + 3, 289 &dki_info->dki_partition); 290 } else if ((strncmp(dev_path, "/dev/ram", 8) == 0)) { 291 strcpy(dki_info->dki_cname, "pseudo"); 292 dki_info->dki_ctype = DKC_PCMCIA_MEM; 293 strcpy(dki_info->dki_dname, "ram"); 294 rval = sscanf(dev_path, "/dev/ram%[0-9]p%hu", 295 dki_info->dki_dname + 3, 296 &dki_info->dki_partition); 297 } else if ((strncmp(dev_path, "/dev/loop", 9) == 0)) { 298 strcpy(dki_info->dki_cname, "pseudo"); 299 dki_info->dki_ctype = DKC_VBD; 300 strcpy(dki_info->dki_dname, "loop"); 301 rval = sscanf(dev_path, "/dev/loop%[0-9]p%hu", 302 dki_info->dki_dname + 4, 303 &dki_info->dki_partition); 304 } else if ((strncmp(dev_path, "/dev/nvme", 9) == 0)) { 305 strcpy(dki_info->dki_cname, "nvme"); 306 dki_info->dki_ctype = DKC_SCSI_CCS; 307 strcpy(dki_info->dki_dname, "nvme"); 308 (void) sscanf(dev_path, "/dev/nvme%[0-9]", 309 dki_info->dki_dname + 4); 310 size_t controller_length = strlen( 311 dki_info->dki_dname); 312 strcpy(dki_info->dki_dname + controller_length, 313 "n"); 314 rval = sscanf(dev_path, 315 "/dev/nvme%*[0-9]n%[0-9]p%hu", 316 dki_info->dki_dname + controller_length + 1, 317 &dki_info->dki_partition); 318 } else { 319 strcpy(dki_info->dki_dname, "unknown"); 320 strcpy(dki_info->dki_cname, "unknown"); 321 dki_info->dki_ctype = DKC_UNKNOWN; 322 } 323 324 switch (rval) { 325 case 0: 326 errno = EINVAL; 327 goto error; 328 case 1: 329 dki_info->dki_partition = 0; 330 } 331 332 free(dev_path); 333 334 return (0); 335 error: 336 if (efi_debug) 337 (void) fprintf(stderr, "DKIOCINFO errno 0x%x\n", errno); 338 339 switch (errno) { 340 case EIO: 341 return (VT_EIO); 342 case EINVAL: 343 return (VT_EINVAL); 344 default: 345 return (VT_ERROR); 346 } 347 } 348 349 /* 350 * the number of blocks the EFI label takes up (round up to nearest 351 * block) 352 */ 353 #define NBLOCKS(p, l) (1 + ((((p) * (int)sizeof (efi_gpe_t)) + \ 354 ((l) - 1)) / (l))) 355 /* number of partitions -- limited by what we can malloc */ 356 #define MAX_PARTS ((4294967295UL - sizeof (struct dk_gpt)) / \ 357 sizeof (struct dk_part)) 358 359 int 360 efi_alloc_and_init(int fd, uint32_t nparts, struct dk_gpt **vtoc) 361 { 362 diskaddr_t capacity = 0; 363 uint_t lbsize = 0; 364 uint_t nblocks; 365 size_t length; 366 struct dk_gpt *vptr; 367 struct uuid uuid; 368 struct dk_cinfo dki_info; 369 370 if (read_disk_info(fd, &capacity, &lbsize) != 0) 371 return (-1); 372 373 if (efi_get_info(fd, &dki_info) != 0) 374 return (-1); 375 376 if (dki_info.dki_partition != 0) 377 return (-1); 378 379 if ((dki_info.dki_ctype == DKC_PCMCIA_MEM) || 380 (dki_info.dki_ctype == DKC_VBD) || 381 (dki_info.dki_ctype == DKC_UNKNOWN)) 382 return (-1); 383 384 nblocks = NBLOCKS(nparts, lbsize); 385 if ((nblocks * lbsize) < EFI_MIN_ARRAY_SIZE + lbsize) { 386 /* 16K plus one block for the GPT */ 387 nblocks = EFI_MIN_ARRAY_SIZE / lbsize + 1; 388 } 389 390 if (nparts > MAX_PARTS) { 391 if (efi_debug) { 392 (void) fprintf(stderr, 393 "the maximum number of partitions supported is %lu\n", 394 MAX_PARTS); 395 } 396 return (-1); 397 } 398 399 length = sizeof (struct dk_gpt) + 400 sizeof (struct dk_part) * (nparts - 1); 401 402 vptr = calloc(1, length); 403 if (vptr == NULL) 404 return (-1); 405 406 *vtoc = vptr; 407 408 vptr->efi_version = EFI_VERSION_CURRENT; 409 vptr->efi_lbasize = lbsize; 410 vptr->efi_nparts = nparts; 411 /* 412 * add one block here for the PMBR; on disks with a 512 byte 413 * block size and 128 or fewer partitions, efi_first_u_lba 414 * should work out to "34" 415 */ 416 vptr->efi_first_u_lba = nblocks + 1; 417 vptr->efi_last_lba = capacity - 1; 418 vptr->efi_altern_lba = capacity -1; 419 vptr->efi_last_u_lba = vptr->efi_last_lba - nblocks; 420 421 (void) uuid_generate((uchar_t *)&uuid); 422 UUID_LE_CONVERT(vptr->efi_disk_uguid, uuid); 423 return (0); 424 } 425 426 /* 427 * Read EFI - return partition number upon success. 428 */ 429 int 430 efi_alloc_and_read(int fd, struct dk_gpt **vtoc) 431 { 432 int rval; 433 uint32_t nparts; 434 int length; 435 struct dk_gpt *vptr; 436 437 /* figure out the number of entries that would fit into 16K */ 438 nparts = EFI_MIN_ARRAY_SIZE / sizeof (efi_gpe_t); 439 length = (int) sizeof (struct dk_gpt) + 440 (int) sizeof (struct dk_part) * (nparts - 1); 441 vptr = calloc(1, length); 442 443 if (vptr == NULL) 444 return (VT_ERROR); 445 446 vptr->efi_nparts = nparts; 447 rval = efi_read(fd, vptr); 448 449 if ((rval == VT_EINVAL) && vptr->efi_nparts > nparts) { 450 void *tmp; 451 length = (int) sizeof (struct dk_gpt) + 452 (int) sizeof (struct dk_part) * (vptr->efi_nparts - 1); 453 nparts = vptr->efi_nparts; 454 if ((tmp = realloc(vptr, length)) == NULL) { 455 free(vptr); 456 *vtoc = NULL; 457 return (VT_ERROR); 458 } else { 459 vptr = tmp; 460 rval = efi_read(fd, vptr); 461 } 462 } 463 464 if (rval < 0) { 465 if (efi_debug) { 466 (void) fprintf(stderr, 467 "read of EFI table failed, rval=%d\n", rval); 468 } 469 free(vptr); 470 *vtoc = NULL; 471 } else { 472 *vtoc = vptr; 473 } 474 475 return (rval); 476 } 477 478 static int 479 efi_ioctl(int fd, int cmd, dk_efi_t *dk_ioc) 480 { 481 void *data = dk_ioc->dki_data; 482 int error; 483 diskaddr_t capacity; 484 uint_t lbsize; 485 486 /* 487 * When the IO is not being performed in kernel as an ioctl we need 488 * to know the sector size so we can seek to the proper byte offset. 489 */ 490 if (read_disk_info(fd, &capacity, &lbsize) == -1) { 491 if (efi_debug) 492 fprintf(stderr, "unable to read disk info: %d", errno); 493 494 errno = EIO; 495 return (-1); 496 } 497 498 switch (cmd) { 499 case DKIOCGETEFI: 500 if (lbsize == 0) { 501 if (efi_debug) 502 (void) fprintf(stderr, "DKIOCGETEFI assuming " 503 "LBA %d bytes\n", DEV_BSIZE); 504 505 lbsize = DEV_BSIZE; 506 } 507 508 error = lseek(fd, dk_ioc->dki_lba * lbsize, SEEK_SET); 509 if (error == -1) { 510 if (efi_debug) 511 (void) fprintf(stderr, "DKIOCGETEFI lseek " 512 "error: %d\n", errno); 513 return (error); 514 } 515 516 error = read(fd, data, dk_ioc->dki_length); 517 if (error == -1) { 518 if (efi_debug) 519 (void) fprintf(stderr, "DKIOCGETEFI read " 520 "error: %d\n", errno); 521 return (error); 522 } 523 524 if (error != dk_ioc->dki_length) { 525 if (efi_debug) 526 (void) fprintf(stderr, "DKIOCGETEFI short " 527 "read of %d bytes\n", error); 528 errno = EIO; 529 return (-1); 530 } 531 error = 0; 532 break; 533 534 case DKIOCSETEFI: 535 if (lbsize == 0) { 536 if (efi_debug) 537 (void) fprintf(stderr, "DKIOCSETEFI unknown " 538 "LBA size\n"); 539 errno = EIO; 540 return (-1); 541 } 542 543 error = lseek(fd, dk_ioc->dki_lba * lbsize, SEEK_SET); 544 if (error == -1) { 545 if (efi_debug) 546 (void) fprintf(stderr, "DKIOCSETEFI lseek " 547 "error: %d\n", errno); 548 return (error); 549 } 550 551 error = write(fd, data, dk_ioc->dki_length); 552 if (error == -1) { 553 if (efi_debug) 554 (void) fprintf(stderr, "DKIOCSETEFI write " 555 "error: %d\n", errno); 556 return (error); 557 } 558 559 if (error != dk_ioc->dki_length) { 560 if (efi_debug) 561 (void) fprintf(stderr, "DKIOCSETEFI short " 562 "write of %d bytes\n", error); 563 errno = EIO; 564 return (-1); 565 } 566 567 /* Sync the new EFI table to disk */ 568 error = fsync(fd); 569 if (error == -1) 570 return (error); 571 572 /* Ensure any local disk cache is also flushed */ 573 if (ioctl(fd, BLKFLSBUF, 0) == -1) 574 return (error); 575 576 error = 0; 577 break; 578 579 default: 580 if (efi_debug) 581 (void) fprintf(stderr, "unsupported ioctl()\n"); 582 583 errno = EIO; 584 return (-1); 585 } 586 587 return (error); 588 } 589 590 int 591 efi_rescan(int fd) 592 { 593 int retry = 10; 594 int error; 595 596 /* Notify the kernel a devices partition table has been updated */ 597 while ((error = ioctl(fd, BLKRRPART)) != 0) { 598 if ((--retry == 0) || (errno != EBUSY)) { 599 (void) fprintf(stderr, "the kernel failed to rescan " 600 "the partition table: %d\n", errno); 601 return (-1); 602 } 603 usleep(50000); 604 } 605 606 return (0); 607 } 608 609 static int 610 check_label(int fd, dk_efi_t *dk_ioc) 611 { 612 efi_gpt_t *efi; 613 uint_t crc; 614 615 if (efi_ioctl(fd, DKIOCGETEFI, dk_ioc) == -1) { 616 switch (errno) { 617 case EIO: 618 return (VT_EIO); 619 default: 620 return (VT_ERROR); 621 } 622 } 623 efi = dk_ioc->dki_data; 624 if (efi->efi_gpt_Signature != LE_64(EFI_SIGNATURE)) { 625 if (efi_debug) 626 (void) fprintf(stderr, 627 "Bad EFI signature: 0x%llx != 0x%llx\n", 628 (long long)efi->efi_gpt_Signature, 629 (long long)LE_64(EFI_SIGNATURE)); 630 return (VT_EINVAL); 631 } 632 633 /* 634 * check CRC of the header; the size of the header should 635 * never be larger than one block 636 */ 637 crc = efi->efi_gpt_HeaderCRC32; 638 efi->efi_gpt_HeaderCRC32 = 0; 639 len_t headerSize = (len_t)LE_32(efi->efi_gpt_HeaderSize); 640 641 if (headerSize < EFI_MIN_LABEL_SIZE || headerSize > EFI_LABEL_SIZE) { 642 if (efi_debug) 643 (void) fprintf(stderr, 644 "Invalid EFI HeaderSize %llu. Assuming %d.\n", 645 headerSize, EFI_MIN_LABEL_SIZE); 646 } 647 648 if ((headerSize > dk_ioc->dki_length) || 649 crc != LE_32(efi_crc32((unsigned char *)efi, headerSize))) { 650 if (efi_debug) 651 (void) fprintf(stderr, 652 "Bad EFI CRC: 0x%x != 0x%x\n", 653 crc, LE_32(efi_crc32((unsigned char *)efi, 654 headerSize))); 655 return (VT_EINVAL); 656 } 657 658 return (0); 659 } 660 661 static int 662 efi_read(int fd, struct dk_gpt *vtoc) 663 { 664 int i, j; 665 int label_len; 666 int rval = 0; 667 int md_flag = 0; 668 int vdc_flag = 0; 669 diskaddr_t capacity = 0; 670 uint_t lbsize = 0; 671 struct dk_minfo disk_info; 672 dk_efi_t dk_ioc; 673 efi_gpt_t *efi; 674 efi_gpe_t *efi_parts; 675 struct dk_cinfo dki_info; 676 uint32_t user_length; 677 boolean_t legacy_label = B_FALSE; 678 679 /* 680 * get the partition number for this file descriptor. 681 */ 682 if ((rval = efi_get_info(fd, &dki_info)) != 0) 683 return (rval); 684 685 if ((strncmp(dki_info.dki_cname, "pseudo", 7) == 0) && 686 (strncmp(dki_info.dki_dname, "md", 3) == 0)) { 687 md_flag++; 688 } else if ((strncmp(dki_info.dki_cname, "vdc", 4) == 0) && 689 (strncmp(dki_info.dki_dname, "vdc", 4) == 0)) { 690 /* 691 * The controller and drive name "vdc" (virtual disk client) 692 * indicates a LDoms virtual disk. 693 */ 694 vdc_flag++; 695 } 696 697 /* get the LBA size */ 698 if (read_disk_info(fd, &capacity, &lbsize) == -1) { 699 if (efi_debug) { 700 (void) fprintf(stderr, 701 "unable to read disk info: %d", 702 errno); 703 } 704 return (VT_EINVAL); 705 } 706 707 disk_info.dki_lbsize = lbsize; 708 disk_info.dki_capacity = capacity; 709 710 if (disk_info.dki_lbsize == 0) { 711 if (efi_debug) { 712 (void) fprintf(stderr, 713 "efi_read: assuming LBA 512 bytes\n"); 714 } 715 disk_info.dki_lbsize = DEV_BSIZE; 716 } 717 /* 718 * Read the EFI GPT to figure out how many partitions we need 719 * to deal with. 720 */ 721 dk_ioc.dki_lba = 1; 722 if (NBLOCKS(vtoc->efi_nparts, disk_info.dki_lbsize) < 34) { 723 label_len = EFI_MIN_ARRAY_SIZE + disk_info.dki_lbsize; 724 } else { 725 label_len = vtoc->efi_nparts * (int) sizeof (efi_gpe_t) + 726 disk_info.dki_lbsize; 727 if (label_len % disk_info.dki_lbsize) { 728 /* pad to physical sector size */ 729 label_len += disk_info.dki_lbsize; 730 label_len &= ~(disk_info.dki_lbsize - 1); 731 } 732 } 733 734 if (posix_memalign((void **)&dk_ioc.dki_data, 735 disk_info.dki_lbsize, label_len)) 736 return (VT_ERROR); 737 738 memset(dk_ioc.dki_data, 0, label_len); 739 dk_ioc.dki_length = disk_info.dki_lbsize; 740 user_length = vtoc->efi_nparts; 741 efi = dk_ioc.dki_data; 742 if (md_flag) { 743 dk_ioc.dki_length = label_len; 744 if (efi_ioctl(fd, DKIOCGETEFI, &dk_ioc) == -1) { 745 switch (errno) { 746 case EIO: 747 return (VT_EIO); 748 default: 749 return (VT_ERROR); 750 } 751 } 752 } else if ((rval = check_label(fd, &dk_ioc)) == VT_EINVAL) { 753 /* 754 * No valid label here; try the alternate. Note that here 755 * we just read GPT header and save it into dk_ioc.data, 756 * Later, we will read GUID partition entry array if we 757 * can get valid GPT header. 758 */ 759 760 /* 761 * This is a workaround for legacy systems. In the past, the 762 * last sector of SCSI disk was invisible on x86 platform. At 763 * that time, backup label was saved on the next to the last 764 * sector. It is possible for users to move a disk from previous 765 * solaris system to present system. Here, we attempt to search 766 * legacy backup EFI label first. 767 */ 768 dk_ioc.dki_lba = disk_info.dki_capacity - 2; 769 dk_ioc.dki_length = disk_info.dki_lbsize; 770 rval = check_label(fd, &dk_ioc); 771 if (rval == VT_EINVAL) { 772 /* 773 * we didn't find legacy backup EFI label, try to 774 * search backup EFI label in the last block. 775 */ 776 dk_ioc.dki_lba = disk_info.dki_capacity - 1; 777 dk_ioc.dki_length = disk_info.dki_lbsize; 778 rval = check_label(fd, &dk_ioc); 779 if (rval == 0) { 780 legacy_label = B_TRUE; 781 if (efi_debug) 782 (void) fprintf(stderr, 783 "efi_read: primary label corrupt; " 784 "using EFI backup label located on" 785 " the last block\n"); 786 } 787 } else { 788 if ((efi_debug) && (rval == 0)) 789 (void) fprintf(stderr, "efi_read: primary label" 790 " corrupt; using legacy EFI backup label " 791 " located on the next to last block\n"); 792 } 793 794 if (rval == 0) { 795 dk_ioc.dki_lba = LE_64(efi->efi_gpt_PartitionEntryLBA); 796 vtoc->efi_flags |= EFI_GPT_PRIMARY_CORRUPT; 797 vtoc->efi_nparts = 798 LE_32(efi->efi_gpt_NumberOfPartitionEntries); 799 /* 800 * Partition tables are between backup GPT header 801 * table and ParitionEntryLBA (the starting LBA of 802 * the GUID partition entries array). Now that we 803 * already got valid GPT header and saved it in 804 * dk_ioc.dki_data, we try to get GUID partition 805 * entry array here. 806 */ 807 /* LINTED */ 808 dk_ioc.dki_data = (efi_gpt_t *)((char *)dk_ioc.dki_data 809 + disk_info.dki_lbsize); 810 if (legacy_label) 811 dk_ioc.dki_length = disk_info.dki_capacity - 1 - 812 dk_ioc.dki_lba; 813 else 814 dk_ioc.dki_length = disk_info.dki_capacity - 2 - 815 dk_ioc.dki_lba; 816 dk_ioc.dki_length *= disk_info.dki_lbsize; 817 if (dk_ioc.dki_length > 818 ((len_t)label_len - sizeof (*dk_ioc.dki_data))) { 819 rval = VT_EINVAL; 820 } else { 821 /* 822 * read GUID partition entry array 823 */ 824 rval = efi_ioctl(fd, DKIOCGETEFI, &dk_ioc); 825 } 826 } 827 828 } else if (rval == 0) { 829 830 dk_ioc.dki_lba = LE_64(efi->efi_gpt_PartitionEntryLBA); 831 /* LINTED */ 832 dk_ioc.dki_data = (efi_gpt_t *)((char *)dk_ioc.dki_data 833 + disk_info.dki_lbsize); 834 dk_ioc.dki_length = label_len - disk_info.dki_lbsize; 835 rval = efi_ioctl(fd, DKIOCGETEFI, &dk_ioc); 836 837 } else if (vdc_flag && rval == VT_ERROR && errno == EINVAL) { 838 /* 839 * When the device is a LDoms virtual disk, the DKIOCGETEFI 840 * ioctl can fail with EINVAL if the virtual disk backend 841 * is a ZFS volume serviced by a domain running an old version 842 * of Solaris. This is because the DKIOCGETEFI ioctl was 843 * initially incorrectly implemented for a ZFS volume and it 844 * expected the GPT and GPE to be retrieved with a single ioctl. 845 * So we try to read the GPT and the GPE using that old style 846 * ioctl. 847 */ 848 dk_ioc.dki_lba = 1; 849 dk_ioc.dki_length = label_len; 850 rval = check_label(fd, &dk_ioc); 851 } 852 853 if (rval < 0) { 854 free(efi); 855 return (rval); 856 } 857 858 /* LINTED -- always longlong aligned */ 859 efi_parts = (efi_gpe_t *)(((char *)efi) + disk_info.dki_lbsize); 860 861 /* 862 * Assemble this into a "dk_gpt" struct for easier 863 * digestibility by applications. 864 */ 865 vtoc->efi_version = LE_32(efi->efi_gpt_Revision); 866 vtoc->efi_nparts = LE_32(efi->efi_gpt_NumberOfPartitionEntries); 867 vtoc->efi_part_size = LE_32(efi->efi_gpt_SizeOfPartitionEntry); 868 vtoc->efi_lbasize = disk_info.dki_lbsize; 869 vtoc->efi_last_lba = disk_info.dki_capacity - 1; 870 vtoc->efi_first_u_lba = LE_64(efi->efi_gpt_FirstUsableLBA); 871 vtoc->efi_last_u_lba = LE_64(efi->efi_gpt_LastUsableLBA); 872 vtoc->efi_altern_lba = LE_64(efi->efi_gpt_AlternateLBA); 873 UUID_LE_CONVERT(vtoc->efi_disk_uguid, efi->efi_gpt_DiskGUID); 874 875 /* 876 * If the array the user passed in is too small, set the length 877 * to what it needs to be and return 878 */ 879 if (user_length < vtoc->efi_nparts) { 880 return (VT_EINVAL); 881 } 882 883 for (i = 0; i < vtoc->efi_nparts; i++) { 884 885 UUID_LE_CONVERT(vtoc->efi_parts[i].p_guid, 886 efi_parts[i].efi_gpe_PartitionTypeGUID); 887 888 for (j = 0; 889 j < sizeof (conversion_array) 890 / sizeof (struct uuid_to_ptag); j++) { 891 892 if (bcmp(&vtoc->efi_parts[i].p_guid, 893 &conversion_array[j].uuid, 894 sizeof (struct uuid)) == 0) { 895 vtoc->efi_parts[i].p_tag = j; 896 break; 897 } 898 } 899 if (vtoc->efi_parts[i].p_tag == V_UNASSIGNED) 900 continue; 901 vtoc->efi_parts[i].p_flag = 902 LE_16(efi_parts[i].efi_gpe_Attributes.PartitionAttrs); 903 vtoc->efi_parts[i].p_start = 904 LE_64(efi_parts[i].efi_gpe_StartingLBA); 905 vtoc->efi_parts[i].p_size = 906 LE_64(efi_parts[i].efi_gpe_EndingLBA) - 907 vtoc->efi_parts[i].p_start + 1; 908 for (j = 0; j < EFI_PART_NAME_LEN; j++) { 909 vtoc->efi_parts[i].p_name[j] = 910 (uchar_t)LE_16( 911 efi_parts[i].efi_gpe_PartitionName[j]); 912 } 913 914 UUID_LE_CONVERT(vtoc->efi_parts[i].p_uguid, 915 efi_parts[i].efi_gpe_UniquePartitionGUID); 916 } 917 free(efi); 918 919 return (dki_info.dki_partition); 920 } 921 922 /* writes a "protective" MBR */ 923 static int 924 write_pmbr(int fd, struct dk_gpt *vtoc) 925 { 926 dk_efi_t dk_ioc; 927 struct mboot mb; 928 uchar_t *cp; 929 diskaddr_t size_in_lba; 930 uchar_t *buf; 931 int len; 932 933 len = (vtoc->efi_lbasize == 0) ? sizeof (mb) : vtoc->efi_lbasize; 934 if (posix_memalign((void **)&buf, len, len)) 935 return (VT_ERROR); 936 937 /* 938 * Preserve any boot code and disk signature if the first block is 939 * already an MBR. 940 */ 941 memset(buf, 0, len); 942 dk_ioc.dki_lba = 0; 943 dk_ioc.dki_length = len; 944 /* LINTED -- always longlong aligned */ 945 dk_ioc.dki_data = (efi_gpt_t *)buf; 946 if (efi_ioctl(fd, DKIOCGETEFI, &dk_ioc) == -1) { 947 (void) memcpy(&mb, buf, sizeof (mb)); 948 bzero(&mb, sizeof (mb)); 949 mb.signature = LE_16(MBB_MAGIC); 950 } else { 951 (void) memcpy(&mb, buf, sizeof (mb)); 952 if (mb.signature != LE_16(MBB_MAGIC)) { 953 bzero(&mb, sizeof (mb)); 954 mb.signature = LE_16(MBB_MAGIC); 955 } 956 } 957 958 bzero(&mb.parts, sizeof (mb.parts)); 959 cp = (uchar_t *)&mb.parts[0]; 960 /* bootable or not */ 961 *cp++ = 0; 962 /* beginning CHS; 0xffffff if not representable */ 963 *cp++ = 0xff; 964 *cp++ = 0xff; 965 *cp++ = 0xff; 966 /* OS type */ 967 *cp++ = EFI_PMBR; 968 /* ending CHS; 0xffffff if not representable */ 969 *cp++ = 0xff; 970 *cp++ = 0xff; 971 *cp++ = 0xff; 972 /* starting LBA: 1 (little endian format) by EFI definition */ 973 *cp++ = 0x01; 974 *cp++ = 0x00; 975 *cp++ = 0x00; 976 *cp++ = 0x00; 977 /* ending LBA: last block on the disk (little endian format) */ 978 size_in_lba = vtoc->efi_last_lba; 979 if (size_in_lba < 0xffffffff) { 980 *cp++ = (size_in_lba & 0x000000ff); 981 *cp++ = (size_in_lba & 0x0000ff00) >> 8; 982 *cp++ = (size_in_lba & 0x00ff0000) >> 16; 983 *cp++ = (size_in_lba & 0xff000000) >> 24; 984 } else { 985 *cp++ = 0xff; 986 *cp++ = 0xff; 987 *cp++ = 0xff; 988 *cp++ = 0xff; 989 } 990 991 (void) memcpy(buf, &mb, sizeof (mb)); 992 /* LINTED -- always longlong aligned */ 993 dk_ioc.dki_data = (efi_gpt_t *)buf; 994 dk_ioc.dki_lba = 0; 995 dk_ioc.dki_length = len; 996 if (efi_ioctl(fd, DKIOCSETEFI, &dk_ioc) == -1) { 997 free(buf); 998 switch (errno) { 999 case EIO: 1000 return (VT_EIO); 1001 case EINVAL: 1002 return (VT_EINVAL); 1003 default: 1004 return (VT_ERROR); 1005 } 1006 } 1007 free(buf); 1008 return (0); 1009 } 1010 1011 /* make sure the user specified something reasonable */ 1012 static int 1013 check_input(struct dk_gpt *vtoc) 1014 { 1015 int resv_part = -1; 1016 int i, j; 1017 diskaddr_t istart, jstart, isize, jsize, endsect; 1018 1019 /* 1020 * Sanity-check the input (make sure no partitions overlap) 1021 */ 1022 for (i = 0; i < vtoc->efi_nparts; i++) { 1023 /* It can't be unassigned and have an actual size */ 1024 if ((vtoc->efi_parts[i].p_tag == V_UNASSIGNED) && 1025 (vtoc->efi_parts[i].p_size != 0)) { 1026 if (efi_debug) { 1027 (void) fprintf(stderr, "partition %d is " 1028 "\"unassigned\" but has a size of %llu", 1029 i, vtoc->efi_parts[i].p_size); 1030 } 1031 return (VT_EINVAL); 1032 } 1033 if (vtoc->efi_parts[i].p_tag == V_UNASSIGNED) { 1034 if (uuid_is_null((uchar_t *)&vtoc->efi_parts[i].p_guid)) 1035 continue; 1036 /* we have encountered an unknown uuid */ 1037 vtoc->efi_parts[i].p_tag = 0xff; 1038 } 1039 if (vtoc->efi_parts[i].p_tag == V_RESERVED) { 1040 if (resv_part != -1) { 1041 if (efi_debug) { 1042 (void) fprintf(stderr, "found " 1043 "duplicate reserved partition " 1044 "at %d\n", i); 1045 } 1046 return (VT_EINVAL); 1047 } 1048 resv_part = i; 1049 } 1050 if ((vtoc->efi_parts[i].p_start < vtoc->efi_first_u_lba) || 1051 (vtoc->efi_parts[i].p_start > vtoc->efi_last_u_lba)) { 1052 if (efi_debug) { 1053 (void) fprintf(stderr, 1054 "Partition %d starts at %llu. ", 1055 i, 1056 vtoc->efi_parts[i].p_start); 1057 (void) fprintf(stderr, 1058 "It must be between %llu and %llu.\n", 1059 vtoc->efi_first_u_lba, 1060 vtoc->efi_last_u_lba); 1061 } 1062 return (VT_EINVAL); 1063 } 1064 if ((vtoc->efi_parts[i].p_start + 1065 vtoc->efi_parts[i].p_size < 1066 vtoc->efi_first_u_lba) || 1067 (vtoc->efi_parts[i].p_start + 1068 vtoc->efi_parts[i].p_size > 1069 vtoc->efi_last_u_lba + 1)) { 1070 if (efi_debug) { 1071 (void) fprintf(stderr, 1072 "Partition %d ends at %llu. ", 1073 i, 1074 vtoc->efi_parts[i].p_start + 1075 vtoc->efi_parts[i].p_size); 1076 (void) fprintf(stderr, 1077 "It must be between %llu and %llu.\n", 1078 vtoc->efi_first_u_lba, 1079 vtoc->efi_last_u_lba); 1080 } 1081 return (VT_EINVAL); 1082 } 1083 1084 for (j = 0; j < vtoc->efi_nparts; j++) { 1085 isize = vtoc->efi_parts[i].p_size; 1086 jsize = vtoc->efi_parts[j].p_size; 1087 istart = vtoc->efi_parts[i].p_start; 1088 jstart = vtoc->efi_parts[j].p_start; 1089 if ((i != j) && (isize != 0) && (jsize != 0)) { 1090 endsect = jstart + jsize -1; 1091 if ((jstart <= istart) && 1092 (istart <= endsect)) { 1093 if (efi_debug) { 1094 (void) fprintf(stderr, 1095 "Partition %d overlaps " 1096 "partition %d.", i, j); 1097 } 1098 return (VT_EINVAL); 1099 } 1100 } 1101 } 1102 } 1103 /* just a warning for now */ 1104 if ((resv_part == -1) && efi_debug) { 1105 (void) fprintf(stderr, 1106 "no reserved partition found\n"); 1107 } 1108 return (0); 1109 } 1110 1111 /* 1112 * add all the unallocated space to the current label 1113 */ 1114 int 1115 efi_use_whole_disk(int fd) 1116 { 1117 struct dk_gpt *efi_label = NULL; 1118 int rval; 1119 int i; 1120 uint_t resv_index = 0, data_index = 0; 1121 diskaddr_t resv_start = 0, data_start = 0; 1122 diskaddr_t data_size, limit, difference; 1123 boolean_t sync_needed = B_FALSE; 1124 uint_t nblocks; 1125 1126 rval = efi_alloc_and_read(fd, &efi_label); 1127 if (rval < 0) { 1128 if (efi_label != NULL) 1129 efi_free(efi_label); 1130 return (rval); 1131 } 1132 1133 /* 1134 * Find the last physically non-zero partition. 1135 * This should be the reserved partition. 1136 */ 1137 for (i = 0; i < efi_label->efi_nparts; i ++) { 1138 if (resv_start < efi_label->efi_parts[i].p_start) { 1139 resv_start = efi_label->efi_parts[i].p_start; 1140 resv_index = i; 1141 } 1142 } 1143 1144 /* 1145 * Find the last physically non-zero partition before that. 1146 * This is the data partition. 1147 */ 1148 for (i = 0; i < resv_index; i ++) { 1149 if (data_start < efi_label->efi_parts[i].p_start) { 1150 data_start = efi_label->efi_parts[i].p_start; 1151 data_index = i; 1152 } 1153 } 1154 data_size = efi_label->efi_parts[data_index].p_size; 1155 1156 /* 1157 * See the "efi_alloc_and_init" function for more information 1158 * about where this "nblocks" value comes from. 1159 */ 1160 nblocks = efi_label->efi_first_u_lba - 1; 1161 1162 /* 1163 * Determine if the EFI label is out of sync. We check that: 1164 * 1165 * 1. the data partition ends at the limit we set, and 1166 * 2. the reserved partition starts at the limit we set. 1167 * 1168 * If either of these conditions is not met, then we need to 1169 * resync the EFI label. 1170 * 1171 * The limit is the last usable LBA, determined by the last LBA 1172 * and the first usable LBA fields on the EFI label of the disk 1173 * (see the lines directly above). Additionally, we factor in 1174 * EFI_MIN_RESV_SIZE (per its use in "zpool_label_disk") and 1175 * P2ALIGN it to ensure the partition boundaries are aligned 1176 * (for performance reasons). The alignment should match the 1177 * alignment used by the "zpool_label_disk" function. 1178 */ 1179 limit = P2ALIGN(efi_label->efi_last_lba - nblocks - EFI_MIN_RESV_SIZE, 1180 PARTITION_END_ALIGNMENT); 1181 if (data_start + data_size != limit || resv_start != limit) 1182 sync_needed = B_TRUE; 1183 1184 if (efi_debug && sync_needed) 1185 (void) fprintf(stderr, "efi_use_whole_disk: sync needed\n"); 1186 1187 /* 1188 * If alter_lba is 1, we are using the backup label. 1189 * Since we can locate the backup label by disk capacity, 1190 * there must be no unallocated space. 1191 */ 1192 if ((efi_label->efi_altern_lba == 1) || (efi_label->efi_altern_lba 1193 >= efi_label->efi_last_lba && !sync_needed)) { 1194 if (efi_debug) { 1195 (void) fprintf(stderr, 1196 "efi_use_whole_disk: requested space not found\n"); 1197 } 1198 efi_free(efi_label); 1199 return (VT_ENOSPC); 1200 } 1201 1202 /* 1203 * Verify that we've found the reserved partition by checking 1204 * that it looks the way it did when we created it in zpool_label_disk. 1205 * If we've found the incorrect partition, then we know that this 1206 * device was reformatted and no longer is solely used by ZFS. 1207 */ 1208 if ((efi_label->efi_parts[resv_index].p_size != EFI_MIN_RESV_SIZE) || 1209 (efi_label->efi_parts[resv_index].p_tag != V_RESERVED) || 1210 (resv_index != 8)) { 1211 if (efi_debug) { 1212 (void) fprintf(stderr, 1213 "efi_use_whole_disk: wholedisk not available\n"); 1214 } 1215 efi_free(efi_label); 1216 return (VT_ENOSPC); 1217 } 1218 1219 if (data_start + data_size != resv_start) { 1220 if (efi_debug) { 1221 (void) fprintf(stderr, 1222 "efi_use_whole_disk: " 1223 "data_start (%lli) + " 1224 "data_size (%lli) != " 1225 "resv_start (%lli)\n", 1226 data_start, data_size, resv_start); 1227 } 1228 1229 return (VT_EINVAL); 1230 } 1231 1232 if (limit < resv_start) { 1233 if (efi_debug) { 1234 (void) fprintf(stderr, 1235 "efi_use_whole_disk: " 1236 "limit (%lli) < resv_start (%lli)\n", 1237 limit, resv_start); 1238 } 1239 1240 return (VT_EINVAL); 1241 } 1242 1243 difference = limit - resv_start; 1244 1245 if (efi_debug) 1246 (void) fprintf(stderr, 1247 "efi_use_whole_disk: difference is %lli\n", difference); 1248 1249 /* 1250 * Move the reserved partition. There is currently no data in 1251 * here except fabricated devids (which get generated via 1252 * efi_write()). So there is no need to copy data. 1253 */ 1254 efi_label->efi_parts[data_index].p_size += difference; 1255 efi_label->efi_parts[resv_index].p_start += difference; 1256 efi_label->efi_last_u_lba = efi_label->efi_last_lba - nblocks; 1257 1258 rval = efi_write(fd, efi_label); 1259 if (rval < 0) { 1260 if (efi_debug) { 1261 (void) fprintf(stderr, 1262 "efi_use_whole_disk:fail to write label, rval=%d\n", 1263 rval); 1264 } 1265 efi_free(efi_label); 1266 return (rval); 1267 } 1268 1269 efi_free(efi_label); 1270 return (0); 1271 } 1272 1273 /* 1274 * write EFI label and backup label 1275 */ 1276 int 1277 efi_write(int fd, struct dk_gpt *vtoc) 1278 { 1279 dk_efi_t dk_ioc; 1280 efi_gpt_t *efi; 1281 efi_gpe_t *efi_parts; 1282 int i, j; 1283 struct dk_cinfo dki_info; 1284 int rval; 1285 int md_flag = 0; 1286 int nblocks; 1287 diskaddr_t lba_backup_gpt_hdr; 1288 1289 if ((rval = efi_get_info(fd, &dki_info)) != 0) 1290 return (rval); 1291 1292 /* check if we are dealing with a metadevice */ 1293 if ((strncmp(dki_info.dki_cname, "pseudo", 7) == 0) && 1294 (strncmp(dki_info.dki_dname, "md", 3) == 0)) { 1295 md_flag = 1; 1296 } 1297 1298 if (check_input(vtoc)) { 1299 /* 1300 * not valid; if it's a metadevice just pass it down 1301 * because SVM will do its own checking 1302 */ 1303 if (md_flag == 0) { 1304 return (VT_EINVAL); 1305 } 1306 } 1307 1308 dk_ioc.dki_lba = 1; 1309 if (NBLOCKS(vtoc->efi_nparts, vtoc->efi_lbasize) < 34) { 1310 dk_ioc.dki_length = EFI_MIN_ARRAY_SIZE + vtoc->efi_lbasize; 1311 } else { 1312 dk_ioc.dki_length = NBLOCKS(vtoc->efi_nparts, 1313 vtoc->efi_lbasize) * 1314 vtoc->efi_lbasize; 1315 } 1316 1317 /* 1318 * the number of blocks occupied by GUID partition entry array 1319 */ 1320 nblocks = dk_ioc.dki_length / vtoc->efi_lbasize - 1; 1321 1322 /* 1323 * Backup GPT header is located on the block after GUID 1324 * partition entry array. Here, we calculate the address 1325 * for backup GPT header. 1326 */ 1327 lba_backup_gpt_hdr = vtoc->efi_last_u_lba + 1 + nblocks; 1328 if (posix_memalign((void **)&dk_ioc.dki_data, 1329 vtoc->efi_lbasize, dk_ioc.dki_length)) 1330 return (VT_ERROR); 1331 1332 memset(dk_ioc.dki_data, 0, dk_ioc.dki_length); 1333 efi = dk_ioc.dki_data; 1334 1335 /* stuff user's input into EFI struct */ 1336 efi->efi_gpt_Signature = LE_64(EFI_SIGNATURE); 1337 efi->efi_gpt_Revision = LE_32(vtoc->efi_version); /* 0x02000100 */ 1338 efi->efi_gpt_HeaderSize = LE_32(sizeof (struct efi_gpt) - LEN_EFI_PAD); 1339 efi->efi_gpt_Reserved1 = 0; 1340 efi->efi_gpt_MyLBA = LE_64(1ULL); 1341 efi->efi_gpt_AlternateLBA = LE_64(lba_backup_gpt_hdr); 1342 efi->efi_gpt_FirstUsableLBA = LE_64(vtoc->efi_first_u_lba); 1343 efi->efi_gpt_LastUsableLBA = LE_64(vtoc->efi_last_u_lba); 1344 efi->efi_gpt_PartitionEntryLBA = LE_64(2ULL); 1345 efi->efi_gpt_NumberOfPartitionEntries = LE_32(vtoc->efi_nparts); 1346 efi->efi_gpt_SizeOfPartitionEntry = LE_32(sizeof (struct efi_gpe)); 1347 UUID_LE_CONVERT(efi->efi_gpt_DiskGUID, vtoc->efi_disk_uguid); 1348 1349 /* LINTED -- always longlong aligned */ 1350 efi_parts = (efi_gpe_t *)((char *)dk_ioc.dki_data + vtoc->efi_lbasize); 1351 1352 for (i = 0; i < vtoc->efi_nparts; i++) { 1353 for (j = 0; 1354 j < sizeof (conversion_array) / 1355 sizeof (struct uuid_to_ptag); j++) { 1356 1357 if (vtoc->efi_parts[i].p_tag == j) { 1358 UUID_LE_CONVERT( 1359 efi_parts[i].efi_gpe_PartitionTypeGUID, 1360 conversion_array[j].uuid); 1361 break; 1362 } 1363 } 1364 1365 if (j == sizeof (conversion_array) / 1366 sizeof (struct uuid_to_ptag)) { 1367 /* 1368 * If we didn't have a matching uuid match, bail here. 1369 * Don't write a label with unknown uuid. 1370 */ 1371 if (efi_debug) { 1372 (void) fprintf(stderr, 1373 "Unknown uuid for p_tag %d\n", 1374 vtoc->efi_parts[i].p_tag); 1375 } 1376 return (VT_EINVAL); 1377 } 1378 1379 /* Zero's should be written for empty partitions */ 1380 if (vtoc->efi_parts[i].p_tag == V_UNASSIGNED) 1381 continue; 1382 1383 efi_parts[i].efi_gpe_StartingLBA = 1384 LE_64(vtoc->efi_parts[i].p_start); 1385 efi_parts[i].efi_gpe_EndingLBA = 1386 LE_64(vtoc->efi_parts[i].p_start + 1387 vtoc->efi_parts[i].p_size - 1); 1388 efi_parts[i].efi_gpe_Attributes.PartitionAttrs = 1389 LE_16(vtoc->efi_parts[i].p_flag); 1390 for (j = 0; j < EFI_PART_NAME_LEN; j++) { 1391 efi_parts[i].efi_gpe_PartitionName[j] = 1392 LE_16((ushort_t)vtoc->efi_parts[i].p_name[j]); 1393 } 1394 if ((vtoc->efi_parts[i].p_tag != V_UNASSIGNED) && 1395 uuid_is_null((uchar_t *)&vtoc->efi_parts[i].p_uguid)) { 1396 (void) uuid_generate((uchar_t *) 1397 &vtoc->efi_parts[i].p_uguid); 1398 } 1399 bcopy(&vtoc->efi_parts[i].p_uguid, 1400 &efi_parts[i].efi_gpe_UniquePartitionGUID, 1401 sizeof (uuid_t)); 1402 } 1403 efi->efi_gpt_PartitionEntryArrayCRC32 = 1404 LE_32(efi_crc32((unsigned char *)efi_parts, 1405 vtoc->efi_nparts * (int)sizeof (struct efi_gpe))); 1406 efi->efi_gpt_HeaderCRC32 = 1407 LE_32(efi_crc32((unsigned char *)efi, 1408 LE_32(efi->efi_gpt_HeaderSize))); 1409 1410 if (efi_ioctl(fd, DKIOCSETEFI, &dk_ioc) == -1) { 1411 free(dk_ioc.dki_data); 1412 switch (errno) { 1413 case EIO: 1414 return (VT_EIO); 1415 case EINVAL: 1416 return (VT_EINVAL); 1417 default: 1418 return (VT_ERROR); 1419 } 1420 } 1421 /* if it's a metadevice we're done */ 1422 if (md_flag) { 1423 free(dk_ioc.dki_data); 1424 return (0); 1425 } 1426 1427 /* write backup partition array */ 1428 dk_ioc.dki_lba = vtoc->efi_last_u_lba + 1; 1429 dk_ioc.dki_length -= vtoc->efi_lbasize; 1430 /* LINTED */ 1431 dk_ioc.dki_data = (efi_gpt_t *)((char *)dk_ioc.dki_data + 1432 vtoc->efi_lbasize); 1433 1434 if (efi_ioctl(fd, DKIOCSETEFI, &dk_ioc) == -1) { 1435 /* 1436 * we wrote the primary label okay, so don't fail 1437 */ 1438 if (efi_debug) { 1439 (void) fprintf(stderr, 1440 "write of backup partitions to block %llu " 1441 "failed, errno %d\n", 1442 vtoc->efi_last_u_lba + 1, 1443 errno); 1444 } 1445 } 1446 /* 1447 * now swap MyLBA and AlternateLBA fields and write backup 1448 * partition table header 1449 */ 1450 dk_ioc.dki_lba = lba_backup_gpt_hdr; 1451 dk_ioc.dki_length = vtoc->efi_lbasize; 1452 /* LINTED */ 1453 dk_ioc.dki_data = (efi_gpt_t *)((char *)dk_ioc.dki_data - 1454 vtoc->efi_lbasize); 1455 efi->efi_gpt_AlternateLBA = LE_64(1ULL); 1456 efi->efi_gpt_MyLBA = LE_64(lba_backup_gpt_hdr); 1457 efi->efi_gpt_PartitionEntryLBA = LE_64(vtoc->efi_last_u_lba + 1); 1458 efi->efi_gpt_HeaderCRC32 = 0; 1459 efi->efi_gpt_HeaderCRC32 = 1460 LE_32(efi_crc32((unsigned char *)dk_ioc.dki_data, 1461 LE_32(efi->efi_gpt_HeaderSize))); 1462 1463 if (efi_ioctl(fd, DKIOCSETEFI, &dk_ioc) == -1) { 1464 if (efi_debug) { 1465 (void) fprintf(stderr, 1466 "write of backup header to block %llu failed, " 1467 "errno %d\n", 1468 lba_backup_gpt_hdr, 1469 errno); 1470 } 1471 } 1472 /* write the PMBR */ 1473 (void) write_pmbr(fd, vtoc); 1474 free(dk_ioc.dki_data); 1475 1476 return (0); 1477 } 1478 1479 void 1480 efi_free(struct dk_gpt *ptr) 1481 { 1482 free(ptr); 1483 } 1484 1485 /* 1486 * Input: File descriptor 1487 * Output: 1 if disk has an EFI label, or > 2TB with no VTOC or legacy MBR. 1488 * Otherwise 0. 1489 */ 1490 int 1491 efi_type(int fd) 1492 { 1493 #if 0 1494 struct vtoc vtoc; 1495 struct extvtoc extvtoc; 1496 1497 if (ioctl(fd, DKIOCGEXTVTOC, &extvtoc) == -1) { 1498 if (errno == ENOTSUP) 1499 return (1); 1500 else if (errno == ENOTTY) { 1501 if (ioctl(fd, DKIOCGVTOC, &vtoc) == -1) 1502 if (errno == ENOTSUP) 1503 return (1); 1504 } 1505 } 1506 return (0); 1507 #else 1508 return (ENOSYS); 1509 #endif 1510 } 1511 1512 void 1513 efi_err_check(struct dk_gpt *vtoc) 1514 { 1515 int resv_part = -1; 1516 int i, j; 1517 diskaddr_t istart, jstart, isize, jsize, endsect; 1518 int overlap = 0; 1519 1520 /* 1521 * make sure no partitions overlap 1522 */ 1523 for (i = 0; i < vtoc->efi_nparts; i++) { 1524 /* It can't be unassigned and have an actual size */ 1525 if ((vtoc->efi_parts[i].p_tag == V_UNASSIGNED) && 1526 (vtoc->efi_parts[i].p_size != 0)) { 1527 (void) fprintf(stderr, 1528 "partition %d is \"unassigned\" but has a size " 1529 "of %llu\n", i, vtoc->efi_parts[i].p_size); 1530 } 1531 if (vtoc->efi_parts[i].p_tag == V_UNASSIGNED) { 1532 continue; 1533 } 1534 if (vtoc->efi_parts[i].p_tag == V_RESERVED) { 1535 if (resv_part != -1) { 1536 (void) fprintf(stderr, 1537 "found duplicate reserved partition at " 1538 "%d\n", i); 1539 } 1540 resv_part = i; 1541 if (vtoc->efi_parts[i].p_size != EFI_MIN_RESV_SIZE) 1542 (void) fprintf(stderr, 1543 "Warning: reserved partition size must " 1544 "be %d sectors\n", EFI_MIN_RESV_SIZE); 1545 } 1546 if ((vtoc->efi_parts[i].p_start < vtoc->efi_first_u_lba) || 1547 (vtoc->efi_parts[i].p_start > vtoc->efi_last_u_lba)) { 1548 (void) fprintf(stderr, 1549 "Partition %d starts at %llu\n", 1550 i, 1551 vtoc->efi_parts[i].p_start); 1552 (void) fprintf(stderr, 1553 "It must be between %llu and %llu.\n", 1554 vtoc->efi_first_u_lba, 1555 vtoc->efi_last_u_lba); 1556 } 1557 if ((vtoc->efi_parts[i].p_start + 1558 vtoc->efi_parts[i].p_size < 1559 vtoc->efi_first_u_lba) || 1560 (vtoc->efi_parts[i].p_start + 1561 vtoc->efi_parts[i].p_size > 1562 vtoc->efi_last_u_lba + 1)) { 1563 (void) fprintf(stderr, 1564 "Partition %d ends at %llu\n", 1565 i, 1566 vtoc->efi_parts[i].p_start + 1567 vtoc->efi_parts[i].p_size); 1568 (void) fprintf(stderr, 1569 "It must be between %llu and %llu.\n", 1570 vtoc->efi_first_u_lba, 1571 vtoc->efi_last_u_lba); 1572 } 1573 1574 for (j = 0; j < vtoc->efi_nparts; j++) { 1575 isize = vtoc->efi_parts[i].p_size; 1576 jsize = vtoc->efi_parts[j].p_size; 1577 istart = vtoc->efi_parts[i].p_start; 1578 jstart = vtoc->efi_parts[j].p_start; 1579 if ((i != j) && (isize != 0) && (jsize != 0)) { 1580 endsect = jstart + jsize -1; 1581 if ((jstart <= istart) && 1582 (istart <= endsect)) { 1583 if (!overlap) { 1584 (void) fprintf(stderr, 1585 "label error: EFI Labels do not " 1586 "support overlapping partitions\n"); 1587 } 1588 (void) fprintf(stderr, 1589 "Partition %d overlaps partition " 1590 "%d.\n", i, j); 1591 overlap = 1; 1592 } 1593 } 1594 } 1595 } 1596 /* make sure there is a reserved partition */ 1597 if (resv_part == -1) { 1598 (void) fprintf(stderr, 1599 "no reserved partition found\n"); 1600 } 1601 } 1602 1603 /* 1604 * We need to get information necessary to construct a *new* efi 1605 * label type 1606 */ 1607 int 1608 efi_auto_sense(int fd, struct dk_gpt **vtoc) 1609 { 1610 1611 int i; 1612 1613 /* 1614 * Now build the default partition table 1615 */ 1616 if (efi_alloc_and_init(fd, EFI_NUMPAR, vtoc) != 0) { 1617 if (efi_debug) { 1618 (void) fprintf(stderr, "efi_alloc_and_init failed.\n"); 1619 } 1620 return (-1); 1621 } 1622 1623 for (i = 0; i < MIN((*vtoc)->efi_nparts, V_NUMPAR); i++) { 1624 (*vtoc)->efi_parts[i].p_tag = default_vtoc_map[i].p_tag; 1625 (*vtoc)->efi_parts[i].p_flag = default_vtoc_map[i].p_flag; 1626 (*vtoc)->efi_parts[i].p_start = 0; 1627 (*vtoc)->efi_parts[i].p_size = 0; 1628 } 1629 /* 1630 * Make constants first 1631 * and variable partitions later 1632 */ 1633 1634 /* root partition - s0 128 MB */ 1635 (*vtoc)->efi_parts[0].p_start = 34; 1636 (*vtoc)->efi_parts[0].p_size = 262144; 1637 1638 /* partition - s1 128 MB */ 1639 (*vtoc)->efi_parts[1].p_start = 262178; 1640 (*vtoc)->efi_parts[1].p_size = 262144; 1641 1642 /* partition -s2 is NOT the Backup disk */ 1643 (*vtoc)->efi_parts[2].p_tag = V_UNASSIGNED; 1644 1645 /* partition -s6 /usr partition - HOG */ 1646 (*vtoc)->efi_parts[6].p_start = 524322; 1647 (*vtoc)->efi_parts[6].p_size = (*vtoc)->efi_last_u_lba - 524322 1648 - (1024 * 16); 1649 1650 /* efi reserved partition - s9 16K */ 1651 (*vtoc)->efi_parts[8].p_start = (*vtoc)->efi_last_u_lba - (1024 * 16); 1652 (*vtoc)->efi_parts[8].p_size = (1024 * 16); 1653 (*vtoc)->efi_parts[8].p_tag = V_RESERVED; 1654 return (0); 1655 } 1656