1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or https://opensource.org/licenses/CDDL-1.0. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. 24 * Copyright 2012 Nexenta Systems, Inc. All rights reserved. 25 * Copyright (c) 2018 by Delphix. All rights reserved. 26 */ 27 28 #include <stdio.h> 29 #include <stdlib.h> 30 #include <errno.h> 31 #include <string.h> 32 #include <unistd.h> 33 #include <uuid/uuid.h> 34 #include <zlib.h> 35 #include <libintl.h> 36 #include <sys/types.h> 37 #include <sys/dkio.h> 38 #include <sys/mhd.h> 39 #include <sys/param.h> 40 #include <sys/dktp/fdisk.h> 41 #include <sys/efi_partition.h> 42 #include <sys/byteorder.h> 43 #include <sys/vdev_disk.h> 44 #include <linux/fs.h> 45 #include <linux/blkpg.h> 46 47 static struct uuid_to_ptag { 48 struct uuid uuid; 49 } conversion_array[] = { 50 { EFI_UNUSED }, 51 { EFI_BOOT }, 52 { EFI_ROOT }, 53 { EFI_SWAP }, 54 { EFI_USR }, 55 { EFI_BACKUP }, 56 { EFI_UNUSED }, /* STAND is never used */ 57 { EFI_VAR }, 58 { EFI_HOME }, 59 { EFI_ALTSCTR }, 60 { EFI_UNUSED }, /* CACHE (cachefs) is never used */ 61 { EFI_RESERVED }, 62 { EFI_SYSTEM }, 63 { EFI_LEGACY_MBR }, 64 { EFI_SYMC_PUB }, 65 { EFI_SYMC_CDS }, 66 { EFI_MSFT_RESV }, 67 { EFI_DELL_BASIC }, 68 { EFI_DELL_RAID }, 69 { EFI_DELL_SWAP }, 70 { EFI_DELL_LVM }, 71 { EFI_DELL_RESV }, 72 { EFI_AAPL_HFS }, 73 { EFI_AAPL_UFS }, 74 { EFI_FREEBSD_BOOT }, 75 { EFI_FREEBSD_SWAP }, 76 { EFI_FREEBSD_UFS }, 77 { EFI_FREEBSD_VINUM }, 78 { EFI_FREEBSD_ZFS }, 79 { EFI_BIOS_BOOT }, 80 { EFI_INTC_RS }, 81 { EFI_SNE_BOOT }, 82 { EFI_LENOVO_BOOT }, 83 { EFI_MSFT_LDMM }, 84 { EFI_MSFT_LDMD }, 85 { EFI_MSFT_RE }, 86 { EFI_IBM_GPFS }, 87 { EFI_MSFT_STORAGESPACES }, 88 { EFI_HPQ_DATA }, 89 { EFI_HPQ_SVC }, 90 { EFI_RHT_DATA }, 91 { EFI_RHT_HOME }, 92 { EFI_RHT_SRV }, 93 { EFI_RHT_DMCRYPT }, 94 { EFI_RHT_LUKS }, 95 { EFI_FREEBSD_DISKLABEL }, 96 { EFI_AAPL_RAID }, 97 { EFI_AAPL_RAIDOFFLINE }, 98 { EFI_AAPL_BOOT }, 99 { EFI_AAPL_LABEL }, 100 { EFI_AAPL_TVRECOVERY }, 101 { EFI_AAPL_CORESTORAGE }, 102 { EFI_NETBSD_SWAP }, 103 { EFI_NETBSD_FFS }, 104 { EFI_NETBSD_LFS }, 105 { EFI_NETBSD_RAID }, 106 { EFI_NETBSD_CAT }, 107 { EFI_NETBSD_CRYPT }, 108 { EFI_GOOG_KERN }, 109 { EFI_GOOG_ROOT }, 110 { EFI_GOOG_RESV }, 111 { EFI_HAIKU_BFS }, 112 { EFI_MIDNIGHTBSD_BOOT }, 113 { EFI_MIDNIGHTBSD_DATA }, 114 { EFI_MIDNIGHTBSD_SWAP }, 115 { EFI_MIDNIGHTBSD_UFS }, 116 { EFI_MIDNIGHTBSD_VINUM }, 117 { EFI_MIDNIGHTBSD_ZFS }, 118 { EFI_CEPH_JOURNAL }, 119 { EFI_CEPH_DMCRYPTJOURNAL }, 120 { EFI_CEPH_OSD }, 121 { EFI_CEPH_DMCRYPTOSD }, 122 { EFI_CEPH_CREATE }, 123 { EFI_CEPH_DMCRYPTCREATE }, 124 { EFI_OPENBSD_DISKLABEL }, 125 { EFI_BBRY_QNX }, 126 { EFI_BELL_PLAN9 }, 127 { EFI_VMW_KCORE }, 128 { EFI_VMW_VMFS }, 129 { EFI_VMW_RESV }, 130 { EFI_RHT_ROOTX86 }, 131 { EFI_RHT_ROOTAMD64 }, 132 { EFI_RHT_ROOTARM }, 133 { EFI_RHT_ROOTARM64 }, 134 { EFI_ACRONIS_SECUREZONE }, 135 { EFI_ONIE_BOOT }, 136 { EFI_ONIE_CONFIG }, 137 { EFI_IBM_PPRPBOOT }, 138 { EFI_FREEDESKTOP_BOOT } 139 }; 140 141 int efi_debug = 0; 142 143 static int efi_read(int, struct dk_gpt *); 144 145 /* 146 * Return a 32-bit CRC of the contents of the buffer. Pre-and-post 147 * one's conditioning will be handled by crc32() internally. 148 */ 149 static uint32_t 150 efi_crc32(const unsigned char *buf, unsigned int size) 151 { 152 uint32_t crc = crc32(0, Z_NULL, 0); 153 154 crc = crc32(crc, buf, size); 155 156 return (crc); 157 } 158 159 static int 160 read_disk_info(int fd, diskaddr_t *capacity, uint_t *lbsize) 161 { 162 int sector_size; 163 unsigned long long capacity_size; 164 165 if (ioctl(fd, BLKSSZGET, §or_size) < 0) 166 return (-1); 167 168 if (ioctl(fd, BLKGETSIZE64, &capacity_size) < 0) 169 return (-1); 170 171 *lbsize = (uint_t)sector_size; 172 *capacity = (diskaddr_t)(capacity_size / sector_size); 173 174 return (0); 175 } 176 177 /* 178 * Return back the device name associated with the file descriptor. The 179 * caller is responsible for freeing the memory associated with the 180 * returned string. 181 */ 182 static char * 183 efi_get_devname(int fd) 184 { 185 char path[32]; 186 187 /* 188 * The libefi API only provides the open fd and not the file path. 189 * To handle this realpath(3) is used to resolve the block device 190 * name from /proc/self/fd/<fd>. 191 */ 192 (void) snprintf(path, sizeof (path), "/proc/self/fd/%d", fd); 193 return (realpath(path, NULL)); 194 } 195 196 static int 197 efi_get_info(int fd, struct dk_cinfo *dki_info) 198 { 199 char *dev_path; 200 int rval = 0; 201 202 memset(dki_info, 0, sizeof (*dki_info)); 203 204 /* 205 * The simplest way to get the partition number under linux is 206 * to parse it out of the /dev/<disk><partition> block device name. 207 * The kernel creates this using the partition number when it 208 * populates /dev/ so it may be trusted. The tricky bit here is 209 * that the naming convention is based on the block device type. 210 * So we need to take this in to account when parsing out the 211 * partition information. Aside from the partition number we collect 212 * some additional device info. 213 */ 214 dev_path = efi_get_devname(fd); 215 if (dev_path == NULL) 216 goto error; 217 218 if ((strncmp(dev_path, "/dev/sd", 7) == 0)) { 219 strcpy(dki_info->dki_cname, "sd"); 220 dki_info->dki_ctype = DKC_SCSI_CCS; 221 rval = sscanf(dev_path, "/dev/%[a-zA-Z]%hu", 222 dki_info->dki_dname, 223 &dki_info->dki_partition); 224 } else if ((strncmp(dev_path, "/dev/hd", 7) == 0)) { 225 strcpy(dki_info->dki_cname, "hd"); 226 dki_info->dki_ctype = DKC_DIRECT; 227 rval = sscanf(dev_path, "/dev/%[a-zA-Z]%hu", 228 dki_info->dki_dname, 229 &dki_info->dki_partition); 230 } else if ((strncmp(dev_path, "/dev/md", 7) == 0)) { 231 strcpy(dki_info->dki_cname, "pseudo"); 232 dki_info->dki_ctype = DKC_MD; 233 strcpy(dki_info->dki_dname, "md"); 234 rval = sscanf(dev_path, "/dev/md%[0-9]p%hu", 235 dki_info->dki_dname + 2, 236 &dki_info->dki_partition); 237 } else if ((strncmp(dev_path, "/dev/vd", 7) == 0)) { 238 strcpy(dki_info->dki_cname, "vd"); 239 dki_info->dki_ctype = DKC_MD; 240 rval = sscanf(dev_path, "/dev/%[a-zA-Z]%hu", 241 dki_info->dki_dname, 242 &dki_info->dki_partition); 243 } else if ((strncmp(dev_path, "/dev/xvd", 8) == 0)) { 244 strcpy(dki_info->dki_cname, "xvd"); 245 dki_info->dki_ctype = DKC_MD; 246 rval = sscanf(dev_path, "/dev/%[a-zA-Z]%hu", 247 dki_info->dki_dname, 248 &dki_info->dki_partition); 249 } else if ((strncmp(dev_path, "/dev/zd", 7) == 0)) { 250 strcpy(dki_info->dki_cname, "zd"); 251 dki_info->dki_ctype = DKC_MD; 252 strcpy(dki_info->dki_dname, "zd"); 253 rval = sscanf(dev_path, "/dev/zd%[0-9]p%hu", 254 dki_info->dki_dname + 2, 255 &dki_info->dki_partition); 256 } else if ((strncmp(dev_path, "/dev/dm-", 8) == 0)) { 257 strcpy(dki_info->dki_cname, "pseudo"); 258 dki_info->dki_ctype = DKC_VBD; 259 strcpy(dki_info->dki_dname, "dm-"); 260 rval = sscanf(dev_path, "/dev/dm-%[0-9]p%hu", 261 dki_info->dki_dname + 3, 262 &dki_info->dki_partition); 263 } else if ((strncmp(dev_path, "/dev/ram", 8) == 0)) { 264 strcpy(dki_info->dki_cname, "pseudo"); 265 dki_info->dki_ctype = DKC_PCMCIA_MEM; 266 strcpy(dki_info->dki_dname, "ram"); 267 rval = sscanf(dev_path, "/dev/ram%[0-9]p%hu", 268 dki_info->dki_dname + 3, 269 &dki_info->dki_partition); 270 } else if ((strncmp(dev_path, "/dev/loop", 9) == 0)) { 271 strcpy(dki_info->dki_cname, "pseudo"); 272 dki_info->dki_ctype = DKC_VBD; 273 strcpy(dki_info->dki_dname, "loop"); 274 rval = sscanf(dev_path, "/dev/loop%[0-9]p%hu", 275 dki_info->dki_dname + 4, 276 &dki_info->dki_partition); 277 } else if ((strncmp(dev_path, "/dev/nvme", 9) == 0)) { 278 strcpy(dki_info->dki_cname, "nvme"); 279 dki_info->dki_ctype = DKC_SCSI_CCS; 280 strcpy(dki_info->dki_dname, "nvme"); 281 (void) sscanf(dev_path, "/dev/nvme%[0-9]", 282 dki_info->dki_dname + 4); 283 size_t controller_length = strlen( 284 dki_info->dki_dname); 285 strcpy(dki_info->dki_dname + controller_length, 286 "n"); 287 rval = sscanf(dev_path, 288 "/dev/nvme%*[0-9]n%[0-9]p%hu", 289 dki_info->dki_dname + controller_length + 1, 290 &dki_info->dki_partition); 291 } else { 292 strcpy(dki_info->dki_dname, "unknown"); 293 strcpy(dki_info->dki_cname, "unknown"); 294 dki_info->dki_ctype = DKC_UNKNOWN; 295 } 296 297 switch (rval) { 298 case 0: 299 errno = EINVAL; 300 goto error; 301 case 1: 302 dki_info->dki_partition = 0; 303 } 304 305 free(dev_path); 306 307 return (0); 308 error: 309 if (efi_debug) 310 (void) fprintf(stderr, "DKIOCINFO errno 0x%x\n", errno); 311 312 switch (errno) { 313 case EIO: 314 return (VT_EIO); 315 case EINVAL: 316 return (VT_EINVAL); 317 default: 318 return (VT_ERROR); 319 } 320 } 321 322 /* 323 * the number of blocks the EFI label takes up (round up to nearest 324 * block) 325 */ 326 #define NBLOCKS(p, l) (1 + ((((p) * (int)sizeof (efi_gpe_t)) + \ 327 ((l) - 1)) / (l))) 328 /* number of partitions -- limited by what we can malloc */ 329 #define MAX_PARTS ((4294967295UL - sizeof (struct dk_gpt)) / \ 330 sizeof (struct dk_part)) 331 332 int 333 efi_alloc_and_init(int fd, uint32_t nparts, struct dk_gpt **vtoc) 334 { 335 diskaddr_t capacity = 0; 336 uint_t lbsize = 0; 337 uint_t nblocks; 338 size_t length; 339 struct dk_gpt *vptr; 340 struct uuid uuid; 341 struct dk_cinfo dki_info; 342 343 if (read_disk_info(fd, &capacity, &lbsize) != 0) 344 return (-1); 345 346 if (efi_get_info(fd, &dki_info) != 0) 347 return (-1); 348 349 if (dki_info.dki_partition != 0) 350 return (-1); 351 352 if ((dki_info.dki_ctype == DKC_PCMCIA_MEM) || 353 (dki_info.dki_ctype == DKC_VBD) || 354 (dki_info.dki_ctype == DKC_UNKNOWN)) 355 return (-1); 356 357 nblocks = NBLOCKS(nparts, lbsize); 358 if ((nblocks * lbsize) < EFI_MIN_ARRAY_SIZE + lbsize) { 359 /* 16K plus one block for the GPT */ 360 nblocks = EFI_MIN_ARRAY_SIZE / lbsize + 1; 361 } 362 363 if (nparts > MAX_PARTS) { 364 if (efi_debug) { 365 (void) fprintf(stderr, 366 "the maximum number of partitions supported is %lu\n", 367 MAX_PARTS); 368 } 369 return (-1); 370 } 371 372 length = sizeof (struct dk_gpt) + 373 sizeof (struct dk_part) * (nparts - 1); 374 375 vptr = calloc(1, length); 376 if (vptr == NULL) 377 return (-1); 378 379 *vtoc = vptr; 380 381 vptr->efi_version = EFI_VERSION_CURRENT; 382 vptr->efi_lbasize = lbsize; 383 vptr->efi_nparts = nparts; 384 /* 385 * add one block here for the PMBR; on disks with a 512 byte 386 * block size and 128 or fewer partitions, efi_first_u_lba 387 * should work out to "34" 388 */ 389 vptr->efi_first_u_lba = nblocks + 1; 390 vptr->efi_last_lba = capacity - 1; 391 vptr->efi_altern_lba = capacity -1; 392 vptr->efi_last_u_lba = vptr->efi_last_lba - nblocks; 393 394 (void) uuid_generate((uchar_t *)&uuid); 395 UUID_LE_CONVERT(vptr->efi_disk_uguid, uuid); 396 return (0); 397 } 398 399 /* 400 * Read EFI - return partition number upon success. 401 */ 402 int 403 efi_alloc_and_read(int fd, struct dk_gpt **vtoc) 404 { 405 int rval; 406 uint32_t nparts; 407 int length; 408 struct dk_gpt *vptr; 409 410 /* figure out the number of entries that would fit into 16K */ 411 nparts = EFI_MIN_ARRAY_SIZE / sizeof (efi_gpe_t); 412 length = (int) sizeof (struct dk_gpt) + 413 (int) sizeof (struct dk_part) * (nparts - 1); 414 vptr = calloc(1, length); 415 416 if (vptr == NULL) 417 return (VT_ERROR); 418 419 vptr->efi_nparts = nparts; 420 rval = efi_read(fd, vptr); 421 422 if ((rval == VT_EINVAL) && vptr->efi_nparts > nparts) { 423 void *tmp; 424 length = (int) sizeof (struct dk_gpt) + 425 (int) sizeof (struct dk_part) * (vptr->efi_nparts - 1); 426 if ((tmp = realloc(vptr, length)) == NULL) { 427 /* cppcheck-suppress doubleFree */ 428 free(vptr); 429 *vtoc = NULL; 430 return (VT_ERROR); 431 } else { 432 vptr = tmp; 433 rval = efi_read(fd, vptr); 434 } 435 } 436 437 if (rval < 0) { 438 if (efi_debug) { 439 (void) fprintf(stderr, 440 "read of EFI table failed, rval=%d\n", rval); 441 } 442 free(vptr); 443 *vtoc = NULL; 444 } else { 445 *vtoc = vptr; 446 } 447 448 return (rval); 449 } 450 451 static int 452 efi_ioctl(int fd, int cmd, dk_efi_t *dk_ioc) 453 { 454 void *data = dk_ioc->dki_data; 455 int error; 456 diskaddr_t capacity; 457 uint_t lbsize; 458 459 /* 460 * When the IO is not being performed in kernel as an ioctl we need 461 * to know the sector size so we can seek to the proper byte offset. 462 */ 463 if (read_disk_info(fd, &capacity, &lbsize) == -1) { 464 if (efi_debug) 465 fprintf(stderr, "unable to read disk info: %d", errno); 466 467 errno = EIO; 468 return (-1); 469 } 470 471 switch (cmd) { 472 case DKIOCGETEFI: 473 if (lbsize == 0) { 474 if (efi_debug) 475 (void) fprintf(stderr, "DKIOCGETEFI assuming " 476 "LBA %d bytes\n", DEV_BSIZE); 477 478 lbsize = DEV_BSIZE; 479 } 480 481 error = lseek(fd, dk_ioc->dki_lba * lbsize, SEEK_SET); 482 if (error == -1) { 483 if (efi_debug) 484 (void) fprintf(stderr, "DKIOCGETEFI lseek " 485 "error: %d\n", errno); 486 return (error); 487 } 488 489 error = read(fd, data, dk_ioc->dki_length); 490 if (error == -1) { 491 if (efi_debug) 492 (void) fprintf(stderr, "DKIOCGETEFI read " 493 "error: %d\n", errno); 494 return (error); 495 } 496 497 if (error != dk_ioc->dki_length) { 498 if (efi_debug) 499 (void) fprintf(stderr, "DKIOCGETEFI short " 500 "read of %d bytes\n", error); 501 errno = EIO; 502 return (-1); 503 } 504 error = 0; 505 break; 506 507 case DKIOCSETEFI: 508 if (lbsize == 0) { 509 if (efi_debug) 510 (void) fprintf(stderr, "DKIOCSETEFI unknown " 511 "LBA size\n"); 512 errno = EIO; 513 return (-1); 514 } 515 516 error = lseek(fd, dk_ioc->dki_lba * lbsize, SEEK_SET); 517 if (error == -1) { 518 if (efi_debug) 519 (void) fprintf(stderr, "DKIOCSETEFI lseek " 520 "error: %d\n", errno); 521 return (error); 522 } 523 524 error = write(fd, data, dk_ioc->dki_length); 525 if (error == -1) { 526 if (efi_debug) 527 (void) fprintf(stderr, "DKIOCSETEFI write " 528 "error: %d\n", errno); 529 return (error); 530 } 531 532 if (error != dk_ioc->dki_length) { 533 if (efi_debug) 534 (void) fprintf(stderr, "DKIOCSETEFI short " 535 "write of %d bytes\n", error); 536 errno = EIO; 537 return (-1); 538 } 539 540 /* Sync the new EFI table to disk */ 541 error = fsync(fd); 542 if (error == -1) 543 return (error); 544 545 /* Ensure any local disk cache is also flushed */ 546 if (ioctl(fd, BLKFLSBUF, 0) == -1) 547 return (error); 548 549 error = 0; 550 break; 551 552 default: 553 if (efi_debug) 554 (void) fprintf(stderr, "unsupported ioctl()\n"); 555 556 errno = EIO; 557 return (-1); 558 } 559 560 return (error); 561 } 562 563 int 564 efi_rescan(int fd) 565 { 566 int retry = 10; 567 568 /* Notify the kernel a devices partition table has been updated */ 569 while (ioctl(fd, BLKRRPART) != 0) { 570 if ((--retry == 0) || (errno != EBUSY)) { 571 (void) fprintf(stderr, "the kernel failed to rescan " 572 "the partition table: %d\n", errno); 573 return (-1); 574 } 575 usleep(50000); 576 } 577 578 return (0); 579 } 580 581 static int 582 check_label(int fd, dk_efi_t *dk_ioc) 583 { 584 efi_gpt_t *efi; 585 uint_t crc; 586 587 if (efi_ioctl(fd, DKIOCGETEFI, dk_ioc) == -1) { 588 switch (errno) { 589 case EIO: 590 return (VT_EIO); 591 default: 592 return (VT_ERROR); 593 } 594 } 595 efi = dk_ioc->dki_data; 596 if (efi->efi_gpt_Signature != LE_64(EFI_SIGNATURE)) { 597 if (efi_debug) 598 (void) fprintf(stderr, 599 "Bad EFI signature: 0x%llx != 0x%llx\n", 600 (long long)efi->efi_gpt_Signature, 601 (long long)LE_64(EFI_SIGNATURE)); 602 return (VT_EINVAL); 603 } 604 605 /* 606 * check CRC of the header; the size of the header should 607 * never be larger than one block 608 */ 609 crc = efi->efi_gpt_HeaderCRC32; 610 efi->efi_gpt_HeaderCRC32 = 0; 611 len_t headerSize = (len_t)LE_32(efi->efi_gpt_HeaderSize); 612 613 if (headerSize < EFI_MIN_LABEL_SIZE || headerSize > EFI_LABEL_SIZE) { 614 if (efi_debug) 615 (void) fprintf(stderr, 616 "Invalid EFI HeaderSize %llu. Assuming %d.\n", 617 headerSize, EFI_MIN_LABEL_SIZE); 618 } 619 620 if ((headerSize > dk_ioc->dki_length) || 621 crc != LE_32(efi_crc32((unsigned char *)efi, headerSize))) { 622 if (efi_debug) 623 (void) fprintf(stderr, 624 "Bad EFI CRC: 0x%x != 0x%x\n", 625 crc, LE_32(efi_crc32((unsigned char *)efi, 626 headerSize))); 627 return (VT_EINVAL); 628 } 629 630 return (0); 631 } 632 633 static int 634 efi_read(int fd, struct dk_gpt *vtoc) 635 { 636 int i, j; 637 int label_len; 638 int rval = 0; 639 int md_flag = 0; 640 int vdc_flag = 0; 641 diskaddr_t capacity = 0; 642 uint_t lbsize = 0; 643 struct dk_minfo disk_info; 644 dk_efi_t dk_ioc; 645 efi_gpt_t *efi; 646 efi_gpe_t *efi_parts; 647 struct dk_cinfo dki_info; 648 uint32_t user_length; 649 boolean_t legacy_label = B_FALSE; 650 651 /* 652 * get the partition number for this file descriptor. 653 */ 654 if ((rval = efi_get_info(fd, &dki_info)) != 0) 655 return (rval); 656 657 if ((strncmp(dki_info.dki_cname, "pseudo", 7) == 0) && 658 (strncmp(dki_info.dki_dname, "md", 3) == 0)) { 659 md_flag++; 660 } else if ((strncmp(dki_info.dki_cname, "vdc", 4) == 0) && 661 (strncmp(dki_info.dki_dname, "vdc", 4) == 0)) { 662 /* 663 * The controller and drive name "vdc" (virtual disk client) 664 * indicates a LDoms virtual disk. 665 */ 666 vdc_flag++; 667 } 668 669 /* get the LBA size */ 670 if (read_disk_info(fd, &capacity, &lbsize) == -1) { 671 if (efi_debug) { 672 (void) fprintf(stderr, 673 "unable to read disk info: %d", 674 errno); 675 } 676 return (VT_EINVAL); 677 } 678 679 disk_info.dki_lbsize = lbsize; 680 disk_info.dki_capacity = capacity; 681 682 if (disk_info.dki_lbsize == 0) { 683 if (efi_debug) { 684 (void) fprintf(stderr, 685 "efi_read: assuming LBA 512 bytes\n"); 686 } 687 disk_info.dki_lbsize = DEV_BSIZE; 688 } 689 /* 690 * Read the EFI GPT to figure out how many partitions we need 691 * to deal with. 692 */ 693 dk_ioc.dki_lba = 1; 694 if (NBLOCKS(vtoc->efi_nparts, disk_info.dki_lbsize) < 34) { 695 label_len = EFI_MIN_ARRAY_SIZE + disk_info.dki_lbsize; 696 } else { 697 label_len = vtoc->efi_nparts * (int) sizeof (efi_gpe_t) + 698 disk_info.dki_lbsize; 699 if (label_len % disk_info.dki_lbsize) { 700 /* pad to physical sector size */ 701 label_len += disk_info.dki_lbsize; 702 label_len &= ~(disk_info.dki_lbsize - 1); 703 } 704 } 705 706 if (posix_memalign((void **)&dk_ioc.dki_data, 707 disk_info.dki_lbsize, label_len)) 708 return (VT_ERROR); 709 710 memset(dk_ioc.dki_data, 0, label_len); 711 dk_ioc.dki_length = disk_info.dki_lbsize; 712 user_length = vtoc->efi_nparts; 713 efi = dk_ioc.dki_data; 714 if (md_flag) { 715 dk_ioc.dki_length = label_len; 716 if (efi_ioctl(fd, DKIOCGETEFI, &dk_ioc) == -1) { 717 switch (errno) { 718 case EIO: 719 return (VT_EIO); 720 default: 721 return (VT_ERROR); 722 } 723 } 724 } else if ((rval = check_label(fd, &dk_ioc)) == VT_EINVAL) { 725 /* 726 * No valid label here; try the alternate. Note that here 727 * we just read GPT header and save it into dk_ioc.data, 728 * Later, we will read GUID partition entry array if we 729 * can get valid GPT header. 730 */ 731 732 /* 733 * This is a workaround for legacy systems. In the past, the 734 * last sector of SCSI disk was invisible on x86 platform. At 735 * that time, backup label was saved on the next to the last 736 * sector. It is possible for users to move a disk from previous 737 * solaris system to present system. Here, we attempt to search 738 * legacy backup EFI label first. 739 */ 740 dk_ioc.dki_lba = disk_info.dki_capacity - 2; 741 dk_ioc.dki_length = disk_info.dki_lbsize; 742 rval = check_label(fd, &dk_ioc); 743 if (rval == VT_EINVAL) { 744 /* 745 * we didn't find legacy backup EFI label, try to 746 * search backup EFI label in the last block. 747 */ 748 dk_ioc.dki_lba = disk_info.dki_capacity - 1; 749 dk_ioc.dki_length = disk_info.dki_lbsize; 750 rval = check_label(fd, &dk_ioc); 751 if (rval == 0) { 752 legacy_label = B_TRUE; 753 if (efi_debug) 754 (void) fprintf(stderr, 755 "efi_read: primary label corrupt; " 756 "using EFI backup label located on" 757 " the last block\n"); 758 } 759 } else { 760 if ((efi_debug) && (rval == 0)) 761 (void) fprintf(stderr, "efi_read: primary label" 762 " corrupt; using legacy EFI backup label " 763 " located on the next to last block\n"); 764 } 765 766 if (rval == 0) { 767 dk_ioc.dki_lba = LE_64(efi->efi_gpt_PartitionEntryLBA); 768 vtoc->efi_flags |= EFI_GPT_PRIMARY_CORRUPT; 769 vtoc->efi_nparts = 770 LE_32(efi->efi_gpt_NumberOfPartitionEntries); 771 /* 772 * Partition tables are between backup GPT header 773 * table and ParitionEntryLBA (the starting LBA of 774 * the GUID partition entries array). Now that we 775 * already got valid GPT header and saved it in 776 * dk_ioc.dki_data, we try to get GUID partition 777 * entry array here. 778 */ 779 /* LINTED */ 780 dk_ioc.dki_data = (efi_gpt_t *)((char *)dk_ioc.dki_data 781 + disk_info.dki_lbsize); 782 if (legacy_label) 783 dk_ioc.dki_length = disk_info.dki_capacity - 1 - 784 dk_ioc.dki_lba; 785 else 786 dk_ioc.dki_length = disk_info.dki_capacity - 2 - 787 dk_ioc.dki_lba; 788 dk_ioc.dki_length *= disk_info.dki_lbsize; 789 if (dk_ioc.dki_length > 790 ((len_t)label_len - sizeof (*dk_ioc.dki_data))) { 791 rval = VT_EINVAL; 792 } else { 793 /* 794 * read GUID partition entry array 795 */ 796 rval = efi_ioctl(fd, DKIOCGETEFI, &dk_ioc); 797 } 798 } 799 800 } else if (rval == 0) { 801 802 dk_ioc.dki_lba = LE_64(efi->efi_gpt_PartitionEntryLBA); 803 /* LINTED */ 804 dk_ioc.dki_data = (efi_gpt_t *)((char *)dk_ioc.dki_data 805 + disk_info.dki_lbsize); 806 dk_ioc.dki_length = label_len - disk_info.dki_lbsize; 807 rval = efi_ioctl(fd, DKIOCGETEFI, &dk_ioc); 808 809 } else if (vdc_flag && rval == VT_ERROR && errno == EINVAL) { 810 /* 811 * When the device is a LDoms virtual disk, the DKIOCGETEFI 812 * ioctl can fail with EINVAL if the virtual disk backend 813 * is a ZFS volume serviced by a domain running an old version 814 * of Solaris. This is because the DKIOCGETEFI ioctl was 815 * initially incorrectly implemented for a ZFS volume and it 816 * expected the GPT and GPE to be retrieved with a single ioctl. 817 * So we try to read the GPT and the GPE using that old style 818 * ioctl. 819 */ 820 dk_ioc.dki_lba = 1; 821 dk_ioc.dki_length = label_len; 822 rval = check_label(fd, &dk_ioc); 823 } 824 825 if (rval < 0) { 826 free(efi); 827 return (rval); 828 } 829 830 /* LINTED -- always longlong aligned */ 831 efi_parts = (efi_gpe_t *)(((char *)efi) + disk_info.dki_lbsize); 832 833 /* 834 * Assemble this into a "dk_gpt" struct for easier 835 * digestibility by applications. 836 */ 837 vtoc->efi_version = LE_32(efi->efi_gpt_Revision); 838 vtoc->efi_nparts = LE_32(efi->efi_gpt_NumberOfPartitionEntries); 839 vtoc->efi_part_size = LE_32(efi->efi_gpt_SizeOfPartitionEntry); 840 vtoc->efi_lbasize = disk_info.dki_lbsize; 841 vtoc->efi_last_lba = disk_info.dki_capacity - 1; 842 vtoc->efi_first_u_lba = LE_64(efi->efi_gpt_FirstUsableLBA); 843 vtoc->efi_last_u_lba = LE_64(efi->efi_gpt_LastUsableLBA); 844 vtoc->efi_altern_lba = LE_64(efi->efi_gpt_AlternateLBA); 845 UUID_LE_CONVERT(vtoc->efi_disk_uguid, efi->efi_gpt_DiskGUID); 846 847 /* 848 * If the array the user passed in is too small, set the length 849 * to what it needs to be and return 850 */ 851 if (user_length < vtoc->efi_nparts) { 852 return (VT_EINVAL); 853 } 854 855 for (i = 0; i < vtoc->efi_nparts; i++) { 856 UUID_LE_CONVERT(vtoc->efi_parts[i].p_guid, 857 efi_parts[i].efi_gpe_PartitionTypeGUID); 858 859 for (j = 0; 860 j < sizeof (conversion_array) 861 / sizeof (struct uuid_to_ptag); j++) { 862 863 if (memcmp(&vtoc->efi_parts[i].p_guid, 864 &conversion_array[j].uuid, 865 sizeof (struct uuid)) == 0) { 866 vtoc->efi_parts[i].p_tag = j; 867 break; 868 } 869 } 870 if (vtoc->efi_parts[i].p_tag == V_UNASSIGNED) 871 continue; 872 vtoc->efi_parts[i].p_flag = 873 LE_16(efi_parts[i].efi_gpe_Attributes.PartitionAttrs); 874 vtoc->efi_parts[i].p_start = 875 LE_64(efi_parts[i].efi_gpe_StartingLBA); 876 vtoc->efi_parts[i].p_size = 877 LE_64(efi_parts[i].efi_gpe_EndingLBA) - 878 vtoc->efi_parts[i].p_start + 1; 879 for (j = 0; j < EFI_PART_NAME_LEN; j++) { 880 vtoc->efi_parts[i].p_name[j] = 881 (uchar_t)LE_16( 882 efi_parts[i].efi_gpe_PartitionName[j]); 883 } 884 885 UUID_LE_CONVERT(vtoc->efi_parts[i].p_uguid, 886 efi_parts[i].efi_gpe_UniquePartitionGUID); 887 } 888 free(efi); 889 890 return (dki_info.dki_partition); 891 } 892 893 /* writes a "protective" MBR */ 894 static int 895 write_pmbr(int fd, struct dk_gpt *vtoc) 896 { 897 dk_efi_t dk_ioc; 898 struct mboot mb; 899 uchar_t *cp; 900 diskaddr_t size_in_lba; 901 uchar_t *buf; 902 int len; 903 904 len = (vtoc->efi_lbasize == 0) ? sizeof (mb) : vtoc->efi_lbasize; 905 if (posix_memalign((void **)&buf, len, len)) 906 return (VT_ERROR); 907 908 /* 909 * Preserve any boot code and disk signature if the first block is 910 * already an MBR. 911 */ 912 memset(buf, 0, len); 913 dk_ioc.dki_lba = 0; 914 dk_ioc.dki_length = len; 915 /* LINTED -- always longlong aligned */ 916 dk_ioc.dki_data = (efi_gpt_t *)buf; 917 if (efi_ioctl(fd, DKIOCGETEFI, &dk_ioc) == -1) { 918 memset(&mb, 0, sizeof (mb)); 919 mb.signature = LE_16(MBB_MAGIC); 920 } else { 921 (void) memcpy(&mb, buf, sizeof (mb)); 922 if (mb.signature != LE_16(MBB_MAGIC)) { 923 memset(&mb, 0, sizeof (mb)); 924 mb.signature = LE_16(MBB_MAGIC); 925 } 926 } 927 928 memset(&mb.parts, 0, sizeof (mb.parts)); 929 cp = (uchar_t *)&mb.parts[0]; 930 /* bootable or not */ 931 *cp++ = 0; 932 /* beginning CHS; 0xffffff if not representable */ 933 *cp++ = 0xff; 934 *cp++ = 0xff; 935 *cp++ = 0xff; 936 /* OS type */ 937 *cp++ = EFI_PMBR; 938 /* ending CHS; 0xffffff if not representable */ 939 *cp++ = 0xff; 940 *cp++ = 0xff; 941 *cp++ = 0xff; 942 /* starting LBA: 1 (little endian format) by EFI definition */ 943 *cp++ = 0x01; 944 *cp++ = 0x00; 945 *cp++ = 0x00; 946 *cp++ = 0x00; 947 /* ending LBA: last block on the disk (little endian format) */ 948 size_in_lba = vtoc->efi_last_lba; 949 if (size_in_lba < 0xffffffff) { 950 *cp++ = (size_in_lba & 0x000000ff); 951 *cp++ = (size_in_lba & 0x0000ff00) >> 8; 952 *cp++ = (size_in_lba & 0x00ff0000) >> 16; 953 *cp++ = (size_in_lba & 0xff000000) >> 24; 954 } else { 955 *cp++ = 0xff; 956 *cp++ = 0xff; 957 *cp++ = 0xff; 958 *cp++ = 0xff; 959 } 960 961 (void) memcpy(buf, &mb, sizeof (mb)); 962 /* LINTED -- always longlong aligned */ 963 dk_ioc.dki_data = (efi_gpt_t *)buf; 964 dk_ioc.dki_lba = 0; 965 dk_ioc.dki_length = len; 966 if (efi_ioctl(fd, DKIOCSETEFI, &dk_ioc) == -1) { 967 free(buf); 968 switch (errno) { 969 case EIO: 970 return (VT_EIO); 971 case EINVAL: 972 return (VT_EINVAL); 973 default: 974 return (VT_ERROR); 975 } 976 } 977 free(buf); 978 return (0); 979 } 980 981 /* make sure the user specified something reasonable */ 982 static int 983 check_input(struct dk_gpt *vtoc) 984 { 985 int resv_part = -1; 986 int i, j; 987 diskaddr_t istart, jstart, isize, jsize, endsect; 988 989 /* 990 * Sanity-check the input (make sure no partitions overlap) 991 */ 992 for (i = 0; i < vtoc->efi_nparts; i++) { 993 /* It can't be unassigned and have an actual size */ 994 if ((vtoc->efi_parts[i].p_tag == V_UNASSIGNED) && 995 (vtoc->efi_parts[i].p_size != 0)) { 996 if (efi_debug) { 997 (void) fprintf(stderr, "partition %d is " 998 "\"unassigned\" but has a size of %llu", 999 i, vtoc->efi_parts[i].p_size); 1000 } 1001 return (VT_EINVAL); 1002 } 1003 if (vtoc->efi_parts[i].p_tag == V_UNASSIGNED) { 1004 if (uuid_is_null((uchar_t *)&vtoc->efi_parts[i].p_guid)) 1005 continue; 1006 /* we have encountered an unknown uuid */ 1007 vtoc->efi_parts[i].p_tag = 0xff; 1008 } 1009 if (vtoc->efi_parts[i].p_tag == V_RESERVED) { 1010 if (resv_part != -1) { 1011 if (efi_debug) { 1012 (void) fprintf(stderr, "found " 1013 "duplicate reserved partition " 1014 "at %d\n", i); 1015 } 1016 return (VT_EINVAL); 1017 } 1018 resv_part = i; 1019 } 1020 if ((vtoc->efi_parts[i].p_start < vtoc->efi_first_u_lba) || 1021 (vtoc->efi_parts[i].p_start > vtoc->efi_last_u_lba)) { 1022 if (efi_debug) { 1023 (void) fprintf(stderr, 1024 "Partition %d starts at %llu. ", 1025 i, 1026 vtoc->efi_parts[i].p_start); 1027 (void) fprintf(stderr, 1028 "It must be between %llu and %llu.\n", 1029 vtoc->efi_first_u_lba, 1030 vtoc->efi_last_u_lba); 1031 } 1032 return (VT_EINVAL); 1033 } 1034 if ((vtoc->efi_parts[i].p_start + 1035 vtoc->efi_parts[i].p_size < 1036 vtoc->efi_first_u_lba) || 1037 (vtoc->efi_parts[i].p_start + 1038 vtoc->efi_parts[i].p_size > 1039 vtoc->efi_last_u_lba + 1)) { 1040 if (efi_debug) { 1041 (void) fprintf(stderr, 1042 "Partition %d ends at %llu. ", 1043 i, 1044 vtoc->efi_parts[i].p_start + 1045 vtoc->efi_parts[i].p_size); 1046 (void) fprintf(stderr, 1047 "It must be between %llu and %llu.\n", 1048 vtoc->efi_first_u_lba, 1049 vtoc->efi_last_u_lba); 1050 } 1051 return (VT_EINVAL); 1052 } 1053 1054 for (j = 0; j < vtoc->efi_nparts; j++) { 1055 isize = vtoc->efi_parts[i].p_size; 1056 jsize = vtoc->efi_parts[j].p_size; 1057 istart = vtoc->efi_parts[i].p_start; 1058 jstart = vtoc->efi_parts[j].p_start; 1059 if ((i != j) && (isize != 0) && (jsize != 0)) { 1060 endsect = jstart + jsize -1; 1061 if ((jstart <= istart) && 1062 (istart <= endsect)) { 1063 if (efi_debug) { 1064 (void) fprintf(stderr, 1065 "Partition %d overlaps " 1066 "partition %d.", i, j); 1067 } 1068 return (VT_EINVAL); 1069 } 1070 } 1071 } 1072 } 1073 /* just a warning for now */ 1074 if ((resv_part == -1) && efi_debug) { 1075 (void) fprintf(stderr, 1076 "no reserved partition found\n"); 1077 } 1078 return (0); 1079 } 1080 1081 static int 1082 call_blkpg_ioctl(int fd, int command, diskaddr_t start, 1083 diskaddr_t size, uint_t pno) 1084 { 1085 struct blkpg_ioctl_arg ioctl_arg; 1086 struct blkpg_partition linux_part; 1087 memset(&linux_part, 0, sizeof (linux_part)); 1088 1089 char *path = efi_get_devname(fd); 1090 if (path == NULL) { 1091 (void) fprintf(stderr, "failed to retrieve device name\n"); 1092 return (VT_EINVAL); 1093 } 1094 1095 linux_part.start = start; 1096 linux_part.length = size; 1097 linux_part.pno = pno; 1098 snprintf(linux_part.devname, BLKPG_DEVNAMELTH - 1, "%s%u", path, pno); 1099 linux_part.devname[BLKPG_DEVNAMELTH - 1] = '\0'; 1100 free(path); 1101 1102 ioctl_arg.op = command; 1103 ioctl_arg.flags = 0; 1104 ioctl_arg.datalen = sizeof (struct blkpg_partition); 1105 ioctl_arg.data = &linux_part; 1106 1107 return (ioctl(fd, BLKPG, &ioctl_arg)); 1108 } 1109 1110 /* 1111 * add all the unallocated space to the current label 1112 */ 1113 int 1114 efi_use_whole_disk(int fd) 1115 { 1116 struct dk_gpt *efi_label = NULL; 1117 int rval; 1118 int i; 1119 uint_t resv_index = 0, data_index = 0; 1120 diskaddr_t resv_start = 0, data_start = 0; 1121 diskaddr_t data_size, limit, difference; 1122 boolean_t sync_needed = B_FALSE; 1123 uint_t nblocks; 1124 1125 rval = efi_alloc_and_read(fd, &efi_label); 1126 if (rval < 0) { 1127 if (efi_label != NULL) 1128 efi_free(efi_label); 1129 return (rval); 1130 } 1131 1132 /* 1133 * Find the last physically non-zero partition. 1134 * This should be the reserved partition. 1135 */ 1136 for (i = 0; i < efi_label->efi_nparts; i ++) { 1137 if (resv_start < efi_label->efi_parts[i].p_start) { 1138 resv_start = efi_label->efi_parts[i].p_start; 1139 resv_index = i; 1140 } 1141 } 1142 1143 /* 1144 * Find the last physically non-zero partition before that. 1145 * This is the data partition. 1146 */ 1147 for (i = 0; i < resv_index; i ++) { 1148 if (data_start < efi_label->efi_parts[i].p_start) { 1149 data_start = efi_label->efi_parts[i].p_start; 1150 data_index = i; 1151 } 1152 } 1153 data_size = efi_label->efi_parts[data_index].p_size; 1154 1155 /* 1156 * See the "efi_alloc_and_init" function for more information 1157 * about where this "nblocks" value comes from. 1158 */ 1159 nblocks = efi_label->efi_first_u_lba - 1; 1160 1161 /* 1162 * Determine if the EFI label is out of sync. We check that: 1163 * 1164 * 1. the data partition ends at the limit we set, and 1165 * 2. the reserved partition starts at the limit we set. 1166 * 1167 * If either of these conditions is not met, then we need to 1168 * resync the EFI label. 1169 * 1170 * The limit is the last usable LBA, determined by the last LBA 1171 * and the first usable LBA fields on the EFI label of the disk 1172 * (see the lines directly above). Additionally, we factor in 1173 * EFI_MIN_RESV_SIZE (per its use in "zpool_label_disk") and 1174 * P2ALIGN it to ensure the partition boundaries are aligned 1175 * (for performance reasons). The alignment should match the 1176 * alignment used by the "zpool_label_disk" function. 1177 */ 1178 limit = P2ALIGN(efi_label->efi_last_lba - nblocks - EFI_MIN_RESV_SIZE, 1179 PARTITION_END_ALIGNMENT); 1180 if (data_start + data_size != limit || resv_start != limit) 1181 sync_needed = B_TRUE; 1182 1183 if (efi_debug && sync_needed) 1184 (void) fprintf(stderr, "efi_use_whole_disk: sync needed\n"); 1185 1186 /* 1187 * If alter_lba is 1, we are using the backup label. 1188 * Since we can locate the backup label by disk capacity, 1189 * there must be no unallocated space. 1190 */ 1191 if ((efi_label->efi_altern_lba == 1) || (efi_label->efi_altern_lba 1192 >= efi_label->efi_last_lba && !sync_needed)) { 1193 if (efi_debug) { 1194 (void) fprintf(stderr, 1195 "efi_use_whole_disk: requested space not found\n"); 1196 } 1197 efi_free(efi_label); 1198 return (VT_ENOSPC); 1199 } 1200 1201 /* 1202 * Verify that we've found the reserved partition by checking 1203 * that it looks the way it did when we created it in zpool_label_disk. 1204 * If we've found the incorrect partition, then we know that this 1205 * device was reformatted and no longer is solely used by ZFS. 1206 */ 1207 if ((efi_label->efi_parts[resv_index].p_size != EFI_MIN_RESV_SIZE) || 1208 (efi_label->efi_parts[resv_index].p_tag != V_RESERVED) || 1209 (resv_index != 8)) { 1210 if (efi_debug) { 1211 (void) fprintf(stderr, 1212 "efi_use_whole_disk: wholedisk not available\n"); 1213 } 1214 efi_free(efi_label); 1215 return (VT_ENOSPC); 1216 } 1217 1218 if (data_start + data_size != resv_start) { 1219 if (efi_debug) { 1220 (void) fprintf(stderr, 1221 "efi_use_whole_disk: " 1222 "data_start (%lli) + " 1223 "data_size (%lli) != " 1224 "resv_start (%lli)\n", 1225 data_start, data_size, resv_start); 1226 } 1227 1228 return (VT_EINVAL); 1229 } 1230 1231 if (limit < resv_start) { 1232 if (efi_debug) { 1233 (void) fprintf(stderr, 1234 "efi_use_whole_disk: " 1235 "limit (%lli) < resv_start (%lli)\n", 1236 limit, resv_start); 1237 } 1238 1239 return (VT_EINVAL); 1240 } 1241 1242 difference = limit - resv_start; 1243 1244 if (efi_debug) 1245 (void) fprintf(stderr, 1246 "efi_use_whole_disk: difference is %lli\n", difference); 1247 1248 /* 1249 * Move the reserved partition. There is currently no data in 1250 * here except fabricated devids (which get generated via 1251 * efi_write()). So there is no need to copy data. 1252 */ 1253 efi_label->efi_parts[data_index].p_size += difference; 1254 efi_label->efi_parts[resv_index].p_start += difference; 1255 efi_label->efi_last_u_lba = efi_label->efi_last_lba - nblocks; 1256 1257 /* 1258 * Rescanning the partition table in the kernel can result 1259 * in the device links to be removed (see comment in vdev_disk_open). 1260 * If BLKPG_RESIZE_PARTITION is available, then we can resize 1261 * the partition table online and avoid having to remove the device 1262 * links used by the pool. This provides a very deterministic 1263 * approach to resizing devices and does not require any 1264 * loops waiting for devices to reappear. 1265 */ 1266 #ifdef BLKPG_RESIZE_PARTITION 1267 /* 1268 * Delete the reserved partition since we're about to expand 1269 * the data partition and it would overlap with the reserved 1270 * partition. 1271 * NOTE: The starting index for the ioctl is 1 while for the 1272 * EFI partitions it's 0. For that reason we have to add one 1273 * whenever we make an ioctl call. 1274 */ 1275 rval = call_blkpg_ioctl(fd, BLKPG_DEL_PARTITION, 0, 0, resv_index + 1); 1276 if (rval != 0) 1277 goto out; 1278 1279 /* 1280 * Expand the data partition 1281 */ 1282 rval = call_blkpg_ioctl(fd, BLKPG_RESIZE_PARTITION, 1283 efi_label->efi_parts[data_index].p_start * efi_label->efi_lbasize, 1284 efi_label->efi_parts[data_index].p_size * efi_label->efi_lbasize, 1285 data_index + 1); 1286 if (rval != 0) { 1287 (void) fprintf(stderr, "Unable to resize data " 1288 "partition: %d\n", rval); 1289 /* 1290 * Since we failed to resize, we need to reset the start 1291 * of the reserve partition and re-create it. 1292 */ 1293 efi_label->efi_parts[resv_index].p_start -= difference; 1294 } 1295 1296 /* 1297 * Re-add the reserved partition. If we've expanded the data partition 1298 * then we'll move the reserve partition to the end of the data 1299 * partition. Otherwise, we'll recreate the partition in its original 1300 * location. Note that we do this as best-effort and ignore any 1301 * errors that may arise here. This will ensure that we finish writing 1302 * the EFI label. 1303 */ 1304 (void) call_blkpg_ioctl(fd, BLKPG_ADD_PARTITION, 1305 efi_label->efi_parts[resv_index].p_start * efi_label->efi_lbasize, 1306 efi_label->efi_parts[resv_index].p_size * efi_label->efi_lbasize, 1307 resv_index + 1); 1308 #endif 1309 1310 /* 1311 * We're now ready to write the EFI label. 1312 */ 1313 if (rval == 0) { 1314 rval = efi_write(fd, efi_label); 1315 if (rval < 0 && efi_debug) { 1316 (void) fprintf(stderr, "efi_use_whole_disk:fail " 1317 "to write label, rval=%d\n", rval); 1318 } 1319 } 1320 1321 out: 1322 efi_free(efi_label); 1323 return (rval); 1324 } 1325 1326 /* 1327 * write EFI label and backup label 1328 */ 1329 int 1330 efi_write(int fd, struct dk_gpt *vtoc) 1331 { 1332 dk_efi_t dk_ioc; 1333 efi_gpt_t *efi; 1334 efi_gpe_t *efi_parts; 1335 int i, j; 1336 struct dk_cinfo dki_info; 1337 int rval; 1338 int md_flag = 0; 1339 int nblocks; 1340 diskaddr_t lba_backup_gpt_hdr; 1341 1342 if ((rval = efi_get_info(fd, &dki_info)) != 0) 1343 return (rval); 1344 1345 /* check if we are dealing with a metadevice */ 1346 if ((strncmp(dki_info.dki_cname, "pseudo", 7) == 0) && 1347 (strncmp(dki_info.dki_dname, "md", 3) == 0)) { 1348 md_flag = 1; 1349 } 1350 1351 if (check_input(vtoc)) { 1352 /* 1353 * not valid; if it's a metadevice just pass it down 1354 * because SVM will do its own checking 1355 */ 1356 if (md_flag == 0) { 1357 return (VT_EINVAL); 1358 } 1359 } 1360 1361 dk_ioc.dki_lba = 1; 1362 if (NBLOCKS(vtoc->efi_nparts, vtoc->efi_lbasize) < 34) { 1363 dk_ioc.dki_length = EFI_MIN_ARRAY_SIZE + vtoc->efi_lbasize; 1364 } else { 1365 dk_ioc.dki_length = (len_t)NBLOCKS(vtoc->efi_nparts, 1366 vtoc->efi_lbasize) * 1367 vtoc->efi_lbasize; 1368 } 1369 1370 /* 1371 * the number of blocks occupied by GUID partition entry array 1372 */ 1373 nblocks = dk_ioc.dki_length / vtoc->efi_lbasize - 1; 1374 1375 /* 1376 * Backup GPT header is located on the block after GUID 1377 * partition entry array. Here, we calculate the address 1378 * for backup GPT header. 1379 */ 1380 lba_backup_gpt_hdr = vtoc->efi_last_u_lba + 1 + nblocks; 1381 if (posix_memalign((void **)&dk_ioc.dki_data, 1382 vtoc->efi_lbasize, dk_ioc.dki_length)) 1383 return (VT_ERROR); 1384 1385 memset(dk_ioc.dki_data, 0, dk_ioc.dki_length); 1386 efi = dk_ioc.dki_data; 1387 1388 /* stuff user's input into EFI struct */ 1389 efi->efi_gpt_Signature = LE_64(EFI_SIGNATURE); 1390 efi->efi_gpt_Revision = LE_32(vtoc->efi_version); /* 0x02000100 */ 1391 efi->efi_gpt_HeaderSize = LE_32(sizeof (struct efi_gpt) - LEN_EFI_PAD); 1392 efi->efi_gpt_Reserved1 = 0; 1393 efi->efi_gpt_MyLBA = LE_64(1ULL); 1394 efi->efi_gpt_AlternateLBA = LE_64(lba_backup_gpt_hdr); 1395 efi->efi_gpt_FirstUsableLBA = LE_64(vtoc->efi_first_u_lba); 1396 efi->efi_gpt_LastUsableLBA = LE_64(vtoc->efi_last_u_lba); 1397 efi->efi_gpt_PartitionEntryLBA = LE_64(2ULL); 1398 efi->efi_gpt_NumberOfPartitionEntries = LE_32(vtoc->efi_nparts); 1399 efi->efi_gpt_SizeOfPartitionEntry = LE_32(sizeof (struct efi_gpe)); 1400 UUID_LE_CONVERT(efi->efi_gpt_DiskGUID, vtoc->efi_disk_uguid); 1401 1402 /* LINTED -- always longlong aligned */ 1403 efi_parts = (efi_gpe_t *)((char *)dk_ioc.dki_data + vtoc->efi_lbasize); 1404 1405 for (i = 0; i < vtoc->efi_nparts; i++) { 1406 for (j = 0; 1407 j < sizeof (conversion_array) / 1408 sizeof (struct uuid_to_ptag); j++) { 1409 1410 if (vtoc->efi_parts[i].p_tag == j) { 1411 UUID_LE_CONVERT( 1412 efi_parts[i].efi_gpe_PartitionTypeGUID, 1413 conversion_array[j].uuid); 1414 break; 1415 } 1416 } 1417 1418 if (j == sizeof (conversion_array) / 1419 sizeof (struct uuid_to_ptag)) { 1420 /* 1421 * If we didn't have a matching uuid match, bail here. 1422 * Don't write a label with unknown uuid. 1423 */ 1424 if (efi_debug) { 1425 (void) fprintf(stderr, 1426 "Unknown uuid for p_tag %d\n", 1427 vtoc->efi_parts[i].p_tag); 1428 } 1429 return (VT_EINVAL); 1430 } 1431 1432 /* Zero's should be written for empty partitions */ 1433 if (vtoc->efi_parts[i].p_tag == V_UNASSIGNED) 1434 continue; 1435 1436 efi_parts[i].efi_gpe_StartingLBA = 1437 LE_64(vtoc->efi_parts[i].p_start); 1438 efi_parts[i].efi_gpe_EndingLBA = 1439 LE_64(vtoc->efi_parts[i].p_start + 1440 vtoc->efi_parts[i].p_size - 1); 1441 efi_parts[i].efi_gpe_Attributes.PartitionAttrs = 1442 LE_16(vtoc->efi_parts[i].p_flag); 1443 for (j = 0; j < EFI_PART_NAME_LEN; j++) { 1444 efi_parts[i].efi_gpe_PartitionName[j] = 1445 LE_16((ushort_t)vtoc->efi_parts[i].p_name[j]); 1446 } 1447 if ((vtoc->efi_parts[i].p_tag != V_UNASSIGNED) && 1448 uuid_is_null((uchar_t *)&vtoc->efi_parts[i].p_uguid)) { 1449 (void) uuid_generate((uchar_t *) 1450 &vtoc->efi_parts[i].p_uguid); 1451 } 1452 memcpy(&efi_parts[i].efi_gpe_UniquePartitionGUID, 1453 &vtoc->efi_parts[i].p_uguid, 1454 sizeof (uuid_t)); 1455 } 1456 efi->efi_gpt_PartitionEntryArrayCRC32 = 1457 LE_32(efi_crc32((unsigned char *)efi_parts, 1458 vtoc->efi_nparts * (int)sizeof (struct efi_gpe))); 1459 efi->efi_gpt_HeaderCRC32 = 1460 LE_32(efi_crc32((unsigned char *)efi, 1461 LE_32(efi->efi_gpt_HeaderSize))); 1462 1463 if (efi_ioctl(fd, DKIOCSETEFI, &dk_ioc) == -1) { 1464 free(dk_ioc.dki_data); 1465 switch (errno) { 1466 case EIO: 1467 return (VT_EIO); 1468 case EINVAL: 1469 return (VT_EINVAL); 1470 default: 1471 return (VT_ERROR); 1472 } 1473 } 1474 /* if it's a metadevice we're done */ 1475 if (md_flag) { 1476 free(dk_ioc.dki_data); 1477 return (0); 1478 } 1479 1480 /* write backup partition array */ 1481 dk_ioc.dki_lba = vtoc->efi_last_u_lba + 1; 1482 dk_ioc.dki_length -= vtoc->efi_lbasize; 1483 /* LINTED */ 1484 dk_ioc.dki_data = (efi_gpt_t *)((char *)dk_ioc.dki_data + 1485 vtoc->efi_lbasize); 1486 1487 if (efi_ioctl(fd, DKIOCSETEFI, &dk_ioc) == -1) { 1488 /* 1489 * we wrote the primary label okay, so don't fail 1490 */ 1491 if (efi_debug) { 1492 (void) fprintf(stderr, 1493 "write of backup partitions to block %llu " 1494 "failed, errno %d\n", 1495 vtoc->efi_last_u_lba + 1, 1496 errno); 1497 } 1498 } 1499 /* 1500 * now swap MyLBA and AlternateLBA fields and write backup 1501 * partition table header 1502 */ 1503 dk_ioc.dki_lba = lba_backup_gpt_hdr; 1504 dk_ioc.dki_length = vtoc->efi_lbasize; 1505 /* LINTED */ 1506 dk_ioc.dki_data = (efi_gpt_t *)((char *)dk_ioc.dki_data - 1507 vtoc->efi_lbasize); 1508 efi->efi_gpt_AlternateLBA = LE_64(1ULL); 1509 efi->efi_gpt_MyLBA = LE_64(lba_backup_gpt_hdr); 1510 efi->efi_gpt_PartitionEntryLBA = LE_64(vtoc->efi_last_u_lba + 1); 1511 efi->efi_gpt_HeaderCRC32 = 0; 1512 efi->efi_gpt_HeaderCRC32 = 1513 LE_32(efi_crc32((unsigned char *)dk_ioc.dki_data, 1514 LE_32(efi->efi_gpt_HeaderSize))); 1515 1516 if (efi_ioctl(fd, DKIOCSETEFI, &dk_ioc) == -1) { 1517 if (efi_debug) { 1518 (void) fprintf(stderr, 1519 "write of backup header to block %llu failed, " 1520 "errno %d\n", 1521 lba_backup_gpt_hdr, 1522 errno); 1523 } 1524 } 1525 /* write the PMBR */ 1526 (void) write_pmbr(fd, vtoc); 1527 free(dk_ioc.dki_data); 1528 1529 return (0); 1530 } 1531 1532 void 1533 efi_free(struct dk_gpt *ptr) 1534 { 1535 free(ptr); 1536 } 1537 1538 void 1539 efi_err_check(struct dk_gpt *vtoc) 1540 { 1541 int resv_part = -1; 1542 int i, j; 1543 diskaddr_t istart, jstart, isize, jsize, endsect; 1544 int overlap = 0; 1545 1546 /* 1547 * make sure no partitions overlap 1548 */ 1549 for (i = 0; i < vtoc->efi_nparts; i++) { 1550 /* It can't be unassigned and have an actual size */ 1551 if ((vtoc->efi_parts[i].p_tag == V_UNASSIGNED) && 1552 (vtoc->efi_parts[i].p_size != 0)) { 1553 (void) fprintf(stderr, 1554 "partition %d is \"unassigned\" but has a size " 1555 "of %llu\n", i, vtoc->efi_parts[i].p_size); 1556 } 1557 if (vtoc->efi_parts[i].p_tag == V_UNASSIGNED) { 1558 continue; 1559 } 1560 if (vtoc->efi_parts[i].p_tag == V_RESERVED) { 1561 if (resv_part != -1) { 1562 (void) fprintf(stderr, 1563 "found duplicate reserved partition at " 1564 "%d\n", i); 1565 } 1566 resv_part = i; 1567 if (vtoc->efi_parts[i].p_size != EFI_MIN_RESV_SIZE) 1568 (void) fprintf(stderr, 1569 "Warning: reserved partition size must " 1570 "be %d sectors\n", EFI_MIN_RESV_SIZE); 1571 } 1572 if ((vtoc->efi_parts[i].p_start < vtoc->efi_first_u_lba) || 1573 (vtoc->efi_parts[i].p_start > vtoc->efi_last_u_lba)) { 1574 (void) fprintf(stderr, 1575 "Partition %d starts at %llu\n", 1576 i, 1577 vtoc->efi_parts[i].p_start); 1578 (void) fprintf(stderr, 1579 "It must be between %llu and %llu.\n", 1580 vtoc->efi_first_u_lba, 1581 vtoc->efi_last_u_lba); 1582 } 1583 if ((vtoc->efi_parts[i].p_start + 1584 vtoc->efi_parts[i].p_size < 1585 vtoc->efi_first_u_lba) || 1586 (vtoc->efi_parts[i].p_start + 1587 vtoc->efi_parts[i].p_size > 1588 vtoc->efi_last_u_lba + 1)) { 1589 (void) fprintf(stderr, 1590 "Partition %d ends at %llu\n", 1591 i, 1592 vtoc->efi_parts[i].p_start + 1593 vtoc->efi_parts[i].p_size); 1594 (void) fprintf(stderr, 1595 "It must be between %llu and %llu.\n", 1596 vtoc->efi_first_u_lba, 1597 vtoc->efi_last_u_lba); 1598 } 1599 1600 for (j = 0; j < vtoc->efi_nparts; j++) { 1601 isize = vtoc->efi_parts[i].p_size; 1602 jsize = vtoc->efi_parts[j].p_size; 1603 istart = vtoc->efi_parts[i].p_start; 1604 jstart = vtoc->efi_parts[j].p_start; 1605 if ((i != j) && (isize != 0) && (jsize != 0)) { 1606 endsect = jstart + jsize -1; 1607 if ((jstart <= istart) && 1608 (istart <= endsect)) { 1609 if (!overlap) { 1610 (void) fprintf(stderr, 1611 "label error: EFI Labels do not " 1612 "support overlapping partitions\n"); 1613 } 1614 (void) fprintf(stderr, 1615 "Partition %d overlaps partition " 1616 "%d.\n", i, j); 1617 overlap = 1; 1618 } 1619 } 1620 } 1621 } 1622 /* make sure there is a reserved partition */ 1623 if (resv_part == -1) { 1624 (void) fprintf(stderr, 1625 "no reserved partition found\n"); 1626 } 1627 } 1628