1 // SPDX-License-Identifier: CDDL-1.0 2 /* 3 * CDDL HEADER START 4 * 5 * The contents of this file are subject to the terms of the 6 * Common Development and Distribution License (the "License"). 7 * You may not use this file except in compliance with the License. 8 * 9 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 10 * or https://opensource.org/licenses/CDDL-1.0. 11 * See the License for the specific language governing permissions 12 * and limitations under the License. 13 * 14 * When distributing Covered Code, include this CDDL HEADER in each 15 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 16 * If applicable, add the following below this CDDL HEADER, with the 17 * fields enclosed by brackets "[]" replaced with your own identifying 18 * information: Portions Copyright [yyyy] [name of copyright owner] 19 * 20 * CDDL HEADER END 21 */ 22 23 /* 24 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. 25 * Copyright 2012 Nexenta Systems, Inc. All rights reserved. 26 * Copyright (c) 2018 by Delphix. All rights reserved. 27 */ 28 29 #include <stdio.h> 30 #include <stdlib.h> 31 #include <errno.h> 32 #include <string.h> 33 #include <unistd.h> 34 #include <uuid/uuid.h> 35 #include <zlib.h> 36 #include <libintl.h> 37 #include <sys/types.h> 38 #include <sys/dkio.h> 39 #include <sys/mhd.h> 40 #include <sys/param.h> 41 #include <sys/dktp/fdisk.h> 42 #include <sys/efi_partition.h> 43 #include <sys/byteorder.h> 44 #include <sys/vdev_disk.h> 45 #include <linux/fs.h> 46 #include <linux/blkpg.h> 47 48 static struct uuid_to_ptag { 49 struct uuid uuid; 50 } conversion_array[] = { 51 { EFI_UNUSED }, 52 { EFI_BOOT }, 53 { EFI_ROOT }, 54 { EFI_SWAP }, 55 { EFI_USR }, 56 { EFI_BACKUP }, 57 { EFI_UNUSED }, /* STAND is never used */ 58 { EFI_VAR }, 59 { EFI_HOME }, 60 { EFI_ALTSCTR }, 61 { EFI_UNUSED }, /* CACHE (cachefs) is never used */ 62 { EFI_RESERVED }, 63 { EFI_SYSTEM }, 64 { EFI_LEGACY_MBR }, 65 { EFI_SYMC_PUB }, 66 { EFI_SYMC_CDS }, 67 { EFI_MSFT_RESV }, 68 { EFI_DELL_BASIC }, 69 { EFI_DELL_RAID }, 70 { EFI_DELL_SWAP }, 71 { EFI_DELL_LVM }, 72 { EFI_DELL_RESV }, 73 { EFI_AAPL_HFS }, 74 { EFI_AAPL_UFS }, 75 { EFI_FREEBSD_BOOT }, 76 { EFI_FREEBSD_SWAP }, 77 { EFI_FREEBSD_UFS }, 78 { EFI_FREEBSD_VINUM }, 79 { EFI_FREEBSD_ZFS }, 80 { EFI_BIOS_BOOT }, 81 { EFI_INTC_RS }, 82 { EFI_SNE_BOOT }, 83 { EFI_LENOVO_BOOT }, 84 { EFI_MSFT_LDMM }, 85 { EFI_MSFT_LDMD }, 86 { EFI_MSFT_RE }, 87 { EFI_IBM_GPFS }, 88 { EFI_MSFT_STORAGESPACES }, 89 { EFI_HPQ_DATA }, 90 { EFI_HPQ_SVC }, 91 { EFI_RHT_DATA }, 92 { EFI_RHT_HOME }, 93 { EFI_RHT_SRV }, 94 { EFI_RHT_DMCRYPT }, 95 { EFI_RHT_LUKS }, 96 { EFI_FREEBSD_DISKLABEL }, 97 { EFI_AAPL_RAID }, 98 { EFI_AAPL_RAIDOFFLINE }, 99 { EFI_AAPL_BOOT }, 100 { EFI_AAPL_LABEL }, 101 { EFI_AAPL_TVRECOVERY }, 102 { EFI_AAPL_CORESTORAGE }, 103 { EFI_NETBSD_SWAP }, 104 { EFI_NETBSD_FFS }, 105 { EFI_NETBSD_LFS }, 106 { EFI_NETBSD_RAID }, 107 { EFI_NETBSD_CAT }, 108 { EFI_NETBSD_CRYPT }, 109 { EFI_GOOG_KERN }, 110 { EFI_GOOG_ROOT }, 111 { EFI_GOOG_RESV }, 112 { EFI_HAIKU_BFS }, 113 { EFI_MIDNIGHTBSD_BOOT }, 114 { EFI_MIDNIGHTBSD_DATA }, 115 { EFI_MIDNIGHTBSD_SWAP }, 116 { EFI_MIDNIGHTBSD_UFS }, 117 { EFI_MIDNIGHTBSD_VINUM }, 118 { EFI_MIDNIGHTBSD_ZFS }, 119 { EFI_CEPH_JOURNAL }, 120 { EFI_CEPH_DMCRYPTJOURNAL }, 121 { EFI_CEPH_OSD }, 122 { EFI_CEPH_DMCRYPTOSD }, 123 { EFI_CEPH_CREATE }, 124 { EFI_CEPH_DMCRYPTCREATE }, 125 { EFI_OPENBSD_DISKLABEL }, 126 { EFI_BBRY_QNX }, 127 { EFI_BELL_PLAN9 }, 128 { EFI_VMW_KCORE }, 129 { EFI_VMW_VMFS }, 130 { EFI_VMW_RESV }, 131 { EFI_RHT_ROOTX86 }, 132 { EFI_RHT_ROOTAMD64 }, 133 { EFI_RHT_ROOTARM }, 134 { EFI_RHT_ROOTARM64 }, 135 { EFI_ACRONIS_SECUREZONE }, 136 { EFI_ONIE_BOOT }, 137 { EFI_ONIE_CONFIG }, 138 { EFI_IBM_PPRPBOOT }, 139 { EFI_FREEDESKTOP_BOOT } 140 }; 141 142 int efi_debug = 0; 143 144 static int efi_read(int, struct dk_gpt *); 145 146 /* 147 * Return a 32-bit CRC of the contents of the buffer. Pre-and-post 148 * one's conditioning will be handled by crc32() internally. 149 */ 150 static uint32_t 151 efi_crc32(const unsigned char *buf, unsigned int size) 152 { 153 uint32_t crc = crc32(0, Z_NULL, 0); 154 155 crc = crc32(crc, buf, size); 156 157 return (crc); 158 } 159 160 static int 161 read_disk_info(int fd, diskaddr_t *capacity, uint_t *lbsize) 162 { 163 int sector_size; 164 unsigned long long capacity_size; 165 166 if (ioctl(fd, BLKSSZGET, §or_size) < 0) 167 return (-1); 168 169 if (ioctl(fd, BLKGETSIZE64, &capacity_size) < 0) 170 return (-1); 171 172 *lbsize = (uint_t)sector_size; 173 *capacity = (diskaddr_t)(capacity_size / sector_size); 174 175 return (0); 176 } 177 178 /* 179 * Return back the device name associated with the file descriptor. The 180 * caller is responsible for freeing the memory associated with the 181 * returned string. 182 */ 183 static char * 184 efi_get_devname(int fd) 185 { 186 char path[32]; 187 188 /* 189 * The libefi API only provides the open fd and not the file path. 190 * To handle this realpath(3) is used to resolve the block device 191 * name from /proc/self/fd/<fd>. 192 */ 193 (void) snprintf(path, sizeof (path), "/proc/self/fd/%d", fd); 194 return (realpath(path, NULL)); 195 } 196 197 static int 198 efi_get_info(int fd, struct dk_cinfo *dki_info) 199 { 200 char *dev_path; 201 int rval = 0; 202 203 memset(dki_info, 0, sizeof (*dki_info)); 204 205 /* 206 * The simplest way to get the partition number under linux is 207 * to parse it out of the /dev/<disk><partition> block device name. 208 * The kernel creates this using the partition number when it 209 * populates /dev/ so it may be trusted. The tricky bit here is 210 * that the naming convention is based on the block device type. 211 * So we need to take this in to account when parsing out the 212 * partition information. Aside from the partition number we collect 213 * some additional device info. 214 */ 215 dev_path = efi_get_devname(fd); 216 if (dev_path == NULL) 217 goto error; 218 219 if ((strncmp(dev_path, "/dev/sd", 7) == 0)) { 220 strcpy(dki_info->dki_cname, "sd"); 221 dki_info->dki_ctype = DKC_SCSI_CCS; 222 rval = sscanf(dev_path, "/dev/%[a-zA-Z]%hu", 223 dki_info->dki_dname, 224 &dki_info->dki_partition); 225 } else if ((strncmp(dev_path, "/dev/hd", 7) == 0)) { 226 strcpy(dki_info->dki_cname, "hd"); 227 dki_info->dki_ctype = DKC_DIRECT; 228 rval = sscanf(dev_path, "/dev/%[a-zA-Z]%hu", 229 dki_info->dki_dname, 230 &dki_info->dki_partition); 231 } else if ((strncmp(dev_path, "/dev/md", 7) == 0)) { 232 strcpy(dki_info->dki_cname, "pseudo"); 233 dki_info->dki_ctype = DKC_MD; 234 strcpy(dki_info->dki_dname, "md"); 235 rval = sscanf(dev_path, "/dev/md%[0-9]p%hu", 236 dki_info->dki_dname + 2, 237 &dki_info->dki_partition); 238 } else if ((strncmp(dev_path, "/dev/vd", 7) == 0)) { 239 strcpy(dki_info->dki_cname, "vd"); 240 dki_info->dki_ctype = DKC_MD; 241 rval = sscanf(dev_path, "/dev/%[a-zA-Z]%hu", 242 dki_info->dki_dname, 243 &dki_info->dki_partition); 244 } else if ((strncmp(dev_path, "/dev/xvd", 8) == 0)) { 245 strcpy(dki_info->dki_cname, "xvd"); 246 dki_info->dki_ctype = DKC_MD; 247 rval = sscanf(dev_path, "/dev/%[a-zA-Z]%hu", 248 dki_info->dki_dname, 249 &dki_info->dki_partition); 250 } else if ((strncmp(dev_path, "/dev/zd", 7) == 0)) { 251 strcpy(dki_info->dki_cname, "zd"); 252 dki_info->dki_ctype = DKC_MD; 253 strcpy(dki_info->dki_dname, "zd"); 254 rval = sscanf(dev_path, "/dev/zd%[0-9]p%hu", 255 dki_info->dki_dname + 2, 256 &dki_info->dki_partition); 257 } else if ((strncmp(dev_path, "/dev/dm-", 8) == 0)) { 258 strcpy(dki_info->dki_cname, "pseudo"); 259 dki_info->dki_ctype = DKC_VBD; 260 strcpy(dki_info->dki_dname, "dm-"); 261 rval = sscanf(dev_path, "/dev/dm-%[0-9]p%hu", 262 dki_info->dki_dname + 3, 263 &dki_info->dki_partition); 264 } else if ((strncmp(dev_path, "/dev/ram", 8) == 0)) { 265 strcpy(dki_info->dki_cname, "pseudo"); 266 dki_info->dki_ctype = DKC_PCMCIA_MEM; 267 strcpy(dki_info->dki_dname, "ram"); 268 rval = sscanf(dev_path, "/dev/ram%[0-9]p%hu", 269 dki_info->dki_dname + 3, 270 &dki_info->dki_partition); 271 } else if ((strncmp(dev_path, "/dev/loop", 9) == 0)) { 272 strcpy(dki_info->dki_cname, "pseudo"); 273 dki_info->dki_ctype = DKC_VBD; 274 strcpy(dki_info->dki_dname, "loop"); 275 rval = sscanf(dev_path, "/dev/loop%[0-9]p%hu", 276 dki_info->dki_dname + 4, 277 &dki_info->dki_partition); 278 } else if ((strncmp(dev_path, "/dev/nvme", 9) == 0)) { 279 strcpy(dki_info->dki_cname, "nvme"); 280 dki_info->dki_ctype = DKC_SCSI_CCS; 281 strcpy(dki_info->dki_dname, "nvme"); 282 (void) sscanf(dev_path, "/dev/nvme%[0-9]", 283 dki_info->dki_dname + 4); 284 size_t controller_length = strlen( 285 dki_info->dki_dname); 286 strcpy(dki_info->dki_dname + controller_length, 287 "n"); 288 rval = sscanf(dev_path, 289 "/dev/nvme%*[0-9]n%[0-9]p%hu", 290 dki_info->dki_dname + controller_length + 1, 291 &dki_info->dki_partition); 292 } else { 293 strcpy(dki_info->dki_dname, "unknown"); 294 strcpy(dki_info->dki_cname, "unknown"); 295 dki_info->dki_ctype = DKC_UNKNOWN; 296 } 297 298 switch (rval) { 299 case 0: 300 errno = EINVAL; 301 goto error; 302 case 1: 303 dki_info->dki_partition = 0; 304 } 305 306 free(dev_path); 307 308 return (0); 309 error: 310 if (efi_debug) 311 (void) fprintf(stderr, "DKIOCINFO errno 0x%x\n", errno); 312 313 switch (errno) { 314 case EIO: 315 return (VT_EIO); 316 case EINVAL: 317 return (VT_EINVAL); 318 default: 319 return (VT_ERROR); 320 } 321 } 322 323 /* 324 * the number of blocks the EFI label takes up (round up to nearest 325 * block) 326 */ 327 #define NBLOCKS(p, l) (1 + ((((p) * (int)sizeof (efi_gpe_t)) + \ 328 ((l) - 1)) / (l))) 329 /* number of partitions -- limited by what we can malloc */ 330 #define MAX_PARTS ((4294967295UL - sizeof (struct dk_gpt)) / \ 331 sizeof (struct dk_part)) 332 333 int 334 efi_alloc_and_init(int fd, uint32_t nparts, struct dk_gpt **vtoc) 335 { 336 diskaddr_t capacity = 0; 337 uint_t lbsize = 0; 338 uint_t nblocks; 339 size_t length; 340 struct dk_gpt *vptr; 341 struct uuid uuid; 342 struct dk_cinfo dki_info; 343 344 if (read_disk_info(fd, &capacity, &lbsize) != 0) 345 return (-1); 346 347 if (efi_get_info(fd, &dki_info) != 0) 348 return (-1); 349 350 if (dki_info.dki_partition != 0) 351 return (-1); 352 353 if ((dki_info.dki_ctype == DKC_PCMCIA_MEM) || 354 (dki_info.dki_ctype == DKC_VBD) || 355 (dki_info.dki_ctype == DKC_UNKNOWN)) 356 return (-1); 357 358 nblocks = NBLOCKS(nparts, lbsize); 359 if ((nblocks * lbsize) < EFI_MIN_ARRAY_SIZE + lbsize) { 360 /* 16K plus one block for the GPT */ 361 nblocks = EFI_MIN_ARRAY_SIZE / lbsize + 1; 362 } 363 364 if (nparts > MAX_PARTS) { 365 if (efi_debug) { 366 (void) fprintf(stderr, 367 "the maximum number of partitions supported is %lu\n", 368 MAX_PARTS); 369 } 370 return (-1); 371 } 372 373 length = sizeof (struct dk_gpt) + 374 sizeof (struct dk_part) * (nparts - 1); 375 376 vptr = calloc(1, length); 377 if (vptr == NULL) 378 return (-1); 379 380 *vtoc = vptr; 381 382 vptr->efi_version = EFI_VERSION_CURRENT; 383 vptr->efi_lbasize = lbsize; 384 vptr->efi_nparts = nparts; 385 /* 386 * add one block here for the PMBR; on disks with a 512 byte 387 * block size and 128 or fewer partitions, efi_first_u_lba 388 * should work out to "34" 389 */ 390 vptr->efi_first_u_lba = nblocks + 1; 391 vptr->efi_last_lba = capacity - 1; 392 vptr->efi_altern_lba = capacity -1; 393 vptr->efi_last_u_lba = vptr->efi_last_lba - nblocks; 394 395 (void) uuid_generate((uchar_t *)&uuid); 396 UUID_LE_CONVERT(vptr->efi_disk_uguid, uuid); 397 return (0); 398 } 399 400 /* 401 * Read EFI - return partition number upon success. 402 */ 403 int 404 efi_alloc_and_read(int fd, struct dk_gpt **vtoc) 405 { 406 int rval; 407 uint32_t nparts; 408 int length; 409 struct dk_gpt *vptr; 410 411 /* figure out the number of entries that would fit into 16K */ 412 nparts = EFI_MIN_ARRAY_SIZE / sizeof (efi_gpe_t); 413 length = (int) sizeof (struct dk_gpt) + 414 (int) sizeof (struct dk_part) * (nparts - 1); 415 vptr = calloc(1, length); 416 417 if (vptr == NULL) 418 return (VT_ERROR); 419 420 vptr->efi_nparts = nparts; 421 rval = efi_read(fd, vptr); 422 423 if ((rval == VT_EINVAL) && vptr->efi_nparts > nparts) { 424 void *tmp; 425 length = (int) sizeof (struct dk_gpt) + 426 (int) sizeof (struct dk_part) * (vptr->efi_nparts - 1); 427 if ((tmp = realloc(vptr, length)) == NULL) { 428 /* cppcheck-suppress doubleFree */ 429 free(vptr); 430 *vtoc = NULL; 431 return (VT_ERROR); 432 } else { 433 vptr = tmp; 434 rval = efi_read(fd, vptr); 435 } 436 } 437 438 if (rval < 0) { 439 if (efi_debug) { 440 (void) fprintf(stderr, 441 "read of EFI table failed, rval=%d\n", rval); 442 } 443 free(vptr); 444 *vtoc = NULL; 445 } else { 446 *vtoc = vptr; 447 } 448 449 return (rval); 450 } 451 452 static int 453 efi_ioctl(int fd, int cmd, dk_efi_t *dk_ioc) 454 { 455 void *data = dk_ioc->dki_data; 456 int error; 457 diskaddr_t capacity; 458 uint_t lbsize; 459 460 /* 461 * When the IO is not being performed in kernel as an ioctl we need 462 * to know the sector size so we can seek to the proper byte offset. 463 */ 464 if (read_disk_info(fd, &capacity, &lbsize) == -1) { 465 if (efi_debug) 466 fprintf(stderr, "unable to read disk info: %d", errno); 467 468 errno = EIO; 469 return (-1); 470 } 471 472 switch (cmd) { 473 case DKIOCGETEFI: 474 if (lbsize == 0) { 475 if (efi_debug) 476 (void) fprintf(stderr, "DKIOCGETEFI assuming " 477 "LBA %d bytes\n", DEV_BSIZE); 478 479 lbsize = DEV_BSIZE; 480 } 481 482 error = lseek(fd, dk_ioc->dki_lba * lbsize, SEEK_SET); 483 if (error == -1) { 484 if (efi_debug) 485 (void) fprintf(stderr, "DKIOCGETEFI lseek " 486 "error: %d\n", errno); 487 return (error); 488 } 489 490 error = read(fd, data, dk_ioc->dki_length); 491 if (error == -1) { 492 if (efi_debug) 493 (void) fprintf(stderr, "DKIOCGETEFI read " 494 "error: %d\n", errno); 495 return (error); 496 } 497 498 if (error != dk_ioc->dki_length) { 499 if (efi_debug) 500 (void) fprintf(stderr, "DKIOCGETEFI short " 501 "read of %d bytes\n", error); 502 errno = EIO; 503 return (-1); 504 } 505 error = 0; 506 break; 507 508 case DKIOCSETEFI: 509 if (lbsize == 0) { 510 if (efi_debug) 511 (void) fprintf(stderr, "DKIOCSETEFI unknown " 512 "LBA size\n"); 513 errno = EIO; 514 return (-1); 515 } 516 517 error = lseek(fd, dk_ioc->dki_lba * lbsize, SEEK_SET); 518 if (error == -1) { 519 if (efi_debug) 520 (void) fprintf(stderr, "DKIOCSETEFI lseek " 521 "error: %d\n", errno); 522 return (error); 523 } 524 525 error = write(fd, data, dk_ioc->dki_length); 526 if (error == -1) { 527 if (efi_debug) 528 (void) fprintf(stderr, "DKIOCSETEFI write " 529 "error: %d\n", errno); 530 return (error); 531 } 532 533 if (error != dk_ioc->dki_length) { 534 if (efi_debug) 535 (void) fprintf(stderr, "DKIOCSETEFI short " 536 "write of %d bytes\n", error); 537 errno = EIO; 538 return (-1); 539 } 540 541 /* Sync the new EFI table to disk */ 542 error = fsync(fd); 543 if (error == -1) 544 return (error); 545 546 /* Ensure any local disk cache is also flushed */ 547 if (ioctl(fd, BLKFLSBUF, 0) == -1) 548 return (error); 549 550 error = 0; 551 break; 552 553 default: 554 if (efi_debug) 555 (void) fprintf(stderr, "unsupported ioctl()\n"); 556 557 errno = EIO; 558 return (-1); 559 } 560 561 return (error); 562 } 563 564 int 565 efi_rescan(int fd) 566 { 567 int retry = 10; 568 569 /* Notify the kernel a devices partition table has been updated */ 570 while (ioctl(fd, BLKRRPART) != 0) { 571 if ((--retry == 0) || (errno != EBUSY)) { 572 (void) fprintf(stderr, "the kernel failed to rescan " 573 "the partition table: %d\n", errno); 574 return (-1); 575 } 576 usleep(50000); 577 } 578 579 return (0); 580 } 581 582 static int 583 check_label(int fd, dk_efi_t *dk_ioc) 584 { 585 efi_gpt_t *efi; 586 uint_t crc; 587 588 if (efi_ioctl(fd, DKIOCGETEFI, dk_ioc) == -1) { 589 switch (errno) { 590 case EIO: 591 return (VT_EIO); 592 default: 593 return (VT_ERROR); 594 } 595 } 596 efi = dk_ioc->dki_data; 597 if (efi->efi_gpt_Signature != LE_64(EFI_SIGNATURE)) { 598 if (efi_debug) 599 (void) fprintf(stderr, 600 "Bad EFI signature: 0x%llx != 0x%llx\n", 601 (long long)efi->efi_gpt_Signature, 602 (long long)LE_64(EFI_SIGNATURE)); 603 return (VT_EINVAL); 604 } 605 606 /* 607 * check CRC of the header; the size of the header should 608 * never be larger than one block 609 */ 610 crc = efi->efi_gpt_HeaderCRC32; 611 efi->efi_gpt_HeaderCRC32 = 0; 612 len_t headerSize = (len_t)LE_32(efi->efi_gpt_HeaderSize); 613 614 if (headerSize < EFI_MIN_LABEL_SIZE || headerSize > EFI_LABEL_SIZE) { 615 if (efi_debug) 616 (void) fprintf(stderr, 617 "Invalid EFI HeaderSize %llu. Assuming %d.\n", 618 headerSize, EFI_MIN_LABEL_SIZE); 619 } 620 621 if ((headerSize > dk_ioc->dki_length) || 622 crc != LE_32(efi_crc32((unsigned char *)efi, headerSize))) { 623 if (efi_debug) 624 (void) fprintf(stderr, 625 "Bad EFI CRC: 0x%x != 0x%x\n", 626 crc, LE_32(efi_crc32((unsigned char *)efi, 627 headerSize))); 628 return (VT_EINVAL); 629 } 630 631 return (0); 632 } 633 634 static int 635 efi_read(int fd, struct dk_gpt *vtoc) 636 { 637 int i, j; 638 int label_len; 639 int rval = 0; 640 int md_flag = 0; 641 int vdc_flag = 0; 642 diskaddr_t capacity = 0; 643 uint_t lbsize = 0; 644 struct dk_minfo disk_info; 645 dk_efi_t dk_ioc; 646 efi_gpt_t *efi; 647 efi_gpe_t *efi_parts; 648 struct dk_cinfo dki_info; 649 uint32_t user_length; 650 boolean_t legacy_label = B_FALSE; 651 652 /* 653 * get the partition number for this file descriptor. 654 */ 655 if ((rval = efi_get_info(fd, &dki_info)) != 0) 656 return (rval); 657 658 if ((strncmp(dki_info.dki_cname, "pseudo", 7) == 0) && 659 (strncmp(dki_info.dki_dname, "md", 3) == 0)) { 660 md_flag++; 661 } else if ((strncmp(dki_info.dki_cname, "vdc", 4) == 0) && 662 (strncmp(dki_info.dki_dname, "vdc", 4) == 0)) { 663 /* 664 * The controller and drive name "vdc" (virtual disk client) 665 * indicates a LDoms virtual disk. 666 */ 667 vdc_flag++; 668 } 669 670 /* get the LBA size */ 671 if (read_disk_info(fd, &capacity, &lbsize) == -1) { 672 if (efi_debug) { 673 (void) fprintf(stderr, 674 "unable to read disk info: %d", 675 errno); 676 } 677 return (VT_EINVAL); 678 } 679 680 disk_info.dki_lbsize = lbsize; 681 disk_info.dki_capacity = capacity; 682 683 if (disk_info.dki_lbsize == 0) { 684 if (efi_debug) { 685 (void) fprintf(stderr, 686 "efi_read: assuming LBA 512 bytes\n"); 687 } 688 disk_info.dki_lbsize = DEV_BSIZE; 689 } 690 /* 691 * Read the EFI GPT to figure out how many partitions we need 692 * to deal with. 693 */ 694 dk_ioc.dki_lba = 1; 695 if (NBLOCKS(vtoc->efi_nparts, disk_info.dki_lbsize) < 34) { 696 label_len = EFI_MIN_ARRAY_SIZE + disk_info.dki_lbsize; 697 } else { 698 label_len = vtoc->efi_nparts * (int) sizeof (efi_gpe_t) + 699 disk_info.dki_lbsize; 700 if (label_len % disk_info.dki_lbsize) { 701 /* pad to physical sector size */ 702 label_len += disk_info.dki_lbsize; 703 label_len &= ~(disk_info.dki_lbsize - 1); 704 } 705 } 706 707 if (posix_memalign((void **)&dk_ioc.dki_data, 708 disk_info.dki_lbsize, label_len)) 709 return (VT_ERROR); 710 711 memset(dk_ioc.dki_data, 0, label_len); 712 dk_ioc.dki_length = disk_info.dki_lbsize; 713 user_length = vtoc->efi_nparts; 714 efi = dk_ioc.dki_data; 715 if (md_flag) { 716 dk_ioc.dki_length = label_len; 717 if (efi_ioctl(fd, DKIOCGETEFI, &dk_ioc) == -1) { 718 switch (errno) { 719 case EIO: 720 return (VT_EIO); 721 default: 722 return (VT_ERROR); 723 } 724 } 725 } else if ((rval = check_label(fd, &dk_ioc)) == VT_EINVAL) { 726 /* 727 * No valid label here; try the alternate. Note that here 728 * we just read GPT header and save it into dk_ioc.data, 729 * Later, we will read GUID partition entry array if we 730 * can get valid GPT header. 731 */ 732 733 /* 734 * This is a workaround for legacy systems. In the past, the 735 * last sector of SCSI disk was invisible on x86 platform. At 736 * that time, backup label was saved on the next to the last 737 * sector. It is possible for users to move a disk from previous 738 * solaris system to present system. Here, we attempt to search 739 * legacy backup EFI label first. 740 */ 741 dk_ioc.dki_lba = disk_info.dki_capacity - 2; 742 dk_ioc.dki_length = disk_info.dki_lbsize; 743 rval = check_label(fd, &dk_ioc); 744 if (rval == VT_EINVAL) { 745 /* 746 * we didn't find legacy backup EFI label, try to 747 * search backup EFI label in the last block. 748 */ 749 dk_ioc.dki_lba = disk_info.dki_capacity - 1; 750 dk_ioc.dki_length = disk_info.dki_lbsize; 751 rval = check_label(fd, &dk_ioc); 752 if (rval == 0) { 753 legacy_label = B_TRUE; 754 if (efi_debug) 755 (void) fprintf(stderr, 756 "efi_read: primary label corrupt; " 757 "using EFI backup label located on" 758 " the last block\n"); 759 } 760 } else { 761 if ((efi_debug) && (rval == 0)) 762 (void) fprintf(stderr, "efi_read: primary label" 763 " corrupt; using legacy EFI backup label " 764 " located on the next to last block\n"); 765 } 766 767 if (rval == 0) { 768 dk_ioc.dki_lba = LE_64(efi->efi_gpt_PartitionEntryLBA); 769 vtoc->efi_flags |= EFI_GPT_PRIMARY_CORRUPT; 770 vtoc->efi_nparts = 771 LE_32(efi->efi_gpt_NumberOfPartitionEntries); 772 /* 773 * Partition tables are between backup GPT header 774 * table and ParitionEntryLBA (the starting LBA of 775 * the GUID partition entries array). Now that we 776 * already got valid GPT header and saved it in 777 * dk_ioc.dki_data, we try to get GUID partition 778 * entry array here. 779 */ 780 /* LINTED */ 781 dk_ioc.dki_data = (efi_gpt_t *)((char *)dk_ioc.dki_data 782 + disk_info.dki_lbsize); 783 if (legacy_label) 784 dk_ioc.dki_length = disk_info.dki_capacity - 1 - 785 dk_ioc.dki_lba; 786 else 787 dk_ioc.dki_length = disk_info.dki_capacity - 2 - 788 dk_ioc.dki_lba; 789 dk_ioc.dki_length *= disk_info.dki_lbsize; 790 if (dk_ioc.dki_length > 791 ((len_t)label_len - sizeof (*dk_ioc.dki_data))) { 792 rval = VT_EINVAL; 793 } else { 794 /* 795 * read GUID partition entry array 796 */ 797 rval = efi_ioctl(fd, DKIOCGETEFI, &dk_ioc); 798 } 799 } 800 801 } else if (rval == 0) { 802 803 dk_ioc.dki_lba = LE_64(efi->efi_gpt_PartitionEntryLBA); 804 /* LINTED */ 805 dk_ioc.dki_data = (efi_gpt_t *)((char *)dk_ioc.dki_data 806 + disk_info.dki_lbsize); 807 dk_ioc.dki_length = label_len - disk_info.dki_lbsize; 808 rval = efi_ioctl(fd, DKIOCGETEFI, &dk_ioc); 809 810 } else if (vdc_flag && rval == VT_ERROR && errno == EINVAL) { 811 /* 812 * When the device is a LDoms virtual disk, the DKIOCGETEFI 813 * ioctl can fail with EINVAL if the virtual disk backend 814 * is a ZFS volume serviced by a domain running an old version 815 * of Solaris. This is because the DKIOCGETEFI ioctl was 816 * initially incorrectly implemented for a ZFS volume and it 817 * expected the GPT and GPE to be retrieved with a single ioctl. 818 * So we try to read the GPT and the GPE using that old style 819 * ioctl. 820 */ 821 dk_ioc.dki_lba = 1; 822 dk_ioc.dki_length = label_len; 823 rval = check_label(fd, &dk_ioc); 824 } 825 826 if (rval < 0) { 827 free(efi); 828 return (rval); 829 } 830 831 /* LINTED -- always longlong aligned */ 832 efi_parts = (efi_gpe_t *)(((char *)efi) + disk_info.dki_lbsize); 833 834 /* 835 * Assemble this into a "dk_gpt" struct for easier 836 * digestibility by applications. 837 */ 838 vtoc->efi_version = LE_32(efi->efi_gpt_Revision); 839 vtoc->efi_nparts = LE_32(efi->efi_gpt_NumberOfPartitionEntries); 840 vtoc->efi_part_size = LE_32(efi->efi_gpt_SizeOfPartitionEntry); 841 vtoc->efi_lbasize = disk_info.dki_lbsize; 842 vtoc->efi_last_lba = disk_info.dki_capacity - 1; 843 vtoc->efi_first_u_lba = LE_64(efi->efi_gpt_FirstUsableLBA); 844 vtoc->efi_last_u_lba = LE_64(efi->efi_gpt_LastUsableLBA); 845 vtoc->efi_altern_lba = LE_64(efi->efi_gpt_AlternateLBA); 846 UUID_LE_CONVERT(vtoc->efi_disk_uguid, efi->efi_gpt_DiskGUID); 847 848 /* 849 * If the array the user passed in is too small, set the length 850 * to what it needs to be and return 851 */ 852 if (user_length < vtoc->efi_nparts) { 853 return (VT_EINVAL); 854 } 855 856 for (i = 0; i < vtoc->efi_nparts; i++) { 857 UUID_LE_CONVERT(vtoc->efi_parts[i].p_guid, 858 efi_parts[i].efi_gpe_PartitionTypeGUID); 859 860 for (j = 0; 861 j < sizeof (conversion_array) 862 / sizeof (struct uuid_to_ptag); j++) { 863 864 if (memcmp(&vtoc->efi_parts[i].p_guid, 865 &conversion_array[j].uuid, 866 sizeof (struct uuid)) == 0) { 867 vtoc->efi_parts[i].p_tag = j; 868 break; 869 } 870 } 871 if (vtoc->efi_parts[i].p_tag == V_UNASSIGNED) 872 continue; 873 vtoc->efi_parts[i].p_flag = 874 LE_16(efi_parts[i].efi_gpe_Attributes.PartitionAttrs); 875 vtoc->efi_parts[i].p_start = 876 LE_64(efi_parts[i].efi_gpe_StartingLBA); 877 vtoc->efi_parts[i].p_size = 878 LE_64(efi_parts[i].efi_gpe_EndingLBA) - 879 vtoc->efi_parts[i].p_start + 1; 880 for (j = 0; j < EFI_PART_NAME_LEN; j++) { 881 vtoc->efi_parts[i].p_name[j] = 882 (uchar_t)LE_16( 883 efi_parts[i].efi_gpe_PartitionName[j]); 884 } 885 886 UUID_LE_CONVERT(vtoc->efi_parts[i].p_uguid, 887 efi_parts[i].efi_gpe_UniquePartitionGUID); 888 } 889 free(efi); 890 891 return (dki_info.dki_partition); 892 } 893 894 /* writes a "protective" MBR */ 895 static int 896 write_pmbr(int fd, struct dk_gpt *vtoc) 897 { 898 dk_efi_t dk_ioc; 899 struct mboot mb; 900 uchar_t *cp; 901 diskaddr_t size_in_lba; 902 uchar_t *buf; 903 int len; 904 905 len = (vtoc->efi_lbasize == 0) ? sizeof (mb) : vtoc->efi_lbasize; 906 if (posix_memalign((void **)&buf, len, len)) 907 return (VT_ERROR); 908 909 /* 910 * Preserve any boot code and disk signature if the first block is 911 * already an MBR. 912 */ 913 memset(buf, 0, len); 914 dk_ioc.dki_lba = 0; 915 dk_ioc.dki_length = len; 916 /* LINTED -- always longlong aligned */ 917 dk_ioc.dki_data = (efi_gpt_t *)buf; 918 if (efi_ioctl(fd, DKIOCGETEFI, &dk_ioc) == -1) { 919 memset(&mb, 0, sizeof (mb)); 920 mb.signature = LE_16(MBB_MAGIC); 921 } else { 922 (void) memcpy(&mb, buf, sizeof (mb)); 923 if (mb.signature != LE_16(MBB_MAGIC)) { 924 memset(&mb, 0, sizeof (mb)); 925 mb.signature = LE_16(MBB_MAGIC); 926 } 927 } 928 929 memset(&mb.parts, 0, sizeof (mb.parts)); 930 cp = (uchar_t *)&mb.parts[0]; 931 /* bootable or not */ 932 *cp++ = 0; 933 /* beginning CHS; 0xffffff if not representable */ 934 *cp++ = 0xff; 935 *cp++ = 0xff; 936 *cp++ = 0xff; 937 /* OS type */ 938 *cp++ = EFI_PMBR; 939 /* ending CHS; 0xffffff if not representable */ 940 *cp++ = 0xff; 941 *cp++ = 0xff; 942 *cp++ = 0xff; 943 /* starting LBA: 1 (little endian format) by EFI definition */ 944 *cp++ = 0x01; 945 *cp++ = 0x00; 946 *cp++ = 0x00; 947 *cp++ = 0x00; 948 /* ending LBA: last block on the disk (little endian format) */ 949 size_in_lba = vtoc->efi_last_lba; 950 if (size_in_lba < 0xffffffff) { 951 *cp++ = (size_in_lba & 0x000000ff); 952 *cp++ = (size_in_lba & 0x0000ff00) >> 8; 953 *cp++ = (size_in_lba & 0x00ff0000) >> 16; 954 *cp++ = (size_in_lba & 0xff000000) >> 24; 955 } else { 956 *cp++ = 0xff; 957 *cp++ = 0xff; 958 *cp++ = 0xff; 959 *cp++ = 0xff; 960 } 961 962 (void) memcpy(buf, &mb, sizeof (mb)); 963 /* LINTED -- always longlong aligned */ 964 dk_ioc.dki_data = (efi_gpt_t *)buf; 965 dk_ioc.dki_lba = 0; 966 dk_ioc.dki_length = len; 967 if (efi_ioctl(fd, DKIOCSETEFI, &dk_ioc) == -1) { 968 free(buf); 969 switch (errno) { 970 case EIO: 971 return (VT_EIO); 972 case EINVAL: 973 return (VT_EINVAL); 974 default: 975 return (VT_ERROR); 976 } 977 } 978 free(buf); 979 return (0); 980 } 981 982 /* make sure the user specified something reasonable */ 983 static int 984 check_input(struct dk_gpt *vtoc) 985 { 986 int resv_part = -1; 987 int i, j; 988 diskaddr_t istart, jstart, isize, jsize, endsect; 989 990 /* 991 * Sanity-check the input (make sure no partitions overlap) 992 */ 993 for (i = 0; i < vtoc->efi_nparts; i++) { 994 /* It can't be unassigned and have an actual size */ 995 if ((vtoc->efi_parts[i].p_tag == V_UNASSIGNED) && 996 (vtoc->efi_parts[i].p_size != 0)) { 997 if (efi_debug) { 998 (void) fprintf(stderr, "partition %d is " 999 "\"unassigned\" but has a size of %llu", 1000 i, vtoc->efi_parts[i].p_size); 1001 } 1002 return (VT_EINVAL); 1003 } 1004 if (vtoc->efi_parts[i].p_tag == V_UNASSIGNED) { 1005 if (uuid_is_null((uchar_t *)&vtoc->efi_parts[i].p_guid)) 1006 continue; 1007 /* we have encountered an unknown uuid */ 1008 vtoc->efi_parts[i].p_tag = 0xff; 1009 } 1010 if (vtoc->efi_parts[i].p_tag == V_RESERVED) { 1011 if (resv_part != -1) { 1012 if (efi_debug) { 1013 (void) fprintf(stderr, "found " 1014 "duplicate reserved partition " 1015 "at %d\n", i); 1016 } 1017 return (VT_EINVAL); 1018 } 1019 resv_part = i; 1020 } 1021 if ((vtoc->efi_parts[i].p_start < vtoc->efi_first_u_lba) || 1022 (vtoc->efi_parts[i].p_start > vtoc->efi_last_u_lba)) { 1023 if (efi_debug) { 1024 (void) fprintf(stderr, 1025 "Partition %d starts at %llu. ", 1026 i, 1027 vtoc->efi_parts[i].p_start); 1028 (void) fprintf(stderr, 1029 "It must be between %llu and %llu.\n", 1030 vtoc->efi_first_u_lba, 1031 vtoc->efi_last_u_lba); 1032 } 1033 return (VT_EINVAL); 1034 } 1035 if ((vtoc->efi_parts[i].p_start + 1036 vtoc->efi_parts[i].p_size < 1037 vtoc->efi_first_u_lba) || 1038 (vtoc->efi_parts[i].p_start + 1039 vtoc->efi_parts[i].p_size > 1040 vtoc->efi_last_u_lba + 1)) { 1041 if (efi_debug) { 1042 (void) fprintf(stderr, 1043 "Partition %d ends at %llu. ", 1044 i, 1045 vtoc->efi_parts[i].p_start + 1046 vtoc->efi_parts[i].p_size); 1047 (void) fprintf(stderr, 1048 "It must be between %llu and %llu.\n", 1049 vtoc->efi_first_u_lba, 1050 vtoc->efi_last_u_lba); 1051 } 1052 return (VT_EINVAL); 1053 } 1054 1055 for (j = 0; j < vtoc->efi_nparts; j++) { 1056 isize = vtoc->efi_parts[i].p_size; 1057 jsize = vtoc->efi_parts[j].p_size; 1058 istart = vtoc->efi_parts[i].p_start; 1059 jstart = vtoc->efi_parts[j].p_start; 1060 if ((i != j) && (isize != 0) && (jsize != 0)) { 1061 endsect = jstart + jsize -1; 1062 if ((jstart <= istart) && 1063 (istart <= endsect)) { 1064 if (efi_debug) { 1065 (void) fprintf(stderr, 1066 "Partition %d overlaps " 1067 "partition %d.", i, j); 1068 } 1069 return (VT_EINVAL); 1070 } 1071 } 1072 } 1073 } 1074 /* just a warning for now */ 1075 if ((resv_part == -1) && efi_debug) { 1076 (void) fprintf(stderr, 1077 "no reserved partition found\n"); 1078 } 1079 return (0); 1080 } 1081 1082 static int 1083 call_blkpg_ioctl(int fd, int command, diskaddr_t start, 1084 diskaddr_t size, uint_t pno) 1085 { 1086 struct blkpg_ioctl_arg ioctl_arg; 1087 struct blkpg_partition linux_part; 1088 memset(&linux_part, 0, sizeof (linux_part)); 1089 1090 char *path = efi_get_devname(fd); 1091 if (path == NULL) { 1092 (void) fprintf(stderr, "failed to retrieve device name\n"); 1093 return (VT_EINVAL); 1094 } 1095 1096 linux_part.start = start; 1097 linux_part.length = size; 1098 linux_part.pno = pno; 1099 snprintf(linux_part.devname, BLKPG_DEVNAMELTH - 1, "%s%u", path, pno); 1100 linux_part.devname[BLKPG_DEVNAMELTH - 1] = '\0'; 1101 free(path); 1102 1103 ioctl_arg.op = command; 1104 ioctl_arg.flags = 0; 1105 ioctl_arg.datalen = sizeof (struct blkpg_partition); 1106 ioctl_arg.data = &linux_part; 1107 1108 return (ioctl(fd, BLKPG, &ioctl_arg)); 1109 } 1110 1111 /* 1112 * add all the unallocated space to the current label 1113 */ 1114 int 1115 efi_use_whole_disk(int fd) 1116 { 1117 struct dk_gpt *efi_label = NULL; 1118 int rval; 1119 int i; 1120 uint_t resv_index = 0, data_index = 0; 1121 diskaddr_t resv_start = 0, data_start = 0; 1122 diskaddr_t data_size, limit, difference; 1123 boolean_t sync_needed = B_FALSE; 1124 uint_t nblocks; 1125 1126 rval = efi_alloc_and_read(fd, &efi_label); 1127 if (rval < 0) { 1128 if (efi_label != NULL) 1129 efi_free(efi_label); 1130 return (rval); 1131 } 1132 1133 /* 1134 * Find the last physically non-zero partition. 1135 * This should be the reserved partition. 1136 */ 1137 for (i = 0; i < efi_label->efi_nparts; i ++) { 1138 if (resv_start < efi_label->efi_parts[i].p_start) { 1139 resv_start = efi_label->efi_parts[i].p_start; 1140 resv_index = i; 1141 } 1142 } 1143 1144 /* 1145 * Find the last physically non-zero partition before that. 1146 * This is the data partition. 1147 */ 1148 for (i = 0; i < resv_index; i ++) { 1149 if (data_start < efi_label->efi_parts[i].p_start) { 1150 data_start = efi_label->efi_parts[i].p_start; 1151 data_index = i; 1152 } 1153 } 1154 data_size = efi_label->efi_parts[data_index].p_size; 1155 1156 /* 1157 * See the "efi_alloc_and_init" function for more information 1158 * about where this "nblocks" value comes from. 1159 */ 1160 nblocks = efi_label->efi_first_u_lba - 1; 1161 1162 /* 1163 * Determine if the EFI label is out of sync. We check that: 1164 * 1165 * 1. the data partition ends at the limit we set, and 1166 * 2. the reserved partition starts at the limit we set. 1167 * 1168 * If either of these conditions is not met, then we need to 1169 * resync the EFI label. 1170 * 1171 * The limit is the last usable LBA, determined by the last LBA 1172 * and the first usable LBA fields on the EFI label of the disk 1173 * (see the lines directly above). Additionally, we factor in 1174 * EFI_MIN_RESV_SIZE (per its use in "zpool_label_disk") and 1175 * P2ALIGN it to ensure the partition boundaries are aligned 1176 * (for performance reasons). The alignment should match the 1177 * alignment used by the "zpool_label_disk" function. 1178 */ 1179 limit = P2ALIGN_TYPED(efi_label->efi_last_lba - nblocks - 1180 EFI_MIN_RESV_SIZE, PARTITION_END_ALIGNMENT, diskaddr_t); 1181 if (data_start + data_size != limit || resv_start != limit) 1182 sync_needed = B_TRUE; 1183 1184 if (efi_debug && sync_needed) 1185 (void) fprintf(stderr, "efi_use_whole_disk: sync needed\n"); 1186 1187 /* 1188 * If alter_lba is 1, we are using the backup label. 1189 * Since we can locate the backup label by disk capacity, 1190 * there must be no unallocated space. 1191 */ 1192 if ((efi_label->efi_altern_lba == 1) || (efi_label->efi_altern_lba 1193 >= efi_label->efi_last_lba && !sync_needed)) { 1194 if (efi_debug) { 1195 (void) fprintf(stderr, 1196 "efi_use_whole_disk: requested space not found\n"); 1197 } 1198 efi_free(efi_label); 1199 return (VT_ENOSPC); 1200 } 1201 1202 /* 1203 * Verify that we've found the reserved partition by checking 1204 * that it looks the way it did when we created it in zpool_label_disk. 1205 * If we've found the incorrect partition, then we know that this 1206 * device was reformatted and no longer is solely used by ZFS. 1207 */ 1208 if ((efi_label->efi_parts[resv_index].p_size != EFI_MIN_RESV_SIZE) || 1209 (efi_label->efi_parts[resv_index].p_tag != V_RESERVED) || 1210 (resv_index != 8)) { 1211 if (efi_debug) { 1212 (void) fprintf(stderr, 1213 "efi_use_whole_disk: wholedisk not available\n"); 1214 } 1215 efi_free(efi_label); 1216 return (VT_ENOSPC); 1217 } 1218 1219 if (data_start + data_size != resv_start) { 1220 if (efi_debug) { 1221 (void) fprintf(stderr, 1222 "efi_use_whole_disk: " 1223 "data_start (%lli) + " 1224 "data_size (%lli) != " 1225 "resv_start (%lli)\n", 1226 data_start, data_size, resv_start); 1227 } 1228 1229 return (VT_EINVAL); 1230 } 1231 1232 if (limit < resv_start) { 1233 if (efi_debug) { 1234 (void) fprintf(stderr, 1235 "efi_use_whole_disk: " 1236 "limit (%lli) < resv_start (%lli)\n", 1237 limit, resv_start); 1238 } 1239 1240 return (VT_EINVAL); 1241 } 1242 1243 difference = limit - resv_start; 1244 1245 if (efi_debug) 1246 (void) fprintf(stderr, 1247 "efi_use_whole_disk: difference is %lli\n", difference); 1248 1249 /* 1250 * Move the reserved partition. There is currently no data in 1251 * here except fabricated devids (which get generated via 1252 * efi_write()). So there is no need to copy data. 1253 */ 1254 efi_label->efi_parts[data_index].p_size += difference; 1255 efi_label->efi_parts[resv_index].p_start += difference; 1256 efi_label->efi_last_u_lba = efi_label->efi_last_lba - nblocks; 1257 1258 /* 1259 * Rescanning the partition table in the kernel can result 1260 * in the device links to be removed (see comment in vdev_disk_open). 1261 * If BLKPG_RESIZE_PARTITION is available, then we can resize 1262 * the partition table online and avoid having to remove the device 1263 * links used by the pool. This provides a very deterministic 1264 * approach to resizing devices and does not require any 1265 * loops waiting for devices to reappear. 1266 */ 1267 #ifdef BLKPG_RESIZE_PARTITION 1268 /* 1269 * Delete the reserved partition since we're about to expand 1270 * the data partition and it would overlap with the reserved 1271 * partition. 1272 * NOTE: The starting index for the ioctl is 1 while for the 1273 * EFI partitions it's 0. For that reason we have to add one 1274 * whenever we make an ioctl call. 1275 */ 1276 rval = call_blkpg_ioctl(fd, BLKPG_DEL_PARTITION, 0, 0, resv_index + 1); 1277 if (rval != 0) 1278 goto out; 1279 1280 /* 1281 * Expand the data partition 1282 */ 1283 rval = call_blkpg_ioctl(fd, BLKPG_RESIZE_PARTITION, 1284 efi_label->efi_parts[data_index].p_start * efi_label->efi_lbasize, 1285 efi_label->efi_parts[data_index].p_size * efi_label->efi_lbasize, 1286 data_index + 1); 1287 if (rval != 0) { 1288 (void) fprintf(stderr, "Unable to resize data " 1289 "partition: %d\n", rval); 1290 /* 1291 * Since we failed to resize, we need to reset the start 1292 * of the reserve partition and re-create it. 1293 */ 1294 efi_label->efi_parts[resv_index].p_start -= difference; 1295 } 1296 1297 /* 1298 * Re-add the reserved partition. If we've expanded the data partition 1299 * then we'll move the reserve partition to the end of the data 1300 * partition. Otherwise, we'll recreate the partition in its original 1301 * location. Note that we do this as best-effort and ignore any 1302 * errors that may arise here. This will ensure that we finish writing 1303 * the EFI label. 1304 */ 1305 (void) call_blkpg_ioctl(fd, BLKPG_ADD_PARTITION, 1306 efi_label->efi_parts[resv_index].p_start * efi_label->efi_lbasize, 1307 efi_label->efi_parts[resv_index].p_size * efi_label->efi_lbasize, 1308 resv_index + 1); 1309 #endif 1310 1311 /* 1312 * We're now ready to write the EFI label. 1313 */ 1314 if (rval == 0) { 1315 rval = efi_write(fd, efi_label); 1316 if (rval < 0 && efi_debug) { 1317 (void) fprintf(stderr, "efi_use_whole_disk:fail " 1318 "to write label, rval=%d\n", rval); 1319 } 1320 } 1321 1322 out: 1323 efi_free(efi_label); 1324 return (rval); 1325 } 1326 1327 /* 1328 * write EFI label and backup label 1329 */ 1330 int 1331 efi_write(int fd, struct dk_gpt *vtoc) 1332 { 1333 dk_efi_t dk_ioc; 1334 efi_gpt_t *efi; 1335 efi_gpe_t *efi_parts; 1336 int i, j; 1337 struct dk_cinfo dki_info; 1338 int rval; 1339 int md_flag = 0; 1340 int nblocks; 1341 diskaddr_t lba_backup_gpt_hdr; 1342 1343 if ((rval = efi_get_info(fd, &dki_info)) != 0) 1344 return (rval); 1345 1346 /* check if we are dealing with a metadevice */ 1347 if ((strncmp(dki_info.dki_cname, "pseudo", 7) == 0) && 1348 (strncmp(dki_info.dki_dname, "md", 3) == 0)) { 1349 md_flag = 1; 1350 } 1351 1352 if (check_input(vtoc)) { 1353 /* 1354 * not valid; if it's a metadevice just pass it down 1355 * because SVM will do its own checking 1356 */ 1357 if (md_flag == 0) { 1358 return (VT_EINVAL); 1359 } 1360 } 1361 1362 dk_ioc.dki_lba = 1; 1363 if (NBLOCKS(vtoc->efi_nparts, vtoc->efi_lbasize) < 34) { 1364 dk_ioc.dki_length = EFI_MIN_ARRAY_SIZE + vtoc->efi_lbasize; 1365 } else { 1366 dk_ioc.dki_length = (len_t)NBLOCKS(vtoc->efi_nparts, 1367 vtoc->efi_lbasize) * 1368 vtoc->efi_lbasize; 1369 } 1370 1371 /* 1372 * the number of blocks occupied by GUID partition entry array 1373 */ 1374 nblocks = dk_ioc.dki_length / vtoc->efi_lbasize - 1; 1375 1376 /* 1377 * Backup GPT header is located on the block after GUID 1378 * partition entry array. Here, we calculate the address 1379 * for backup GPT header. 1380 */ 1381 lba_backup_gpt_hdr = vtoc->efi_last_u_lba + 1 + nblocks; 1382 if (posix_memalign((void **)&dk_ioc.dki_data, 1383 vtoc->efi_lbasize, dk_ioc.dki_length)) 1384 return (VT_ERROR); 1385 1386 memset(dk_ioc.dki_data, 0, dk_ioc.dki_length); 1387 efi = dk_ioc.dki_data; 1388 1389 /* stuff user's input into EFI struct */ 1390 efi->efi_gpt_Signature = LE_64(EFI_SIGNATURE); 1391 efi->efi_gpt_Revision = LE_32(vtoc->efi_version); /* 0x02000100 */ 1392 efi->efi_gpt_HeaderSize = LE_32(sizeof (struct efi_gpt) - LEN_EFI_PAD); 1393 efi->efi_gpt_Reserved1 = 0; 1394 efi->efi_gpt_MyLBA = LE_64(1ULL); 1395 efi->efi_gpt_AlternateLBA = LE_64(lba_backup_gpt_hdr); 1396 efi->efi_gpt_FirstUsableLBA = LE_64(vtoc->efi_first_u_lba); 1397 efi->efi_gpt_LastUsableLBA = LE_64(vtoc->efi_last_u_lba); 1398 efi->efi_gpt_PartitionEntryLBA = LE_64(2ULL); 1399 efi->efi_gpt_NumberOfPartitionEntries = LE_32(vtoc->efi_nparts); 1400 efi->efi_gpt_SizeOfPartitionEntry = LE_32(sizeof (struct efi_gpe)); 1401 UUID_LE_CONVERT(efi->efi_gpt_DiskGUID, vtoc->efi_disk_uguid); 1402 1403 /* LINTED -- always longlong aligned */ 1404 efi_parts = (efi_gpe_t *)((char *)dk_ioc.dki_data + vtoc->efi_lbasize); 1405 1406 for (i = 0; i < vtoc->efi_nparts; i++) { 1407 for (j = 0; 1408 j < sizeof (conversion_array) / 1409 sizeof (struct uuid_to_ptag); j++) { 1410 1411 if (vtoc->efi_parts[i].p_tag == j) { 1412 UUID_LE_CONVERT( 1413 efi_parts[i].efi_gpe_PartitionTypeGUID, 1414 conversion_array[j].uuid); 1415 break; 1416 } 1417 } 1418 1419 if (j == sizeof (conversion_array) / 1420 sizeof (struct uuid_to_ptag)) { 1421 /* 1422 * If we didn't have a matching uuid match, bail here. 1423 * Don't write a label with unknown uuid. 1424 */ 1425 if (efi_debug) { 1426 (void) fprintf(stderr, 1427 "Unknown uuid for p_tag %d\n", 1428 vtoc->efi_parts[i].p_tag); 1429 } 1430 return (VT_EINVAL); 1431 } 1432 1433 /* Zero's should be written for empty partitions */ 1434 if (vtoc->efi_parts[i].p_tag == V_UNASSIGNED) 1435 continue; 1436 1437 efi_parts[i].efi_gpe_StartingLBA = 1438 LE_64(vtoc->efi_parts[i].p_start); 1439 efi_parts[i].efi_gpe_EndingLBA = 1440 LE_64(vtoc->efi_parts[i].p_start + 1441 vtoc->efi_parts[i].p_size - 1); 1442 efi_parts[i].efi_gpe_Attributes.PartitionAttrs = 1443 LE_16(vtoc->efi_parts[i].p_flag); 1444 for (j = 0; j < EFI_PART_NAME_LEN; j++) { 1445 efi_parts[i].efi_gpe_PartitionName[j] = 1446 LE_16((ushort_t)vtoc->efi_parts[i].p_name[j]); 1447 } 1448 if ((vtoc->efi_parts[i].p_tag != V_UNASSIGNED) && 1449 uuid_is_null((uchar_t *)&vtoc->efi_parts[i].p_uguid)) { 1450 (void) uuid_generate((uchar_t *) 1451 &vtoc->efi_parts[i].p_uguid); 1452 } 1453 memcpy(&efi_parts[i].efi_gpe_UniquePartitionGUID, 1454 &vtoc->efi_parts[i].p_uguid, 1455 sizeof (uuid_t)); 1456 } 1457 efi->efi_gpt_PartitionEntryArrayCRC32 = 1458 LE_32(efi_crc32((unsigned char *)efi_parts, 1459 vtoc->efi_nparts * (int)sizeof (struct efi_gpe))); 1460 efi->efi_gpt_HeaderCRC32 = 1461 LE_32(efi_crc32((unsigned char *)efi, 1462 LE_32(efi->efi_gpt_HeaderSize))); 1463 1464 if (efi_ioctl(fd, DKIOCSETEFI, &dk_ioc) == -1) { 1465 free(dk_ioc.dki_data); 1466 switch (errno) { 1467 case EIO: 1468 return (VT_EIO); 1469 case EINVAL: 1470 return (VT_EINVAL); 1471 default: 1472 return (VT_ERROR); 1473 } 1474 } 1475 /* if it's a metadevice we're done */ 1476 if (md_flag) { 1477 free(dk_ioc.dki_data); 1478 return (0); 1479 } 1480 1481 /* write backup partition array */ 1482 dk_ioc.dki_lba = vtoc->efi_last_u_lba + 1; 1483 dk_ioc.dki_length -= vtoc->efi_lbasize; 1484 /* LINTED */ 1485 dk_ioc.dki_data = (efi_gpt_t *)((char *)dk_ioc.dki_data + 1486 vtoc->efi_lbasize); 1487 1488 if (efi_ioctl(fd, DKIOCSETEFI, &dk_ioc) == -1) { 1489 /* 1490 * we wrote the primary label okay, so don't fail 1491 */ 1492 if (efi_debug) { 1493 (void) fprintf(stderr, 1494 "write of backup partitions to block %llu " 1495 "failed, errno %d\n", 1496 vtoc->efi_last_u_lba + 1, 1497 errno); 1498 } 1499 } 1500 /* 1501 * now swap MyLBA and AlternateLBA fields and write backup 1502 * partition table header 1503 */ 1504 dk_ioc.dki_lba = lba_backup_gpt_hdr; 1505 dk_ioc.dki_length = vtoc->efi_lbasize; 1506 /* LINTED */ 1507 dk_ioc.dki_data = (efi_gpt_t *)((char *)dk_ioc.dki_data - 1508 vtoc->efi_lbasize); 1509 efi->efi_gpt_AlternateLBA = LE_64(1ULL); 1510 efi->efi_gpt_MyLBA = LE_64(lba_backup_gpt_hdr); 1511 efi->efi_gpt_PartitionEntryLBA = LE_64(vtoc->efi_last_u_lba + 1); 1512 efi->efi_gpt_HeaderCRC32 = 0; 1513 efi->efi_gpt_HeaderCRC32 = 1514 LE_32(efi_crc32((unsigned char *)dk_ioc.dki_data, 1515 LE_32(efi->efi_gpt_HeaderSize))); 1516 1517 if (efi_ioctl(fd, DKIOCSETEFI, &dk_ioc) == -1) { 1518 if (efi_debug) { 1519 (void) fprintf(stderr, 1520 "write of backup header to block %llu failed, " 1521 "errno %d\n", 1522 lba_backup_gpt_hdr, 1523 errno); 1524 } 1525 } 1526 /* write the PMBR */ 1527 (void) write_pmbr(fd, vtoc); 1528 free(dk_ioc.dki_data); 1529 1530 return (0); 1531 } 1532 1533 void 1534 efi_free(struct dk_gpt *ptr) 1535 { 1536 free(ptr); 1537 } 1538 1539 void 1540 efi_err_check(struct dk_gpt *vtoc) 1541 { 1542 int resv_part = -1; 1543 int i, j; 1544 diskaddr_t istart, jstart, isize, jsize, endsect; 1545 int overlap = 0; 1546 1547 /* 1548 * make sure no partitions overlap 1549 */ 1550 for (i = 0; i < vtoc->efi_nparts; i++) { 1551 /* It can't be unassigned and have an actual size */ 1552 if ((vtoc->efi_parts[i].p_tag == V_UNASSIGNED) && 1553 (vtoc->efi_parts[i].p_size != 0)) { 1554 (void) fprintf(stderr, 1555 "partition %d is \"unassigned\" but has a size " 1556 "of %llu\n", i, vtoc->efi_parts[i].p_size); 1557 } 1558 if (vtoc->efi_parts[i].p_tag == V_UNASSIGNED) { 1559 continue; 1560 } 1561 if (vtoc->efi_parts[i].p_tag == V_RESERVED) { 1562 if (resv_part != -1) { 1563 (void) fprintf(stderr, 1564 "found duplicate reserved partition at " 1565 "%d\n", i); 1566 } 1567 resv_part = i; 1568 if (vtoc->efi_parts[i].p_size != EFI_MIN_RESV_SIZE) 1569 (void) fprintf(stderr, 1570 "Warning: reserved partition size must " 1571 "be %d sectors\n", EFI_MIN_RESV_SIZE); 1572 } 1573 if ((vtoc->efi_parts[i].p_start < vtoc->efi_first_u_lba) || 1574 (vtoc->efi_parts[i].p_start > vtoc->efi_last_u_lba)) { 1575 (void) fprintf(stderr, 1576 "Partition %d starts at %llu\n", 1577 i, 1578 vtoc->efi_parts[i].p_start); 1579 (void) fprintf(stderr, 1580 "It must be between %llu and %llu.\n", 1581 vtoc->efi_first_u_lba, 1582 vtoc->efi_last_u_lba); 1583 } 1584 if ((vtoc->efi_parts[i].p_start + 1585 vtoc->efi_parts[i].p_size < 1586 vtoc->efi_first_u_lba) || 1587 (vtoc->efi_parts[i].p_start + 1588 vtoc->efi_parts[i].p_size > 1589 vtoc->efi_last_u_lba + 1)) { 1590 (void) fprintf(stderr, 1591 "Partition %d ends at %llu\n", 1592 i, 1593 vtoc->efi_parts[i].p_start + 1594 vtoc->efi_parts[i].p_size); 1595 (void) fprintf(stderr, 1596 "It must be between %llu and %llu.\n", 1597 vtoc->efi_first_u_lba, 1598 vtoc->efi_last_u_lba); 1599 } 1600 1601 for (j = 0; j < vtoc->efi_nparts; j++) { 1602 isize = vtoc->efi_parts[i].p_size; 1603 jsize = vtoc->efi_parts[j].p_size; 1604 istart = vtoc->efi_parts[i].p_start; 1605 jstart = vtoc->efi_parts[j].p_start; 1606 if ((i != j) && (isize != 0) && (jsize != 0)) { 1607 endsect = jstart + jsize -1; 1608 if ((jstart <= istart) && 1609 (istart <= endsect)) { 1610 if (!overlap) { 1611 (void) fprintf(stderr, 1612 "label error: EFI Labels do not " 1613 "support overlapping partitions\n"); 1614 } 1615 (void) fprintf(stderr, 1616 "Partition %d overlaps partition " 1617 "%d.\n", i, j); 1618 overlap = 1; 1619 } 1620 } 1621 } 1622 } 1623 /* make sure there is a reserved partition */ 1624 if (resv_part == -1) { 1625 (void) fprintf(stderr, 1626 "no reserved partition found\n"); 1627 } 1628 } 1629