1 /* 2 * CDDL HEADER START 3 * 4 * The contents of this file are subject to the terms of the 5 * Common Development and Distribution License (the "License"). 6 * You may not use this file except in compliance with the License. 7 * 8 * You can obtain a copy of the license at usr/src/OPENSOLARIS.LICENSE 9 * or http://www.opensolaris.org/os/licensing. 10 * See the License for the specific language governing permissions 11 * and limitations under the License. 12 * 13 * When distributing Covered Code, include this CDDL HEADER in each 14 * file and include the License file at usr/src/OPENSOLARIS.LICENSE. 15 * If applicable, add the following below this CDDL HEADER, with the 16 * fields enclosed by brackets "[]" replaced with your own identifying 17 * information: Portions Copyright [yyyy] [name of copyright owner] 18 * 19 * CDDL HEADER END 20 */ 21 22 /* 23 * Copyright (c) 2002, 2010, Oracle and/or its affiliates. All rights reserved. 24 * Copyright 2012 Nexenta Systems, Inc. All rights reserved. 25 * Copyright (c) 2018 by Delphix. All rights reserved. 26 */ 27 28 #include <stdio.h> 29 #include <stdlib.h> 30 #include <errno.h> 31 #include <string.h> 32 #include <unistd.h> 33 #include <uuid/uuid.h> 34 #include <zlib.h> 35 #include <libintl.h> 36 #include <sys/types.h> 37 #include <sys/dkio.h> 38 #include <sys/mhd.h> 39 #include <sys/param.h> 40 #include <sys/dktp/fdisk.h> 41 #include <sys/efi_partition.h> 42 #include <sys/byteorder.h> 43 #include <sys/vdev_disk.h> 44 #include <linux/fs.h> 45 #include <linux/blkpg.h> 46 47 static struct uuid_to_ptag { 48 struct uuid uuid; 49 } conversion_array[] = { 50 { EFI_UNUSED }, 51 { EFI_BOOT }, 52 { EFI_ROOT }, 53 { EFI_SWAP }, 54 { EFI_USR }, 55 { EFI_BACKUP }, 56 { EFI_UNUSED }, /* STAND is never used */ 57 { EFI_VAR }, 58 { EFI_HOME }, 59 { EFI_ALTSCTR }, 60 { EFI_UNUSED }, /* CACHE (cachefs) is never used */ 61 { EFI_RESERVED }, 62 { EFI_SYSTEM }, 63 { EFI_LEGACY_MBR }, 64 { EFI_SYMC_PUB }, 65 { EFI_SYMC_CDS }, 66 { EFI_MSFT_RESV }, 67 { EFI_DELL_BASIC }, 68 { EFI_DELL_RAID }, 69 { EFI_DELL_SWAP }, 70 { EFI_DELL_LVM }, 71 { EFI_DELL_RESV }, 72 { EFI_AAPL_HFS }, 73 { EFI_AAPL_UFS }, 74 { EFI_FREEBSD_BOOT }, 75 { EFI_FREEBSD_SWAP }, 76 { EFI_FREEBSD_UFS }, 77 { EFI_FREEBSD_VINUM }, 78 { EFI_FREEBSD_ZFS }, 79 { EFI_BIOS_BOOT }, 80 { EFI_INTC_RS }, 81 { EFI_SNE_BOOT }, 82 { EFI_LENOVO_BOOT }, 83 { EFI_MSFT_LDMM }, 84 { EFI_MSFT_LDMD }, 85 { EFI_MSFT_RE }, 86 { EFI_IBM_GPFS }, 87 { EFI_MSFT_STORAGESPACES }, 88 { EFI_HPQ_DATA }, 89 { EFI_HPQ_SVC }, 90 { EFI_RHT_DATA }, 91 { EFI_RHT_HOME }, 92 { EFI_RHT_SRV }, 93 { EFI_RHT_DMCRYPT }, 94 { EFI_RHT_LUKS }, 95 { EFI_FREEBSD_DISKLABEL }, 96 { EFI_AAPL_RAID }, 97 { EFI_AAPL_RAIDOFFLINE }, 98 { EFI_AAPL_BOOT }, 99 { EFI_AAPL_LABEL }, 100 { EFI_AAPL_TVRECOVERY }, 101 { EFI_AAPL_CORESTORAGE }, 102 { EFI_NETBSD_SWAP }, 103 { EFI_NETBSD_FFS }, 104 { EFI_NETBSD_LFS }, 105 { EFI_NETBSD_RAID }, 106 { EFI_NETBSD_CAT }, 107 { EFI_NETBSD_CRYPT }, 108 { EFI_GOOG_KERN }, 109 { EFI_GOOG_ROOT }, 110 { EFI_GOOG_RESV }, 111 { EFI_HAIKU_BFS }, 112 { EFI_MIDNIGHTBSD_BOOT }, 113 { EFI_MIDNIGHTBSD_DATA }, 114 { EFI_MIDNIGHTBSD_SWAP }, 115 { EFI_MIDNIGHTBSD_UFS }, 116 { EFI_MIDNIGHTBSD_VINUM }, 117 { EFI_MIDNIGHTBSD_ZFS }, 118 { EFI_CEPH_JOURNAL }, 119 { EFI_CEPH_DMCRYPTJOURNAL }, 120 { EFI_CEPH_OSD }, 121 { EFI_CEPH_DMCRYPTOSD }, 122 { EFI_CEPH_CREATE }, 123 { EFI_CEPH_DMCRYPTCREATE }, 124 { EFI_OPENBSD_DISKLABEL }, 125 { EFI_BBRY_QNX }, 126 { EFI_BELL_PLAN9 }, 127 { EFI_VMW_KCORE }, 128 { EFI_VMW_VMFS }, 129 { EFI_VMW_RESV }, 130 { EFI_RHT_ROOTX86 }, 131 { EFI_RHT_ROOTAMD64 }, 132 { EFI_RHT_ROOTARM }, 133 { EFI_RHT_ROOTARM64 }, 134 { EFI_ACRONIS_SECUREZONE }, 135 { EFI_ONIE_BOOT }, 136 { EFI_ONIE_CONFIG }, 137 { EFI_IBM_PPRPBOOT }, 138 { EFI_FREEDESKTOP_BOOT } 139 }; 140 141 int efi_debug = 0; 142 143 static int efi_read(int, struct dk_gpt *); 144 145 /* 146 * Return a 32-bit CRC of the contents of the buffer. Pre-and-post 147 * one's conditioning will be handled by crc32() internally. 148 */ 149 static uint32_t 150 efi_crc32(const unsigned char *buf, unsigned int size) 151 { 152 uint32_t crc = crc32(0, Z_NULL, 0); 153 154 crc = crc32(crc, buf, size); 155 156 return (crc); 157 } 158 159 static int 160 read_disk_info(int fd, diskaddr_t *capacity, uint_t *lbsize) 161 { 162 int sector_size; 163 unsigned long long capacity_size; 164 165 if (ioctl(fd, BLKSSZGET, §or_size) < 0) 166 return (-1); 167 168 if (ioctl(fd, BLKGETSIZE64, &capacity_size) < 0) 169 return (-1); 170 171 *lbsize = (uint_t)sector_size; 172 *capacity = (diskaddr_t)(capacity_size / sector_size); 173 174 return (0); 175 } 176 177 /* 178 * Return back the device name associated with the file descriptor. The 179 * caller is responsible for freeing the memory associated with the 180 * returned string. 181 */ 182 static char * 183 efi_get_devname(int fd) 184 { 185 char path[32]; 186 187 /* 188 * The libefi API only provides the open fd and not the file path. 189 * To handle this realpath(3) is used to resolve the block device 190 * name from /proc/self/fd/<fd>. 191 */ 192 (void) snprintf(path, sizeof (path), "/proc/self/fd/%d", fd); 193 return (realpath(path, NULL)); 194 } 195 196 static int 197 efi_get_info(int fd, struct dk_cinfo *dki_info) 198 { 199 char *dev_path; 200 int rval = 0; 201 202 memset(dki_info, 0, sizeof (*dki_info)); 203 204 /* 205 * The simplest way to get the partition number under linux is 206 * to parse it out of the /dev/<disk><partition> block device name. 207 * The kernel creates this using the partition number when it 208 * populates /dev/ so it may be trusted. The tricky bit here is 209 * that the naming convention is based on the block device type. 210 * So we need to take this in to account when parsing out the 211 * partition information. Aside from the partition number we collect 212 * some additional device info. 213 */ 214 dev_path = efi_get_devname(fd); 215 if (dev_path == NULL) 216 goto error; 217 218 if ((strncmp(dev_path, "/dev/sd", 7) == 0)) { 219 strcpy(dki_info->dki_cname, "sd"); 220 dki_info->dki_ctype = DKC_SCSI_CCS; 221 rval = sscanf(dev_path, "/dev/%[a-zA-Z]%hu", 222 dki_info->dki_dname, 223 &dki_info->dki_partition); 224 } else if ((strncmp(dev_path, "/dev/hd", 7) == 0)) { 225 strcpy(dki_info->dki_cname, "hd"); 226 dki_info->dki_ctype = DKC_DIRECT; 227 rval = sscanf(dev_path, "/dev/%[a-zA-Z]%hu", 228 dki_info->dki_dname, 229 &dki_info->dki_partition); 230 } else if ((strncmp(dev_path, "/dev/md", 7) == 0)) { 231 strcpy(dki_info->dki_cname, "pseudo"); 232 dki_info->dki_ctype = DKC_MD; 233 strcpy(dki_info->dki_dname, "md"); 234 rval = sscanf(dev_path, "/dev/md%[0-9]p%hu", 235 dki_info->dki_dname + 2, 236 &dki_info->dki_partition); 237 } else if ((strncmp(dev_path, "/dev/vd", 7) == 0)) { 238 strcpy(dki_info->dki_cname, "vd"); 239 dki_info->dki_ctype = DKC_MD; 240 rval = sscanf(dev_path, "/dev/%[a-zA-Z]%hu", 241 dki_info->dki_dname, 242 &dki_info->dki_partition); 243 } else if ((strncmp(dev_path, "/dev/xvd", 8) == 0)) { 244 strcpy(dki_info->dki_cname, "xvd"); 245 dki_info->dki_ctype = DKC_MD; 246 rval = sscanf(dev_path, "/dev/%[a-zA-Z]%hu", 247 dki_info->dki_dname, 248 &dki_info->dki_partition); 249 } else if ((strncmp(dev_path, "/dev/zd", 7) == 0)) { 250 strcpy(dki_info->dki_cname, "zd"); 251 dki_info->dki_ctype = DKC_MD; 252 strcpy(dki_info->dki_dname, "zd"); 253 rval = sscanf(dev_path, "/dev/zd%[0-9]p%hu", 254 dki_info->dki_dname + 2, 255 &dki_info->dki_partition); 256 } else if ((strncmp(dev_path, "/dev/dm-", 8) == 0)) { 257 strcpy(dki_info->dki_cname, "pseudo"); 258 dki_info->dki_ctype = DKC_VBD; 259 strcpy(dki_info->dki_dname, "dm-"); 260 rval = sscanf(dev_path, "/dev/dm-%[0-9]p%hu", 261 dki_info->dki_dname + 3, 262 &dki_info->dki_partition); 263 } else if ((strncmp(dev_path, "/dev/ram", 8) == 0)) { 264 strcpy(dki_info->dki_cname, "pseudo"); 265 dki_info->dki_ctype = DKC_PCMCIA_MEM; 266 strcpy(dki_info->dki_dname, "ram"); 267 rval = sscanf(dev_path, "/dev/ram%[0-9]p%hu", 268 dki_info->dki_dname + 3, 269 &dki_info->dki_partition); 270 } else if ((strncmp(dev_path, "/dev/loop", 9) == 0)) { 271 strcpy(dki_info->dki_cname, "pseudo"); 272 dki_info->dki_ctype = DKC_VBD; 273 strcpy(dki_info->dki_dname, "loop"); 274 rval = sscanf(dev_path, "/dev/loop%[0-9]p%hu", 275 dki_info->dki_dname + 4, 276 &dki_info->dki_partition); 277 } else if ((strncmp(dev_path, "/dev/nvme", 9) == 0)) { 278 strcpy(dki_info->dki_cname, "nvme"); 279 dki_info->dki_ctype = DKC_SCSI_CCS; 280 strcpy(dki_info->dki_dname, "nvme"); 281 (void) sscanf(dev_path, "/dev/nvme%[0-9]", 282 dki_info->dki_dname + 4); 283 size_t controller_length = strlen( 284 dki_info->dki_dname); 285 strcpy(dki_info->dki_dname + controller_length, 286 "n"); 287 rval = sscanf(dev_path, 288 "/dev/nvme%*[0-9]n%[0-9]p%hu", 289 dki_info->dki_dname + controller_length + 1, 290 &dki_info->dki_partition); 291 } else { 292 strcpy(dki_info->dki_dname, "unknown"); 293 strcpy(dki_info->dki_cname, "unknown"); 294 dki_info->dki_ctype = DKC_UNKNOWN; 295 } 296 297 switch (rval) { 298 case 0: 299 errno = EINVAL; 300 goto error; 301 case 1: 302 dki_info->dki_partition = 0; 303 } 304 305 free(dev_path); 306 307 return (0); 308 error: 309 if (efi_debug) 310 (void) fprintf(stderr, "DKIOCINFO errno 0x%x\n", errno); 311 312 switch (errno) { 313 case EIO: 314 return (VT_EIO); 315 case EINVAL: 316 return (VT_EINVAL); 317 default: 318 return (VT_ERROR); 319 } 320 } 321 322 /* 323 * the number of blocks the EFI label takes up (round up to nearest 324 * block) 325 */ 326 #define NBLOCKS(p, l) (1 + ((((p) * (int)sizeof (efi_gpe_t)) + \ 327 ((l) - 1)) / (l))) 328 /* number of partitions -- limited by what we can malloc */ 329 #define MAX_PARTS ((4294967295UL - sizeof (struct dk_gpt)) / \ 330 sizeof (struct dk_part)) 331 332 int 333 efi_alloc_and_init(int fd, uint32_t nparts, struct dk_gpt **vtoc) 334 { 335 diskaddr_t capacity = 0; 336 uint_t lbsize = 0; 337 uint_t nblocks; 338 size_t length; 339 struct dk_gpt *vptr; 340 struct uuid uuid; 341 struct dk_cinfo dki_info; 342 343 if (read_disk_info(fd, &capacity, &lbsize) != 0) 344 return (-1); 345 346 if (efi_get_info(fd, &dki_info) != 0) 347 return (-1); 348 349 if (dki_info.dki_partition != 0) 350 return (-1); 351 352 if ((dki_info.dki_ctype == DKC_PCMCIA_MEM) || 353 (dki_info.dki_ctype == DKC_VBD) || 354 (dki_info.dki_ctype == DKC_UNKNOWN)) 355 return (-1); 356 357 nblocks = NBLOCKS(nparts, lbsize); 358 if ((nblocks * lbsize) < EFI_MIN_ARRAY_SIZE + lbsize) { 359 /* 16K plus one block for the GPT */ 360 nblocks = EFI_MIN_ARRAY_SIZE / lbsize + 1; 361 } 362 363 if (nparts > MAX_PARTS) { 364 if (efi_debug) { 365 (void) fprintf(stderr, 366 "the maximum number of partitions supported is %lu\n", 367 MAX_PARTS); 368 } 369 return (-1); 370 } 371 372 length = sizeof (struct dk_gpt) + 373 sizeof (struct dk_part) * (nparts - 1); 374 375 vptr = calloc(1, length); 376 if (vptr == NULL) 377 return (-1); 378 379 *vtoc = vptr; 380 381 vptr->efi_version = EFI_VERSION_CURRENT; 382 vptr->efi_lbasize = lbsize; 383 vptr->efi_nparts = nparts; 384 /* 385 * add one block here for the PMBR; on disks with a 512 byte 386 * block size and 128 or fewer partitions, efi_first_u_lba 387 * should work out to "34" 388 */ 389 vptr->efi_first_u_lba = nblocks + 1; 390 vptr->efi_last_lba = capacity - 1; 391 vptr->efi_altern_lba = capacity -1; 392 vptr->efi_last_u_lba = vptr->efi_last_lba - nblocks; 393 394 (void) uuid_generate((uchar_t *)&uuid); 395 UUID_LE_CONVERT(vptr->efi_disk_uguid, uuid); 396 return (0); 397 } 398 399 /* 400 * Read EFI - return partition number upon success. 401 */ 402 int 403 efi_alloc_and_read(int fd, struct dk_gpt **vtoc) 404 { 405 int rval; 406 uint32_t nparts; 407 int length; 408 struct dk_gpt *vptr; 409 410 /* figure out the number of entries that would fit into 16K */ 411 nparts = EFI_MIN_ARRAY_SIZE / sizeof (efi_gpe_t); 412 length = (int) sizeof (struct dk_gpt) + 413 (int) sizeof (struct dk_part) * (nparts - 1); 414 vptr = calloc(1, length); 415 416 if (vptr == NULL) 417 return (VT_ERROR); 418 419 vptr->efi_nparts = nparts; 420 rval = efi_read(fd, vptr); 421 422 if ((rval == VT_EINVAL) && vptr->efi_nparts > nparts) { 423 void *tmp; 424 length = (int) sizeof (struct dk_gpt) + 425 (int) sizeof (struct dk_part) * (vptr->efi_nparts - 1); 426 nparts = vptr->efi_nparts; 427 if ((tmp = realloc(vptr, length)) == NULL) { 428 /* cppcheck-suppress doubleFree */ 429 free(vptr); 430 *vtoc = NULL; 431 return (VT_ERROR); 432 } else { 433 vptr = tmp; 434 rval = efi_read(fd, vptr); 435 } 436 } 437 438 if (rval < 0) { 439 if (efi_debug) { 440 (void) fprintf(stderr, 441 "read of EFI table failed, rval=%d\n", rval); 442 } 443 free(vptr); 444 *vtoc = NULL; 445 } else { 446 *vtoc = vptr; 447 } 448 449 return (rval); 450 } 451 452 static int 453 efi_ioctl(int fd, int cmd, dk_efi_t *dk_ioc) 454 { 455 void *data = dk_ioc->dki_data; 456 int error; 457 diskaddr_t capacity; 458 uint_t lbsize; 459 460 /* 461 * When the IO is not being performed in kernel as an ioctl we need 462 * to know the sector size so we can seek to the proper byte offset. 463 */ 464 if (read_disk_info(fd, &capacity, &lbsize) == -1) { 465 if (efi_debug) 466 fprintf(stderr, "unable to read disk info: %d", errno); 467 468 errno = EIO; 469 return (-1); 470 } 471 472 switch (cmd) { 473 case DKIOCGETEFI: 474 if (lbsize == 0) { 475 if (efi_debug) 476 (void) fprintf(stderr, "DKIOCGETEFI assuming " 477 "LBA %d bytes\n", DEV_BSIZE); 478 479 lbsize = DEV_BSIZE; 480 } 481 482 error = lseek(fd, dk_ioc->dki_lba * lbsize, SEEK_SET); 483 if (error == -1) { 484 if (efi_debug) 485 (void) fprintf(stderr, "DKIOCGETEFI lseek " 486 "error: %d\n", errno); 487 return (error); 488 } 489 490 error = read(fd, data, dk_ioc->dki_length); 491 if (error == -1) { 492 if (efi_debug) 493 (void) fprintf(stderr, "DKIOCGETEFI read " 494 "error: %d\n", errno); 495 return (error); 496 } 497 498 if (error != dk_ioc->dki_length) { 499 if (efi_debug) 500 (void) fprintf(stderr, "DKIOCGETEFI short " 501 "read of %d bytes\n", error); 502 errno = EIO; 503 return (-1); 504 } 505 error = 0; 506 break; 507 508 case DKIOCSETEFI: 509 if (lbsize == 0) { 510 if (efi_debug) 511 (void) fprintf(stderr, "DKIOCSETEFI unknown " 512 "LBA size\n"); 513 errno = EIO; 514 return (-1); 515 } 516 517 error = lseek(fd, dk_ioc->dki_lba * lbsize, SEEK_SET); 518 if (error == -1) { 519 if (efi_debug) 520 (void) fprintf(stderr, "DKIOCSETEFI lseek " 521 "error: %d\n", errno); 522 return (error); 523 } 524 525 error = write(fd, data, dk_ioc->dki_length); 526 if (error == -1) { 527 if (efi_debug) 528 (void) fprintf(stderr, "DKIOCSETEFI write " 529 "error: %d\n", errno); 530 return (error); 531 } 532 533 if (error != dk_ioc->dki_length) { 534 if (efi_debug) 535 (void) fprintf(stderr, "DKIOCSETEFI short " 536 "write of %d bytes\n", error); 537 errno = EIO; 538 return (-1); 539 } 540 541 /* Sync the new EFI table to disk */ 542 error = fsync(fd); 543 if (error == -1) 544 return (error); 545 546 /* Ensure any local disk cache is also flushed */ 547 if (ioctl(fd, BLKFLSBUF, 0) == -1) 548 return (error); 549 550 error = 0; 551 break; 552 553 default: 554 if (efi_debug) 555 (void) fprintf(stderr, "unsupported ioctl()\n"); 556 557 errno = EIO; 558 return (-1); 559 } 560 561 return (error); 562 } 563 564 int 565 efi_rescan(int fd) 566 { 567 int retry = 10; 568 int error; 569 570 /* Notify the kernel a devices partition table has been updated */ 571 while ((error = ioctl(fd, BLKRRPART)) != 0) { 572 if ((--retry == 0) || (errno != EBUSY)) { 573 (void) fprintf(stderr, "the kernel failed to rescan " 574 "the partition table: %d\n", errno); 575 return (-1); 576 } 577 usleep(50000); 578 } 579 580 return (0); 581 } 582 583 static int 584 check_label(int fd, dk_efi_t *dk_ioc) 585 { 586 efi_gpt_t *efi; 587 uint_t crc; 588 589 if (efi_ioctl(fd, DKIOCGETEFI, dk_ioc) == -1) { 590 switch (errno) { 591 case EIO: 592 return (VT_EIO); 593 default: 594 return (VT_ERROR); 595 } 596 } 597 efi = dk_ioc->dki_data; 598 if (efi->efi_gpt_Signature != LE_64(EFI_SIGNATURE)) { 599 if (efi_debug) 600 (void) fprintf(stderr, 601 "Bad EFI signature: 0x%llx != 0x%llx\n", 602 (long long)efi->efi_gpt_Signature, 603 (long long)LE_64(EFI_SIGNATURE)); 604 return (VT_EINVAL); 605 } 606 607 /* 608 * check CRC of the header; the size of the header should 609 * never be larger than one block 610 */ 611 crc = efi->efi_gpt_HeaderCRC32; 612 efi->efi_gpt_HeaderCRC32 = 0; 613 len_t headerSize = (len_t)LE_32(efi->efi_gpt_HeaderSize); 614 615 if (headerSize < EFI_MIN_LABEL_SIZE || headerSize > EFI_LABEL_SIZE) { 616 if (efi_debug) 617 (void) fprintf(stderr, 618 "Invalid EFI HeaderSize %llu. Assuming %d.\n", 619 headerSize, EFI_MIN_LABEL_SIZE); 620 } 621 622 if ((headerSize > dk_ioc->dki_length) || 623 crc != LE_32(efi_crc32((unsigned char *)efi, headerSize))) { 624 if (efi_debug) 625 (void) fprintf(stderr, 626 "Bad EFI CRC: 0x%x != 0x%x\n", 627 crc, LE_32(efi_crc32((unsigned char *)efi, 628 headerSize))); 629 return (VT_EINVAL); 630 } 631 632 return (0); 633 } 634 635 static int 636 efi_read(int fd, struct dk_gpt *vtoc) 637 { 638 int i, j; 639 int label_len; 640 int rval = 0; 641 int md_flag = 0; 642 int vdc_flag = 0; 643 diskaddr_t capacity = 0; 644 uint_t lbsize = 0; 645 struct dk_minfo disk_info; 646 dk_efi_t dk_ioc; 647 efi_gpt_t *efi; 648 efi_gpe_t *efi_parts; 649 struct dk_cinfo dki_info; 650 uint32_t user_length; 651 boolean_t legacy_label = B_FALSE; 652 653 /* 654 * get the partition number for this file descriptor. 655 */ 656 if ((rval = efi_get_info(fd, &dki_info)) != 0) 657 return (rval); 658 659 if ((strncmp(dki_info.dki_cname, "pseudo", 7) == 0) && 660 (strncmp(dki_info.dki_dname, "md", 3) == 0)) { 661 md_flag++; 662 } else if ((strncmp(dki_info.dki_cname, "vdc", 4) == 0) && 663 (strncmp(dki_info.dki_dname, "vdc", 4) == 0)) { 664 /* 665 * The controller and drive name "vdc" (virtual disk client) 666 * indicates a LDoms virtual disk. 667 */ 668 vdc_flag++; 669 } 670 671 /* get the LBA size */ 672 if (read_disk_info(fd, &capacity, &lbsize) == -1) { 673 if (efi_debug) { 674 (void) fprintf(stderr, 675 "unable to read disk info: %d", 676 errno); 677 } 678 return (VT_EINVAL); 679 } 680 681 disk_info.dki_lbsize = lbsize; 682 disk_info.dki_capacity = capacity; 683 684 if (disk_info.dki_lbsize == 0) { 685 if (efi_debug) { 686 (void) fprintf(stderr, 687 "efi_read: assuming LBA 512 bytes\n"); 688 } 689 disk_info.dki_lbsize = DEV_BSIZE; 690 } 691 /* 692 * Read the EFI GPT to figure out how many partitions we need 693 * to deal with. 694 */ 695 dk_ioc.dki_lba = 1; 696 if (NBLOCKS(vtoc->efi_nparts, disk_info.dki_lbsize) < 34) { 697 label_len = EFI_MIN_ARRAY_SIZE + disk_info.dki_lbsize; 698 } else { 699 label_len = vtoc->efi_nparts * (int) sizeof (efi_gpe_t) + 700 disk_info.dki_lbsize; 701 if (label_len % disk_info.dki_lbsize) { 702 /* pad to physical sector size */ 703 label_len += disk_info.dki_lbsize; 704 label_len &= ~(disk_info.dki_lbsize - 1); 705 } 706 } 707 708 if (posix_memalign((void **)&dk_ioc.dki_data, 709 disk_info.dki_lbsize, label_len)) 710 return (VT_ERROR); 711 712 memset(dk_ioc.dki_data, 0, label_len); 713 dk_ioc.dki_length = disk_info.dki_lbsize; 714 user_length = vtoc->efi_nparts; 715 efi = dk_ioc.dki_data; 716 if (md_flag) { 717 dk_ioc.dki_length = label_len; 718 if (efi_ioctl(fd, DKIOCGETEFI, &dk_ioc) == -1) { 719 switch (errno) { 720 case EIO: 721 return (VT_EIO); 722 default: 723 return (VT_ERROR); 724 } 725 } 726 } else if ((rval = check_label(fd, &dk_ioc)) == VT_EINVAL) { 727 /* 728 * No valid label here; try the alternate. Note that here 729 * we just read GPT header and save it into dk_ioc.data, 730 * Later, we will read GUID partition entry array if we 731 * can get valid GPT header. 732 */ 733 734 /* 735 * This is a workaround for legacy systems. In the past, the 736 * last sector of SCSI disk was invisible on x86 platform. At 737 * that time, backup label was saved on the next to the last 738 * sector. It is possible for users to move a disk from previous 739 * solaris system to present system. Here, we attempt to search 740 * legacy backup EFI label first. 741 */ 742 dk_ioc.dki_lba = disk_info.dki_capacity - 2; 743 dk_ioc.dki_length = disk_info.dki_lbsize; 744 rval = check_label(fd, &dk_ioc); 745 if (rval == VT_EINVAL) { 746 /* 747 * we didn't find legacy backup EFI label, try to 748 * search backup EFI label in the last block. 749 */ 750 dk_ioc.dki_lba = disk_info.dki_capacity - 1; 751 dk_ioc.dki_length = disk_info.dki_lbsize; 752 rval = check_label(fd, &dk_ioc); 753 if (rval == 0) { 754 legacy_label = B_TRUE; 755 if (efi_debug) 756 (void) fprintf(stderr, 757 "efi_read: primary label corrupt; " 758 "using EFI backup label located on" 759 " the last block\n"); 760 } 761 } else { 762 if ((efi_debug) && (rval == 0)) 763 (void) fprintf(stderr, "efi_read: primary label" 764 " corrupt; using legacy EFI backup label " 765 " located on the next to last block\n"); 766 } 767 768 if (rval == 0) { 769 dk_ioc.dki_lba = LE_64(efi->efi_gpt_PartitionEntryLBA); 770 vtoc->efi_flags |= EFI_GPT_PRIMARY_CORRUPT; 771 vtoc->efi_nparts = 772 LE_32(efi->efi_gpt_NumberOfPartitionEntries); 773 /* 774 * Partition tables are between backup GPT header 775 * table and ParitionEntryLBA (the starting LBA of 776 * the GUID partition entries array). Now that we 777 * already got valid GPT header and saved it in 778 * dk_ioc.dki_data, we try to get GUID partition 779 * entry array here. 780 */ 781 /* LINTED */ 782 dk_ioc.dki_data = (efi_gpt_t *)((char *)dk_ioc.dki_data 783 + disk_info.dki_lbsize); 784 if (legacy_label) 785 dk_ioc.dki_length = disk_info.dki_capacity - 1 - 786 dk_ioc.dki_lba; 787 else 788 dk_ioc.dki_length = disk_info.dki_capacity - 2 - 789 dk_ioc.dki_lba; 790 dk_ioc.dki_length *= disk_info.dki_lbsize; 791 if (dk_ioc.dki_length > 792 ((len_t)label_len - sizeof (*dk_ioc.dki_data))) { 793 rval = VT_EINVAL; 794 } else { 795 /* 796 * read GUID partition entry array 797 */ 798 rval = efi_ioctl(fd, DKIOCGETEFI, &dk_ioc); 799 } 800 } 801 802 } else if (rval == 0) { 803 804 dk_ioc.dki_lba = LE_64(efi->efi_gpt_PartitionEntryLBA); 805 /* LINTED */ 806 dk_ioc.dki_data = (efi_gpt_t *)((char *)dk_ioc.dki_data 807 + disk_info.dki_lbsize); 808 dk_ioc.dki_length = label_len - disk_info.dki_lbsize; 809 rval = efi_ioctl(fd, DKIOCGETEFI, &dk_ioc); 810 811 } else if (vdc_flag && rval == VT_ERROR && errno == EINVAL) { 812 /* 813 * When the device is a LDoms virtual disk, the DKIOCGETEFI 814 * ioctl can fail with EINVAL if the virtual disk backend 815 * is a ZFS volume serviced by a domain running an old version 816 * of Solaris. This is because the DKIOCGETEFI ioctl was 817 * initially incorrectly implemented for a ZFS volume and it 818 * expected the GPT and GPE to be retrieved with a single ioctl. 819 * So we try to read the GPT and the GPE using that old style 820 * ioctl. 821 */ 822 dk_ioc.dki_lba = 1; 823 dk_ioc.dki_length = label_len; 824 rval = check_label(fd, &dk_ioc); 825 } 826 827 if (rval < 0) { 828 free(efi); 829 return (rval); 830 } 831 832 /* LINTED -- always longlong aligned */ 833 efi_parts = (efi_gpe_t *)(((char *)efi) + disk_info.dki_lbsize); 834 835 /* 836 * Assemble this into a "dk_gpt" struct for easier 837 * digestibility by applications. 838 */ 839 vtoc->efi_version = LE_32(efi->efi_gpt_Revision); 840 vtoc->efi_nparts = LE_32(efi->efi_gpt_NumberOfPartitionEntries); 841 vtoc->efi_part_size = LE_32(efi->efi_gpt_SizeOfPartitionEntry); 842 vtoc->efi_lbasize = disk_info.dki_lbsize; 843 vtoc->efi_last_lba = disk_info.dki_capacity - 1; 844 vtoc->efi_first_u_lba = LE_64(efi->efi_gpt_FirstUsableLBA); 845 vtoc->efi_last_u_lba = LE_64(efi->efi_gpt_LastUsableLBA); 846 vtoc->efi_altern_lba = LE_64(efi->efi_gpt_AlternateLBA); 847 UUID_LE_CONVERT(vtoc->efi_disk_uguid, efi->efi_gpt_DiskGUID); 848 849 /* 850 * If the array the user passed in is too small, set the length 851 * to what it needs to be and return 852 */ 853 if (user_length < vtoc->efi_nparts) { 854 return (VT_EINVAL); 855 } 856 857 for (i = 0; i < vtoc->efi_nparts; i++) { 858 UUID_LE_CONVERT(vtoc->efi_parts[i].p_guid, 859 efi_parts[i].efi_gpe_PartitionTypeGUID); 860 861 for (j = 0; 862 j < sizeof (conversion_array) 863 / sizeof (struct uuid_to_ptag); j++) { 864 865 if (memcmp(&vtoc->efi_parts[i].p_guid, 866 &conversion_array[j].uuid, 867 sizeof (struct uuid)) == 0) { 868 vtoc->efi_parts[i].p_tag = j; 869 break; 870 } 871 } 872 if (vtoc->efi_parts[i].p_tag == V_UNASSIGNED) 873 continue; 874 vtoc->efi_parts[i].p_flag = 875 LE_16(efi_parts[i].efi_gpe_Attributes.PartitionAttrs); 876 vtoc->efi_parts[i].p_start = 877 LE_64(efi_parts[i].efi_gpe_StartingLBA); 878 vtoc->efi_parts[i].p_size = 879 LE_64(efi_parts[i].efi_gpe_EndingLBA) - 880 vtoc->efi_parts[i].p_start + 1; 881 for (j = 0; j < EFI_PART_NAME_LEN; j++) { 882 vtoc->efi_parts[i].p_name[j] = 883 (uchar_t)LE_16( 884 efi_parts[i].efi_gpe_PartitionName[j]); 885 } 886 887 UUID_LE_CONVERT(vtoc->efi_parts[i].p_uguid, 888 efi_parts[i].efi_gpe_UniquePartitionGUID); 889 } 890 free(efi); 891 892 return (dki_info.dki_partition); 893 } 894 895 /* writes a "protective" MBR */ 896 static int 897 write_pmbr(int fd, struct dk_gpt *vtoc) 898 { 899 dk_efi_t dk_ioc; 900 struct mboot mb; 901 uchar_t *cp; 902 diskaddr_t size_in_lba; 903 uchar_t *buf; 904 int len; 905 906 len = (vtoc->efi_lbasize == 0) ? sizeof (mb) : vtoc->efi_lbasize; 907 if (posix_memalign((void **)&buf, len, len)) 908 return (VT_ERROR); 909 910 /* 911 * Preserve any boot code and disk signature if the first block is 912 * already an MBR. 913 */ 914 memset(buf, 0, len); 915 dk_ioc.dki_lba = 0; 916 dk_ioc.dki_length = len; 917 /* LINTED -- always longlong aligned */ 918 dk_ioc.dki_data = (efi_gpt_t *)buf; 919 if (efi_ioctl(fd, DKIOCGETEFI, &dk_ioc) == -1) { 920 memset(&mb, 0, sizeof (mb)); 921 mb.signature = LE_16(MBB_MAGIC); 922 } else { 923 (void) memcpy(&mb, buf, sizeof (mb)); 924 if (mb.signature != LE_16(MBB_MAGIC)) { 925 memset(&mb, 0, sizeof (mb)); 926 mb.signature = LE_16(MBB_MAGIC); 927 } 928 } 929 930 memset(&mb.parts, 0, sizeof (mb.parts)); 931 cp = (uchar_t *)&mb.parts[0]; 932 /* bootable or not */ 933 *cp++ = 0; 934 /* beginning CHS; 0xffffff if not representable */ 935 *cp++ = 0xff; 936 *cp++ = 0xff; 937 *cp++ = 0xff; 938 /* OS type */ 939 *cp++ = EFI_PMBR; 940 /* ending CHS; 0xffffff if not representable */ 941 *cp++ = 0xff; 942 *cp++ = 0xff; 943 *cp++ = 0xff; 944 /* starting LBA: 1 (little endian format) by EFI definition */ 945 *cp++ = 0x01; 946 *cp++ = 0x00; 947 *cp++ = 0x00; 948 *cp++ = 0x00; 949 /* ending LBA: last block on the disk (little endian format) */ 950 size_in_lba = vtoc->efi_last_lba; 951 if (size_in_lba < 0xffffffff) { 952 *cp++ = (size_in_lba & 0x000000ff); 953 *cp++ = (size_in_lba & 0x0000ff00) >> 8; 954 *cp++ = (size_in_lba & 0x00ff0000) >> 16; 955 *cp++ = (size_in_lba & 0xff000000) >> 24; 956 } else { 957 *cp++ = 0xff; 958 *cp++ = 0xff; 959 *cp++ = 0xff; 960 *cp++ = 0xff; 961 } 962 963 (void) memcpy(buf, &mb, sizeof (mb)); 964 /* LINTED -- always longlong aligned */ 965 dk_ioc.dki_data = (efi_gpt_t *)buf; 966 dk_ioc.dki_lba = 0; 967 dk_ioc.dki_length = len; 968 if (efi_ioctl(fd, DKIOCSETEFI, &dk_ioc) == -1) { 969 free(buf); 970 switch (errno) { 971 case EIO: 972 return (VT_EIO); 973 case EINVAL: 974 return (VT_EINVAL); 975 default: 976 return (VT_ERROR); 977 } 978 } 979 free(buf); 980 return (0); 981 } 982 983 /* make sure the user specified something reasonable */ 984 static int 985 check_input(struct dk_gpt *vtoc) 986 { 987 int resv_part = -1; 988 int i, j; 989 diskaddr_t istart, jstart, isize, jsize, endsect; 990 991 /* 992 * Sanity-check the input (make sure no partitions overlap) 993 */ 994 for (i = 0; i < vtoc->efi_nparts; i++) { 995 /* It can't be unassigned and have an actual size */ 996 if ((vtoc->efi_parts[i].p_tag == V_UNASSIGNED) && 997 (vtoc->efi_parts[i].p_size != 0)) { 998 if (efi_debug) { 999 (void) fprintf(stderr, "partition %d is " 1000 "\"unassigned\" but has a size of %llu", 1001 i, vtoc->efi_parts[i].p_size); 1002 } 1003 return (VT_EINVAL); 1004 } 1005 if (vtoc->efi_parts[i].p_tag == V_UNASSIGNED) { 1006 if (uuid_is_null((uchar_t *)&vtoc->efi_parts[i].p_guid)) 1007 continue; 1008 /* we have encountered an unknown uuid */ 1009 vtoc->efi_parts[i].p_tag = 0xff; 1010 } 1011 if (vtoc->efi_parts[i].p_tag == V_RESERVED) { 1012 if (resv_part != -1) { 1013 if (efi_debug) { 1014 (void) fprintf(stderr, "found " 1015 "duplicate reserved partition " 1016 "at %d\n", i); 1017 } 1018 return (VT_EINVAL); 1019 } 1020 resv_part = i; 1021 } 1022 if ((vtoc->efi_parts[i].p_start < vtoc->efi_first_u_lba) || 1023 (vtoc->efi_parts[i].p_start > vtoc->efi_last_u_lba)) { 1024 if (efi_debug) { 1025 (void) fprintf(stderr, 1026 "Partition %d starts at %llu. ", 1027 i, 1028 vtoc->efi_parts[i].p_start); 1029 (void) fprintf(stderr, 1030 "It must be between %llu and %llu.\n", 1031 vtoc->efi_first_u_lba, 1032 vtoc->efi_last_u_lba); 1033 } 1034 return (VT_EINVAL); 1035 } 1036 if ((vtoc->efi_parts[i].p_start + 1037 vtoc->efi_parts[i].p_size < 1038 vtoc->efi_first_u_lba) || 1039 (vtoc->efi_parts[i].p_start + 1040 vtoc->efi_parts[i].p_size > 1041 vtoc->efi_last_u_lba + 1)) { 1042 if (efi_debug) { 1043 (void) fprintf(stderr, 1044 "Partition %d ends at %llu. ", 1045 i, 1046 vtoc->efi_parts[i].p_start + 1047 vtoc->efi_parts[i].p_size); 1048 (void) fprintf(stderr, 1049 "It must be between %llu and %llu.\n", 1050 vtoc->efi_first_u_lba, 1051 vtoc->efi_last_u_lba); 1052 } 1053 return (VT_EINVAL); 1054 } 1055 1056 for (j = 0; j < vtoc->efi_nparts; j++) { 1057 isize = vtoc->efi_parts[i].p_size; 1058 jsize = vtoc->efi_parts[j].p_size; 1059 istart = vtoc->efi_parts[i].p_start; 1060 jstart = vtoc->efi_parts[j].p_start; 1061 if ((i != j) && (isize != 0) && (jsize != 0)) { 1062 endsect = jstart + jsize -1; 1063 if ((jstart <= istart) && 1064 (istart <= endsect)) { 1065 if (efi_debug) { 1066 (void) fprintf(stderr, 1067 "Partition %d overlaps " 1068 "partition %d.", i, j); 1069 } 1070 return (VT_EINVAL); 1071 } 1072 } 1073 } 1074 } 1075 /* just a warning for now */ 1076 if ((resv_part == -1) && efi_debug) { 1077 (void) fprintf(stderr, 1078 "no reserved partition found\n"); 1079 } 1080 return (0); 1081 } 1082 1083 static int 1084 call_blkpg_ioctl(int fd, int command, diskaddr_t start, 1085 diskaddr_t size, uint_t pno) 1086 { 1087 struct blkpg_ioctl_arg ioctl_arg; 1088 struct blkpg_partition linux_part; 1089 memset(&linux_part, 0, sizeof (linux_part)); 1090 1091 char *path = efi_get_devname(fd); 1092 if (path == NULL) { 1093 (void) fprintf(stderr, "failed to retrieve device name\n"); 1094 return (VT_EINVAL); 1095 } 1096 1097 linux_part.start = start; 1098 linux_part.length = size; 1099 linux_part.pno = pno; 1100 snprintf(linux_part.devname, BLKPG_DEVNAMELTH - 1, "%s%u", path, pno); 1101 linux_part.devname[BLKPG_DEVNAMELTH - 1] = '\0'; 1102 free(path); 1103 1104 ioctl_arg.op = command; 1105 ioctl_arg.flags = 0; 1106 ioctl_arg.datalen = sizeof (struct blkpg_partition); 1107 ioctl_arg.data = &linux_part; 1108 1109 return (ioctl(fd, BLKPG, &ioctl_arg)); 1110 } 1111 1112 /* 1113 * add all the unallocated space to the current label 1114 */ 1115 int 1116 efi_use_whole_disk(int fd) 1117 { 1118 struct dk_gpt *efi_label = NULL; 1119 int rval; 1120 int i; 1121 uint_t resv_index = 0, data_index = 0; 1122 diskaddr_t resv_start = 0, data_start = 0; 1123 diskaddr_t data_size, limit, difference; 1124 boolean_t sync_needed = B_FALSE; 1125 uint_t nblocks; 1126 1127 rval = efi_alloc_and_read(fd, &efi_label); 1128 if (rval < 0) { 1129 if (efi_label != NULL) 1130 efi_free(efi_label); 1131 return (rval); 1132 } 1133 1134 /* 1135 * Find the last physically non-zero partition. 1136 * This should be the reserved partition. 1137 */ 1138 for (i = 0; i < efi_label->efi_nparts; i ++) { 1139 if (resv_start < efi_label->efi_parts[i].p_start) { 1140 resv_start = efi_label->efi_parts[i].p_start; 1141 resv_index = i; 1142 } 1143 } 1144 1145 /* 1146 * Find the last physically non-zero partition before that. 1147 * This is the data partition. 1148 */ 1149 for (i = 0; i < resv_index; i ++) { 1150 if (data_start < efi_label->efi_parts[i].p_start) { 1151 data_start = efi_label->efi_parts[i].p_start; 1152 data_index = i; 1153 } 1154 } 1155 data_size = efi_label->efi_parts[data_index].p_size; 1156 1157 /* 1158 * See the "efi_alloc_and_init" function for more information 1159 * about where this "nblocks" value comes from. 1160 */ 1161 nblocks = efi_label->efi_first_u_lba - 1; 1162 1163 /* 1164 * Determine if the EFI label is out of sync. We check that: 1165 * 1166 * 1. the data partition ends at the limit we set, and 1167 * 2. the reserved partition starts at the limit we set. 1168 * 1169 * If either of these conditions is not met, then we need to 1170 * resync the EFI label. 1171 * 1172 * The limit is the last usable LBA, determined by the last LBA 1173 * and the first usable LBA fields on the EFI label of the disk 1174 * (see the lines directly above). Additionally, we factor in 1175 * EFI_MIN_RESV_SIZE (per its use in "zpool_label_disk") and 1176 * P2ALIGN it to ensure the partition boundaries are aligned 1177 * (for performance reasons). The alignment should match the 1178 * alignment used by the "zpool_label_disk" function. 1179 */ 1180 limit = P2ALIGN(efi_label->efi_last_lba - nblocks - EFI_MIN_RESV_SIZE, 1181 PARTITION_END_ALIGNMENT); 1182 if (data_start + data_size != limit || resv_start != limit) 1183 sync_needed = B_TRUE; 1184 1185 if (efi_debug && sync_needed) 1186 (void) fprintf(stderr, "efi_use_whole_disk: sync needed\n"); 1187 1188 /* 1189 * If alter_lba is 1, we are using the backup label. 1190 * Since we can locate the backup label by disk capacity, 1191 * there must be no unallocated space. 1192 */ 1193 if ((efi_label->efi_altern_lba == 1) || (efi_label->efi_altern_lba 1194 >= efi_label->efi_last_lba && !sync_needed)) { 1195 if (efi_debug) { 1196 (void) fprintf(stderr, 1197 "efi_use_whole_disk: requested space not found\n"); 1198 } 1199 efi_free(efi_label); 1200 return (VT_ENOSPC); 1201 } 1202 1203 /* 1204 * Verify that we've found the reserved partition by checking 1205 * that it looks the way it did when we created it in zpool_label_disk. 1206 * If we've found the incorrect partition, then we know that this 1207 * device was reformatted and no longer is solely used by ZFS. 1208 */ 1209 if ((efi_label->efi_parts[resv_index].p_size != EFI_MIN_RESV_SIZE) || 1210 (efi_label->efi_parts[resv_index].p_tag != V_RESERVED) || 1211 (resv_index != 8)) { 1212 if (efi_debug) { 1213 (void) fprintf(stderr, 1214 "efi_use_whole_disk: wholedisk not available\n"); 1215 } 1216 efi_free(efi_label); 1217 return (VT_ENOSPC); 1218 } 1219 1220 if (data_start + data_size != resv_start) { 1221 if (efi_debug) { 1222 (void) fprintf(stderr, 1223 "efi_use_whole_disk: " 1224 "data_start (%lli) + " 1225 "data_size (%lli) != " 1226 "resv_start (%lli)\n", 1227 data_start, data_size, resv_start); 1228 } 1229 1230 return (VT_EINVAL); 1231 } 1232 1233 if (limit < resv_start) { 1234 if (efi_debug) { 1235 (void) fprintf(stderr, 1236 "efi_use_whole_disk: " 1237 "limit (%lli) < resv_start (%lli)\n", 1238 limit, resv_start); 1239 } 1240 1241 return (VT_EINVAL); 1242 } 1243 1244 difference = limit - resv_start; 1245 1246 if (efi_debug) 1247 (void) fprintf(stderr, 1248 "efi_use_whole_disk: difference is %lli\n", difference); 1249 1250 /* 1251 * Move the reserved partition. There is currently no data in 1252 * here except fabricated devids (which get generated via 1253 * efi_write()). So there is no need to copy data. 1254 */ 1255 efi_label->efi_parts[data_index].p_size += difference; 1256 efi_label->efi_parts[resv_index].p_start += difference; 1257 efi_label->efi_last_u_lba = efi_label->efi_last_lba - nblocks; 1258 1259 /* 1260 * Rescanning the partition table in the kernel can result 1261 * in the device links to be removed (see comment in vdev_disk_open). 1262 * If BLKPG_RESIZE_PARTITION is available, then we can resize 1263 * the partition table online and avoid having to remove the device 1264 * links used by the pool. This provides a very deterministic 1265 * approach to resizing devices and does not require any 1266 * loops waiting for devices to reappear. 1267 */ 1268 #ifdef BLKPG_RESIZE_PARTITION 1269 /* 1270 * Delete the reserved partition since we're about to expand 1271 * the data partition and it would overlap with the reserved 1272 * partition. 1273 * NOTE: The starting index for the ioctl is 1 while for the 1274 * EFI partitions it's 0. For that reason we have to add one 1275 * whenever we make an ioctl call. 1276 */ 1277 rval = call_blkpg_ioctl(fd, BLKPG_DEL_PARTITION, 0, 0, resv_index + 1); 1278 if (rval != 0) 1279 goto out; 1280 1281 /* 1282 * Expand the data partition 1283 */ 1284 rval = call_blkpg_ioctl(fd, BLKPG_RESIZE_PARTITION, 1285 efi_label->efi_parts[data_index].p_start * efi_label->efi_lbasize, 1286 efi_label->efi_parts[data_index].p_size * efi_label->efi_lbasize, 1287 data_index + 1); 1288 if (rval != 0) { 1289 (void) fprintf(stderr, "Unable to resize data " 1290 "partition: %d\n", rval); 1291 /* 1292 * Since we failed to resize, we need to reset the start 1293 * of the reserve partition and re-create it. 1294 */ 1295 efi_label->efi_parts[resv_index].p_start -= difference; 1296 } 1297 1298 /* 1299 * Re-add the reserved partition. If we've expanded the data partition 1300 * then we'll move the reserve partition to the end of the data 1301 * partition. Otherwise, we'll recreate the partition in its original 1302 * location. Note that we do this as best-effort and ignore any 1303 * errors that may arise here. This will ensure that we finish writing 1304 * the EFI label. 1305 */ 1306 (void) call_blkpg_ioctl(fd, BLKPG_ADD_PARTITION, 1307 efi_label->efi_parts[resv_index].p_start * efi_label->efi_lbasize, 1308 efi_label->efi_parts[resv_index].p_size * efi_label->efi_lbasize, 1309 resv_index + 1); 1310 #endif 1311 1312 /* 1313 * We're now ready to write the EFI label. 1314 */ 1315 if (rval == 0) { 1316 rval = efi_write(fd, efi_label); 1317 if (rval < 0 && efi_debug) { 1318 (void) fprintf(stderr, "efi_use_whole_disk:fail " 1319 "to write label, rval=%d\n", rval); 1320 } 1321 } 1322 1323 out: 1324 efi_free(efi_label); 1325 return (rval); 1326 } 1327 1328 /* 1329 * write EFI label and backup label 1330 */ 1331 int 1332 efi_write(int fd, struct dk_gpt *vtoc) 1333 { 1334 dk_efi_t dk_ioc; 1335 efi_gpt_t *efi; 1336 efi_gpe_t *efi_parts; 1337 int i, j; 1338 struct dk_cinfo dki_info; 1339 int rval; 1340 int md_flag = 0; 1341 int nblocks; 1342 diskaddr_t lba_backup_gpt_hdr; 1343 1344 if ((rval = efi_get_info(fd, &dki_info)) != 0) 1345 return (rval); 1346 1347 /* check if we are dealing with a metadevice */ 1348 if ((strncmp(dki_info.dki_cname, "pseudo", 7) == 0) && 1349 (strncmp(dki_info.dki_dname, "md", 3) == 0)) { 1350 md_flag = 1; 1351 } 1352 1353 if (check_input(vtoc)) { 1354 /* 1355 * not valid; if it's a metadevice just pass it down 1356 * because SVM will do its own checking 1357 */ 1358 if (md_flag == 0) { 1359 return (VT_EINVAL); 1360 } 1361 } 1362 1363 dk_ioc.dki_lba = 1; 1364 if (NBLOCKS(vtoc->efi_nparts, vtoc->efi_lbasize) < 34) { 1365 dk_ioc.dki_length = EFI_MIN_ARRAY_SIZE + vtoc->efi_lbasize; 1366 } else { 1367 dk_ioc.dki_length = NBLOCKS(vtoc->efi_nparts, 1368 vtoc->efi_lbasize) * 1369 vtoc->efi_lbasize; 1370 } 1371 1372 /* 1373 * the number of blocks occupied by GUID partition entry array 1374 */ 1375 nblocks = dk_ioc.dki_length / vtoc->efi_lbasize - 1; 1376 1377 /* 1378 * Backup GPT header is located on the block after GUID 1379 * partition entry array. Here, we calculate the address 1380 * for backup GPT header. 1381 */ 1382 lba_backup_gpt_hdr = vtoc->efi_last_u_lba + 1 + nblocks; 1383 if (posix_memalign((void **)&dk_ioc.dki_data, 1384 vtoc->efi_lbasize, dk_ioc.dki_length)) 1385 return (VT_ERROR); 1386 1387 memset(dk_ioc.dki_data, 0, dk_ioc.dki_length); 1388 efi = dk_ioc.dki_data; 1389 1390 /* stuff user's input into EFI struct */ 1391 efi->efi_gpt_Signature = LE_64(EFI_SIGNATURE); 1392 efi->efi_gpt_Revision = LE_32(vtoc->efi_version); /* 0x02000100 */ 1393 efi->efi_gpt_HeaderSize = LE_32(sizeof (struct efi_gpt) - LEN_EFI_PAD); 1394 efi->efi_gpt_Reserved1 = 0; 1395 efi->efi_gpt_MyLBA = LE_64(1ULL); 1396 efi->efi_gpt_AlternateLBA = LE_64(lba_backup_gpt_hdr); 1397 efi->efi_gpt_FirstUsableLBA = LE_64(vtoc->efi_first_u_lba); 1398 efi->efi_gpt_LastUsableLBA = LE_64(vtoc->efi_last_u_lba); 1399 efi->efi_gpt_PartitionEntryLBA = LE_64(2ULL); 1400 efi->efi_gpt_NumberOfPartitionEntries = LE_32(vtoc->efi_nparts); 1401 efi->efi_gpt_SizeOfPartitionEntry = LE_32(sizeof (struct efi_gpe)); 1402 UUID_LE_CONVERT(efi->efi_gpt_DiskGUID, vtoc->efi_disk_uguid); 1403 1404 /* LINTED -- always longlong aligned */ 1405 efi_parts = (efi_gpe_t *)((char *)dk_ioc.dki_data + vtoc->efi_lbasize); 1406 1407 for (i = 0; i < vtoc->efi_nparts; i++) { 1408 for (j = 0; 1409 j < sizeof (conversion_array) / 1410 sizeof (struct uuid_to_ptag); j++) { 1411 1412 if (vtoc->efi_parts[i].p_tag == j) { 1413 UUID_LE_CONVERT( 1414 efi_parts[i].efi_gpe_PartitionTypeGUID, 1415 conversion_array[j].uuid); 1416 break; 1417 } 1418 } 1419 1420 if (j == sizeof (conversion_array) / 1421 sizeof (struct uuid_to_ptag)) { 1422 /* 1423 * If we didn't have a matching uuid match, bail here. 1424 * Don't write a label with unknown uuid. 1425 */ 1426 if (efi_debug) { 1427 (void) fprintf(stderr, 1428 "Unknown uuid for p_tag %d\n", 1429 vtoc->efi_parts[i].p_tag); 1430 } 1431 return (VT_EINVAL); 1432 } 1433 1434 /* Zero's should be written for empty partitions */ 1435 if (vtoc->efi_parts[i].p_tag == V_UNASSIGNED) 1436 continue; 1437 1438 efi_parts[i].efi_gpe_StartingLBA = 1439 LE_64(vtoc->efi_parts[i].p_start); 1440 efi_parts[i].efi_gpe_EndingLBA = 1441 LE_64(vtoc->efi_parts[i].p_start + 1442 vtoc->efi_parts[i].p_size - 1); 1443 efi_parts[i].efi_gpe_Attributes.PartitionAttrs = 1444 LE_16(vtoc->efi_parts[i].p_flag); 1445 for (j = 0; j < EFI_PART_NAME_LEN; j++) { 1446 efi_parts[i].efi_gpe_PartitionName[j] = 1447 LE_16((ushort_t)vtoc->efi_parts[i].p_name[j]); 1448 } 1449 if ((vtoc->efi_parts[i].p_tag != V_UNASSIGNED) && 1450 uuid_is_null((uchar_t *)&vtoc->efi_parts[i].p_uguid)) { 1451 (void) uuid_generate((uchar_t *) 1452 &vtoc->efi_parts[i].p_uguid); 1453 } 1454 memcpy(&efi_parts[i].efi_gpe_UniquePartitionGUID, 1455 &vtoc->efi_parts[i].p_uguid, 1456 sizeof (uuid_t)); 1457 } 1458 efi->efi_gpt_PartitionEntryArrayCRC32 = 1459 LE_32(efi_crc32((unsigned char *)efi_parts, 1460 vtoc->efi_nparts * (int)sizeof (struct efi_gpe))); 1461 efi->efi_gpt_HeaderCRC32 = 1462 LE_32(efi_crc32((unsigned char *)efi, 1463 LE_32(efi->efi_gpt_HeaderSize))); 1464 1465 if (efi_ioctl(fd, DKIOCSETEFI, &dk_ioc) == -1) { 1466 free(dk_ioc.dki_data); 1467 switch (errno) { 1468 case EIO: 1469 return (VT_EIO); 1470 case EINVAL: 1471 return (VT_EINVAL); 1472 default: 1473 return (VT_ERROR); 1474 } 1475 } 1476 /* if it's a metadevice we're done */ 1477 if (md_flag) { 1478 free(dk_ioc.dki_data); 1479 return (0); 1480 } 1481 1482 /* write backup partition array */ 1483 dk_ioc.dki_lba = vtoc->efi_last_u_lba + 1; 1484 dk_ioc.dki_length -= vtoc->efi_lbasize; 1485 /* LINTED */ 1486 dk_ioc.dki_data = (efi_gpt_t *)((char *)dk_ioc.dki_data + 1487 vtoc->efi_lbasize); 1488 1489 if (efi_ioctl(fd, DKIOCSETEFI, &dk_ioc) == -1) { 1490 /* 1491 * we wrote the primary label okay, so don't fail 1492 */ 1493 if (efi_debug) { 1494 (void) fprintf(stderr, 1495 "write of backup partitions to block %llu " 1496 "failed, errno %d\n", 1497 vtoc->efi_last_u_lba + 1, 1498 errno); 1499 } 1500 } 1501 /* 1502 * now swap MyLBA and AlternateLBA fields and write backup 1503 * partition table header 1504 */ 1505 dk_ioc.dki_lba = lba_backup_gpt_hdr; 1506 dk_ioc.dki_length = vtoc->efi_lbasize; 1507 /* LINTED */ 1508 dk_ioc.dki_data = (efi_gpt_t *)((char *)dk_ioc.dki_data - 1509 vtoc->efi_lbasize); 1510 efi->efi_gpt_AlternateLBA = LE_64(1ULL); 1511 efi->efi_gpt_MyLBA = LE_64(lba_backup_gpt_hdr); 1512 efi->efi_gpt_PartitionEntryLBA = LE_64(vtoc->efi_last_u_lba + 1); 1513 efi->efi_gpt_HeaderCRC32 = 0; 1514 efi->efi_gpt_HeaderCRC32 = 1515 LE_32(efi_crc32((unsigned char *)dk_ioc.dki_data, 1516 LE_32(efi->efi_gpt_HeaderSize))); 1517 1518 if (efi_ioctl(fd, DKIOCSETEFI, &dk_ioc) == -1) { 1519 if (efi_debug) { 1520 (void) fprintf(stderr, 1521 "write of backup header to block %llu failed, " 1522 "errno %d\n", 1523 lba_backup_gpt_hdr, 1524 errno); 1525 } 1526 } 1527 /* write the PMBR */ 1528 (void) write_pmbr(fd, vtoc); 1529 free(dk_ioc.dki_data); 1530 1531 return (0); 1532 } 1533 1534 void 1535 efi_free(struct dk_gpt *ptr) 1536 { 1537 free(ptr); 1538 } 1539 1540 void 1541 efi_err_check(struct dk_gpt *vtoc) 1542 { 1543 int resv_part = -1; 1544 int i, j; 1545 diskaddr_t istart, jstart, isize, jsize, endsect; 1546 int overlap = 0; 1547 1548 /* 1549 * make sure no partitions overlap 1550 */ 1551 for (i = 0; i < vtoc->efi_nparts; i++) { 1552 /* It can't be unassigned and have an actual size */ 1553 if ((vtoc->efi_parts[i].p_tag == V_UNASSIGNED) && 1554 (vtoc->efi_parts[i].p_size != 0)) { 1555 (void) fprintf(stderr, 1556 "partition %d is \"unassigned\" but has a size " 1557 "of %llu\n", i, vtoc->efi_parts[i].p_size); 1558 } 1559 if (vtoc->efi_parts[i].p_tag == V_UNASSIGNED) { 1560 continue; 1561 } 1562 if (vtoc->efi_parts[i].p_tag == V_RESERVED) { 1563 if (resv_part != -1) { 1564 (void) fprintf(stderr, 1565 "found duplicate reserved partition at " 1566 "%d\n", i); 1567 } 1568 resv_part = i; 1569 if (vtoc->efi_parts[i].p_size != EFI_MIN_RESV_SIZE) 1570 (void) fprintf(stderr, 1571 "Warning: reserved partition size must " 1572 "be %d sectors\n", EFI_MIN_RESV_SIZE); 1573 } 1574 if ((vtoc->efi_parts[i].p_start < vtoc->efi_first_u_lba) || 1575 (vtoc->efi_parts[i].p_start > vtoc->efi_last_u_lba)) { 1576 (void) fprintf(stderr, 1577 "Partition %d starts at %llu\n", 1578 i, 1579 vtoc->efi_parts[i].p_start); 1580 (void) fprintf(stderr, 1581 "It must be between %llu and %llu.\n", 1582 vtoc->efi_first_u_lba, 1583 vtoc->efi_last_u_lba); 1584 } 1585 if ((vtoc->efi_parts[i].p_start + 1586 vtoc->efi_parts[i].p_size < 1587 vtoc->efi_first_u_lba) || 1588 (vtoc->efi_parts[i].p_start + 1589 vtoc->efi_parts[i].p_size > 1590 vtoc->efi_last_u_lba + 1)) { 1591 (void) fprintf(stderr, 1592 "Partition %d ends at %llu\n", 1593 i, 1594 vtoc->efi_parts[i].p_start + 1595 vtoc->efi_parts[i].p_size); 1596 (void) fprintf(stderr, 1597 "It must be between %llu and %llu.\n", 1598 vtoc->efi_first_u_lba, 1599 vtoc->efi_last_u_lba); 1600 } 1601 1602 for (j = 0; j < vtoc->efi_nparts; j++) { 1603 isize = vtoc->efi_parts[i].p_size; 1604 jsize = vtoc->efi_parts[j].p_size; 1605 istart = vtoc->efi_parts[i].p_start; 1606 jstart = vtoc->efi_parts[j].p_start; 1607 if ((i != j) && (isize != 0) && (jsize != 0)) { 1608 endsect = jstart + jsize -1; 1609 if ((jstart <= istart) && 1610 (istart <= endsect)) { 1611 if (!overlap) { 1612 (void) fprintf(stderr, 1613 "label error: EFI Labels do not " 1614 "support overlapping partitions\n"); 1615 } 1616 (void) fprintf(stderr, 1617 "Partition %d overlaps partition " 1618 "%d.\n", i, j); 1619 overlap = 1; 1620 } 1621 } 1622 } 1623 } 1624 /* make sure there is a reserved partition */ 1625 if (resv_part == -1) { 1626 (void) fprintf(stderr, 1627 "no reserved partition found\n"); 1628 } 1629 } 1630