1 /*- 2 * Copyright (c) 2014 Juniper Networks, Inc. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 #include <sys/cdefs.h> 28 __FBSDID("$FreeBSD$"); 29 30 #include <sys/mman.h> 31 #include <sys/stat.h> 32 #include <assert.h> 33 #include <err.h> 34 #include <errno.h> 35 #include <limits.h> 36 #include <paths.h> 37 #include <stdint.h> 38 #include <stdio.h> 39 #include <stdlib.h> 40 #include <string.h> 41 #include <unistd.h> 42 43 #include "image.h" 44 #include "mkimg.h" 45 46 #ifndef MAP_NOCORE 47 #define MAP_NOCORE 0 48 #endif 49 #ifndef MAP_NOSYNC 50 #define MAP_NOSYNC 0 51 #endif 52 53 #ifndef SEEK_DATA 54 #define SEEK_DATA -1 55 #endif 56 #ifndef SEEK_HOLE 57 #define SEEK_HOLE -1 58 #endif 59 60 struct chunk { 61 TAILQ_ENTRY(chunk) ch_list; 62 size_t ch_size; /* Size of chunk in bytes. */ 63 lba_t ch_block; /* Block address in image. */ 64 union { 65 struct { 66 off_t ofs; /* Offset in backing file. */ 67 int fd; /* FD of backing file. */ 68 } file; 69 struct { 70 void *ptr; /* Pointer to data in memory */ 71 } mem; 72 } ch_u; 73 u_int ch_type; 74 #define CH_TYPE_ZEROES 0 /* Chunk is a gap (no data). */ 75 #define CH_TYPE_FILE 1 /* File-backed chunk. */ 76 #define CH_TYPE_MEMORY 2 /* Memory-backed chunk */ 77 }; 78 79 static TAILQ_HEAD(chunk_head, chunk) image_chunks; 80 static u_int image_nchunks; 81 82 static char image_swap_file[PATH_MAX]; 83 static int image_swap_fd = -1; 84 static u_int image_swap_pgsz; 85 static off_t image_swap_size; 86 87 static lba_t image_size; 88 89 static int 90 is_empty_sector(void *buf) 91 { 92 uint64_t *p = buf; 93 size_t n, max; 94 95 assert(((uintptr_t)p & 3) == 0); 96 97 max = secsz / sizeof(uint64_t); 98 for (n = 0; n < max; n++) { 99 if (p[n] != 0UL) 100 return (0); 101 } 102 return (1); 103 } 104 105 /* 106 * Swap file handlng. 107 */ 108 109 static off_t 110 image_swap_alloc(size_t size) 111 { 112 off_t ofs; 113 size_t unit; 114 115 unit = (secsz > image_swap_pgsz) ? secsz : image_swap_pgsz; 116 assert((unit & (unit - 1)) == 0); 117 118 size = (size + unit - 1) & ~(unit - 1); 119 120 ofs = image_swap_size; 121 image_swap_size += size; 122 if (ftruncate(image_swap_fd, image_swap_size) == -1) { 123 image_swap_size = ofs; 124 ofs = -1LL; 125 } 126 return (ofs); 127 } 128 129 /* 130 * Image chunk handling. 131 */ 132 133 static struct chunk * 134 image_chunk_find(lba_t blk) 135 { 136 static struct chunk *last = NULL; 137 struct chunk *ch; 138 139 ch = (last != NULL && last->ch_block <= blk) 140 ? last : TAILQ_FIRST(&image_chunks); 141 while (ch != NULL) { 142 if (ch->ch_block <= blk && 143 (lba_t)(ch->ch_block + (ch->ch_size / secsz)) > blk) { 144 last = ch; 145 break; 146 } 147 ch = TAILQ_NEXT(ch, ch_list); 148 } 149 return (ch); 150 } 151 152 static size_t 153 image_chunk_grow(struct chunk *ch, size_t sz) 154 { 155 size_t dsz, newsz; 156 157 newsz = ch->ch_size + sz; 158 if (newsz > ch->ch_size) { 159 ch->ch_size = newsz; 160 return (0); 161 } 162 /* We would overflow -- create new chunk for remainder. */ 163 dsz = SIZE_MAX - ch->ch_size; 164 assert(dsz < sz); 165 ch->ch_size = SIZE_MAX; 166 return (sz - dsz); 167 } 168 169 static struct chunk * 170 image_chunk_memory(struct chunk *ch, lba_t blk) 171 { 172 struct chunk *new; 173 void *ptr; 174 175 ptr = calloc(1, secsz); 176 if (ptr == NULL) 177 return (NULL); 178 179 if (ch->ch_block < blk) { 180 new = malloc(sizeof(*new)); 181 if (new == NULL) { 182 free(ptr); 183 return (NULL); 184 } 185 memcpy(new, ch, sizeof(*new)); 186 ch->ch_size = (blk - ch->ch_block) * secsz; 187 new->ch_block = blk; 188 new->ch_size -= ch->ch_size; 189 TAILQ_INSERT_AFTER(&image_chunks, ch, new, ch_list); 190 image_nchunks++; 191 ch = new; 192 } 193 194 if (ch->ch_size > secsz) { 195 new = malloc(sizeof(*new)); 196 if (new == NULL) { 197 free(ptr); 198 return (NULL); 199 } 200 memcpy(new, ch, sizeof(*new)); 201 ch->ch_size = secsz; 202 new->ch_block++; 203 new->ch_size -= secsz; 204 TAILQ_INSERT_AFTER(&image_chunks, ch, new, ch_list); 205 image_nchunks++; 206 } 207 208 ch->ch_type = CH_TYPE_MEMORY; 209 ch->ch_u.mem.ptr = ptr; 210 return (ch); 211 } 212 213 static int 214 image_chunk_skipto(lba_t to) 215 { 216 struct chunk *ch; 217 lba_t from; 218 size_t sz; 219 220 ch = TAILQ_LAST(&image_chunks, chunk_head); 221 from = (ch != NULL) ? ch->ch_block + (ch->ch_size / secsz) : 0LL; 222 223 assert(from <= to); 224 225 /* Nothing to do? */ 226 if (from == to) 227 return (0); 228 /* Avoid bugs due to overflows. */ 229 if ((uintmax_t)(to - from) > (uintmax_t)(SIZE_MAX / secsz)) 230 return (EFBIG); 231 sz = (to - from) * secsz; 232 if (ch != NULL && ch->ch_type == CH_TYPE_ZEROES) { 233 sz = image_chunk_grow(ch, sz); 234 if (sz == 0) 235 return (0); 236 from = ch->ch_block + (ch->ch_size / secsz); 237 } 238 ch = malloc(sizeof(*ch)); 239 if (ch == NULL) 240 return (ENOMEM); 241 memset(ch, 0, sizeof(*ch)); 242 ch->ch_block = from; 243 ch->ch_size = sz; 244 ch->ch_type = CH_TYPE_ZEROES; 245 TAILQ_INSERT_TAIL(&image_chunks, ch, ch_list); 246 image_nchunks++; 247 return (0); 248 } 249 250 static int 251 image_chunk_append(lba_t blk, size_t sz, off_t ofs, int fd) 252 { 253 struct chunk *ch; 254 255 ch = TAILQ_LAST(&image_chunks, chunk_head); 256 if (ch != NULL && ch->ch_type == CH_TYPE_FILE) { 257 if (fd == ch->ch_u.file.fd && 258 blk == (lba_t)(ch->ch_block + (ch->ch_size / secsz)) && 259 ofs == (off_t)(ch->ch_u.file.ofs + ch->ch_size)) { 260 sz = image_chunk_grow(ch, sz); 261 if (sz == 0) 262 return (0); 263 blk = ch->ch_block + (ch->ch_size / secsz); 264 ofs = ch->ch_u.file.ofs + ch->ch_size; 265 } 266 } 267 ch = malloc(sizeof(*ch)); 268 if (ch == NULL) 269 return (ENOMEM); 270 memset(ch, 0, sizeof(*ch)); 271 ch->ch_block = blk; 272 ch->ch_size = sz; 273 ch->ch_type = CH_TYPE_FILE; 274 ch->ch_u.file.ofs = ofs; 275 ch->ch_u.file.fd = fd; 276 TAILQ_INSERT_TAIL(&image_chunks, ch, ch_list); 277 image_nchunks++; 278 return (0); 279 } 280 281 static int 282 image_chunk_copyin(lba_t blk, void *buf, size_t sz, off_t ofs, int fd) 283 { 284 uint8_t *p = buf; 285 int error; 286 287 error = 0; 288 sz = (sz + secsz - 1) & ~(secsz - 1); 289 while (!error && sz > 0) { 290 if (is_empty_sector(p)) 291 error = image_chunk_skipto(blk + 1); 292 else 293 error = image_chunk_append(blk, secsz, ofs, fd); 294 blk++; 295 p += secsz; 296 sz -= secsz; 297 ofs += secsz; 298 } 299 return (error); 300 } 301 302 /* 303 * File mapping support. 304 */ 305 306 static void * 307 image_file_map(int fd, off_t ofs, size_t sz) 308 { 309 void *ptr; 310 size_t unit; 311 int flags, prot; 312 313 unit = (secsz > image_swap_pgsz) ? secsz : image_swap_pgsz; 314 assert((unit & (unit - 1)) == 0); 315 316 flags = MAP_NOCORE | MAP_NOSYNC | MAP_SHARED; 317 /* Allow writing to our swap file only. */ 318 prot = PROT_READ | ((fd == image_swap_fd) ? PROT_WRITE : 0); 319 sz = (sz + unit - 1) & ~(unit - 1); 320 ptr = mmap(NULL, sz, prot, flags, fd, ofs); 321 return ((ptr == MAP_FAILED) ? NULL : ptr); 322 } 323 324 static int 325 image_file_unmap(void *buffer, size_t sz) 326 { 327 size_t unit; 328 329 unit = (secsz > image_swap_pgsz) ? secsz : image_swap_pgsz; 330 sz = (sz + unit - 1) & ~(unit - 1); 331 if (madvise(buffer, sz, MADV_DONTNEED) != 0) 332 warn("madvise"); 333 munmap(buffer, sz); 334 return (0); 335 } 336 337 /* 338 * Input/source file handling. 339 */ 340 341 static int 342 image_copyin_stream(lba_t blk, int fd, uint64_t *sizep) 343 { 344 char *buffer; 345 uint64_t bytesize; 346 off_t swofs; 347 size_t iosz; 348 ssize_t rdsz; 349 int error; 350 351 /* 352 * This makes sure we're doing I/O in multiples of the page 353 * size as well as of the sector size. 2MB is the minimum 354 * by virtue of secsz at least 512 bytes and the page size 355 * at least 4K bytes. 356 */ 357 iosz = secsz * image_swap_pgsz; 358 359 bytesize = 0; 360 do { 361 swofs = image_swap_alloc(iosz); 362 if (swofs == -1LL) 363 return (errno); 364 buffer = image_file_map(image_swap_fd, swofs, iosz); 365 if (buffer == NULL) 366 return (errno); 367 rdsz = read(fd, buffer, iosz); 368 if (rdsz > 0) 369 error = image_chunk_copyin(blk, buffer, rdsz, swofs, 370 image_swap_fd); 371 else if (rdsz < 0) 372 error = errno; 373 else 374 error = 0; 375 image_file_unmap(buffer, iosz); 376 /* XXX should we relinguish unused swap space? */ 377 if (error) 378 return (error); 379 380 bytesize += rdsz; 381 blk += (rdsz + secsz - 1) / secsz; 382 } while (rdsz > 0); 383 384 if (sizep != NULL) 385 *sizep = bytesize; 386 return (0); 387 } 388 389 static int 390 image_copyin_mapped(lba_t blk, int fd, uint64_t *sizep) 391 { 392 off_t cur, data, end, hole, pos; 393 void *buf; 394 uint64_t bytesize; 395 size_t iosz, sz; 396 int error; 397 398 /* 399 * We'd like to know the size of the file and we must 400 * be able to seek in order to mmap(2). If this isn't 401 * possible, then treat the file as a stream/pipe. 402 */ 403 end = lseek(fd, 0L, SEEK_END); 404 if (end == -1L) 405 return (image_copyin_stream(blk, fd, sizep)); 406 407 /* 408 * We need the file opened for the duration and our 409 * caller is going to close the file. Make a dup(2) 410 * so that control the faith of the descriptor. 411 */ 412 fd = dup(fd); 413 if (fd == -1) 414 return (errno); 415 416 iosz = secsz * image_swap_pgsz; 417 418 bytesize = 0; 419 cur = pos = 0; 420 error = 0; 421 while (!error && cur < end) { 422 hole = lseek(fd, cur, SEEK_HOLE); 423 if (hole == -1) 424 hole = end; 425 data = lseek(fd, cur, SEEK_DATA); 426 if (data == -1) 427 data = end; 428 429 /* 430 * Treat the entire file as data if sparse files 431 * are not supported by the underlying file system. 432 */ 433 if (hole == end && data == end) 434 data = cur; 435 436 if (cur == hole && data > hole) { 437 hole = pos; 438 pos = data & ~((uint64_t)secsz - 1); 439 440 blk += (pos - hole) / secsz; 441 error = image_chunk_skipto(blk); 442 443 bytesize += pos - hole; 444 cur = data; 445 } else if (cur == data && hole > data) { 446 data = pos; 447 pos = (hole + secsz - 1) & ~((uint64_t)secsz - 1); 448 449 while (data < pos) { 450 sz = (pos - data > (off_t)iosz) 451 ? iosz : (size_t)(pos - data); 452 453 buf = image_file_map(fd, data, sz); 454 if (buf != NULL) { 455 error = image_chunk_copyin(blk, buf, 456 sz, data, fd); 457 image_file_unmap(buf, sz); 458 } else 459 error = errno; 460 461 blk += sz / secsz; 462 bytesize += sz; 463 data += sz; 464 } 465 cur = hole; 466 } else { 467 /* 468 * I don't know what this means or whether it 469 * can happen at all... 470 */ 471 assert(0); 472 } 473 } 474 if (error) 475 close(fd); 476 if (!error && sizep != NULL) 477 *sizep = bytesize; 478 return (error); 479 } 480 481 int 482 image_copyin(lba_t blk, int fd, uint64_t *sizep) 483 { 484 struct stat sb; 485 int error; 486 487 error = image_chunk_skipto(blk); 488 if (!error) { 489 if (fstat(fd, &sb) == -1 || !S_ISREG(sb.st_mode)) 490 error = image_copyin_stream(blk, fd, sizep); 491 else 492 error = image_copyin_mapped(blk, fd, sizep); 493 } 494 return (error); 495 } 496 497 /* 498 * Output/sink file handling. 499 */ 500 501 int 502 image_copyout(int fd) 503 { 504 int error; 505 506 error = image_copyout_region(fd, 0, image_size); 507 if (!error) 508 error = image_copyout_done(fd); 509 return (error); 510 } 511 512 int 513 image_copyout_done(int fd) 514 { 515 off_t ofs; 516 int error; 517 518 ofs = lseek(fd, 0L, SEEK_CUR); 519 if (ofs == -1) 520 return (0); 521 error = (ftruncate(fd, ofs) == -1) ? errno : 0; 522 return (error); 523 } 524 525 static int 526 image_copyout_memory(int fd, size_t size, void *ptr) 527 { 528 529 if (write(fd, ptr, size) == -1) 530 return (errno); 531 return (0); 532 } 533 534 int 535 image_copyout_zeroes(int fd, size_t count) 536 { 537 static uint8_t *zeroes = NULL; 538 size_t sz; 539 int error; 540 541 if (lseek(fd, (off_t)count, SEEK_CUR) != -1) 542 return (0); 543 544 /* 545 * If we can't seek, we must write. 546 */ 547 548 if (zeroes == NULL) { 549 zeroes = calloc(1, secsz); 550 if (zeroes == NULL) 551 return (ENOMEM); 552 } 553 554 while (count > 0) { 555 sz = (count > secsz) ? secsz : count; 556 error = image_copyout_memory(fd, sz, zeroes); 557 if (error) 558 return (error); 559 count -= sz; 560 } 561 return (0); 562 } 563 564 static int 565 image_copyout_file(int fd, size_t size, int ifd, off_t iofs) 566 { 567 void *buf; 568 size_t iosz, sz; 569 int error; 570 571 iosz = secsz * image_swap_pgsz; 572 573 while (size > 0) { 574 sz = (size > iosz) ? iosz : size; 575 buf = image_file_map(ifd, iofs, sz); 576 if (buf == NULL) 577 return (errno); 578 error = image_copyout_memory(fd, sz, buf); 579 image_file_unmap(buf, sz); 580 if (error) 581 return (error); 582 size -= sz; 583 iofs += sz; 584 } 585 return (0); 586 } 587 588 int 589 image_copyout_region(int fd, lba_t blk, lba_t size) 590 { 591 struct chunk *ch; 592 size_t ofs, sz; 593 int error; 594 595 size *= secsz; 596 597 error = 0; 598 while (!error && size > 0) { 599 ch = image_chunk_find(blk); 600 if (ch == NULL) { 601 error = EINVAL; 602 break; 603 } 604 ofs = (blk - ch->ch_block) * secsz; 605 sz = ch->ch_size - ofs; 606 sz = ((lba_t)sz < size) ? sz : (size_t)size; 607 switch (ch->ch_type) { 608 case CH_TYPE_ZEROES: 609 error = image_copyout_zeroes(fd, sz); 610 break; 611 case CH_TYPE_FILE: 612 error = image_copyout_file(fd, sz, ch->ch_u.file.fd, 613 ch->ch_u.file.ofs + ofs); 614 break; 615 case CH_TYPE_MEMORY: 616 error = image_copyout_memory(fd, sz, ch->ch_u.mem.ptr); 617 break; 618 default: 619 assert(0); 620 } 621 size -= sz; 622 blk += sz / secsz; 623 } 624 return (error); 625 } 626 627 int 628 image_data(lba_t blk, lba_t size) 629 { 630 struct chunk *ch; 631 lba_t lim; 632 633 while (1) { 634 ch = image_chunk_find(blk); 635 if (ch == NULL) 636 return (0); 637 if (ch->ch_type != CH_TYPE_ZEROES) 638 return (1); 639 lim = ch->ch_block + (ch->ch_size / secsz); 640 if (lim >= blk + size) 641 return (0); 642 size -= lim - blk; 643 blk = lim; 644 } 645 /*NOTREACHED*/ 646 } 647 648 lba_t 649 image_get_size(void) 650 { 651 652 return (image_size); 653 } 654 655 int 656 image_set_size(lba_t blk) 657 { 658 int error; 659 660 error = image_chunk_skipto(blk); 661 if (!error) 662 image_size = blk; 663 return (error); 664 } 665 666 int 667 image_write(lba_t blk, void *buf, ssize_t len) 668 { 669 struct chunk *ch; 670 671 while (len > 0) { 672 if (!is_empty_sector(buf)) { 673 ch = image_chunk_find(blk); 674 if (ch == NULL) 675 return (ENXIO); 676 /* We may not be able to write to files. */ 677 if (ch->ch_type == CH_TYPE_FILE) 678 return (EINVAL); 679 if (ch->ch_type == CH_TYPE_ZEROES) { 680 ch = image_chunk_memory(ch, blk); 681 if (ch == NULL) 682 return (ENOMEM); 683 } 684 assert(ch->ch_type == CH_TYPE_MEMORY); 685 memcpy(ch->ch_u.mem.ptr, buf, secsz); 686 } 687 blk++; 688 buf = (char *)buf + secsz; 689 len--; 690 } 691 return (0); 692 } 693 694 static void 695 image_cleanup(void) 696 { 697 struct chunk *ch; 698 699 while ((ch = TAILQ_FIRST(&image_chunks)) != NULL) { 700 switch (ch->ch_type) { 701 case CH_TYPE_FILE: 702 /* We may be closing the same file multiple times. */ 703 if (ch->ch_u.file.fd != -1) 704 close(ch->ch_u.file.fd); 705 break; 706 case CH_TYPE_MEMORY: 707 free(ch->ch_u.mem.ptr); 708 break; 709 default: 710 break; 711 } 712 TAILQ_REMOVE(&image_chunks, ch, ch_list); 713 free(ch); 714 } 715 if (image_swap_fd != -1) 716 close(image_swap_fd); 717 unlink(image_swap_file); 718 } 719 720 int 721 image_init(void) 722 { 723 const char *tmpdir; 724 725 TAILQ_INIT(&image_chunks); 726 image_nchunks = 0; 727 728 image_swap_size = 0; 729 image_swap_pgsz = getpagesize(); 730 731 if (atexit(image_cleanup) == -1) 732 return (errno); 733 if ((tmpdir = getenv("TMPDIR")) == NULL || *tmpdir == '\0') 734 tmpdir = _PATH_TMP; 735 snprintf(image_swap_file, sizeof(image_swap_file), "%s/mkimg-XXXXXX", 736 tmpdir); 737 image_swap_fd = mkstemp(image_swap_file); 738 if (image_swap_fd == -1) 739 return (errno); 740 return (0); 741 } 742