1 /*- 2 * Copyright (c) 2014 Juniper Networks, Inc. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 #include <sys/cdefs.h> 28 __FBSDID("$FreeBSD$"); 29 30 #include <sys/mman.h> 31 #include <sys/stat.h> 32 #include <assert.h> 33 #include <err.h> 34 #include <errno.h> 35 #include <limits.h> 36 #include <paths.h> 37 #include <stdint.h> 38 #include <stdio.h> 39 #include <stdlib.h> 40 #include <string.h> 41 #include <unistd.h> 42 43 #include "image.h" 44 #include "mkimg.h" 45 46 #ifndef MAP_NOCORE 47 #define MAP_NOCORE 0 48 #endif 49 #ifndef MAP_NOSYNC 50 #define MAP_NOSYNC 0 51 #endif 52 53 #ifndef SEEK_DATA 54 #define SEEK_DATA -1 55 #endif 56 #ifndef SEEK_HOLE 57 #define SEEK_HOLE -1 58 #endif 59 60 struct chunk { 61 TAILQ_ENTRY(chunk) ch_list; 62 size_t ch_size; /* Size of chunk in bytes. */ 63 lba_t ch_block; /* Block address in image. */ 64 union { 65 struct { 66 off_t ofs; /* Offset in backing file. */ 67 int fd; /* FD of backing file. */ 68 } file; 69 struct { 70 void *ptr; /* Pointer to data in memory */ 71 } mem; 72 } ch_u; 73 u_int ch_type; 74 #define CH_TYPE_ZEROES 0 /* Chunk is a gap (no data). */ 75 #define CH_TYPE_FILE 1 /* File-backed chunk. */ 76 #define CH_TYPE_MEMORY 2 /* Memory-backed chunk */ 77 }; 78 79 static TAILQ_HEAD(chunk_head, chunk) image_chunks; 80 static u_int image_nchunks; 81 82 static char image_swap_file[PATH_MAX]; 83 static int image_swap_fd = -1; 84 static u_int image_swap_pgsz; 85 static off_t image_swap_size; 86 87 static lba_t image_size; 88 89 static int 90 is_empty_sector(void *buf) 91 { 92 uint64_t *p = buf; 93 size_t n, max; 94 95 assert(((uintptr_t)p & 3) == 0); 96 97 max = secsz / sizeof(uint64_t); 98 for (n = 0; n < max; n++) { 99 if (p[n] != 0UL) 100 return (0); 101 } 102 return (1); 103 } 104 105 /* 106 * Swap file handlng. 107 */ 108 109 static off_t 110 image_swap_alloc(size_t size) 111 { 112 off_t ofs; 113 size_t unit; 114 115 unit = (secsz > image_swap_pgsz) ? secsz : image_swap_pgsz; 116 assert((unit & (unit - 1)) == 0); 117 118 size = (size + unit - 1) & ~(unit - 1); 119 120 ofs = image_swap_size; 121 image_swap_size += size; 122 if (ftruncate(image_swap_fd, image_swap_size) == -1) { 123 image_swap_size = ofs; 124 ofs = -1LL; 125 } 126 return (ofs); 127 } 128 129 /* 130 * Image chunk handling. 131 */ 132 133 static struct chunk * 134 image_chunk_find(lba_t blk) 135 { 136 static struct chunk *last = NULL; 137 struct chunk *ch; 138 139 ch = (last != NULL && last->ch_block <= blk) 140 ? last : TAILQ_FIRST(&image_chunks); 141 while (ch != NULL) { 142 if (ch->ch_block <= blk && 143 (lba_t)(ch->ch_block + (ch->ch_size / secsz)) > blk) { 144 last = ch; 145 break; 146 } 147 ch = TAILQ_NEXT(ch, ch_list); 148 } 149 return (ch); 150 } 151 152 static size_t 153 image_chunk_grow(struct chunk *ch, size_t sz) 154 { 155 size_t dsz, newsz; 156 157 newsz = ch->ch_size + sz; 158 if (newsz > ch->ch_size) { 159 ch->ch_size = newsz; 160 return (0); 161 } 162 /* We would overflow -- create new chunk for remainder. */ 163 dsz = SIZE_MAX - ch->ch_size; 164 assert(dsz < sz); 165 ch->ch_size = SIZE_MAX; 166 return (sz - dsz); 167 } 168 169 static struct chunk * 170 image_chunk_memory(struct chunk *ch, lba_t blk) 171 { 172 struct chunk *new; 173 void *ptr; 174 175 ptr = calloc(1, secsz); 176 if (ptr == NULL) 177 return (NULL); 178 179 if (ch->ch_block < blk) { 180 new = malloc(sizeof(*new)); 181 if (new == NULL) { 182 free(ptr); 183 return (NULL); 184 } 185 memcpy(new, ch, sizeof(*new)); 186 ch->ch_size = (blk - ch->ch_block) * secsz; 187 new->ch_block = blk; 188 new->ch_size -= ch->ch_size; 189 TAILQ_INSERT_AFTER(&image_chunks, ch, new, ch_list); 190 image_nchunks++; 191 ch = new; 192 } 193 194 if (ch->ch_size > secsz) { 195 new = malloc(sizeof(*new)); 196 if (new == NULL) { 197 free(ptr); 198 return (NULL); 199 } 200 memcpy(new, ch, sizeof(*new)); 201 ch->ch_size = secsz; 202 new->ch_block++; 203 new->ch_size -= secsz; 204 TAILQ_INSERT_AFTER(&image_chunks, ch, new, ch_list); 205 image_nchunks++; 206 } 207 208 ch->ch_type = CH_TYPE_MEMORY; 209 ch->ch_u.mem.ptr = ptr; 210 return (ch); 211 } 212 213 static int 214 image_chunk_skipto(lba_t to) 215 { 216 struct chunk *ch; 217 lba_t from; 218 size_t sz; 219 220 ch = TAILQ_LAST(&image_chunks, chunk_head); 221 from = (ch != NULL) ? ch->ch_block + (ch->ch_size / secsz) : 0LL; 222 223 assert(from <= to); 224 225 /* Nothing to do? */ 226 if (from == to) 227 return (0); 228 /* Avoid bugs due to overflows. */ 229 if ((uintmax_t)(to - from) > (uintmax_t)(SIZE_MAX / secsz)) 230 return (EFBIG); 231 sz = (to - from) * secsz; 232 if (ch != NULL && ch->ch_type == CH_TYPE_ZEROES) { 233 sz = image_chunk_grow(ch, sz); 234 if (sz == 0) 235 return (0); 236 from = ch->ch_block + (ch->ch_size / secsz); 237 } 238 ch = malloc(sizeof(*ch)); 239 if (ch == NULL) 240 return (ENOMEM); 241 memset(ch, 0, sizeof(*ch)); 242 ch->ch_block = from; 243 ch->ch_size = sz; 244 ch->ch_type = CH_TYPE_ZEROES; 245 TAILQ_INSERT_TAIL(&image_chunks, ch, ch_list); 246 image_nchunks++; 247 return (0); 248 } 249 250 static int 251 image_chunk_append(lba_t blk, size_t sz, off_t ofs, int fd) 252 { 253 struct chunk *ch; 254 255 ch = TAILQ_LAST(&image_chunks, chunk_head); 256 if (ch != NULL && ch->ch_type == CH_TYPE_FILE) { 257 if (fd == ch->ch_u.file.fd && 258 blk == (lba_t)(ch->ch_block + (ch->ch_size / secsz)) && 259 ofs == (off_t)(ch->ch_u.file.ofs + ch->ch_size)) { 260 sz = image_chunk_grow(ch, sz); 261 if (sz == 0) 262 return (0); 263 blk = ch->ch_block + (ch->ch_size / secsz); 264 ofs = ch->ch_u.file.ofs + ch->ch_size; 265 } 266 } 267 ch = malloc(sizeof(*ch)); 268 if (ch == NULL) 269 return (ENOMEM); 270 memset(ch, 0, sizeof(*ch)); 271 ch->ch_block = blk; 272 ch->ch_size = sz; 273 ch->ch_type = CH_TYPE_FILE; 274 ch->ch_u.file.ofs = ofs; 275 ch->ch_u.file.fd = fd; 276 TAILQ_INSERT_TAIL(&image_chunks, ch, ch_list); 277 image_nchunks++; 278 return (0); 279 } 280 281 static int 282 image_chunk_copyin(lba_t blk, void *buf, size_t sz, off_t ofs, int fd) 283 { 284 uint8_t *p = buf; 285 int error; 286 287 error = 0; 288 sz = (sz + secsz - 1) & ~(secsz - 1); 289 while (!error && sz > 0) { 290 if (is_empty_sector(p)) 291 error = image_chunk_skipto(blk + 1); 292 else 293 error = image_chunk_append(blk, secsz, ofs, fd); 294 blk++; 295 p += secsz; 296 sz -= secsz; 297 ofs += secsz; 298 } 299 return (error); 300 } 301 302 /* 303 * File mapping support. 304 */ 305 306 static void * 307 image_file_map(int fd, off_t ofs, size_t sz, off_t *iofp) 308 { 309 void *ptr; 310 size_t unit; 311 int flags, prot; 312 off_t x; 313 314 /* On Linux anyway ofs must also be page aligned */ 315 if ((x = (ofs % image_swap_pgsz)) != 0) { 316 ofs -= x; 317 sz += x; 318 *iofp = x; 319 } else 320 *iofp = 0; 321 unit = (secsz > image_swap_pgsz) ? secsz : image_swap_pgsz; 322 assert((unit & (unit - 1)) == 0); 323 324 flags = MAP_NOCORE | MAP_NOSYNC | MAP_SHARED; 325 /* Allow writing to our swap file only. */ 326 prot = PROT_READ | ((fd == image_swap_fd) ? PROT_WRITE : 0); 327 sz = (sz + unit - 1) & ~(unit - 1); 328 ptr = mmap(NULL, sz, prot, flags, fd, ofs); 329 return ((ptr == MAP_FAILED) ? NULL : ptr); 330 } 331 332 static int 333 image_file_unmap(void *buffer, size_t sz) 334 { 335 size_t unit; 336 337 unit = (secsz > image_swap_pgsz) ? secsz : image_swap_pgsz; 338 sz = (sz + unit - 1) & ~(unit - 1); 339 if (madvise(buffer, sz, MADV_DONTNEED) != 0) 340 warn("madvise"); 341 munmap(buffer, sz); 342 return (0); 343 } 344 345 /* 346 * Input/source file handling. 347 */ 348 349 static int 350 image_copyin_stream(lba_t blk, int fd, uint64_t *sizep) 351 { 352 char *buffer; 353 uint64_t bytesize; 354 off_t swofs; 355 size_t iosz; 356 ssize_t rdsz; 357 int error; 358 off_t iof; 359 360 /* 361 * This makes sure we're doing I/O in multiples of the page 362 * size as well as of the sector size. 2MB is the minimum 363 * by virtue of secsz at least 512 bytes and the page size 364 * at least 4K bytes. 365 */ 366 iosz = secsz * image_swap_pgsz; 367 368 bytesize = 0; 369 do { 370 swofs = image_swap_alloc(iosz); 371 if (swofs == -1LL) 372 return (errno); 373 buffer = image_file_map(image_swap_fd, swofs, iosz, &iof); 374 if (buffer == NULL) 375 return (errno); 376 rdsz = read(fd, &buffer[iof], iosz); 377 if (rdsz > 0) 378 error = image_chunk_copyin(blk, &buffer[iof], rdsz, swofs, 379 image_swap_fd); 380 else if (rdsz < 0) 381 error = errno; 382 else 383 error = 0; 384 image_file_unmap(buffer, iosz); 385 /* XXX should we relinguish unused swap space? */ 386 if (error) 387 return (error); 388 389 bytesize += rdsz; 390 blk += (rdsz + secsz - 1) / secsz; 391 } while (rdsz > 0); 392 393 if (sizep != NULL) 394 *sizep = bytesize; 395 return (0); 396 } 397 398 static int 399 image_copyin_mapped(lba_t blk, int fd, uint64_t *sizep) 400 { 401 off_t cur, data, end, hole, pos, iof; 402 void *mp; 403 char *buf; 404 uint64_t bytesize; 405 size_t iosz, sz; 406 int error; 407 408 /* 409 * We'd like to know the size of the file and we must 410 * be able to seek in order to mmap(2). If this isn't 411 * possible, then treat the file as a stream/pipe. 412 */ 413 end = lseek(fd, 0L, SEEK_END); 414 if (end == -1L) 415 return (image_copyin_stream(blk, fd, sizep)); 416 417 /* 418 * We need the file opened for the duration and our 419 * caller is going to close the file. Make a dup(2) 420 * so that control the faith of the descriptor. 421 */ 422 fd = dup(fd); 423 if (fd == -1) 424 return (errno); 425 426 iosz = secsz * image_swap_pgsz; 427 428 bytesize = 0; 429 cur = pos = 0; 430 error = 0; 431 while (!error && cur < end) { 432 hole = lseek(fd, cur, SEEK_HOLE); 433 if (hole == -1) 434 hole = end; 435 data = lseek(fd, cur, SEEK_DATA); 436 if (data == -1) 437 data = end; 438 439 /* 440 * Treat the entire file as data if sparse files 441 * are not supported by the underlying file system. 442 */ 443 if (hole == end && data == end) 444 data = cur; 445 446 if (cur == hole && data > hole) { 447 hole = pos; 448 pos = data & ~((uint64_t)secsz - 1); 449 450 blk += (pos - hole) / secsz; 451 error = image_chunk_skipto(blk); 452 453 bytesize += pos - hole; 454 cur = data; 455 } else if (cur == data && hole > data) { 456 data = pos; 457 pos = (hole + secsz - 1) & ~((uint64_t)secsz - 1); 458 459 while (data < pos) { 460 sz = (pos - data > (off_t)iosz) 461 ? iosz : (size_t)(pos - data); 462 463 buf = mp = image_file_map(fd, data, sz, &iof); 464 if (mp != NULL) { 465 buf += iof; 466 error = image_chunk_copyin(blk, buf, 467 sz, data, fd); 468 image_file_unmap(mp, sz); 469 } else 470 error = errno; 471 472 blk += sz / secsz; 473 bytesize += sz; 474 data += sz; 475 } 476 cur = hole; 477 } else { 478 /* 479 * I don't know what this means or whether it 480 * can happen at all... 481 */ 482 assert(0); 483 } 484 } 485 if (error) 486 close(fd); 487 if (!error && sizep != NULL) 488 *sizep = bytesize; 489 return (error); 490 } 491 492 int 493 image_copyin(lba_t blk, int fd, uint64_t *sizep) 494 { 495 struct stat sb; 496 int error; 497 498 error = image_chunk_skipto(blk); 499 if (!error) { 500 if (fstat(fd, &sb) == -1 || !S_ISREG(sb.st_mode)) 501 error = image_copyin_stream(blk, fd, sizep); 502 else 503 error = image_copyin_mapped(blk, fd, sizep); 504 } 505 return (error); 506 } 507 508 /* 509 * Output/sink file handling. 510 */ 511 512 int 513 image_copyout(int fd) 514 { 515 int error; 516 517 error = image_copyout_region(fd, 0, image_size); 518 if (!error) 519 error = image_copyout_done(fd); 520 return (error); 521 } 522 523 int 524 image_copyout_done(int fd) 525 { 526 off_t ofs; 527 int error; 528 529 ofs = lseek(fd, 0L, SEEK_CUR); 530 if (ofs == -1) 531 return (0); 532 error = (ftruncate(fd, ofs) == -1) ? errno : 0; 533 return (error); 534 } 535 536 static int 537 image_copyout_memory(int fd, size_t size, void *ptr) 538 { 539 540 if (write(fd, ptr, size) == -1) 541 return (errno); 542 return (0); 543 } 544 545 int 546 image_copyout_zeroes(int fd, size_t count) 547 { 548 static uint8_t *zeroes = NULL; 549 size_t sz; 550 int error; 551 552 if (lseek(fd, (off_t)count, SEEK_CUR) != -1) 553 return (0); 554 555 /* 556 * If we can't seek, we must write. 557 */ 558 559 if (zeroes == NULL) { 560 zeroes = calloc(1, secsz); 561 if (zeroes == NULL) 562 return (ENOMEM); 563 } 564 565 while (count > 0) { 566 sz = (count > secsz) ? secsz : count; 567 error = image_copyout_memory(fd, sz, zeroes); 568 if (error) 569 return (error); 570 count -= sz; 571 } 572 return (0); 573 } 574 575 static int 576 image_copyout_file(int fd, size_t size, int ifd, off_t iofs) 577 { 578 void *mp; 579 char *buf; 580 size_t iosz, sz; 581 int error; 582 off_t iof; 583 584 iosz = secsz * image_swap_pgsz; 585 586 while (size > 0) { 587 sz = (size > iosz) ? iosz : size; 588 buf = mp = image_file_map(ifd, iofs, sz, &iof); 589 if (buf == NULL) 590 return (errno); 591 buf += iof; 592 error = image_copyout_memory(fd, sz, buf); 593 image_file_unmap(mp, sz); 594 if (error) 595 return (error); 596 size -= sz; 597 iofs += sz; 598 } 599 return (0); 600 } 601 602 int 603 image_copyout_region(int fd, lba_t blk, lba_t size) 604 { 605 struct chunk *ch; 606 size_t ofs, sz; 607 int error; 608 609 size *= secsz; 610 611 error = 0; 612 while (!error && size > 0) { 613 ch = image_chunk_find(blk); 614 if (ch == NULL) { 615 error = EINVAL; 616 break; 617 } 618 ofs = (blk - ch->ch_block) * secsz; 619 sz = ch->ch_size - ofs; 620 sz = ((lba_t)sz < size) ? sz : (size_t)size; 621 switch (ch->ch_type) { 622 case CH_TYPE_ZEROES: 623 error = image_copyout_zeroes(fd, sz); 624 break; 625 case CH_TYPE_FILE: 626 error = image_copyout_file(fd, sz, ch->ch_u.file.fd, 627 ch->ch_u.file.ofs + ofs); 628 break; 629 case CH_TYPE_MEMORY: 630 error = image_copyout_memory(fd, sz, ch->ch_u.mem.ptr); 631 break; 632 default: 633 assert(0); 634 } 635 size -= sz; 636 blk += sz / secsz; 637 } 638 return (error); 639 } 640 641 int 642 image_data(lba_t blk, lba_t size) 643 { 644 struct chunk *ch; 645 lba_t lim; 646 647 while (1) { 648 ch = image_chunk_find(blk); 649 if (ch == NULL) 650 return (0); 651 if (ch->ch_type != CH_TYPE_ZEROES) 652 return (1); 653 lim = ch->ch_block + (ch->ch_size / secsz); 654 if (lim >= blk + size) 655 return (0); 656 size -= lim - blk; 657 blk = lim; 658 } 659 /*NOTREACHED*/ 660 } 661 662 lba_t 663 image_get_size(void) 664 { 665 666 return (image_size); 667 } 668 669 int 670 image_set_size(lba_t blk) 671 { 672 int error; 673 674 error = image_chunk_skipto(blk); 675 if (!error) 676 image_size = blk; 677 return (error); 678 } 679 680 int 681 image_write(lba_t blk, void *buf, ssize_t len) 682 { 683 struct chunk *ch; 684 685 while (len > 0) { 686 if (!is_empty_sector(buf)) { 687 ch = image_chunk_find(blk); 688 if (ch == NULL) 689 return (ENXIO); 690 /* We may not be able to write to files. */ 691 if (ch->ch_type == CH_TYPE_FILE) 692 return (EINVAL); 693 if (ch->ch_type == CH_TYPE_ZEROES) { 694 ch = image_chunk_memory(ch, blk); 695 if (ch == NULL) 696 return (ENOMEM); 697 } 698 assert(ch->ch_type == CH_TYPE_MEMORY); 699 memcpy(ch->ch_u.mem.ptr, buf, secsz); 700 } 701 blk++; 702 buf = (char *)buf + secsz; 703 len--; 704 } 705 return (0); 706 } 707 708 static void 709 image_cleanup(void) 710 { 711 struct chunk *ch; 712 713 while ((ch = TAILQ_FIRST(&image_chunks)) != NULL) { 714 switch (ch->ch_type) { 715 case CH_TYPE_FILE: 716 /* We may be closing the same file multiple times. */ 717 if (ch->ch_u.file.fd != -1) 718 close(ch->ch_u.file.fd); 719 break; 720 case CH_TYPE_MEMORY: 721 free(ch->ch_u.mem.ptr); 722 break; 723 default: 724 break; 725 } 726 TAILQ_REMOVE(&image_chunks, ch, ch_list); 727 free(ch); 728 } 729 if (image_swap_fd != -1) 730 close(image_swap_fd); 731 unlink(image_swap_file); 732 } 733 734 int 735 image_init(void) 736 { 737 const char *tmpdir; 738 739 TAILQ_INIT(&image_chunks); 740 image_nchunks = 0; 741 742 image_swap_size = 0; 743 image_swap_pgsz = getpagesize(); 744 745 if (atexit(image_cleanup) == -1) 746 return (errno); 747 if ((tmpdir = getenv("TMPDIR")) == NULL || *tmpdir == '\0') 748 tmpdir = _PATH_TMP; 749 snprintf(image_swap_file, sizeof(image_swap_file), "%s/mkimg-XXXXXX", 750 tmpdir); 751 image_swap_fd = mkstemp(image_swap_file); 752 if (image_swap_fd == -1) 753 return (errno); 754 return (0); 755 } 756