1 /*- 2 * Copyright (c) 2014 Juniper Networks, Inc. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 #include <sys/cdefs.h> 28 #include <sys/mman.h> 29 #include <sys/stat.h> 30 #include <assert.h> 31 #include <err.h> 32 #include <errno.h> 33 #include <limits.h> 34 #include <paths.h> 35 #include <stdint.h> 36 #include <stdio.h> 37 #include <stdlib.h> 38 #include <string.h> 39 #include <unistd.h> 40 41 #include "image.h" 42 #include "mkimg.h" 43 44 #ifndef MAP_NOCORE 45 #define MAP_NOCORE 0 46 #endif 47 #ifndef MAP_NOSYNC 48 #define MAP_NOSYNC 0 49 #endif 50 51 #ifndef SEEK_DATA 52 #define SEEK_DATA -1 53 #endif 54 #ifndef SEEK_HOLE 55 #define SEEK_HOLE -1 56 #endif 57 58 struct chunk { 59 TAILQ_ENTRY(chunk) ch_list; 60 size_t ch_size; /* Size of chunk in bytes. */ 61 lba_t ch_block; /* Block address in image. */ 62 union { 63 struct { 64 off_t ofs; /* Offset in backing file. */ 65 int fd; /* FD of backing file. */ 66 } file; 67 struct { 68 void *ptr; /* Pointer to data in memory */ 69 } mem; 70 } ch_u; 71 u_int ch_type; 72 #define CH_TYPE_ZEROES 0 /* Chunk is a gap (no data). */ 73 #define CH_TYPE_FILE 1 /* File-backed chunk. */ 74 #define CH_TYPE_MEMORY 2 /* Memory-backed chunk */ 75 }; 76 77 static TAILQ_HEAD(chunk_head, chunk) image_chunks; 78 static u_int image_nchunks; 79 80 static char image_swap_file[PATH_MAX]; 81 static int image_swap_fd = -1; 82 static u_int image_swap_pgsz; 83 static off_t image_swap_size; 84 85 static lba_t image_size; 86 87 static int 88 is_empty_sector(void *buf) 89 { 90 uint64_t *p = buf; 91 size_t n, max; 92 93 assert(((uintptr_t)p & 3) == 0); 94 95 max = secsz / sizeof(uint64_t); 96 for (n = 0; n < max; n++) { 97 if (p[n] != 0UL) 98 return (0); 99 } 100 return (1); 101 } 102 103 /* 104 * Swap file handlng. 105 */ 106 107 static off_t 108 image_swap_alloc(size_t size) 109 { 110 off_t ofs; 111 size_t unit; 112 113 unit = (secsz > image_swap_pgsz) ? secsz : image_swap_pgsz; 114 assert((unit & (unit - 1)) == 0); 115 116 size = (size + unit - 1) & ~(unit - 1); 117 118 ofs = image_swap_size; 119 image_swap_size += size; 120 if (ftruncate(image_swap_fd, image_swap_size) == -1) { 121 image_swap_size = ofs; 122 ofs = -1LL; 123 } 124 return (ofs); 125 } 126 127 /* 128 * Image chunk handling. 129 */ 130 131 static struct chunk * 132 image_chunk_find(lba_t blk) 133 { 134 static struct chunk *last = NULL; 135 struct chunk *ch; 136 137 ch = (last != NULL && last->ch_block <= blk) 138 ? last : TAILQ_FIRST(&image_chunks); 139 while (ch != NULL) { 140 if (ch->ch_block <= blk && 141 (lba_t)(ch->ch_block + (ch->ch_size / secsz)) > blk) { 142 last = ch; 143 break; 144 } 145 ch = TAILQ_NEXT(ch, ch_list); 146 } 147 return (ch); 148 } 149 150 static size_t 151 image_chunk_grow(struct chunk *ch, size_t sz) 152 { 153 size_t dsz, newsz; 154 155 newsz = ch->ch_size + sz; 156 if (newsz > ch->ch_size) { 157 ch->ch_size = newsz; 158 return (0); 159 } 160 /* We would overflow -- create new chunk for remainder. */ 161 dsz = SIZE_MAX - ch->ch_size; 162 assert(dsz < sz); 163 ch->ch_size = SIZE_MAX; 164 return (sz - dsz); 165 } 166 167 static struct chunk * 168 image_chunk_memory(struct chunk *ch, lba_t blk) 169 { 170 struct chunk *new; 171 void *ptr; 172 173 ptr = calloc(1, secsz); 174 if (ptr == NULL) 175 return (NULL); 176 177 if (ch->ch_block < blk) { 178 new = malloc(sizeof(*new)); 179 if (new == NULL) { 180 free(ptr); 181 return (NULL); 182 } 183 memcpy(new, ch, sizeof(*new)); 184 ch->ch_size = (blk - ch->ch_block) * secsz; 185 new->ch_block = blk; 186 new->ch_size -= ch->ch_size; 187 TAILQ_INSERT_AFTER(&image_chunks, ch, new, ch_list); 188 image_nchunks++; 189 ch = new; 190 } 191 192 if (ch->ch_size > secsz) { 193 new = malloc(sizeof(*new)); 194 if (new == NULL) { 195 free(ptr); 196 return (NULL); 197 } 198 memcpy(new, ch, sizeof(*new)); 199 ch->ch_size = secsz; 200 new->ch_block++; 201 new->ch_size -= secsz; 202 TAILQ_INSERT_AFTER(&image_chunks, ch, new, ch_list); 203 image_nchunks++; 204 } 205 206 ch->ch_type = CH_TYPE_MEMORY; 207 ch->ch_u.mem.ptr = ptr; 208 return (ch); 209 } 210 211 static int 212 image_chunk_skipto(lba_t to) 213 { 214 struct chunk *ch; 215 lba_t from; 216 size_t sz; 217 218 ch = TAILQ_LAST(&image_chunks, chunk_head); 219 from = (ch != NULL) ? ch->ch_block + (ch->ch_size / secsz) : 0LL; 220 221 assert(from <= to); 222 223 /* Nothing to do? */ 224 if (from == to) 225 return (0); 226 /* Avoid bugs due to overflows. */ 227 if ((uintmax_t)(to - from) > (uintmax_t)(SIZE_MAX / secsz)) 228 return (EFBIG); 229 sz = (to - from) * secsz; 230 if (ch != NULL && ch->ch_type == CH_TYPE_ZEROES) { 231 sz = image_chunk_grow(ch, sz); 232 if (sz == 0) 233 return (0); 234 from = ch->ch_block + (ch->ch_size / secsz); 235 } 236 ch = malloc(sizeof(*ch)); 237 if (ch == NULL) 238 return (ENOMEM); 239 memset(ch, 0, sizeof(*ch)); 240 ch->ch_block = from; 241 ch->ch_size = sz; 242 ch->ch_type = CH_TYPE_ZEROES; 243 TAILQ_INSERT_TAIL(&image_chunks, ch, ch_list); 244 image_nchunks++; 245 return (0); 246 } 247 248 static int 249 image_chunk_append(lba_t blk, size_t sz, off_t ofs, int fd) 250 { 251 struct chunk *ch; 252 253 ch = TAILQ_LAST(&image_chunks, chunk_head); 254 if (ch != NULL && ch->ch_type == CH_TYPE_FILE) { 255 if (fd == ch->ch_u.file.fd && 256 blk == (lba_t)(ch->ch_block + (ch->ch_size / secsz)) && 257 ofs == (off_t)(ch->ch_u.file.ofs + ch->ch_size)) { 258 sz = image_chunk_grow(ch, sz); 259 if (sz == 0) 260 return (0); 261 blk = ch->ch_block + (ch->ch_size / secsz); 262 ofs = ch->ch_u.file.ofs + ch->ch_size; 263 } 264 } 265 ch = malloc(sizeof(*ch)); 266 if (ch == NULL) 267 return (ENOMEM); 268 memset(ch, 0, sizeof(*ch)); 269 ch->ch_block = blk; 270 ch->ch_size = sz; 271 ch->ch_type = CH_TYPE_FILE; 272 ch->ch_u.file.ofs = ofs; 273 ch->ch_u.file.fd = fd; 274 TAILQ_INSERT_TAIL(&image_chunks, ch, ch_list); 275 image_nchunks++; 276 return (0); 277 } 278 279 static int 280 image_chunk_copyin(lba_t blk, void *buf, size_t sz, off_t ofs, int fd) 281 { 282 uint8_t *p = buf; 283 int error; 284 285 error = 0; 286 sz = (sz + secsz - 1) & ~(secsz - 1); 287 while (!error && sz > 0) { 288 if (is_empty_sector(p)) 289 error = image_chunk_skipto(blk + 1); 290 else 291 error = image_chunk_append(blk, secsz, ofs, fd); 292 blk++; 293 p += secsz; 294 sz -= secsz; 295 ofs += secsz; 296 } 297 return (error); 298 } 299 300 /* 301 * File mapping support. 302 */ 303 304 static void * 305 image_file_map(int fd, off_t ofs, size_t sz, off_t *iofp) 306 { 307 void *ptr; 308 size_t unit; 309 int flags, prot; 310 off_t x; 311 312 /* On Linux anyway ofs must also be page aligned */ 313 if ((x = (ofs % image_swap_pgsz)) != 0) { 314 ofs -= x; 315 sz += x; 316 *iofp = x; 317 } else 318 *iofp = 0; 319 unit = (secsz > image_swap_pgsz) ? secsz : image_swap_pgsz; 320 assert((unit & (unit - 1)) == 0); 321 322 flags = MAP_NOCORE | MAP_NOSYNC | MAP_SHARED; 323 /* Allow writing to our swap file only. */ 324 prot = PROT_READ | ((fd == image_swap_fd) ? PROT_WRITE : 0); 325 sz = (sz + unit - 1) & ~(unit - 1); 326 ptr = mmap(NULL, sz, prot, flags, fd, ofs); 327 return ((ptr == MAP_FAILED) ? NULL : ptr); 328 } 329 330 static int 331 image_file_unmap(void *buffer, size_t sz) 332 { 333 size_t unit; 334 335 unit = (secsz > image_swap_pgsz) ? secsz : image_swap_pgsz; 336 sz = (sz + unit - 1) & ~(unit - 1); 337 if (madvise(buffer, sz, MADV_DONTNEED) != 0) 338 warn("madvise"); 339 munmap(buffer, sz); 340 return (0); 341 } 342 343 /* 344 * Input/source file handling. 345 */ 346 347 static int 348 image_copyin_stream(lba_t blk, int fd, uint64_t *sizep) 349 { 350 char *buffer; 351 uint64_t bytesize; 352 off_t swofs; 353 size_t iosz; 354 ssize_t rdsz; 355 int error; 356 off_t iof; 357 358 /* 359 * This makes sure we're doing I/O in multiples of the page 360 * size as well as of the sector size. 2MB is the minimum 361 * by virtue of secsz at least 512 bytes and the page size 362 * at least 4K bytes. 363 */ 364 iosz = secsz * image_swap_pgsz; 365 366 bytesize = 0; 367 do { 368 swofs = image_swap_alloc(iosz); 369 if (swofs == -1LL) 370 return (errno); 371 buffer = image_file_map(image_swap_fd, swofs, iosz, &iof); 372 if (buffer == NULL) 373 return (errno); 374 rdsz = read(fd, &buffer[iof], iosz); 375 if (rdsz > 0) 376 error = image_chunk_copyin(blk, &buffer[iof], rdsz, swofs, 377 image_swap_fd); 378 else if (rdsz < 0) 379 error = errno; 380 else 381 error = 0; 382 image_file_unmap(buffer, iosz); 383 /* XXX should we relinguish unused swap space? */ 384 if (error) 385 return (error); 386 387 bytesize += rdsz; 388 blk += (rdsz + secsz - 1) / secsz; 389 } while (rdsz > 0); 390 391 if (sizep != NULL) 392 *sizep = bytesize; 393 return (0); 394 } 395 396 static int 397 image_copyin_mapped(lba_t blk, int fd, uint64_t *sizep) 398 { 399 off_t cur, data, end, hole, pos, iof; 400 void *mp; 401 char *buf; 402 uint64_t bytesize; 403 size_t iosz, sz; 404 int error; 405 406 /* 407 * We'd like to know the size of the file and we must 408 * be able to seek in order to mmap(2). If this isn't 409 * possible, then treat the file as a stream/pipe. 410 */ 411 end = lseek(fd, 0L, SEEK_END); 412 if (end == -1L) 413 return (image_copyin_stream(blk, fd, sizep)); 414 415 /* 416 * We need the file opened for the duration and our 417 * caller is going to close the file. Make a dup(2) 418 * so that control the faith of the descriptor. 419 */ 420 fd = dup(fd); 421 if (fd == -1) 422 return (errno); 423 424 iosz = secsz * image_swap_pgsz; 425 426 bytesize = 0; 427 cur = pos = 0; 428 error = 0; 429 while (!error && cur < end) { 430 hole = lseek(fd, cur, SEEK_HOLE); 431 if (hole == -1) 432 hole = end; 433 data = lseek(fd, cur, SEEK_DATA); 434 if (data == -1) 435 data = end; 436 437 /* 438 * Treat the entire file as data if sparse files 439 * are not supported by the underlying file system. 440 */ 441 if (hole == end && data == end) 442 data = cur; 443 444 if (cur == hole && data > hole) { 445 hole = pos; 446 pos = data & ~((uint64_t)secsz - 1); 447 448 blk += (pos - hole) / secsz; 449 error = image_chunk_skipto(blk); 450 451 bytesize += pos - hole; 452 cur = data; 453 } else if (cur == data && hole > data) { 454 data = pos; 455 pos = (hole + secsz - 1) & ~((uint64_t)secsz - 1); 456 457 while (data < pos) { 458 sz = (pos - data > (off_t)iosz) 459 ? iosz : (size_t)(pos - data); 460 461 buf = mp = image_file_map(fd, data, sz, &iof); 462 if (mp != NULL) { 463 buf += iof; 464 error = image_chunk_copyin(blk, buf, 465 sz, data, fd); 466 image_file_unmap(mp, sz); 467 } else 468 error = errno; 469 470 blk += sz / secsz; 471 bytesize += sz; 472 data += sz; 473 } 474 cur = hole; 475 } else { 476 /* 477 * I don't know what this means or whether it 478 * can happen at all... 479 */ 480 assert(0); 481 } 482 } 483 if (error) 484 close(fd); 485 if (!error && sizep != NULL) 486 *sizep = bytesize; 487 return (error); 488 } 489 490 int 491 image_copyin(lba_t blk, int fd, uint64_t *sizep) 492 { 493 struct stat sb; 494 int error; 495 496 error = image_chunk_skipto(blk); 497 if (!error) { 498 if (fstat(fd, &sb) == -1 || !S_ISREG(sb.st_mode)) 499 error = image_copyin_stream(blk, fd, sizep); 500 else 501 error = image_copyin_mapped(blk, fd, sizep); 502 } 503 return (error); 504 } 505 506 /* 507 * Output/sink file handling. 508 */ 509 510 int 511 image_copyout(int fd) 512 { 513 int error; 514 515 error = image_copyout_region(fd, 0, image_size); 516 if (!error) 517 error = image_copyout_done(fd); 518 return (error); 519 } 520 521 int 522 image_copyout_done(int fd) 523 { 524 off_t ofs; 525 int error; 526 527 ofs = lseek(fd, 0L, SEEK_CUR); 528 if (ofs == -1) 529 return (0); 530 error = (ftruncate(fd, ofs) == -1) ? errno : 0; 531 return (error); 532 } 533 534 static int 535 image_copyout_memory(int fd, size_t size, void *ptr) 536 { 537 538 if (write(fd, ptr, size) == -1) 539 return (errno); 540 return (0); 541 } 542 543 int 544 image_copyout_zeroes(int fd, size_t count) 545 { 546 static uint8_t *zeroes = NULL; 547 size_t sz; 548 int error; 549 550 if (lseek(fd, (off_t)count, SEEK_CUR) != -1) 551 return (0); 552 553 /* 554 * If we can't seek, we must write. 555 */ 556 557 if (zeroes == NULL) { 558 zeroes = calloc(1, secsz); 559 if (zeroes == NULL) 560 return (ENOMEM); 561 } 562 563 while (count > 0) { 564 sz = (count > secsz) ? secsz : count; 565 error = image_copyout_memory(fd, sz, zeroes); 566 if (error) 567 return (error); 568 count -= sz; 569 } 570 return (0); 571 } 572 573 static int 574 image_copyout_file(int fd, size_t size, int ifd, off_t iofs) 575 { 576 void *mp; 577 char *buf; 578 size_t iosz, sz; 579 int error; 580 off_t iof; 581 582 iosz = secsz * image_swap_pgsz; 583 584 while (size > 0) { 585 sz = (size > iosz) ? iosz : size; 586 buf = mp = image_file_map(ifd, iofs, sz, &iof); 587 if (buf == NULL) 588 return (errno); 589 buf += iof; 590 error = image_copyout_memory(fd, sz, buf); 591 image_file_unmap(mp, sz); 592 if (error) 593 return (error); 594 size -= sz; 595 iofs += sz; 596 } 597 return (0); 598 } 599 600 int 601 image_copyout_region(int fd, lba_t blk, lba_t size) 602 { 603 struct chunk *ch; 604 size_t ofs, sz; 605 int error; 606 607 size *= secsz; 608 609 error = 0; 610 while (!error && size > 0) { 611 ch = image_chunk_find(blk); 612 if (ch == NULL) { 613 error = EINVAL; 614 break; 615 } 616 ofs = (blk - ch->ch_block) * secsz; 617 sz = ch->ch_size - ofs; 618 sz = ((lba_t)sz < size) ? sz : (size_t)size; 619 switch (ch->ch_type) { 620 case CH_TYPE_ZEROES: 621 error = image_copyout_zeroes(fd, sz); 622 break; 623 case CH_TYPE_FILE: 624 error = image_copyout_file(fd, sz, ch->ch_u.file.fd, 625 ch->ch_u.file.ofs + ofs); 626 break; 627 case CH_TYPE_MEMORY: 628 error = image_copyout_memory(fd, sz, ch->ch_u.mem.ptr); 629 break; 630 default: 631 assert(0); 632 } 633 size -= sz; 634 blk += sz / secsz; 635 } 636 return (error); 637 } 638 639 int 640 image_data(lba_t blk, lba_t size) 641 { 642 struct chunk *ch; 643 lba_t lim; 644 645 while (1) { 646 ch = image_chunk_find(blk); 647 if (ch == NULL) 648 return (0); 649 if (ch->ch_type != CH_TYPE_ZEROES) 650 return (1); 651 lim = ch->ch_block + (ch->ch_size / secsz); 652 if (lim >= blk + size) 653 return (0); 654 size -= lim - blk; 655 blk = lim; 656 } 657 /*NOTREACHED*/ 658 } 659 660 lba_t 661 image_get_size(void) 662 { 663 664 return (image_size); 665 } 666 667 int 668 image_set_size(lba_t blk) 669 { 670 int error; 671 672 error = image_chunk_skipto(blk); 673 if (!error) 674 image_size = blk; 675 return (error); 676 } 677 678 int 679 image_write(lba_t blk, void *buf, ssize_t len) 680 { 681 struct chunk *ch; 682 683 while (len > 0) { 684 if (!is_empty_sector(buf)) { 685 ch = image_chunk_find(blk); 686 if (ch == NULL) 687 return (ENXIO); 688 /* We may not be able to write to files. */ 689 if (ch->ch_type == CH_TYPE_FILE) 690 return (EINVAL); 691 if (ch->ch_type == CH_TYPE_ZEROES) { 692 ch = image_chunk_memory(ch, blk); 693 if (ch == NULL) 694 return (ENOMEM); 695 } 696 assert(ch->ch_type == CH_TYPE_MEMORY); 697 memcpy(ch->ch_u.mem.ptr, buf, secsz); 698 } 699 blk++; 700 buf = (char *)buf + secsz; 701 len--; 702 } 703 return (0); 704 } 705 706 static void 707 image_cleanup(void) 708 { 709 struct chunk *ch; 710 711 while ((ch = TAILQ_FIRST(&image_chunks)) != NULL) { 712 switch (ch->ch_type) { 713 case CH_TYPE_FILE: 714 /* We may be closing the same file multiple times. */ 715 if (ch->ch_u.file.fd != -1) 716 close(ch->ch_u.file.fd); 717 break; 718 case CH_TYPE_MEMORY: 719 free(ch->ch_u.mem.ptr); 720 break; 721 default: 722 break; 723 } 724 TAILQ_REMOVE(&image_chunks, ch, ch_list); 725 free(ch); 726 } 727 if (image_swap_fd != -1) 728 close(image_swap_fd); 729 unlink(image_swap_file); 730 } 731 732 int 733 image_init(void) 734 { 735 const char *tmpdir; 736 737 TAILQ_INIT(&image_chunks); 738 image_nchunks = 0; 739 740 image_swap_size = 0; 741 image_swap_pgsz = getpagesize(); 742 743 if (atexit(image_cleanup) == -1) 744 return (errno); 745 if ((tmpdir = getenv("TMPDIR")) == NULL || *tmpdir == '\0') 746 tmpdir = _PATH_TMP; 747 snprintf(image_swap_file, sizeof(image_swap_file), "%s/mkimg-XXXXXX", 748 tmpdir); 749 image_swap_fd = mkstemp(image_swap_file); 750 if (image_swap_fd == -1) 751 return (errno); 752 return (0); 753 } 754