1 /*- 2 * Copyright (c) 2014 Juniper Networks, Inc. 3 * All rights reserved. 4 * 5 * Redistribution and use in source and binary forms, with or without 6 * modification, are permitted provided that the following conditions 7 * are met: 8 * 1. Redistributions of source code must retain the above copyright 9 * notice, this list of conditions and the following disclaimer. 10 * 2. Redistributions in binary form must reproduce the above copyright 11 * notice, this list of conditions and the following disclaimer in the 12 * documentation and/or other materials provided with the distribution. 13 * 14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND 15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE 18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL 19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS 20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION) 21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT 22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY 23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF 24 * SUCH DAMAGE. 25 */ 26 27 #include <sys/cdefs.h> 28 __FBSDID("$FreeBSD$"); 29 30 #include <sys/mman.h> 31 #include <sys/queue.h> 32 #include <sys/stat.h> 33 #include <sys/types.h> 34 #include <assert.h> 35 #include <err.h> 36 #include <errno.h> 37 #include <limits.h> 38 #include <paths.h> 39 #include <stdint.h> 40 #include <stdio.h> 41 #include <stdlib.h> 42 #include <string.h> 43 #include <unistd.h> 44 45 #include "image.h" 46 #include "mkimg.h" 47 48 #ifndef MAP_NOCORE 49 #define MAP_NOCORE 0 50 #endif 51 #ifndef MAP_NOSYNC 52 #define MAP_NOSYNC 0 53 #endif 54 55 #ifndef SEEK_DATA 56 #define SEEK_DATA -1 57 #endif 58 #ifndef SEEK_HOLE 59 #define SEEK_HOLE -1 60 #endif 61 62 struct chunk { 63 STAILQ_ENTRY(chunk) ch_list; 64 size_t ch_size; /* Size of chunk in bytes. */ 65 lba_t ch_block; /* Block address in image. */ 66 union { 67 struct { 68 off_t ofs; /* Offset in backing file. */ 69 int fd; /* FD of backing file. */ 70 } file; 71 struct { 72 void *ptr; /* Pointer to data in memory */ 73 } mem; 74 } ch_u; 75 u_int ch_type; 76 #define CH_TYPE_ZEROES 0 /* Chunk is a gap (no data). */ 77 #define CH_TYPE_FILE 1 /* File-backed chunk. */ 78 #define CH_TYPE_MEMORY 2 /* Memory-backed chunk */ 79 }; 80 81 static STAILQ_HEAD(chunk_head, chunk) image_chunks; 82 static u_int image_nchunks; 83 84 static char image_swap_file[PATH_MAX]; 85 static int image_swap_fd = -1; 86 static u_int image_swap_pgsz; 87 static off_t image_swap_size; 88 89 static lba_t image_size; 90 91 static int 92 is_empty_sector(void *buf) 93 { 94 uint64_t *p = buf; 95 size_t n, max; 96 97 assert(((uintptr_t)p & 3) == 0); 98 99 max = secsz / sizeof(uint64_t); 100 for (n = 0; n < max; n++) { 101 if (p[n] != 0UL) 102 return (0); 103 } 104 return (1); 105 } 106 107 /* 108 * Swap file handlng. 109 */ 110 111 static off_t 112 image_swap_alloc(size_t size) 113 { 114 off_t ofs; 115 size_t unit; 116 117 unit = (secsz > image_swap_pgsz) ? secsz : image_swap_pgsz; 118 assert((unit & (unit - 1)) == 0); 119 120 size = (size + unit - 1) & ~(unit - 1); 121 122 ofs = image_swap_size; 123 image_swap_size += size; 124 if (ftruncate(image_swap_fd, image_swap_size) == -1) { 125 image_swap_size = ofs; 126 ofs = -1LL; 127 } 128 return (ofs); 129 } 130 131 /* 132 * Image chunk handling. 133 */ 134 135 static struct chunk * 136 image_chunk_find(lba_t blk) 137 { 138 static struct chunk *last = NULL; 139 struct chunk *ch; 140 141 ch = (last != NULL && last->ch_block <= blk) 142 ? last : STAILQ_FIRST(&image_chunks); 143 while (ch != NULL) { 144 if (ch->ch_block <= blk && 145 (lba_t)(ch->ch_block + (ch->ch_size / secsz)) > blk) { 146 last = ch; 147 break; 148 } 149 ch = STAILQ_NEXT(ch, ch_list); 150 } 151 return (ch); 152 } 153 154 static size_t 155 image_chunk_grow(struct chunk *ch, size_t sz) 156 { 157 size_t dsz, newsz; 158 159 newsz = ch->ch_size + sz; 160 if (newsz > ch->ch_size) { 161 ch->ch_size = newsz; 162 return (0); 163 } 164 /* We would overflow -- create new chunk for remainder. */ 165 dsz = SIZE_MAX - ch->ch_size; 166 assert(dsz < sz); 167 ch->ch_size = SIZE_MAX; 168 return (sz - dsz); 169 } 170 171 static struct chunk * 172 image_chunk_memory(struct chunk *ch, lba_t blk) 173 { 174 struct chunk *new; 175 void *ptr; 176 177 ptr = calloc(1, secsz); 178 if (ptr == NULL) 179 return (NULL); 180 181 if (ch->ch_block < blk) { 182 new = malloc(sizeof(*new)); 183 if (new == NULL) { 184 free(ptr); 185 return (NULL); 186 } 187 memcpy(new, ch, sizeof(*new)); 188 ch->ch_size = (blk - ch->ch_block) * secsz; 189 new->ch_block = blk; 190 new->ch_size -= ch->ch_size; 191 STAILQ_INSERT_AFTER(&image_chunks, ch, new, ch_list); 192 image_nchunks++; 193 ch = new; 194 } 195 196 if (ch->ch_size > secsz) { 197 new = malloc(sizeof(*new)); 198 if (new == NULL) { 199 free(ptr); 200 return (NULL); 201 } 202 memcpy(new, ch, sizeof(*new)); 203 ch->ch_size = secsz; 204 new->ch_block++; 205 new->ch_size -= secsz; 206 STAILQ_INSERT_AFTER(&image_chunks, ch, new, ch_list); 207 image_nchunks++; 208 } 209 210 ch->ch_type = CH_TYPE_MEMORY; 211 ch->ch_u.mem.ptr = ptr; 212 return (ch); 213 } 214 215 static int 216 image_chunk_skipto(lba_t to) 217 { 218 struct chunk *ch; 219 lba_t from; 220 size_t sz; 221 222 ch = STAILQ_LAST(&image_chunks, chunk, ch_list); 223 from = (ch != NULL) ? ch->ch_block + (ch->ch_size / secsz) : 0LL; 224 225 assert(from <= to); 226 227 /* Nothing to do? */ 228 if (from == to) 229 return (0); 230 /* Avoid bugs due to overflows. */ 231 if ((uintmax_t)(to - from) > (uintmax_t)(SIZE_MAX / secsz)) 232 return (EFBIG); 233 sz = (to - from) * secsz; 234 if (ch != NULL && ch->ch_type == CH_TYPE_ZEROES) { 235 sz = image_chunk_grow(ch, sz); 236 if (sz == 0) 237 return (0); 238 from = ch->ch_block + (ch->ch_size / secsz); 239 } 240 ch = malloc(sizeof(*ch)); 241 if (ch == NULL) 242 return (ENOMEM); 243 memset(ch, 0, sizeof(*ch)); 244 ch->ch_block = from; 245 ch->ch_size = sz; 246 ch->ch_type = CH_TYPE_ZEROES; 247 STAILQ_INSERT_TAIL(&image_chunks, ch, ch_list); 248 image_nchunks++; 249 return (0); 250 } 251 252 static int 253 image_chunk_append(lba_t blk, size_t sz, off_t ofs, int fd) 254 { 255 struct chunk *ch; 256 257 ch = STAILQ_LAST(&image_chunks, chunk, ch_list); 258 if (ch != NULL && ch->ch_type == CH_TYPE_FILE) { 259 if (fd == ch->ch_u.file.fd && 260 blk == (lba_t)(ch->ch_block + (ch->ch_size / secsz)) && 261 ofs == (off_t)(ch->ch_u.file.ofs + ch->ch_size)) { 262 sz = image_chunk_grow(ch, sz); 263 if (sz == 0) 264 return (0); 265 blk = ch->ch_block + (ch->ch_size / secsz); 266 ofs = ch->ch_u.file.ofs + ch->ch_size; 267 } 268 } 269 ch = malloc(sizeof(*ch)); 270 if (ch == NULL) 271 return (ENOMEM); 272 memset(ch, 0, sizeof(*ch)); 273 ch->ch_block = blk; 274 ch->ch_size = sz; 275 ch->ch_type = CH_TYPE_FILE; 276 ch->ch_u.file.ofs = ofs; 277 ch->ch_u.file.fd = fd; 278 STAILQ_INSERT_TAIL(&image_chunks, ch, ch_list); 279 image_nchunks++; 280 return (0); 281 } 282 283 static int 284 image_chunk_copyin(lba_t blk, void *buf, size_t sz, off_t ofs, int fd) 285 { 286 uint8_t *p = buf; 287 int error; 288 289 error = 0; 290 sz = (sz + secsz - 1) & ~(secsz - 1); 291 while (!error && sz > 0) { 292 if (is_empty_sector(p)) 293 error = image_chunk_skipto(blk + 1); 294 else 295 error = image_chunk_append(blk, secsz, ofs, fd); 296 blk++; 297 p += secsz; 298 sz -= secsz; 299 ofs += secsz; 300 } 301 return (error); 302 } 303 304 /* 305 * File mapping support. 306 */ 307 308 static void * 309 image_file_map(int fd, off_t ofs, size_t sz) 310 { 311 void *ptr; 312 size_t unit; 313 int flags, prot; 314 315 unit = (secsz > image_swap_pgsz) ? secsz : image_swap_pgsz; 316 assert((unit & (unit - 1)) == 0); 317 318 flags = MAP_NOCORE | MAP_NOSYNC | MAP_SHARED; 319 /* Allow writing to our swap file only. */ 320 prot = PROT_READ | ((fd == image_swap_fd) ? PROT_WRITE : 0); 321 sz = (sz + unit - 1) & ~(unit - 1); 322 ptr = mmap(NULL, sz, prot, flags, fd, ofs); 323 return ((ptr == MAP_FAILED) ? NULL : ptr); 324 } 325 326 static int 327 image_file_unmap(void *buffer, size_t sz) 328 { 329 size_t unit; 330 331 unit = (secsz > image_swap_pgsz) ? secsz : image_swap_pgsz; 332 sz = (sz + unit - 1) & ~(unit - 1); 333 if (madvise(buffer, sz, MADV_DONTNEED) != 0) 334 warn("madvise"); 335 munmap(buffer, sz); 336 return (0); 337 } 338 339 /* 340 * Input/source file handling. 341 */ 342 343 static int 344 image_copyin_stream(lba_t blk, int fd, uint64_t *sizep) 345 { 346 char *buffer; 347 uint64_t bytesize; 348 off_t swofs; 349 size_t iosz; 350 ssize_t rdsz; 351 int error; 352 353 /* 354 * This makes sure we're doing I/O in multiples of the page 355 * size as well as of the sector size. 2MB is the minimum 356 * by virtue of secsz at least 512 bytes and the page size 357 * at least 4K bytes. 358 */ 359 iosz = secsz * image_swap_pgsz; 360 361 bytesize = 0; 362 do { 363 swofs = image_swap_alloc(iosz); 364 if (swofs == -1LL) 365 return (errno); 366 buffer = image_file_map(image_swap_fd, swofs, iosz); 367 if (buffer == NULL) 368 return (errno); 369 rdsz = read(fd, buffer, iosz); 370 if (rdsz > 0) 371 error = image_chunk_copyin(blk, buffer, rdsz, swofs, 372 image_swap_fd); 373 else if (rdsz < 0) 374 error = errno; 375 else 376 error = 0; 377 image_file_unmap(buffer, iosz); 378 /* XXX should we relinguish unused swap space? */ 379 if (error) 380 return (error); 381 382 bytesize += rdsz; 383 blk += (rdsz + secsz - 1) / secsz; 384 } while (rdsz > 0); 385 386 if (sizep != NULL) 387 *sizep = bytesize; 388 return (0); 389 } 390 391 static int 392 image_copyin_mapped(lba_t blk, int fd, uint64_t *sizep) 393 { 394 off_t cur, data, end, hole, pos; 395 void *buf; 396 uint64_t bytesize; 397 size_t iosz, sz; 398 int error; 399 400 /* 401 * We'd like to know the size of the file and we must 402 * be able to seek in order to mmap(2). If this isn't 403 * possible, then treat the file as a stream/pipe. 404 */ 405 end = lseek(fd, 0L, SEEK_END); 406 if (end == -1L) 407 return (image_copyin_stream(blk, fd, sizep)); 408 409 /* 410 * We need the file opened for the duration and our 411 * caller is going to close the file. Make a dup(2) 412 * so that control the faith of the descriptor. 413 */ 414 fd = dup(fd); 415 if (fd == -1) 416 return (errno); 417 418 iosz = secsz * image_swap_pgsz; 419 420 bytesize = 0; 421 cur = pos = 0; 422 error = 0; 423 while (!error && cur < end) { 424 hole = lseek(fd, cur, SEEK_HOLE); 425 if (hole == -1) 426 hole = end; 427 data = lseek(fd, cur, SEEK_DATA); 428 if (data == -1) 429 data = end; 430 431 /* 432 * Treat the entire file as data if sparse files 433 * are not supported by the underlying file system. 434 */ 435 if (hole == end && data == end) 436 data = cur; 437 438 if (cur == hole && data > hole) { 439 hole = pos; 440 pos = data & ~((uint64_t)secsz - 1); 441 442 blk += (pos - hole) / secsz; 443 error = image_chunk_skipto(blk); 444 445 bytesize += pos - hole; 446 cur = data; 447 } else if (cur == data && hole > data) { 448 data = pos; 449 pos = (hole + secsz - 1) & ~((uint64_t)secsz - 1); 450 451 while (data < pos) { 452 sz = (pos - data > (off_t)iosz) 453 ? iosz : (size_t)(pos - data); 454 455 buf = image_file_map(fd, data, sz); 456 if (buf != NULL) { 457 error = image_chunk_copyin(blk, buf, 458 sz, data, fd); 459 image_file_unmap(buf, sz); 460 } else 461 error = errno; 462 463 blk += sz / secsz; 464 bytesize += sz; 465 data += sz; 466 } 467 cur = hole; 468 } else { 469 /* 470 * I don't know what this means or whether it 471 * can happen at all... 472 */ 473 assert(0); 474 } 475 } 476 if (error) 477 close(fd); 478 if (!error && sizep != NULL) 479 *sizep = bytesize; 480 return (error); 481 } 482 483 int 484 image_copyin(lba_t blk, int fd, uint64_t *sizep) 485 { 486 struct stat sb; 487 int error; 488 489 error = image_chunk_skipto(blk); 490 if (!error) { 491 if (fstat(fd, &sb) == -1 || !S_ISREG(sb.st_mode)) 492 error = image_copyin_stream(blk, fd, sizep); 493 else 494 error = image_copyin_mapped(blk, fd, sizep); 495 } 496 return (error); 497 } 498 499 /* 500 * Output/sink file handling. 501 */ 502 503 int 504 image_copyout(int fd) 505 { 506 int error; 507 508 error = image_copyout_region(fd, 0, image_size); 509 if (!error) 510 error = image_copyout_done(fd); 511 return (error); 512 } 513 514 int 515 image_copyout_done(int fd) 516 { 517 off_t ofs; 518 int error; 519 520 ofs = lseek(fd, 0L, SEEK_CUR); 521 if (ofs == -1) 522 return (0); 523 error = (ftruncate(fd, ofs) == -1) ? errno : 0; 524 return (error); 525 } 526 527 static int 528 image_copyout_memory(int fd, size_t size, void *ptr) 529 { 530 531 if (write(fd, ptr, size) == -1) 532 return (errno); 533 return (0); 534 } 535 536 int 537 image_copyout_zeroes(int fd, size_t count) 538 { 539 static uint8_t *zeroes = NULL; 540 size_t sz; 541 int error; 542 543 if (lseek(fd, (off_t)count, SEEK_CUR) != -1) 544 return (0); 545 546 /* 547 * If we can't seek, we must write. 548 */ 549 550 if (zeroes == NULL) { 551 zeroes = calloc(1, secsz); 552 if (zeroes == NULL) 553 return (ENOMEM); 554 } 555 556 while (count > 0) { 557 sz = (count > secsz) ? secsz : count; 558 error = image_copyout_memory(fd, sz, zeroes); 559 if (error) 560 return (error); 561 count -= sz; 562 } 563 return (0); 564 } 565 566 static int 567 image_copyout_file(int fd, size_t size, int ifd, off_t iofs) 568 { 569 void *buf; 570 size_t iosz, sz; 571 int error; 572 573 iosz = secsz * image_swap_pgsz; 574 575 while (size > 0) { 576 sz = (size > iosz) ? iosz : size; 577 buf = image_file_map(ifd, iofs, sz); 578 if (buf == NULL) 579 return (errno); 580 error = image_copyout_memory(fd, sz, buf); 581 image_file_unmap(buf, sz); 582 if (error) 583 return (error); 584 size -= sz; 585 iofs += sz; 586 } 587 return (0); 588 } 589 590 int 591 image_copyout_region(int fd, lba_t blk, lba_t size) 592 { 593 struct chunk *ch; 594 size_t ofs, sz; 595 int error; 596 597 size *= secsz; 598 599 error = 0; 600 while (!error && size > 0) { 601 ch = image_chunk_find(blk); 602 if (ch == NULL) { 603 error = EINVAL; 604 break; 605 } 606 ofs = (blk - ch->ch_block) * secsz; 607 sz = ch->ch_size - ofs; 608 sz = ((lba_t)sz < size) ? sz : (size_t)size; 609 switch (ch->ch_type) { 610 case CH_TYPE_ZEROES: 611 error = image_copyout_zeroes(fd, sz); 612 break; 613 case CH_TYPE_FILE: 614 error = image_copyout_file(fd, sz, ch->ch_u.file.fd, 615 ch->ch_u.file.ofs + ofs); 616 break; 617 case CH_TYPE_MEMORY: 618 error = image_copyout_memory(fd, sz, ch->ch_u.mem.ptr); 619 break; 620 default: 621 assert(0); 622 } 623 size -= sz; 624 blk += sz / secsz; 625 } 626 return (error); 627 } 628 629 int 630 image_data(lba_t blk, lba_t size) 631 { 632 struct chunk *ch; 633 lba_t lim; 634 635 while (1) { 636 ch = image_chunk_find(blk); 637 if (ch == NULL) 638 return (0); 639 if (ch->ch_type != CH_TYPE_ZEROES) 640 return (1); 641 lim = ch->ch_block + (ch->ch_size / secsz); 642 if (lim >= blk + size) 643 return (0); 644 size -= lim - blk; 645 blk = lim; 646 } 647 /*NOTREACHED*/ 648 } 649 650 lba_t 651 image_get_size(void) 652 { 653 654 return (image_size); 655 } 656 657 int 658 image_set_size(lba_t blk) 659 { 660 int error; 661 662 error = image_chunk_skipto(blk); 663 if (!error) 664 image_size = blk; 665 return (error); 666 } 667 668 int 669 image_write(lba_t blk, void *buf, ssize_t len) 670 { 671 struct chunk *ch; 672 673 while (len > 0) { 674 if (!is_empty_sector(buf)) { 675 ch = image_chunk_find(blk); 676 if (ch == NULL) 677 return (ENXIO); 678 /* We may not be able to write to files. */ 679 if (ch->ch_type == CH_TYPE_FILE) 680 return (EINVAL); 681 if (ch->ch_type == CH_TYPE_ZEROES) { 682 ch = image_chunk_memory(ch, blk); 683 if (ch == NULL) 684 return (ENOMEM); 685 } 686 assert(ch->ch_type == CH_TYPE_MEMORY); 687 memcpy(ch->ch_u.mem.ptr, buf, secsz); 688 } 689 blk++; 690 buf = (char *)buf + secsz; 691 len--; 692 } 693 return (0); 694 } 695 696 static void 697 image_cleanup(void) 698 { 699 struct chunk *ch; 700 701 while ((ch = STAILQ_FIRST(&image_chunks)) != NULL) { 702 switch (ch->ch_type) { 703 case CH_TYPE_FILE: 704 /* We may be closing the same file multiple times. */ 705 if (ch->ch_u.file.fd != -1) 706 close(ch->ch_u.file.fd); 707 break; 708 case CH_TYPE_MEMORY: 709 free(ch->ch_u.mem.ptr); 710 break; 711 default: 712 break; 713 } 714 STAILQ_REMOVE_HEAD(&image_chunks, ch_list); 715 free(ch); 716 } 717 if (image_swap_fd != -1) 718 close(image_swap_fd); 719 unlink(image_swap_file); 720 } 721 722 int 723 image_init(void) 724 { 725 const char *tmpdir; 726 727 STAILQ_INIT(&image_chunks); 728 image_nchunks = 0; 729 730 image_swap_size = 0; 731 image_swap_pgsz = getpagesize(); 732 733 if (atexit(image_cleanup) == -1) 734 return (errno); 735 if ((tmpdir = getenv("TMPDIR")) == NULL || *tmpdir == '\0') 736 tmpdir = _PATH_TMP; 737 snprintf(image_swap_file, sizeof(image_swap_file), "%s/mkimg-XXXXXX", 738 tmpdir); 739 image_swap_fd = mkstemp(image_swap_file); 740 if (image_swap_fd == -1) 741 return (errno); 742 return (0); 743 } 744