1 /*-
2 * Copyright (c) 2014 Juniper Networks, Inc.
3 * All rights reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice, this list of conditions and the following disclaimer.
10 * 2. Redistributions in binary form must reproduce the above copyright
11 * notice, this list of conditions and the following disclaimer in the
12 * documentation and/or other materials provided with the distribution.
13 *
14 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18 * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24 * SUCH DAMAGE.
25 */
26
27 #include <sys/cdefs.h>
28 #include <sys/mman.h>
29 #include <sys/stat.h>
30 #include <assert.h>
31 #include <err.h>
32 #include <errno.h>
33 #include <limits.h>
34 #include <paths.h>
35 #include <stdint.h>
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39 #include <unistd.h>
40
41 #include "image.h"
42 #include "mkimg.h"
43
44 #ifndef MAP_NOCORE
45 #define MAP_NOCORE 0
46 #endif
47 #ifndef MAP_NOSYNC
48 #define MAP_NOSYNC 0
49 #endif
50
51 #ifndef SEEK_DATA
52 #define SEEK_DATA -1
53 #endif
54 #ifndef SEEK_HOLE
55 #define SEEK_HOLE -1
56 #endif
57
58 struct chunk {
59 TAILQ_ENTRY(chunk) ch_list;
60 size_t ch_size; /* Size of chunk in bytes. */
61 lba_t ch_block; /* Block address in image. */
62 union {
63 struct {
64 off_t ofs; /* Offset in backing file. */
65 int fd; /* FD of backing file. */
66 } file;
67 struct {
68 void *ptr; /* Pointer to data in memory */
69 } mem;
70 } ch_u;
71 u_int ch_type;
72 #define CH_TYPE_ZEROES 0 /* Chunk is a gap (no data). */
73 #define CH_TYPE_FILE 1 /* File-backed chunk. */
74 #define CH_TYPE_MEMORY 2 /* Memory-backed chunk */
75 };
76
77 static TAILQ_HEAD(chunk_head, chunk) image_chunks;
78 static u_int image_nchunks;
79
80 static char image_swap_file[PATH_MAX];
81 static int image_swap_fd = -1;
82 static u_int image_swap_pgsz;
83 static off_t image_swap_size;
84
85 static lba_t image_size;
86
87 static int
is_empty_sector(void * buf)88 is_empty_sector(void *buf)
89 {
90 uint64_t *p = buf;
91 size_t n, max;
92
93 assert(((uintptr_t)p & 3) == 0);
94
95 max = secsz / sizeof(uint64_t);
96 for (n = 0; n < max; n++) {
97 if (p[n] != 0UL)
98 return (0);
99 }
100 return (1);
101 }
102
103 /*
104 * Swap file handlng.
105 */
106
107 static off_t
image_swap_alloc(size_t size)108 image_swap_alloc(size_t size)
109 {
110 off_t ofs;
111 size_t unit;
112
113 unit = (secsz > image_swap_pgsz) ? secsz : image_swap_pgsz;
114 assert((unit & (unit - 1)) == 0);
115
116 size = (size + unit - 1) & ~(unit - 1);
117
118 ofs = image_swap_size;
119 image_swap_size += size;
120 if (ftruncate(image_swap_fd, image_swap_size) == -1) {
121 image_swap_size = ofs;
122 ofs = -1LL;
123 }
124 return (ofs);
125 }
126
127 /*
128 * Image chunk handling.
129 */
130
131 static struct chunk *
image_chunk_find(lba_t blk)132 image_chunk_find(lba_t blk)
133 {
134 static struct chunk *last = NULL;
135 struct chunk *ch;
136
137 ch = (last != NULL && last->ch_block <= blk)
138 ? last : TAILQ_FIRST(&image_chunks);
139 while (ch != NULL) {
140 if (ch->ch_block <= blk &&
141 (lba_t)(ch->ch_block + (ch->ch_size / secsz)) > blk) {
142 last = ch;
143 break;
144 }
145 ch = TAILQ_NEXT(ch, ch_list);
146 }
147 return (ch);
148 }
149
150 static size_t
image_chunk_grow(struct chunk * ch,size_t sz)151 image_chunk_grow(struct chunk *ch, size_t sz)
152 {
153 size_t dsz, newsz;
154
155 newsz = ch->ch_size + sz;
156 if (newsz > ch->ch_size) {
157 ch->ch_size = newsz;
158 return (0);
159 }
160 /* We would overflow -- create new chunk for remainder. */
161 dsz = SIZE_MAX - ch->ch_size;
162 assert(dsz < sz);
163 ch->ch_size = SIZE_MAX;
164 return (sz - dsz);
165 }
166
167 static struct chunk *
image_chunk_memory(struct chunk * ch,lba_t blk)168 image_chunk_memory(struct chunk *ch, lba_t blk)
169 {
170 struct chunk *new;
171 void *ptr;
172
173 ptr = calloc(1, secsz);
174 if (ptr == NULL)
175 return (NULL);
176
177 if (ch->ch_block < blk) {
178 new = malloc(sizeof(*new));
179 if (new == NULL) {
180 free(ptr);
181 return (NULL);
182 }
183 memcpy(new, ch, sizeof(*new));
184 ch->ch_size = (blk - ch->ch_block) * secsz;
185 new->ch_block = blk;
186 new->ch_size -= ch->ch_size;
187 TAILQ_INSERT_AFTER(&image_chunks, ch, new, ch_list);
188 image_nchunks++;
189 ch = new;
190 }
191
192 if (ch->ch_size > secsz) {
193 new = malloc(sizeof(*new));
194 if (new == NULL) {
195 free(ptr);
196 return (NULL);
197 }
198 memcpy(new, ch, sizeof(*new));
199 ch->ch_size = secsz;
200 new->ch_block++;
201 new->ch_size -= secsz;
202 TAILQ_INSERT_AFTER(&image_chunks, ch, new, ch_list);
203 image_nchunks++;
204 }
205
206 ch->ch_type = CH_TYPE_MEMORY;
207 ch->ch_u.mem.ptr = ptr;
208 return (ch);
209 }
210
211 static int
image_chunk_skipto(lba_t to)212 image_chunk_skipto(lba_t to)
213 {
214 struct chunk *ch;
215 lba_t from;
216 size_t sz;
217
218 ch = TAILQ_LAST(&image_chunks, chunk_head);
219 from = (ch != NULL) ? ch->ch_block + (ch->ch_size / secsz) : 0LL;
220
221 assert(from <= to);
222
223 /* Nothing to do? */
224 if (from == to)
225 return (0);
226 /* Avoid bugs due to overflows. */
227 if ((uintmax_t)(to - from) > (uintmax_t)(SIZE_MAX / secsz))
228 return (EFBIG);
229 sz = (to - from) * secsz;
230 if (ch != NULL && ch->ch_type == CH_TYPE_ZEROES) {
231 sz = image_chunk_grow(ch, sz);
232 if (sz == 0)
233 return (0);
234 from = ch->ch_block + (ch->ch_size / secsz);
235 }
236 ch = malloc(sizeof(*ch));
237 if (ch == NULL)
238 return (ENOMEM);
239 memset(ch, 0, sizeof(*ch));
240 ch->ch_block = from;
241 ch->ch_size = sz;
242 ch->ch_type = CH_TYPE_ZEROES;
243 TAILQ_INSERT_TAIL(&image_chunks, ch, ch_list);
244 image_nchunks++;
245 return (0);
246 }
247
248 static int
image_chunk_append(lba_t blk,size_t sz,off_t ofs,int fd)249 image_chunk_append(lba_t blk, size_t sz, off_t ofs, int fd)
250 {
251 struct chunk *ch;
252
253 ch = TAILQ_LAST(&image_chunks, chunk_head);
254 if (ch != NULL && ch->ch_type == CH_TYPE_FILE) {
255 if (fd == ch->ch_u.file.fd &&
256 blk == (lba_t)(ch->ch_block + (ch->ch_size / secsz)) &&
257 ofs == (off_t)(ch->ch_u.file.ofs + ch->ch_size)) {
258 sz = image_chunk_grow(ch, sz);
259 if (sz == 0)
260 return (0);
261 blk = ch->ch_block + (ch->ch_size / secsz);
262 ofs = ch->ch_u.file.ofs + ch->ch_size;
263 }
264 }
265 ch = malloc(sizeof(*ch));
266 if (ch == NULL)
267 return (ENOMEM);
268 memset(ch, 0, sizeof(*ch));
269 ch->ch_block = blk;
270 ch->ch_size = sz;
271 ch->ch_type = CH_TYPE_FILE;
272 ch->ch_u.file.ofs = ofs;
273 ch->ch_u.file.fd = fd;
274 TAILQ_INSERT_TAIL(&image_chunks, ch, ch_list);
275 image_nchunks++;
276 return (0);
277 }
278
279 static int
image_chunk_copyin(lba_t blk,void * buf,size_t sz,off_t ofs,int fd)280 image_chunk_copyin(lba_t blk, void *buf, size_t sz, off_t ofs, int fd)
281 {
282 uint8_t *p = buf;
283 int error;
284
285 error = 0;
286 sz = (sz + secsz - 1) & ~(secsz - 1);
287 while (!error && sz > 0) {
288 if (is_empty_sector(p))
289 error = image_chunk_skipto(blk + 1);
290 else
291 error = image_chunk_append(blk, secsz, ofs, fd);
292 blk++;
293 p += secsz;
294 sz -= secsz;
295 ofs += secsz;
296 }
297 return (error);
298 }
299
300 /*
301 * File mapping support.
302 */
303
304 static void *
image_file_map(int fd,off_t ofs,size_t sz,off_t * iofp)305 image_file_map(int fd, off_t ofs, size_t sz, off_t *iofp)
306 {
307 void *ptr;
308 size_t unit;
309 int flags, prot;
310 off_t x;
311
312 /* On Linux anyway ofs must also be page aligned */
313 if ((x = (ofs % image_swap_pgsz)) != 0) {
314 ofs -= x;
315 sz += x;
316 *iofp = x;
317 } else
318 *iofp = 0;
319 unit = (secsz > image_swap_pgsz) ? secsz : image_swap_pgsz;
320 assert((unit & (unit - 1)) == 0);
321
322 flags = MAP_NOCORE | MAP_NOSYNC | MAP_SHARED;
323 /* Allow writing to our swap file only. */
324 prot = PROT_READ | ((fd == image_swap_fd) ? PROT_WRITE : 0);
325 sz = (sz + unit - 1) & ~(unit - 1);
326 ptr = mmap(NULL, sz, prot, flags, fd, ofs);
327 return ((ptr == MAP_FAILED) ? NULL : ptr);
328 }
329
330 static int
image_file_unmap(void * buffer,size_t sz)331 image_file_unmap(void *buffer, size_t sz)
332 {
333 size_t unit;
334
335 unit = (secsz > image_swap_pgsz) ? secsz : image_swap_pgsz;
336 sz = (sz + unit - 1) & ~(unit - 1);
337 if (madvise(buffer, sz, MADV_DONTNEED) != 0)
338 warn("madvise");
339 munmap(buffer, sz);
340 return (0);
341 }
342
343 /*
344 * Input/source file handling.
345 */
346
347 static int
image_copyin_stream(lba_t blk,int fd,uint64_t * sizep)348 image_copyin_stream(lba_t blk, int fd, uint64_t *sizep)
349 {
350 char *buffer;
351 uint64_t bytesize;
352 off_t swofs;
353 size_t iosz;
354 ssize_t rdsz;
355 int error;
356 off_t iof;
357
358 /*
359 * This makes sure we're doing I/O in multiples of the page
360 * size as well as of the sector size. 2MB is the minimum
361 * by virtue of secsz at least 512 bytes and the page size
362 * at least 4K bytes.
363 */
364 iosz = secsz * image_swap_pgsz;
365
366 bytesize = 0;
367 do {
368 swofs = image_swap_alloc(iosz);
369 if (swofs == -1LL)
370 return (errno);
371 buffer = image_file_map(image_swap_fd, swofs, iosz, &iof);
372 if (buffer == NULL)
373 return (errno);
374 rdsz = read(fd, &buffer[iof], iosz);
375 if (rdsz > 0)
376 error = image_chunk_copyin(blk, &buffer[iof], rdsz, swofs,
377 image_swap_fd);
378 else if (rdsz < 0)
379 error = errno;
380 else
381 error = 0;
382 image_file_unmap(buffer, iosz);
383 /* XXX should we relinguish unused swap space? */
384 if (error)
385 return (error);
386
387 bytesize += rdsz;
388 blk += (rdsz + secsz - 1) / secsz;
389 } while (rdsz > 0);
390
391 if (sizep != NULL)
392 *sizep = bytesize;
393 return (0);
394 }
395
396 static int
image_copyin_mapped(lba_t blk,int fd,uint64_t * sizep)397 image_copyin_mapped(lba_t blk, int fd, uint64_t *sizep)
398 {
399 off_t cur, data, end, hole, pos, iof;
400 void *mp;
401 char *buf;
402 uint64_t bytesize;
403 size_t iosz, sz;
404 int error;
405
406 /*
407 * We'd like to know the size of the file and we must
408 * be able to seek in order to mmap(2). If this isn't
409 * possible, then treat the file as a stream/pipe.
410 */
411 end = lseek(fd, 0L, SEEK_END);
412 if (end == -1L)
413 return (image_copyin_stream(blk, fd, sizep));
414
415 /*
416 * We need the file opened for the duration and our
417 * caller is going to close the file. Make a dup(2)
418 * so that control the faith of the descriptor.
419 */
420 fd = dup(fd);
421 if (fd == -1)
422 return (errno);
423
424 iosz = secsz * image_swap_pgsz;
425
426 bytesize = 0;
427 cur = pos = 0;
428 error = 0;
429 while (!error && cur < end) {
430 hole = lseek(fd, cur, SEEK_HOLE);
431 if (hole == -1)
432 hole = end;
433 data = lseek(fd, cur, SEEK_DATA);
434 if (data == -1)
435 data = end;
436
437 /*
438 * Treat the entire file as data if sparse files
439 * are not supported by the underlying file system.
440 */
441 if (hole == end && data == end)
442 data = cur;
443
444 if (cur == hole && data > hole) {
445 hole = pos;
446 pos = data & ~((uint64_t)secsz - 1);
447
448 blk += (pos - hole) / secsz;
449 error = image_chunk_skipto(blk);
450
451 bytesize += pos - hole;
452 cur = data;
453 } else if (cur == data && hole > data) {
454 data = pos;
455 pos = (hole + secsz - 1) & ~((uint64_t)secsz - 1);
456
457 while (data < pos) {
458 sz = (pos - data > (off_t)iosz)
459 ? iosz : (size_t)(pos - data);
460
461 buf = mp = image_file_map(fd, data, sz, &iof);
462 if (mp != NULL) {
463 buf += iof;
464 error = image_chunk_copyin(blk, buf,
465 sz, data, fd);
466 image_file_unmap(mp, sz);
467 } else
468 error = errno;
469
470 blk += sz / secsz;
471 bytesize += sz;
472 data += sz;
473 }
474 cur = hole;
475 } else {
476 /*
477 * I don't know what this means or whether it
478 * can happen at all...
479 */
480 assert(0);
481 }
482 }
483 if (error)
484 close(fd);
485 if (!error && sizep != NULL)
486 *sizep = bytesize;
487 return (error);
488 }
489
490 int
image_copyin(lba_t blk,int fd,uint64_t * sizep)491 image_copyin(lba_t blk, int fd, uint64_t *sizep)
492 {
493 struct stat sb;
494 int error;
495
496 error = image_chunk_skipto(blk);
497 if (!error) {
498 if (fstat(fd, &sb) == -1 || !S_ISREG(sb.st_mode))
499 error = image_copyin_stream(blk, fd, sizep);
500 else
501 error = image_copyin_mapped(blk, fd, sizep);
502 }
503 return (error);
504 }
505
506 /*
507 * Output/sink file handling.
508 */
509
510 int
image_copyout(int fd)511 image_copyout(int fd)
512 {
513 int error;
514
515 error = image_copyout_region(fd, 0, image_size);
516 if (!error)
517 error = image_copyout_done(fd);
518 return (error);
519 }
520
521 int
image_copyout_done(int fd)522 image_copyout_done(int fd)
523 {
524 off_t ofs;
525 int error;
526
527 ofs = lseek(fd, 0L, SEEK_CUR);
528 if (ofs == -1)
529 return (0);
530 error = (ftruncate(fd, ofs) == -1) ? errno : 0;
531 return (error);
532 }
533
534 static int
image_copyout_memory(int fd,size_t size,void * ptr)535 image_copyout_memory(int fd, size_t size, void *ptr)
536 {
537
538 if (write(fd, ptr, size) == -1)
539 return (errno);
540 return (0);
541 }
542
543 int
image_copyout_zeroes(int fd,size_t count)544 image_copyout_zeroes(int fd, size_t count)
545 {
546 static uint8_t *zeroes = NULL;
547 size_t sz;
548 int error;
549
550 if (lseek(fd, (off_t)count, SEEK_CUR) != -1)
551 return (0);
552
553 /*
554 * If we can't seek, we must write.
555 */
556
557 if (zeroes == NULL) {
558 zeroes = calloc(1, secsz);
559 if (zeroes == NULL)
560 return (ENOMEM);
561 }
562
563 while (count > 0) {
564 sz = (count > secsz) ? secsz : count;
565 error = image_copyout_memory(fd, sz, zeroes);
566 if (error)
567 return (error);
568 count -= sz;
569 }
570 return (0);
571 }
572
573 static int
image_copyout_file(int fd,size_t size,int ifd,off_t iofs)574 image_copyout_file(int fd, size_t size, int ifd, off_t iofs)
575 {
576 void *mp;
577 char *buf;
578 size_t iosz, sz;
579 int error;
580 off_t iof;
581
582 iosz = secsz * image_swap_pgsz;
583
584 while (size > 0) {
585 sz = (size > iosz) ? iosz : size;
586 buf = mp = image_file_map(ifd, iofs, sz, &iof);
587 if (buf == NULL)
588 return (errno);
589 buf += iof;
590 error = image_copyout_memory(fd, sz, buf);
591 image_file_unmap(mp, sz);
592 if (error)
593 return (error);
594 size -= sz;
595 iofs += sz;
596 }
597 return (0);
598 }
599
600 int
image_copyout_region(int fd,lba_t blk,lba_t size)601 image_copyout_region(int fd, lba_t blk, lba_t size)
602 {
603 struct chunk *ch;
604 size_t ofs, sz;
605 int error;
606
607 size *= secsz;
608
609 error = 0;
610 while (!error && size > 0) {
611 ch = image_chunk_find(blk);
612 if (ch == NULL) {
613 error = EINVAL;
614 break;
615 }
616 ofs = (blk - ch->ch_block) * secsz;
617 sz = ch->ch_size - ofs;
618 sz = ((lba_t)sz < size) ? sz : (size_t)size;
619 switch (ch->ch_type) {
620 case CH_TYPE_ZEROES:
621 error = image_copyout_zeroes(fd, sz);
622 break;
623 case CH_TYPE_FILE:
624 error = image_copyout_file(fd, sz, ch->ch_u.file.fd,
625 ch->ch_u.file.ofs + ofs);
626 break;
627 case CH_TYPE_MEMORY:
628 error = image_copyout_memory(fd, sz, ch->ch_u.mem.ptr);
629 break;
630 default:
631 assert(0);
632 }
633 size -= sz;
634 blk += sz / secsz;
635 }
636 return (error);
637 }
638
639 int
image_data(lba_t blk,lba_t size)640 image_data(lba_t blk, lba_t size)
641 {
642 struct chunk *ch;
643 lba_t lim;
644
645 while (1) {
646 ch = image_chunk_find(blk);
647 if (ch == NULL)
648 return (0);
649 if (ch->ch_type != CH_TYPE_ZEROES)
650 return (1);
651 lim = ch->ch_block + (ch->ch_size / secsz);
652 if (lim >= blk + size)
653 return (0);
654 size -= lim - blk;
655 blk = lim;
656 }
657 /*NOTREACHED*/
658 }
659
660 lba_t
image_get_size(void)661 image_get_size(void)
662 {
663
664 return (image_size);
665 }
666
667 int
image_set_size(lba_t blk)668 image_set_size(lba_t blk)
669 {
670 int error;
671
672 error = image_chunk_skipto(blk);
673 if (!error)
674 image_size = blk;
675 return (error);
676 }
677
678 int
image_write(lba_t blk,void * buf,ssize_t len)679 image_write(lba_t blk, void *buf, ssize_t len)
680 {
681 struct chunk *ch;
682
683 while (len > 0) {
684 if (!is_empty_sector(buf)) {
685 ch = image_chunk_find(blk);
686 if (ch == NULL)
687 return (ENXIO);
688 /* We may not be able to write to files. */
689 if (ch->ch_type == CH_TYPE_FILE)
690 return (EINVAL);
691 if (ch->ch_type == CH_TYPE_ZEROES) {
692 ch = image_chunk_memory(ch, blk);
693 if (ch == NULL)
694 return (ENOMEM);
695 }
696 assert(ch->ch_type == CH_TYPE_MEMORY);
697 memcpy(ch->ch_u.mem.ptr, buf, secsz);
698 }
699 blk++;
700 buf = (char *)buf + secsz;
701 len--;
702 }
703 return (0);
704 }
705
706 static void
image_cleanup(void)707 image_cleanup(void)
708 {
709 struct chunk *ch;
710
711 while ((ch = TAILQ_FIRST(&image_chunks)) != NULL) {
712 switch (ch->ch_type) {
713 case CH_TYPE_FILE:
714 /* We may be closing the same file multiple times. */
715 if (ch->ch_u.file.fd != -1)
716 close(ch->ch_u.file.fd);
717 break;
718 case CH_TYPE_MEMORY:
719 free(ch->ch_u.mem.ptr);
720 break;
721 default:
722 break;
723 }
724 TAILQ_REMOVE(&image_chunks, ch, ch_list);
725 free(ch);
726 }
727 if (image_swap_fd != -1)
728 close(image_swap_fd);
729 unlink(image_swap_file);
730 }
731
732 int
image_init(void)733 image_init(void)
734 {
735 const char *tmpdir;
736
737 TAILQ_INIT(&image_chunks);
738 image_nchunks = 0;
739
740 image_swap_size = 0;
741 image_swap_pgsz = getpagesize();
742
743 if (atexit(image_cleanup) == -1)
744 return (errno);
745 if ((tmpdir = getenv("TMPDIR")) == NULL || *tmpdir == '\0')
746 tmpdir = _PATH_TMP;
747 snprintf(image_swap_file, sizeof(image_swap_file), "%s/mkimg-XXXXXX",
748 tmpdir);
749 image_swap_fd = mkstemp(image_swap_file);
750 if (image_swap_fd == -1)
751 return (errno);
752 return (0);
753 }
754