xref: /freebsd/usr.bin/mkimg/image.c (revision c99b67a7947ea215f9c1d44ec022680e98920cd1)
1 /*-
2  * Copyright (c) 2014 Juniper Networks, Inc.
3  * All rights reserved.
4  *
5  * Redistribution and use in source and binary forms, with or without
6  * modification, are permitted provided that the following conditions
7  * are met:
8  * 1. Redistributions of source code must retain the above copyright
9  *    notice, this list of conditions and the following disclaimer.
10  * 2. Redistributions in binary form must reproduce the above copyright
11  *    notice, this list of conditions and the following disclaimer in the
12  *    documentation and/or other materials provided with the distribution.
13  *
14  * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
15  * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
16  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
17  * ARE DISCLAIMED.  IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE
18  * FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
19  * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
20  * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
21  * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
22  * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
23  * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
24  * SUCH DAMAGE.
25  */
26 
27 #include <sys/cdefs.h>
28 __FBSDID("$FreeBSD$");
29 
30 #include <sys/mman.h>
31 #include <sys/stat.h>
32 #include <assert.h>
33 #include <err.h>
34 #include <errno.h>
35 #include <limits.h>
36 #include <paths.h>
37 #include <stdint.h>
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include <string.h>
41 #include <unistd.h>
42 
43 #include "image.h"
44 #include "mkimg.h"
45 
46 #ifndef MAP_NOCORE
47 #define	MAP_NOCORE	0
48 #endif
49 #ifndef MAP_NOSYNC
50 #define	MAP_NOSYNC	0
51 #endif
52 
53 #ifndef SEEK_DATA
54 #define	SEEK_DATA	-1
55 #endif
56 #ifndef SEEK_HOLE
57 #define	SEEK_HOLE	-1
58 #endif
59 
60 struct chunk {
61 	TAILQ_ENTRY(chunk) ch_list;
62 	size_t	ch_size;		/* Size of chunk in bytes. */
63 	lba_t	ch_block;		/* Block address in image. */
64 	union {
65 		struct {
66 			off_t	ofs;	/* Offset in backing file. */
67 			int	fd;	/* FD of backing file. */
68 		} file;
69 		struct {
70 			void	*ptr;	/* Pointer to data in memory */
71 		} mem;
72 	} ch_u;
73 	u_int	ch_type;
74 #define	CH_TYPE_ZEROES		0	/* Chunk is a gap (no data). */
75 #define	CH_TYPE_FILE		1	/* File-backed chunk. */
76 #define	CH_TYPE_MEMORY		2	/* Memory-backed chunk */
77 };
78 
79 static TAILQ_HEAD(chunk_head, chunk) image_chunks;
80 static u_int image_nchunks;
81 
82 static char image_swap_file[PATH_MAX];
83 static int image_swap_fd = -1;
84 static u_int image_swap_pgsz;
85 static off_t image_swap_size;
86 
87 static lba_t image_size;
88 
89 static int
90 is_empty_sector(void *buf)
91 {
92 	uint64_t *p = buf;
93 	size_t n, max;
94 
95 	assert(((uintptr_t)p & 3) == 0);
96 
97 	max = secsz / sizeof(uint64_t);
98 	for (n = 0; n < max; n++) {
99 		if (p[n] != 0UL)
100 			return (0);
101 	}
102 	return (1);
103 }
104 
105 /*
106  * Swap file handlng.
107  */
108 
109 static off_t
110 image_swap_alloc(size_t size)
111 {
112 	off_t ofs;
113 	size_t unit;
114 
115 	unit = (secsz > image_swap_pgsz) ? secsz : image_swap_pgsz;
116 	assert((unit & (unit - 1)) == 0);
117 
118 	size = (size + unit - 1) & ~(unit - 1);
119 
120 	ofs = image_swap_size;
121 	image_swap_size += size;
122 	if (ftruncate(image_swap_fd, image_swap_size) == -1) {
123 		image_swap_size = ofs;
124 		ofs = -1LL;
125 	}
126 	return (ofs);
127 }
128 
129 /*
130  * Image chunk handling.
131  */
132 
133 static struct chunk *
134 image_chunk_find(lba_t blk)
135 {
136 	static struct chunk *last = NULL;
137 	struct chunk *ch;
138 
139 	ch = (last != NULL && last->ch_block <= blk)
140 	    ? last : TAILQ_FIRST(&image_chunks);
141 	while (ch != NULL) {
142 		if (ch->ch_block <= blk &&
143 		    (lba_t)(ch->ch_block + (ch->ch_size / secsz)) > blk) {
144 			last = ch;
145 			break;
146 		}
147 		ch = TAILQ_NEXT(ch, ch_list);
148 	}
149 	return (ch);
150 }
151 
152 static size_t
153 image_chunk_grow(struct chunk *ch, size_t sz)
154 {
155 	size_t dsz, newsz;
156 
157 	newsz = ch->ch_size + sz;
158 	if (newsz > ch->ch_size) {
159 		ch->ch_size = newsz;
160 		return (0);
161 	}
162 	/* We would overflow -- create new chunk for remainder. */
163 	dsz = SIZE_MAX - ch->ch_size;
164 	assert(dsz < sz);
165 	ch->ch_size = SIZE_MAX;
166 	return (sz - dsz);
167 }
168 
169 static struct chunk *
170 image_chunk_memory(struct chunk *ch, lba_t blk)
171 {
172 	struct chunk *new;
173 	void *ptr;
174 
175 	ptr = calloc(1, secsz);
176 	if (ptr == NULL)
177 		return (NULL);
178 
179 	if (ch->ch_block < blk) {
180 		new = malloc(sizeof(*new));
181 		if (new == NULL) {
182 			free(ptr);
183 			return (NULL);
184 		}
185 		memcpy(new, ch, sizeof(*new));
186 		ch->ch_size = (blk - ch->ch_block) * secsz;
187 		new->ch_block = blk;
188 		new->ch_size -= ch->ch_size;
189 		TAILQ_INSERT_AFTER(&image_chunks, ch, new, ch_list);
190 		image_nchunks++;
191 		ch = new;
192 	}
193 
194 	if (ch->ch_size > secsz) {
195 		new = malloc(sizeof(*new));
196 		if (new == NULL) {
197 			free(ptr);
198 			return (NULL);
199 		}
200 		memcpy(new, ch, sizeof(*new));
201 		ch->ch_size = secsz;
202 		new->ch_block++;
203 		new->ch_size -= secsz;
204 		TAILQ_INSERT_AFTER(&image_chunks, ch, new, ch_list);
205 		image_nchunks++;
206 	}
207 
208 	ch->ch_type = CH_TYPE_MEMORY;
209 	ch->ch_u.mem.ptr = ptr;
210 	return (ch);
211 }
212 
213 static int
214 image_chunk_skipto(lba_t to)
215 {
216 	struct chunk *ch;
217 	lba_t from;
218 	size_t sz;
219 
220 	ch = TAILQ_LAST(&image_chunks, chunk_head);
221 	from = (ch != NULL) ? ch->ch_block + (ch->ch_size / secsz) : 0LL;
222 
223 	assert(from <= to);
224 
225 	/* Nothing to do? */
226 	if (from == to)
227 		return (0);
228 	/* Avoid bugs due to overflows. */
229 	if ((uintmax_t)(to - from) > (uintmax_t)(SIZE_MAX / secsz))
230 		return (EFBIG);
231 	sz = (to - from) * secsz;
232 	if (ch != NULL && ch->ch_type == CH_TYPE_ZEROES) {
233 		sz = image_chunk_grow(ch, sz);
234 		if (sz == 0)
235 			return (0);
236 		from = ch->ch_block + (ch->ch_size / secsz);
237 	}
238 	ch = malloc(sizeof(*ch));
239 	if (ch == NULL)
240 		return (ENOMEM);
241 	memset(ch, 0, sizeof(*ch));
242 	ch->ch_block = from;
243 	ch->ch_size = sz;
244 	ch->ch_type = CH_TYPE_ZEROES;
245 	TAILQ_INSERT_TAIL(&image_chunks, ch, ch_list);
246 	image_nchunks++;
247 	return (0);
248 }
249 
250 static int
251 image_chunk_append(lba_t blk, size_t sz, off_t ofs, int fd)
252 {
253 	struct chunk *ch;
254 
255 	ch = TAILQ_LAST(&image_chunks, chunk_head);
256 	if (ch != NULL && ch->ch_type == CH_TYPE_FILE) {
257 		if (fd == ch->ch_u.file.fd &&
258 		    blk == (lba_t)(ch->ch_block + (ch->ch_size / secsz)) &&
259 		    ofs == (off_t)(ch->ch_u.file.ofs + ch->ch_size)) {
260 			sz = image_chunk_grow(ch, sz);
261 			if (sz == 0)
262 				return (0);
263 			blk = ch->ch_block + (ch->ch_size / secsz);
264 			ofs = ch->ch_u.file.ofs + ch->ch_size;
265 		}
266 	}
267 	ch = malloc(sizeof(*ch));
268 	if (ch == NULL)
269 		return (ENOMEM);
270 	memset(ch, 0, sizeof(*ch));
271 	ch->ch_block = blk;
272 	ch->ch_size = sz;
273 	ch->ch_type = CH_TYPE_FILE;
274 	ch->ch_u.file.ofs = ofs;
275 	ch->ch_u.file.fd = fd;
276 	TAILQ_INSERT_TAIL(&image_chunks, ch, ch_list);
277 	image_nchunks++;
278 	return (0);
279 }
280 
281 static int
282 image_chunk_copyin(lba_t blk, void *buf, size_t sz, off_t ofs, int fd)
283 {
284 	uint8_t *p = buf;
285 	int error;
286 
287 	error = 0;
288 	sz = (sz + secsz - 1) & ~(secsz - 1);
289 	while (!error && sz > 0) {
290 		if (is_empty_sector(p))
291 			error = image_chunk_skipto(blk + 1);
292 		else
293 			error = image_chunk_append(blk, secsz, ofs, fd);
294 		blk++;
295 		p += secsz;
296 		sz -= secsz;
297 		ofs += secsz;
298 	}
299 	return (error);
300 }
301 
302 /*
303  * File mapping support.
304  */
305 
306 static void *
307 image_file_map(int fd, off_t ofs, size_t sz, off_t *iofp)
308 {
309 	void *ptr;
310 	size_t unit;
311 	int flags, prot;
312 	off_t x;
313 
314 	/* On Linux anyway ofs must also be page aligned */
315 	if ((x = (ofs % image_swap_pgsz)) != 0) {
316 	    ofs -= x;
317 	    sz += x;
318 	    *iofp = x;
319 	} else
320 	    *iofp = 0;
321 	unit = (secsz > image_swap_pgsz) ? secsz : image_swap_pgsz;
322 	assert((unit & (unit - 1)) == 0);
323 
324 	flags = MAP_NOCORE | MAP_NOSYNC | MAP_SHARED;
325 	/* Allow writing to our swap file only. */
326 	prot = PROT_READ | ((fd == image_swap_fd) ? PROT_WRITE : 0);
327 	sz = (sz + unit - 1) & ~(unit - 1);
328 	ptr = mmap(NULL, sz, prot, flags, fd, ofs);
329 	return ((ptr == MAP_FAILED) ? NULL : ptr);
330 }
331 
332 static int
333 image_file_unmap(void *buffer, size_t sz)
334 {
335 	size_t unit;
336 
337 	unit = (secsz > image_swap_pgsz) ? secsz : image_swap_pgsz;
338 	sz = (sz + unit - 1) & ~(unit - 1);
339 	if (madvise(buffer, sz, MADV_DONTNEED) != 0)
340 		warn("madvise");
341 	munmap(buffer, sz);
342 	return (0);
343 }
344 
345 /*
346  * Input/source file handling.
347  */
348 
349 static int
350 image_copyin_stream(lba_t blk, int fd, uint64_t *sizep)
351 {
352 	char *buffer;
353 	uint64_t bytesize;
354 	off_t swofs;
355 	size_t iosz;
356 	ssize_t rdsz;
357 	int error;
358 	off_t iof;
359 
360 	/*
361 	 * This makes sure we're doing I/O in multiples of the page
362 	 * size as well as of the sector size. 2MB is the minimum
363 	 * by virtue of secsz at least 512 bytes and the page size
364 	 * at least 4K bytes.
365 	 */
366 	iosz = secsz * image_swap_pgsz;
367 
368 	bytesize = 0;
369 	do {
370 		swofs = image_swap_alloc(iosz);
371 		if (swofs == -1LL)
372 			return (errno);
373 		buffer = image_file_map(image_swap_fd, swofs, iosz, &iof);
374 		if (buffer == NULL)
375 			return (errno);
376 		rdsz = read(fd, &buffer[iof], iosz);
377 		if (rdsz > 0)
378 			error = image_chunk_copyin(blk, &buffer[iof], rdsz, swofs,
379 			    image_swap_fd);
380 		else if (rdsz < 0)
381 			error = errno;
382 		else
383 			error = 0;
384 		image_file_unmap(buffer, iosz);
385 		/* XXX should we relinguish unused swap space? */
386 		if (error)
387 			return (error);
388 
389 		bytesize += rdsz;
390 		blk += (rdsz + secsz - 1) / secsz;
391 	} while (rdsz > 0);
392 
393 	if (sizep != NULL)
394 		*sizep = bytesize;
395 	return (0);
396 }
397 
398 static int
399 image_copyin_mapped(lba_t blk, int fd, uint64_t *sizep)
400 {
401 	off_t cur, data, end, hole, pos, iof;
402 	void *mp;
403 	char *buf;
404 	uint64_t bytesize;
405 	size_t iosz, sz;
406 	int error;
407 
408 	/*
409 	 * We'd like to know the size of the file and we must
410 	 * be able to seek in order to mmap(2). If this isn't
411 	 * possible, then treat the file as a stream/pipe.
412 	 */
413 	end = lseek(fd, 0L, SEEK_END);
414 	if (end == -1L)
415 		return (image_copyin_stream(blk, fd, sizep));
416 
417 	/*
418 	 * We need the file opened for the duration and our
419 	 * caller is going to close the file. Make a dup(2)
420 	 * so that control the faith of the descriptor.
421 	 */
422 	fd = dup(fd);
423 	if (fd == -1)
424 		return (errno);
425 
426 	iosz = secsz * image_swap_pgsz;
427 
428 	bytesize = 0;
429 	cur = pos = 0;
430 	error = 0;
431 	while (!error && cur < end) {
432 		hole = lseek(fd, cur, SEEK_HOLE);
433 		if (hole == -1)
434 			hole = end;
435 		data = lseek(fd, cur, SEEK_DATA);
436 		if (data == -1)
437 			data = end;
438 
439 		/*
440 		 * Treat the entire file as data if sparse files
441 		 * are not supported by the underlying file system.
442 		 */
443 		if (hole == end && data == end)
444 			data = cur;
445 
446 		if (cur == hole && data > hole) {
447 			hole = pos;
448 			pos = data & ~((uint64_t)secsz - 1);
449 
450 			blk += (pos - hole) / secsz;
451 			error = image_chunk_skipto(blk);
452 
453 			bytesize += pos - hole;
454 			cur = data;
455 		} else if (cur == data && hole > data) {
456 			data = pos;
457 			pos = (hole + secsz - 1) & ~((uint64_t)secsz - 1);
458 
459 			while (data < pos) {
460 				sz = (pos - data > (off_t)iosz)
461 				    ? iosz : (size_t)(pos - data);
462 
463 				buf = mp = image_file_map(fd, data, sz, &iof);
464 				if (mp != NULL) {
465 					buf += iof;
466 					error = image_chunk_copyin(blk, buf,
467 					    sz, data, fd);
468 					image_file_unmap(mp, sz);
469 				} else
470 					error = errno;
471 
472 				blk += sz / secsz;
473 				bytesize += sz;
474 				data += sz;
475 			}
476 			cur = hole;
477 		} else {
478 			/*
479 			 * I don't know what this means or whether it
480 			 * can happen at all...
481 			 */
482 			assert(0);
483 		}
484 	}
485 	if (error)
486 		close(fd);
487 	if (!error && sizep != NULL)
488 		*sizep = bytesize;
489 	return (error);
490 }
491 
492 int
493 image_copyin(lba_t blk, int fd, uint64_t *sizep)
494 {
495 	struct stat sb;
496 	int error;
497 
498 	error = image_chunk_skipto(blk);
499 	if (!error) {
500 		if (fstat(fd, &sb) == -1 || !S_ISREG(sb.st_mode))
501 			error = image_copyin_stream(blk, fd, sizep);
502 		else
503 			error = image_copyin_mapped(blk, fd, sizep);
504 	}
505 	return (error);
506 }
507 
508 /*
509  * Output/sink file handling.
510  */
511 
512 int
513 image_copyout(int fd)
514 {
515 	int error;
516 
517 	error = image_copyout_region(fd, 0, image_size);
518 	if (!error)
519 		error = image_copyout_done(fd);
520 	return (error);
521 }
522 
523 int
524 image_copyout_done(int fd)
525 {
526 	off_t ofs;
527 	int error;
528 
529 	ofs = lseek(fd, 0L, SEEK_CUR);
530 	if (ofs == -1)
531 		return (0);
532 	error = (ftruncate(fd, ofs) == -1) ? errno : 0;
533 	return (error);
534 }
535 
536 static int
537 image_copyout_memory(int fd, size_t size, void *ptr)
538 {
539 
540 	if (write(fd, ptr, size) == -1)
541 		return (errno);
542 	return (0);
543 }
544 
545 int
546 image_copyout_zeroes(int fd, size_t count)
547 {
548 	static uint8_t *zeroes = NULL;
549 	size_t sz;
550 	int error;
551 
552 	if (lseek(fd, (off_t)count, SEEK_CUR) != -1)
553 		return (0);
554 
555 	/*
556 	 * If we can't seek, we must write.
557 	 */
558 
559 	if (zeroes == NULL) {
560 		zeroes = calloc(1, secsz);
561 		if (zeroes == NULL)
562 			return (ENOMEM);
563 	}
564 
565 	while (count > 0) {
566 		sz = (count > secsz) ? secsz : count;
567 		error = image_copyout_memory(fd, sz, zeroes);
568 		if (error)
569 			return (error);
570 		count -= sz;
571 	}
572 	return (0);
573 }
574 
575 static int
576 image_copyout_file(int fd, size_t size, int ifd, off_t iofs)
577 {
578 	void *mp;
579 	char *buf;
580 	size_t iosz, sz;
581 	int error;
582 	off_t iof;
583 
584 	iosz = secsz * image_swap_pgsz;
585 
586 	while (size > 0) {
587 		sz = (size > iosz) ? iosz : size;
588 		buf = mp = image_file_map(ifd, iofs, sz, &iof);
589 		if (buf == NULL)
590 			return (errno);
591 		buf += iof;
592 		error = image_copyout_memory(fd, sz, buf);
593 		image_file_unmap(mp, sz);
594 		if (error)
595 			return (error);
596 		size -= sz;
597 		iofs += sz;
598 	}
599 	return (0);
600 }
601 
602 int
603 image_copyout_region(int fd, lba_t blk, lba_t size)
604 {
605 	struct chunk *ch;
606 	size_t ofs, sz;
607 	int error;
608 
609 	size *= secsz;
610 
611 	error = 0;
612 	while (!error && size > 0) {
613 		ch = image_chunk_find(blk);
614 		if (ch == NULL) {
615 			error = EINVAL;
616 			break;
617 		}
618 		ofs = (blk - ch->ch_block) * secsz;
619 		sz = ch->ch_size - ofs;
620 		sz = ((lba_t)sz < size) ? sz : (size_t)size;
621 		switch (ch->ch_type) {
622 		case CH_TYPE_ZEROES:
623 			error = image_copyout_zeroes(fd, sz);
624 			break;
625 		case CH_TYPE_FILE:
626 			error = image_copyout_file(fd, sz, ch->ch_u.file.fd,
627 			    ch->ch_u.file.ofs + ofs);
628 			break;
629 		case CH_TYPE_MEMORY:
630 			error = image_copyout_memory(fd, sz, ch->ch_u.mem.ptr);
631 			break;
632 		default:
633 			assert(0);
634 		}
635 		size -= sz;
636 		blk += sz / secsz;
637 	}
638 	return (error);
639 }
640 
641 int
642 image_data(lba_t blk, lba_t size)
643 {
644 	struct chunk *ch;
645 	lba_t lim;
646 
647 	while (1) {
648 		ch = image_chunk_find(blk);
649 		if (ch == NULL)
650 			return (0);
651 		if (ch->ch_type != CH_TYPE_ZEROES)
652 			return (1);
653 		lim = ch->ch_block + (ch->ch_size / secsz);
654 		if (lim >= blk + size)
655 			return (0);
656 		size -= lim - blk;
657 		blk = lim;
658 	}
659 	/*NOTREACHED*/
660 }
661 
662 lba_t
663 image_get_size(void)
664 {
665 
666 	return (image_size);
667 }
668 
669 int
670 image_set_size(lba_t blk)
671 {
672 	int error;
673 
674 	error = image_chunk_skipto(blk);
675 	if (!error)
676 		image_size = blk;
677 	return (error);
678 }
679 
680 int
681 image_write(lba_t blk, void *buf, ssize_t len)
682 {
683 	struct chunk *ch;
684 
685 	while (len > 0) {
686 		if (!is_empty_sector(buf)) {
687 			ch = image_chunk_find(blk);
688 			if (ch == NULL)
689 				return (ENXIO);
690 			/* We may not be able to write to files. */
691 			if (ch->ch_type == CH_TYPE_FILE)
692 				return (EINVAL);
693 			if (ch->ch_type == CH_TYPE_ZEROES) {
694 				ch = image_chunk_memory(ch, blk);
695 				if (ch == NULL)
696 					return (ENOMEM);
697 			}
698 			assert(ch->ch_type == CH_TYPE_MEMORY);
699 			memcpy(ch->ch_u.mem.ptr, buf, secsz);
700 		}
701 		blk++;
702 		buf = (char *)buf + secsz;
703 		len--;
704 	}
705 	return (0);
706 }
707 
708 static void
709 image_cleanup(void)
710 {
711 	struct chunk *ch;
712 
713 	while ((ch = TAILQ_FIRST(&image_chunks)) != NULL) {
714 		switch (ch->ch_type) {
715 		case CH_TYPE_FILE:
716 			/* We may be closing the same file multiple times. */
717 			if (ch->ch_u.file.fd != -1)
718 				close(ch->ch_u.file.fd);
719 			break;
720 		case CH_TYPE_MEMORY:
721 			free(ch->ch_u.mem.ptr);
722 			break;
723 		default:
724 			break;
725 		}
726 		TAILQ_REMOVE(&image_chunks, ch, ch_list);
727 		free(ch);
728 	}
729 	if (image_swap_fd != -1)
730 		close(image_swap_fd);
731 	unlink(image_swap_file);
732 }
733 
734 int
735 image_init(void)
736 {
737 	const char *tmpdir;
738 
739 	TAILQ_INIT(&image_chunks);
740 	image_nchunks = 0;
741 
742 	image_swap_size = 0;
743 	image_swap_pgsz = getpagesize();
744 
745 	if (atexit(image_cleanup) == -1)
746 		return (errno);
747 	if ((tmpdir = getenv("TMPDIR")) == NULL || *tmpdir == '\0')
748 		tmpdir = _PATH_TMP;
749 	snprintf(image_swap_file, sizeof(image_swap_file), "%s/mkimg-XXXXXX",
750 	    tmpdir);
751 	image_swap_fd = mkstemp(image_swap_file);
752 	if (image_swap_fd == -1)
753 		return (errno);
754 	return (0);
755 }
756