xref: /linux/tools/lib/bpf/zip.c (revision ece68749f86230ebf691bd7ee27eb3118140b1f1)
1 // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause)
2 /*
3  * Routines for dealing with .zip archives.
4  *
5  * Copyright (c) Meta Platforms, Inc. and affiliates.
6  */
7 
8 #include <errno.h>
9 #include <fcntl.h>
10 #include <stdint.h>
11 #include <stdlib.h>
12 #include <string.h>
13 #include <sys/mman.h>
14 #include <unistd.h>
15 
16 #include "libbpf_internal.h"
17 #include "zip.h"
18 
19 #pragma GCC diagnostic push
20 #pragma GCC diagnostic ignored "-Wpacked"
21 #pragma GCC diagnostic ignored "-Wattributes"
22 
23 /* Specification of ZIP file format can be found here:
24  * https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT
25  * For a high level overview of the structure of a ZIP file see
26  * sections 4.3.1 - 4.3.6.
27  *
28  * Data structures appearing in ZIP files do not contain any
29  * padding and they might be misaligned. To allow us to safely
30  * operate on pointers to such structures and their members, we
31  * declare the types as packed.
32  */
33 
34 #define END_OF_CD_RECORD_MAGIC 0x06054b50
35 
36 /* See section 4.3.16 of the spec. */
37 struct end_of_cd_record {
38 	/* Magic value equal to END_OF_CD_RECORD_MAGIC */
39 	__u32 magic;
40 
41 	/* Number of the file containing this structure or 0xFFFF if ZIP64 archive.
42 	 * Zip archive might span multiple files (disks).
43 	 */
44 	__u16 this_disk;
45 
46 	/* Number of the file containing the beginning of the central directory or
47 	 * 0xFFFF if ZIP64 archive.
48 	 */
49 	__u16 cd_disk;
50 
51 	/* Number of central directory records on this disk or 0xFFFF if ZIP64
52 	 * archive.
53 	 */
54 	__u16 cd_records;
55 
56 	/* Number of central directory records on all disks or 0xFFFF if ZIP64
57 	 * archive.
58 	 */
59 	__u16 cd_records_total;
60 
61 	/* Size of the central directory record or 0xFFFFFFFF if ZIP64 archive. */
62 	__u32 cd_size;
63 
64 	/* Offset of the central directory from the beginning of the archive or
65 	 * 0xFFFFFFFF if ZIP64 archive.
66 	 */
67 	__u32 cd_offset;
68 
69 	/* Length of comment data following end of central directory record. */
70 	__u16 comment_length;
71 
72 	/* Up to 64k of arbitrary bytes. */
73 	/* uint8_t comment[comment_length] */
74 } __attribute__((packed));
75 
76 #define CD_FILE_HEADER_MAGIC 0x02014b50
77 #define FLAG_ENCRYPTED (1 << 0)
78 #define FLAG_HAS_DATA_DESCRIPTOR (1 << 3)
79 
80 /* See section 4.3.12 of the spec. */
81 struct cd_file_header {
82 	/* Magic value equal to CD_FILE_HEADER_MAGIC. */
83 	__u32 magic;
84 	__u16 version;
85 	/* Minimum zip version needed to extract the file. */
86 	__u16 min_version;
87 	__u16 flags;
88 	__u16 compression;
89 	__u16 last_modified_time;
90 	__u16 last_modified_date;
91 	__u32 crc;
92 	__u32 compressed_size;
93 	__u32 uncompressed_size;
94 	__u16 file_name_length;
95 	__u16 extra_field_length;
96 	__u16 file_comment_length;
97 	/* Number of the disk where the file starts or 0xFFFF if ZIP64 archive. */
98 	__u16 disk;
99 	__u16 internal_attributes;
100 	__u32 external_attributes;
101 	/* Offset from the start of the disk containing the local file header to the
102 	 * start of the local file header.
103 	 */
104 	__u32 offset;
105 } __attribute__((packed));
106 
107 #define LOCAL_FILE_HEADER_MAGIC 0x04034b50
108 
109 /* See section 4.3.7 of the spec. */
110 struct local_file_header {
111 	/* Magic value equal to LOCAL_FILE_HEADER_MAGIC. */
112 	__u32 magic;
113 	/* Minimum zip version needed to extract the file. */
114 	__u16 min_version;
115 	__u16 flags;
116 	__u16 compression;
117 	__u16 last_modified_time;
118 	__u16 last_modified_date;
119 	__u32 crc;
120 	__u32 compressed_size;
121 	__u32 uncompressed_size;
122 	__u16 file_name_length;
123 	__u16 extra_field_length;
124 } __attribute__((packed));
125 
126 #pragma GCC diagnostic pop
127 
128 struct zip_archive {
129 	void *data;
130 	__u32 size;
131 	__u32 cd_offset;
132 	__u32 cd_records;
133 };
134 
135 static void *check_access(struct zip_archive *archive, __u32 offset, __u32 size)
136 {
137 	if (offset + size > archive->size || offset > offset + size)
138 		return NULL;
139 
140 	return archive->data + offset;
141 }
142 
143 /* Returns 0 on success, -EINVAL on error and -ENOTSUP if the eocd indicates the
144  * archive uses features which are not supported.
145  */
146 static int try_parse_end_of_cd(struct zip_archive *archive, __u32 offset)
147 {
148 	__u16 comment_length, cd_records;
149 	struct end_of_cd_record *eocd;
150 	__u32 cd_offset, cd_size;
151 
152 	eocd = check_access(archive, offset, sizeof(*eocd));
153 	if (!eocd || eocd->magic != END_OF_CD_RECORD_MAGIC)
154 		return -EINVAL;
155 
156 	comment_length = eocd->comment_length;
157 	if (offset + sizeof(*eocd) + comment_length != archive->size)
158 		return -EINVAL;
159 
160 	cd_records = eocd->cd_records;
161 	if (eocd->this_disk != 0 || eocd->cd_disk != 0 || eocd->cd_records_total != cd_records)
162 		/* This is a valid eocd, but we only support single-file non-ZIP64 archives. */
163 		return -ENOTSUP;
164 
165 	cd_offset = eocd->cd_offset;
166 	cd_size = eocd->cd_size;
167 	if (!check_access(archive, cd_offset, cd_size))
168 		return -EINVAL;
169 
170 	archive->cd_offset = cd_offset;
171 	archive->cd_records = cd_records;
172 	return 0;
173 }
174 
175 static int find_cd(struct zip_archive *archive)
176 {
177 	int64_t limit, offset;
178 	int rc = -EINVAL;
179 
180 	if (archive->size <= sizeof(struct end_of_cd_record))
181 		return -EINVAL;
182 
183 	/* Because the end of central directory ends with a variable length array of
184 	 * up to 0xFFFF bytes we can't know exactly where it starts and need to
185 	 * search for it at the end of the file, scanning the (limit, offset] range.
186 	 */
187 	offset = archive->size - sizeof(struct end_of_cd_record);
188 	limit = (int64_t)offset - (1 << 16);
189 
190 	for (; offset >= 0 && offset > limit && rc != 0; offset--) {
191 		rc = try_parse_end_of_cd(archive, offset);
192 		if (rc == -ENOTSUP)
193 			break;
194 	}
195 	return rc;
196 }
197 
198 struct zip_archive *zip_archive_open(const char *path)
199 {
200 	struct zip_archive *archive;
201 	int err, fd;
202 	off_t size;
203 	void *data;
204 
205 	fd = open(path, O_RDONLY | O_CLOEXEC);
206 	if (fd < 0)
207 		return ERR_PTR(-errno);
208 
209 	size = lseek(fd, 0, SEEK_END);
210 	if (size == (off_t)-1 || size > UINT32_MAX) {
211 		close(fd);
212 		return ERR_PTR(-EINVAL);
213 	}
214 
215 	data = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0);
216 	err = -errno;
217 	close(fd);
218 
219 	if (data == MAP_FAILED)
220 		return ERR_PTR(err);
221 
222 	archive = malloc(sizeof(*archive));
223 	if (!archive) {
224 		munmap(data, size);
225 		return ERR_PTR(-ENOMEM);
226 	};
227 
228 	archive->data = data;
229 	archive->size = size;
230 
231 	err = find_cd(archive);
232 	if (err) {
233 		munmap(data, size);
234 		free(archive);
235 		return ERR_PTR(err);
236 	}
237 
238 	return archive;
239 }
240 
241 void zip_archive_close(struct zip_archive *archive)
242 {
243 	munmap(archive->data, archive->size);
244 	free(archive);
245 }
246 
247 static struct local_file_header *local_file_header_at_offset(struct zip_archive *archive,
248 							     __u32 offset)
249 {
250 	struct local_file_header *lfh;
251 
252 	lfh = check_access(archive, offset, sizeof(*lfh));
253 	if (!lfh || lfh->magic != LOCAL_FILE_HEADER_MAGIC)
254 		return NULL;
255 
256 	return lfh;
257 }
258 
259 static int get_entry_at_offset(struct zip_archive *archive, __u32 offset, struct zip_entry *out)
260 {
261 	struct local_file_header *lfh;
262 	__u32 compressed_size;
263 	const char *name;
264 	void *data;
265 
266 	lfh = local_file_header_at_offset(archive, offset);
267 	if (!lfh)
268 		return -EINVAL;
269 
270 	offset += sizeof(*lfh);
271 	if ((lfh->flags & FLAG_ENCRYPTED) || (lfh->flags & FLAG_HAS_DATA_DESCRIPTOR))
272 		return -EINVAL;
273 
274 	name = check_access(archive, offset, lfh->file_name_length);
275 	if (!name)
276 		return -EINVAL;
277 
278 	offset += lfh->file_name_length;
279 	if (!check_access(archive, offset, lfh->extra_field_length))
280 		return -EINVAL;
281 
282 	offset += lfh->extra_field_length;
283 	compressed_size = lfh->compressed_size;
284 	data = check_access(archive, offset, compressed_size);
285 	if (!data)
286 		return -EINVAL;
287 
288 	out->compression = lfh->compression;
289 	out->name_length = lfh->file_name_length;
290 	out->name = name;
291 	out->data = data;
292 	out->data_length = compressed_size;
293 	out->data_offset = offset;
294 
295 	return 0;
296 }
297 
298 int zip_archive_find_entry(struct zip_archive *archive, const char *file_name,
299 			   struct zip_entry *out)
300 {
301 	size_t file_name_length = strlen(file_name);
302 	__u32 i, offset = archive->cd_offset;
303 
304 	for (i = 0; i < archive->cd_records; ++i) {
305 		__u16 cdfh_name_length, cdfh_flags;
306 		struct cd_file_header *cdfh;
307 		const char *cdfh_name;
308 
309 		cdfh = check_access(archive, offset, sizeof(*cdfh));
310 		if (!cdfh || cdfh->magic != CD_FILE_HEADER_MAGIC)
311 			return -EINVAL;
312 
313 		offset += sizeof(*cdfh);
314 		cdfh_name_length = cdfh->file_name_length;
315 		cdfh_name = check_access(archive, offset, cdfh_name_length);
316 		if (!cdfh_name)
317 			return -EINVAL;
318 
319 		cdfh_flags = cdfh->flags;
320 		if ((cdfh_flags & FLAG_ENCRYPTED) == 0 &&
321 		    (cdfh_flags & FLAG_HAS_DATA_DESCRIPTOR) == 0 &&
322 		    file_name_length == cdfh_name_length &&
323 		    memcmp(file_name, archive->data + offset, file_name_length) == 0) {
324 			return get_entry_at_offset(archive, cdfh->offset, out);
325 		}
326 
327 		offset += cdfh_name_length;
328 		offset += cdfh->extra_field_length;
329 		offset += cdfh->file_comment_length;
330 	}
331 
332 	return -ENOENT;
333 }
334