1 // SPDX-License-Identifier: (LGPL-2.1 OR BSD-2-Clause) 2 /* 3 * Routines for dealing with .zip archives. 4 * 5 * Copyright (c) Meta Platforms, Inc. and affiliates. 6 */ 7 8 #include <errno.h> 9 #include <fcntl.h> 10 #include <stdint.h> 11 #include <stdlib.h> 12 #include <string.h> 13 #include <sys/mman.h> 14 #include <unistd.h> 15 16 #include "libbpf_internal.h" 17 #include "zip.h" 18 19 /* Specification of ZIP file format can be found here: 20 * https://pkware.cachefly.net/webdocs/casestudies/APPNOTE.TXT 21 * For a high level overview of the structure of a ZIP file see 22 * sections 4.3.1 - 4.3.6. 23 * 24 * Data structures appearing in ZIP files do not contain any 25 * padding and they might be misaligned. To allow us to safely 26 * operate on pointers to such structures and their members, we 27 * declare the types as packed. 28 */ 29 30 #define END_OF_CD_RECORD_MAGIC 0x06054b50 31 32 /* See section 4.3.16 of the spec. */ 33 struct end_of_cd_record { 34 /* Magic value equal to END_OF_CD_RECORD_MAGIC */ 35 __u32 magic; 36 37 /* Number of the file containing this structure or 0xFFFF if ZIP64 archive. 38 * Zip archive might span multiple files (disks). 39 */ 40 __u16 this_disk; 41 42 /* Number of the file containing the beginning of the central directory or 43 * 0xFFFF if ZIP64 archive. 44 */ 45 __u16 cd_disk; 46 47 /* Number of central directory records on this disk or 0xFFFF if ZIP64 48 * archive. 49 */ 50 __u16 cd_records; 51 52 /* Number of central directory records on all disks or 0xFFFF if ZIP64 53 * archive. 54 */ 55 __u16 cd_records_total; 56 57 /* Size of the central directory record or 0xFFFFFFFF if ZIP64 archive. */ 58 __u32 cd_size; 59 60 /* Offset of the central directory from the beginning of the archive or 61 * 0xFFFFFFFF if ZIP64 archive. 62 */ 63 __u32 cd_offset; 64 65 /* Length of comment data following end of central directory record. */ 66 __u16 comment_length; 67 68 /* Up to 64k of arbitrary bytes. */ 69 /* uint8_t comment[comment_length] */ 70 } __attribute__((packed)); 71 72 #define CD_FILE_HEADER_MAGIC 0x02014b50 73 #define FLAG_ENCRYPTED (1 << 0) 74 #define FLAG_HAS_DATA_DESCRIPTOR (1 << 3) 75 76 /* See section 4.3.12 of the spec. */ 77 struct cd_file_header { 78 /* Magic value equal to CD_FILE_HEADER_MAGIC. */ 79 __u32 magic; 80 __u16 version; 81 /* Minimum zip version needed to extract the file. */ 82 __u16 min_version; 83 __u16 flags; 84 __u16 compression; 85 __u16 last_modified_time; 86 __u16 last_modified_date; 87 __u32 crc; 88 __u32 compressed_size; 89 __u32 uncompressed_size; 90 __u16 file_name_length; 91 __u16 extra_field_length; 92 __u16 file_comment_length; 93 /* Number of the disk where the file starts or 0xFFFF if ZIP64 archive. */ 94 __u16 disk; 95 __u16 internal_attributes; 96 __u32 external_attributes; 97 /* Offset from the start of the disk containing the local file header to the 98 * start of the local file header. 99 */ 100 __u32 offset; 101 } __attribute__((packed)); 102 103 #define LOCAL_FILE_HEADER_MAGIC 0x04034b50 104 105 /* See section 4.3.7 of the spec. */ 106 struct local_file_header { 107 /* Magic value equal to LOCAL_FILE_HEADER_MAGIC. */ 108 __u32 magic; 109 /* Minimum zip version needed to extract the file. */ 110 __u16 min_version; 111 __u16 flags; 112 __u16 compression; 113 __u16 last_modified_time; 114 __u16 last_modified_date; 115 __u32 crc; 116 __u32 compressed_size; 117 __u32 uncompressed_size; 118 __u16 file_name_length; 119 __u16 extra_field_length; 120 } __attribute__((packed)); 121 122 struct zip_archive { 123 void *data; 124 __u32 size; 125 __u32 cd_offset; 126 __u32 cd_records; 127 }; 128 129 static void *check_access(struct zip_archive *archive, __u32 offset, __u32 size) 130 { 131 if (offset + size > archive->size || offset > offset + size) 132 return NULL; 133 134 return archive->data + offset; 135 } 136 137 /* Returns 0 on success, -EINVAL on error and -ENOTSUP if the eocd indicates the 138 * archive uses features which are not supported. 139 */ 140 static int try_parse_end_of_cd(struct zip_archive *archive, __u32 offset) 141 { 142 __u16 comment_length, cd_records; 143 struct end_of_cd_record *eocd; 144 __u32 cd_offset, cd_size; 145 146 eocd = check_access(archive, offset, sizeof(*eocd)); 147 if (!eocd || eocd->magic != END_OF_CD_RECORD_MAGIC) 148 return -EINVAL; 149 150 comment_length = eocd->comment_length; 151 if (offset + sizeof(*eocd) + comment_length != archive->size) 152 return -EINVAL; 153 154 cd_records = eocd->cd_records; 155 if (eocd->this_disk != 0 || eocd->cd_disk != 0 || eocd->cd_records_total != cd_records) 156 /* This is a valid eocd, but we only support single-file non-ZIP64 archives. */ 157 return -ENOTSUP; 158 159 cd_offset = eocd->cd_offset; 160 cd_size = eocd->cd_size; 161 if (!check_access(archive, cd_offset, cd_size)) 162 return -EINVAL; 163 164 archive->cd_offset = cd_offset; 165 archive->cd_records = cd_records; 166 return 0; 167 } 168 169 static int find_cd(struct zip_archive *archive) 170 { 171 int rc = -EINVAL; 172 int64_t limit; 173 __u32 offset; 174 175 if (archive->size <= sizeof(struct end_of_cd_record)) 176 return -EINVAL; 177 178 /* Because the end of central directory ends with a variable length array of 179 * up to 0xFFFF bytes we can't know exactly where it starts and need to 180 * search for it at the end of the file, scanning the (limit, offset] range. 181 */ 182 offset = archive->size - sizeof(struct end_of_cd_record); 183 limit = (int64_t)offset - (1 << 16); 184 185 for (; offset >= 0 && offset > limit && rc != 0; offset--) { 186 rc = try_parse_end_of_cd(archive, offset); 187 if (rc == -ENOTSUP) 188 break; 189 } 190 return rc; 191 } 192 193 struct zip_archive *zip_archive_open(const char *path) 194 { 195 struct zip_archive *archive; 196 int err, fd; 197 off_t size; 198 void *data; 199 200 fd = open(path, O_RDONLY | O_CLOEXEC); 201 if (fd < 0) 202 return ERR_PTR(-errno); 203 204 size = lseek(fd, 0, SEEK_END); 205 if (size == (off_t)-1 || size > UINT32_MAX) { 206 close(fd); 207 return ERR_PTR(-EINVAL); 208 } 209 210 data = mmap(NULL, size, PROT_READ, MAP_PRIVATE, fd, 0); 211 err = -errno; 212 close(fd); 213 214 if (data == MAP_FAILED) 215 return ERR_PTR(err); 216 217 archive = malloc(sizeof(*archive)); 218 if (!archive) { 219 munmap(data, size); 220 return ERR_PTR(-ENOMEM); 221 }; 222 223 archive->data = data; 224 archive->size = size; 225 226 err = find_cd(archive); 227 if (err) { 228 munmap(data, size); 229 free(archive); 230 return ERR_PTR(err); 231 } 232 233 return archive; 234 } 235 236 void zip_archive_close(struct zip_archive *archive) 237 { 238 munmap(archive->data, archive->size); 239 free(archive); 240 } 241 242 static struct local_file_header *local_file_header_at_offset(struct zip_archive *archive, 243 __u32 offset) 244 { 245 struct local_file_header *lfh; 246 247 lfh = check_access(archive, offset, sizeof(*lfh)); 248 if (!lfh || lfh->magic != LOCAL_FILE_HEADER_MAGIC) 249 return NULL; 250 251 return lfh; 252 } 253 254 static int get_entry_at_offset(struct zip_archive *archive, __u32 offset, struct zip_entry *out) 255 { 256 struct local_file_header *lfh; 257 __u32 compressed_size; 258 const char *name; 259 void *data; 260 261 lfh = local_file_header_at_offset(archive, offset); 262 if (!lfh) 263 return -EINVAL; 264 265 offset += sizeof(*lfh); 266 if ((lfh->flags & FLAG_ENCRYPTED) || (lfh->flags & FLAG_HAS_DATA_DESCRIPTOR)) 267 return -EINVAL; 268 269 name = check_access(archive, offset, lfh->file_name_length); 270 if (!name) 271 return -EINVAL; 272 273 offset += lfh->file_name_length; 274 if (!check_access(archive, offset, lfh->extra_field_length)) 275 return -EINVAL; 276 277 offset += lfh->extra_field_length; 278 compressed_size = lfh->compressed_size; 279 data = check_access(archive, offset, compressed_size); 280 if (!data) 281 return -EINVAL; 282 283 out->compression = lfh->compression; 284 out->name_length = lfh->file_name_length; 285 out->name = name; 286 out->data = data; 287 out->data_length = compressed_size; 288 out->data_offset = offset; 289 290 return 0; 291 } 292 293 int zip_archive_find_entry(struct zip_archive *archive, const char *file_name, 294 struct zip_entry *out) 295 { 296 size_t file_name_length = strlen(file_name); 297 __u32 i, offset = archive->cd_offset; 298 299 for (i = 0; i < archive->cd_records; ++i) { 300 __u16 cdfh_name_length, cdfh_flags; 301 struct cd_file_header *cdfh; 302 const char *cdfh_name; 303 304 cdfh = check_access(archive, offset, sizeof(*cdfh)); 305 if (!cdfh || cdfh->magic != CD_FILE_HEADER_MAGIC) 306 return -EINVAL; 307 308 offset += sizeof(*cdfh); 309 cdfh_name_length = cdfh->file_name_length; 310 cdfh_name = check_access(archive, offset, cdfh_name_length); 311 if (!cdfh_name) 312 return -EINVAL; 313 314 cdfh_flags = cdfh->flags; 315 if ((cdfh_flags & FLAG_ENCRYPTED) == 0 && 316 (cdfh_flags & FLAG_HAS_DATA_DESCRIPTOR) == 0 && 317 file_name_length == cdfh_name_length && 318 memcmp(file_name, archive->data + offset, file_name_length) == 0) { 319 return get_entry_at_offset(archive, cdfh->offset, out); 320 } 321 322 offset += cdfh_name_length; 323 offset += cdfh->extra_field_length; 324 offset += cdfh->file_comment_length; 325 } 326 327 return -ENOENT; 328 } 329