1 /*
2 * Copyright (c) Christos Zoulas 2003.
3 * All Rights Reserved.
4 *
5 * Redistribution and use in source and binary forms, with or without
6 * modification, are permitted provided that the following conditions
7 * are met:
8 * 1. Redistributions of source code must retain the above copyright
9 * notice immediately at the beginning of the file, without modification,
10 * this list of conditions, and the following disclaimer.
11 * 2. Redistributions in binary form must reproduce the above copyright
12 * notice, this list of conditions and the following disclaimer in the
13 * documentation and/or other materials provided with the distribution.
14 *
15 * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16 * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18 * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
19 * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20 * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21 * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22 * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23 * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24 * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25 * SUCH DAMAGE.
26 */
27 #include "file.h"
28
29 #ifndef lint
30 FILE_RCSID("@(#)$File: funcs.c,v 1.148 2026/01/10 16:18:18 christos Exp $")
31 #endif /* lint */
32
33 #include "magic.h"
34 #include <assert.h>
35 #include <stdarg.h>
36 #include <stdlib.h>
37 #include <string.h>
38 #include <ctype.h>
39 #ifdef HAVE_UNISTD_H
40 #include <unistd.h> /* for pipe2() */
41 #endif
42 #if defined(HAVE_WCHAR_H)
43 #include <wchar.h>
44 #endif
45 #if defined(HAVE_WCTYPE_H)
46 #include <wctype.h>
47 #endif
48 #include <limits.h>
49
50 #ifndef SIZE_MAX
51 #define SIZE_MAX ((size_t)~0)
52 #endif
53
54 file_protected char *
file_copystr(char * buf,size_t blen,size_t width,const char * str)55 file_copystr(char *buf, size_t blen, size_t width, const char *str)
56 {
57 if (blen == 0)
58 return buf;
59 if (width >= blen)
60 width = blen - 1;
61 memcpy(buf, str, width);
62 buf[width] = '\0';
63 return buf;
64 }
65
66 file_private void
file_clearbuf(struct magic_set * ms)67 file_clearbuf(struct magic_set *ms)
68 {
69 free(ms->o.buf);
70 ms->o.buf = NULL;
71 ms->o.blen = 0;
72 }
73
74 file_private int
file_checkfield(char * msg,size_t mlen,const char * what,const char ** pp)75 file_checkfield(char *msg, size_t mlen, const char *what, const char **pp)
76 {
77 const char *p = *pp;
78 int fw = 0;
79
80 while (*p && isdigit((unsigned char)*p))
81 fw = fw * 10 + (*p++ - '0');
82
83 *pp = p;
84
85 if (fw < 1024)
86 return 1;
87 if (msg)
88 snprintf(msg, mlen, "field %s too large: %d", what, fw);
89
90 return 0;
91 }
92
93 file_protected int
file_checkfmt(char * msg,size_t mlen,const char * fmt)94 file_checkfmt(char *msg, size_t mlen, const char *fmt)
95 {
96 const char *p;
97 for (p = fmt; *p; p++) {
98 if (*p != '%')
99 continue;
100 if (*++p == '%')
101 continue;
102 if (*p == '\0') {
103 if (msg)
104 snprintf(msg, mlen, "incomplete %% format");
105 return -1;
106 }
107 // Skip uninteresting.
108 while (*p != '\0' && strchr("#0.'+- ", *p) != NULL)
109 p++;
110 if (*p == '*') {
111 if (msg)
112 snprintf(msg, mlen, "* not allowed in format");
113 return -1;
114 }
115
116 if (!file_checkfield(msg, mlen, "width", &p))
117 return -1;
118
119 if (*p == '.') {
120 p++;
121 if (!file_checkfield(msg, mlen, "precision", &p))
122 return -1;
123 }
124
125 if (!isalpha((unsigned char)*p)) {
126 if (msg)
127 snprintf(msg, mlen, "bad format char: %c", *p);
128 return -1;
129 }
130 }
131 return 0;
132 }
133
134 /*
135 * Like printf, only we append to a buffer.
136 */
137 file_protected int
file_vprintf(struct magic_set * ms,const char * fmt,va_list ap)138 file_vprintf(struct magic_set *ms, const char *fmt, va_list ap)
139 {
140 int len;
141 char *buf, *newstr;
142 char tbuf[1024];
143
144 if (ms->event_flags & EVENT_HAD_ERR)
145 return 0;
146
147 if (file_checkfmt(tbuf, sizeof(tbuf), fmt)) {
148 file_clearbuf(ms);
149 file_error(ms, 0, "Bad magic format `%s' (%s)", fmt, tbuf);
150 return -1;
151 }
152
153 len = vasprintf(&buf, fmt, ap);
154 if (len < 0 || (size_t)len > 1024 || len + ms->o.blen > 1024 * 1024) {
155 size_t blen = ms->o.blen;
156 free(buf);
157 file_clearbuf(ms);
158 file_error(ms, 0, "Output buffer space exceeded %d+%"
159 SIZE_T_FORMAT "u", len, blen);
160 return -1;
161 }
162
163 if (ms->o.buf != NULL) {
164 len = asprintf(&newstr, "%s%s", ms->o.buf, buf);
165 free(buf);
166 if (len < 0)
167 goto out;
168 free(ms->o.buf);
169 buf = newstr;
170 }
171 ms->o.buf = buf;
172 ms->o.blen = len;
173 return 0;
174 out:
175 file_clearbuf(ms);
176 file_error(ms, errno, "vasprintf failed");
177 return -1;
178 }
179
180 file_protected int
file_printf(struct magic_set * ms,const char * fmt,...)181 file_printf(struct magic_set *ms, const char *fmt, ...)
182 {
183 int rv;
184 va_list ap;
185
186 va_start(ap, fmt);
187 rv = file_vprintf(ms, fmt, ap);
188 va_end(ap);
189 return rv;
190 }
191
192 /*
193 * error - print best error message possible
194 */
195 /*VARARGS*/
196 __attribute__((__format__(__printf__, 3, 0)))
197 file_private void
file_error_core(struct magic_set * ms,int error,const char * f,va_list va,size_t lineno)198 file_error_core(struct magic_set *ms, int error, const char *f, va_list va,
199 size_t lineno)
200 {
201 /* Only the first error is ok */
202 if (ms->event_flags & EVENT_HAD_ERR)
203 return;
204 if (lineno != 0) {
205 file_clearbuf(ms);
206 (void)file_printf(ms, "line %" SIZE_T_FORMAT "u:", lineno);
207 }
208 if (ms->o.buf && *ms->o.buf)
209 (void)file_printf(ms, " ");
210 (void)file_vprintf(ms, f, va);
211 if (error > 0)
212 (void)file_printf(ms, " (%s)", strerror(error));
213 ms->event_flags |= EVENT_HAD_ERR;
214 ms->error = error;
215 }
216
217 /*VARARGS*/
218 file_protected void
file_error(struct magic_set * ms,int error,const char * f,...)219 file_error(struct magic_set *ms, int error, const char *f, ...)
220 {
221 va_list va;
222 va_start(va, f);
223 file_error_core(ms, error, f, va, 0);
224 va_end(va);
225 }
226
227 /*
228 * Print an error with magic line number.
229 */
230 /*VARARGS*/
231 file_protected void
file_magerror(struct magic_set * ms,const char * f,...)232 file_magerror(struct magic_set *ms, const char *f, ...)
233 {
234 va_list va;
235 va_start(va, f);
236 file_error_core(ms, 0, f, va, ms->line);
237 va_end(va);
238 }
239
240 file_protected void
file_oomem(struct magic_set * ms,size_t len)241 file_oomem(struct magic_set *ms, size_t len)
242 {
243 file_error(ms, errno, "cannot allocate %" SIZE_T_FORMAT "u bytes",
244 len);
245 }
246
247 file_protected void
file_badseek(struct magic_set * ms)248 file_badseek(struct magic_set *ms)
249 {
250 file_error(ms, errno, "error seeking");
251 }
252
253 file_protected void
file_badread(struct magic_set * ms)254 file_badread(struct magic_set *ms)
255 {
256 file_error(ms, errno, "error reading");
257 }
258
259 #ifndef COMPILE_ONLY
260 #define FILE_SEPARATOR "\n- "
261
262 file_protected int
file_separator(struct magic_set * ms)263 file_separator(struct magic_set *ms)
264 {
265 return file_printf(ms, FILE_SEPARATOR);
266 }
267
268 static void
trim_separator(struct magic_set * ms)269 trim_separator(struct magic_set *ms)
270 {
271 size_t l;
272
273 if (ms->o.buf == NULL)
274 return;
275
276 l = strlen(ms->o.buf);
277 if (l < sizeof(FILE_SEPARATOR))
278 return;
279
280 l -= sizeof(FILE_SEPARATOR) - 1;
281 if (strcmp(ms->o.buf + l, FILE_SEPARATOR) != 0)
282 return;
283
284 ms->o.buf[l] = '\0';
285 }
286
287 static int
checkdone(struct magic_set * ms,int * rv)288 checkdone(struct magic_set *ms, int *rv)
289 {
290 if ((ms->flags & MAGIC_CONTINUE) == 0)
291 return 1;
292 if (file_separator(ms) == -1)
293 *rv = -1;
294 return 0;
295 }
296
297 file_protected int
file_default(struct magic_set * ms,size_t nb)298 file_default(struct magic_set *ms, size_t nb)
299 {
300 if (ms->flags & MAGIC_MIME) {
301 if ((ms->flags & MAGIC_MIME_TYPE) &&
302 file_printf(ms, "application/%s",
303 nb ? "octet-stream" : "x-empty") == -1)
304 return -1;
305 return 1;
306 }
307 if (ms->flags & MAGIC_APPLE) {
308 // This is not a typo: Type: UNKN Creator: UNKN
309 if (file_printf(ms, "UNKNUNKN") == -1)
310 return -1;
311 return 1;
312 }
313 if (ms->flags & MAGIC_EXTENSION) {
314 if (file_printf(ms, "???") == -1)
315 return -1;
316 return 1;
317 }
318 return 0;
319 }
320
321 /*
322 * The magic detection functions return:
323 * 1: found
324 * 0: not found
325 * -1: error
326 */
327 /*ARGSUSED*/
328 file_protected int
file_buffer(struct magic_set * ms,int fd,struct stat * st,const char * inname,const void * buf,size_t nb)329 file_buffer(struct magic_set *ms, int fd, struct stat *st,
330 const char *inname __attribute__ ((__unused__)),
331 const void *buf, size_t nb)
332 {
333 int m = 0, rv = 0, looks_text = 0;
334 const char *code = NULL;
335 const char *code_mime = "binary";
336 const char *def = "data";
337 const char *ftype = NULL;
338 char *rbuf = NULL;
339 struct buffer b;
340
341 buffer_init(&b, fd, st, buf, nb);
342 ms->mode = b.st.st_mode;
343
344 if (nb == 0) {
345 def = "empty";
346 goto simple;
347 } else if (nb == 1) {
348 def = "very short file (no magic)";
349 goto simple;
350 }
351
352 if ((ms->flags & MAGIC_NO_CHECK_ENCODING) == 0) {
353 looks_text = file_encoding(ms, &b, NULL, 0,
354 &code, &code_mime, &ftype);
355 }
356
357 #ifdef __EMX__
358 if ((ms->flags & MAGIC_NO_CHECK_APPTYPE) == 0 && inname) {
359 m = file_os2_apptype(ms, inname, &b);
360 if ((ms->flags & MAGIC_DEBUG) != 0)
361 (void)fprintf(stderr, "[try os2_apptype %d]\n", m);
362 switch (m) {
363 case -1:
364 return -1;
365 case 0:
366 break;
367 default:
368 return 1;
369 }
370 }
371 #endif
372 #if HAVE_FORK
373 /* try compression stuff */
374 if ((ms->flags & MAGIC_NO_CHECK_COMPRESS) == 0) {
375 m = file_zmagic(ms, &b, inname);
376 if ((ms->flags & MAGIC_DEBUG) != 0)
377 (void)fprintf(stderr, "[try zmagic %d]\n", m);
378 if (m) {
379 goto done_encoding;
380 }
381 }
382 #endif
383 /* Check if we have a tar file */
384 if ((ms->flags & MAGIC_NO_CHECK_TAR) == 0) {
385 m = file_is_tar(ms, &b);
386 if ((ms->flags & MAGIC_DEBUG) != 0)
387 (void)fprintf(stderr, "[try tar %d]\n", m);
388 if (m) {
389 if (checkdone(ms, &rv))
390 goto done;
391 }
392 }
393
394 /* Check if we have a JSON file */
395 if ((ms->flags & MAGIC_NO_CHECK_JSON) == 0) {
396 m = file_is_json(ms, &b);
397 if ((ms->flags & MAGIC_DEBUG) != 0)
398 (void)fprintf(stderr, "[try json %d]\n", m);
399 if (m) {
400 if (checkdone(ms, &rv))
401 goto done;
402 }
403 }
404
405 /* Check if we have a CSV file */
406 if ((ms->flags & MAGIC_NO_CHECK_CSV) == 0) {
407 m = file_is_csv(ms, &b, looks_text, code);
408 if ((ms->flags & MAGIC_DEBUG) != 0)
409 (void)fprintf(stderr, "[try csv %d]\n", m);
410 if (m) {
411 if (checkdone(ms, &rv))
412 goto done;
413 }
414 }
415
416 /* Check if we have a SIMH tape file */
417 if ((ms->flags & MAGIC_NO_CHECK_SIMH) == 0) {
418 m = file_is_simh(ms, &b);
419 if ((ms->flags & MAGIC_DEBUG) != 0)
420 (void)fprintf(stderr, "[try simh %d]\n", m);
421 if (m) {
422 if (checkdone(ms, &rv))
423 goto done;
424 }
425 }
426
427 /* Check if we have a CDF file */
428 if ((ms->flags & MAGIC_NO_CHECK_CDF) == 0) {
429 m = file_trycdf(ms, &b);
430 if ((ms->flags & MAGIC_DEBUG) != 0)
431 (void)fprintf(stderr, "[try cdf %d]\n", m);
432 if (m) {
433 if (checkdone(ms, &rv))
434 goto done;
435 }
436 }
437 #ifdef BUILTIN_ELF
438 if ((ms->flags & MAGIC_NO_CHECK_ELF) == 0 && nb > 5 && fd != -1) {
439 file_pushbuf_t *pb;
440 /*
441 * We matched something in the file, so this
442 * *might* be an ELF file, and the file is at
443 * least 5 bytes long, so if it's an ELF file
444 * it has at least one byte past the ELF magic
445 * number - try extracting information from the
446 * ELF headers that cannot easily be extracted
447 * with rules in the magic file. We we don't
448 * print the information yet.
449 */
450 if ((pb = file_push_buffer(ms)) == NULL)
451 return -1;
452
453 rv = file_tryelf(ms, &b);
454 rbuf = file_pop_buffer(ms, pb);
455 if (rv == -1) {
456 free(rbuf);
457 rbuf = NULL;
458 }
459 if ((ms->flags & MAGIC_DEBUG) != 0)
460 (void)fprintf(stderr, "[try elf %d]\n", m);
461 }
462 #endif
463
464 /* try soft magic tests */
465 if ((ms->flags & MAGIC_NO_CHECK_SOFT) == 0) {
466 m = file_softmagic(ms, &b, NULL, NULL, BINTEST, looks_text);
467 if ((ms->flags & MAGIC_DEBUG) != 0)
468 (void)fprintf(stderr, "[try softmagic %d]\n", m);
469 if (m == 1 && rbuf) {
470 if (file_printf(ms, "%s", rbuf) == -1)
471 goto done;
472 }
473 if (m) {
474 if (checkdone(ms, &rv))
475 goto done;
476 }
477 }
478
479 /* try text properties */
480 if ((ms->flags & MAGIC_NO_CHECK_TEXT) == 0) {
481
482 m = file_ascmagic(ms, &b, looks_text);
483 if ((ms->flags & MAGIC_DEBUG) != 0)
484 (void)fprintf(stderr, "[try ascmagic %d]\n", m);
485 if (m) {
486 goto done;
487 }
488 }
489
490 simple:
491 /* give up */
492 if (m == 0) {
493 m = 1;
494 rv = file_default(ms, nb);
495 if (rv == 0)
496 if (file_printf(ms, "%s", def) == -1)
497 rv = -1;
498 }
499 done:
500 trim_separator(ms);
501 if ((ms->flags & MAGIC_MIME_ENCODING) != 0) {
502 if (ms->flags & MAGIC_MIME_TYPE)
503 if (file_printf(ms, "; charset=") == -1)
504 rv = -1;
505 if (file_printf(ms, "%s", code_mime) == -1)
506 rv = -1;
507 }
508 #if HAVE_FORK
509 done_encoding:
510 #endif
511 free(rbuf);
512 buffer_fini(&b);
513 if (rv)
514 return rv;
515
516 return m;
517 }
518 #endif
519
520 file_protected int
file_reset(struct magic_set * ms,int checkloaded)521 file_reset(struct magic_set *ms, int checkloaded)
522 {
523 if (checkloaded && ms->mlist[0] == NULL) {
524 file_error(ms, 0, "no magic files loaded");
525 return -1;
526 }
527 file_clearbuf(ms);
528 if (ms->o.pbuf) {
529 free(ms->o.pbuf);
530 ms->o.pbuf = NULL;
531 }
532 ms->event_flags &= ~EVENT_HAD_ERR;
533 ms->error = -1;
534 return 0;
535 }
536
537 #define OCTALIFY(n, o) \
538 /*LINTED*/ \
539 (void)(*(n)++ = '\\', \
540 *(n)++ = ((CAST(uint32_t, *(o)) >> 6) & 3) + '0', \
541 *(n)++ = ((CAST(uint32_t, *(o)) >> 3) & 7) + '0', \
542 *(n)++ = ((CAST(uint32_t, *(o)) >> 0) & 7) + '0', \
543 (o)++)
544
545 file_protected const char *
file_getbuffer(struct magic_set * ms)546 file_getbuffer(struct magic_set *ms)
547 {
548 char *pbuf, *op, *np;
549 size_t psize, len;
550
551 if (ms->event_flags & EVENT_HAD_ERR)
552 return NULL;
553
554 if (ms->flags & MAGIC_RAW)
555 return ms->o.buf;
556
557 if (ms->o.buf == NULL)
558 return NULL;
559
560 /* * 4 is for octal representation, + 1 is for NUL */
561 len = strlen(ms->o.buf);
562 if (len > (SIZE_MAX - 1) / 4) {
563 file_oomem(ms, len);
564 return NULL;
565 }
566 psize = len * 4 + 1;
567 if ((pbuf = CAST(char *, realloc(ms->o.pbuf, psize))) == NULL) {
568 file_oomem(ms, psize);
569 return NULL;
570 }
571 ms->o.pbuf = pbuf;
572
573 #if defined(HAVE_WCHAR_H) && defined(HAVE_MBRTOWC) && defined(HAVE_WCWIDTH)
574 {
575 mbstate_t state;
576 wchar_t nextchar;
577 int mb_conv = 1;
578 size_t bytesconsumed;
579 char *eop;
580 (void)memset(&state, 0, sizeof(mbstate_t));
581
582 np = ms->o.pbuf;
583 op = ms->o.buf;
584 eop = op + len;
585
586 while (op < eop) {
587 bytesconsumed = mbrtowc(&nextchar, op,
588 CAST(size_t, eop - op), &state);
589 if (bytesconsumed == CAST(size_t, -1) ||
590 bytesconsumed == CAST(size_t, -2)) {
591 mb_conv = 0;
592 break;
593 }
594
595 if (iswprint(nextchar)) {
596 (void)memcpy(np, op, bytesconsumed);
597 op += bytesconsumed;
598 np += bytesconsumed;
599 } else {
600 while (bytesconsumed-- > 0)
601 OCTALIFY(np, op);
602 }
603 }
604 *np = '\0';
605
606 /* Parsing succeeded as a multi-byte sequence */
607 if (mb_conv != 0)
608 return ms->o.pbuf;
609 }
610 #endif
611
612 for (np = ms->o.pbuf, op = ms->o.buf; *op;) {
613 if (isprint(CAST(unsigned char, *op))) {
614 *np++ = *op++;
615 } else {
616 OCTALIFY(np, op);
617 }
618 }
619 *np = '\0';
620 return ms->o.pbuf;
621 }
622
623 file_protected int
file_check_mem(struct magic_set * ms,unsigned int level)624 file_check_mem(struct magic_set *ms, unsigned int level)
625 {
626 size_t len;
627
628 if (level >= ms->c.len) {
629 len = (ms->c.len = 20 + level) * sizeof(*ms->c.li);
630 ms->c.li = CAST(struct level_info *, (ms->c.li == NULL) ?
631 malloc(len) :
632 realloc(ms->c.li, len));
633 if (ms->c.li == NULL) {
634 file_oomem(ms, len);
635 return -1;
636 }
637 }
638 ms->c.li[level].got_match = 0;
639 #ifdef ENABLE_CONDITIONALS
640 ms->c.li[level].last_match = 0;
641 ms->c.li[level].last_cond = COND_NONE;
642 #endif /* ENABLE_CONDITIONALS */
643 return 0;
644 }
645
646 file_protected size_t
file_printedlen(const struct magic_set * ms)647 file_printedlen(const struct magic_set *ms)
648 {
649 return ms->o.blen;
650 }
651
652 file_protected int
file_replace(struct magic_set * ms,const char * pat,const char * rep)653 file_replace(struct magic_set *ms, const char *pat, const char *rep)
654 {
655 file_regex_t rx;
656 int rc, rv = -1;
657
658 rc = file_regcomp(ms, &rx, pat, REG_EXTENDED);
659 if (rc == 0) {
660 regmatch_t rm;
661 int nm = 0;
662 while (file_regexec(ms, &rx, ms->o.buf, 1, &rm, 0) == 0) {
663 ms->o.buf[rm.rm_so] = '\0';
664 if (file_printf(ms, "%s%s", rep,
665 rm.rm_eo != 0 ? ms->o.buf + rm.rm_eo : "") == -1)
666 goto out;
667 nm++;
668 }
669 rv = nm;
670 }
671 out:
672 file_regfree(&rx);
673 return rv;
674 }
675
676 file_private int
check_regex(struct magic_set * ms,const char * pat)677 check_regex(struct magic_set *ms, const char *pat)
678 {
679 char sbuf[512];
680 unsigned char oc = '\0';
681 const char *p;
682 unsigned long l;
683 static const char wild[] = "?*+{";
684
685 for (p = pat; *p; p++) {
686 unsigned char c = *p;
687 // Avoid repetition of wild characters
688 if (strchr(wild, oc) != NULL && strchr(wild, c) != NULL) {
689 size_t len = strlen(pat);
690 file_magwarn(ms,
691 "repetition-operator operand `%c%c' "
692 "invalid in regex `%s'", oc, c,
693 file_printable(ms, sbuf, sizeof(sbuf), pat, len));
694 return -1;
695 }
696 if (c == '{') {
697 char *ep, *eep;
698
699 if (oc == '}') {
700 file_magwarn(ms, "cascading repetition "
701 "operators in regex `%s'", pat);
702 return -1;
703 }
704 errno = 0;
705 l = strtoul(p + 1, &ep, 10);
706 if (ep != p + 1 && l > 1000)
707 goto bounds;
708 if (*ep == ',') {
709 l = strtoul(ep + 1, &eep, 10);
710 if (eep != ep + 1 && l > 1000)
711 goto bounds;
712 }
713 }
714 oc = c;
715 if (isprint(c) || isspace(c) || c == '\b'
716 || c == 0x8a) // XXX: apple magic fixme
717 continue;
718 size_t len = strlen(pat);
719 file_magwarn(ms,
720 "non-ascii characters in regex \\%#o `%s'",
721 c, file_printable(ms, sbuf, sizeof(sbuf), pat, len));
722 return -1;
723 }
724 return 0;
725 bounds:
726 file_magwarn(ms, "bounds too large %ld in regex `%s'", l, pat);
727 return -1;
728 }
729
730 file_protected int
file_regcomp(struct magic_set * ms file_locale_used,file_regex_t * rx,const char * pat,int flags)731 file_regcomp(struct magic_set *ms file_locale_used, file_regex_t *rx,
732 const char *pat, int flags)
733 {
734 if (check_regex(ms, pat) == -1)
735 return -1;
736
737 #ifdef USE_C_LOCALE
738 locale_t old = uselocale(ms->c_lc_ctype);
739 assert(old != NULL);
740 #else
741 char old[1024];
742 strlcpy(old, setlocale(LC_CTYPE, NULL), sizeof(old));
743 (void)setlocale(LC_CTYPE, "C");
744 #endif
745 int rc;
746 rc = regcomp(rx, pat, flags);
747
748 #ifdef USE_C_LOCALE
749 uselocale(old);
750 #else
751 (void)setlocale(LC_CTYPE, old);
752 #endif
753 if (rc > 0 && (ms->flags & MAGIC_CHECK)) {
754 char errmsg[512], buf[512];
755
756 (void)regerror(rc, rx, errmsg, sizeof(errmsg));
757 file_magerror(ms, "regex error %d for `%s', (%s)", rc,
758 file_printable(ms, buf, sizeof(buf), pat, strlen(pat)),
759 errmsg);
760 }
761 return rc;
762 }
763
764 /*ARGSUSED*/
765 file_protected int
file_regexec(struct magic_set * ms file_locale_used,file_regex_t * rx,const char * str,size_t nmatch,regmatch_t * pmatch,int eflags)766 file_regexec(struct magic_set *ms file_locale_used, file_regex_t *rx,
767 const char *str, size_t nmatch, regmatch_t* pmatch, int eflags)
768 {
769 #ifdef USE_C_LOCALE
770 locale_t old = uselocale(ms->c_lc_ctype);
771 assert(old != NULL);
772 #else
773 char old[1024];
774 strlcpy(old, setlocale(LC_CTYPE, NULL), sizeof(old));
775 (void)setlocale(LC_CTYPE, "C");
776 #endif
777 int rc;
778 /* XXX: force initialization because glibc does not always do this */
779 if (nmatch != 0)
780 memset(pmatch, 0, nmatch * sizeof(*pmatch));
781 rc = regexec(rx, str, nmatch, pmatch, eflags);
782 #ifdef USE_C_LOCALE
783 uselocale(old);
784 #else
785 (void)setlocale(LC_CTYPE, old);
786 #endif
787 return rc;
788 }
789
790 file_protected void
file_regfree(file_regex_t * rx)791 file_regfree(file_regex_t *rx)
792 {
793 regfree(rx);
794 }
795
796 file_protected file_pushbuf_t *
file_push_buffer(struct magic_set * ms)797 file_push_buffer(struct magic_set *ms)
798 {
799 file_pushbuf_t *pb;
800
801 if (ms->event_flags & EVENT_HAD_ERR)
802 return NULL;
803
804 if ((pb = (CAST(file_pushbuf_t *, malloc(sizeof(*pb))))) == NULL)
805 return NULL;
806
807 pb->buf = ms->o.buf;
808 pb->blen = ms->o.blen;
809 pb->offset = ms->offset;
810
811 ms->o.buf = NULL;
812 ms->o.blen = 0;
813 ms->offset = 0;
814
815 return pb;
816 }
817
818 file_protected char *
file_pop_buffer(struct magic_set * ms,file_pushbuf_t * pb)819 file_pop_buffer(struct magic_set *ms, file_pushbuf_t *pb)
820 {
821 char *rbuf;
822
823 if (ms->event_flags & EVENT_HAD_ERR) {
824 free(pb->buf);
825 free(pb);
826 return NULL;
827 }
828
829 rbuf = ms->o.buf;
830
831 ms->o.buf = pb->buf;
832 ms->o.blen = pb->blen;
833 ms->offset = pb->offset;
834
835 free(pb);
836 return rbuf;
837 }
838
839 /*
840 * convert string to ascii printable format.
841 */
842 file_protected char *
file_printable(struct magic_set * ms,char * buf,size_t bufsiz,const char * str,size_t slen)843 file_printable(struct magic_set *ms, char *buf, size_t bufsiz,
844 const char *str, size_t slen)
845 {
846 char *ptr, *eptr = buf + bufsiz - 1;
847 const unsigned char *s = RCAST(const unsigned char *, str);
848 const unsigned char *es = s + slen;
849
850 for (ptr = buf; ptr < eptr && s < es && *s; s++) {
851 if ((ms->flags & MAGIC_RAW) != 0 || isprint(*s)) {
852 *ptr++ = *s;
853 continue;
854 }
855 if (ptr >= eptr - 3)
856 break;
857 *ptr++ = '\\';
858 *ptr++ = ((CAST(unsigned int, *s) >> 6) & 7) + '0';
859 *ptr++ = ((CAST(unsigned int, *s) >> 3) & 7) + '0';
860 *ptr++ = ((CAST(unsigned int, *s) >> 0) & 7) + '0';
861 }
862 *ptr = '\0';
863 return buf;
864 }
865
866 struct guid {
867 uint32_t data1;
868 uint16_t data2;
869 uint16_t data3;
870 uint8_t data4[8];
871 };
872
873 static char XDIGIT[] = "0123456789abcdef";
874 static int
atox(const uint8_t c)875 atox(const uint8_t c)
876 {
877 uint8_t d = isupper(c) ? tolower(c) : c;
878 const char *q = d ? strchr(XDIGIT, isupper(c) ? tolower(c) : c) : NULL;
879 if (q == NULL)
880 return -1;
881 return q - XDIGIT;
882 }
883
884 static int
getxvalue(void * p,const char * s,size_t n)885 getxvalue(void *p, const char *s, size_t n)
886 {
887 uint64_t v = 0;
888 for (size_t i = 0; i < n; i++) {
889 int x = atox(s[i]);
890 if (x == -1)
891 return 0;
892 v = (v << 4) | x;
893 }
894 switch (n) {
895 case 8:
896 *(uint32_t *)p = v;
897 return 1;
898 case 4:
899 *(uint16_t *)p = v;
900 return 1;
901 case 2:
902 *(uint8_t *)p = v;
903 return 1;
904 default:
905 return 0;
906 }
907 }
908
909 file_protected int
file_parse_guid(const char * s,uint64_t * guid)910 file_parse_guid(const char *s, uint64_t *guid)
911 {
912 struct guid *g = CAST(struct guid *, CAST(void *, guid));
913
914 if (!getxvalue(&g->data1, s, 8) || s[8] != '-')
915 return -1;
916 s += 9;
917 if (!getxvalue(&g->data2, s, 4) || s[4] != '-')
918 return -1;
919 s += 5;
920 if (!getxvalue(&g->data3, s, 4) || s[4] != '-')
921 return -1;
922 s += 5;
923 if (!getxvalue(&g->data4[0], s, 2) ||
924 !getxvalue(&g->data4[1], s + 2, 2) || s[4] != '-')
925 return -1;
926 s += 5;
927 if (!getxvalue(&g->data4[2], s, 2) ||
928 !getxvalue(&g->data4[3], s + 2, 2) ||
929 !getxvalue(&g->data4[4], s + 4, 2) ||
930 !getxvalue(&g->data4[5], s + 6, 2) ||
931 !getxvalue(&g->data4[6], s + 8, 2) ||
932 !getxvalue(&g->data4[7], s + 10, 2))
933 return -1;
934 return 0;
935 }
936
937 file_protected int
file_print_guid(char * str,size_t len,const uint64_t * guid)938 file_print_guid(char *str, size_t len, const uint64_t *guid)
939 {
940 const struct guid *g = CAST(const struct guid *,
941 CAST(const void *, guid));
942
943 #ifndef WIN32
944 return snprintf(str, len, "%.8X-%.4hX-%.4hX-%.2hhX%.2hhX-"
945 "%.2hhX%.2hhX%.2hhX%.2hhX%.2hhX%.2hhX",
946 g->data1, g->data2, g->data3, g->data4[0], g->data4[1],
947 g->data4[2], g->data4[3], g->data4[4], g->data4[5],
948 g->data4[6], g->data4[7]);
949 #else
950 return snprintf(str, len, "%.8X-%.4hX-%.4hX-%.2hX%.2hX-"
951 "%.2hX%.2hX%.2hX%.2hX%.2hX%.2hX",
952 g->data1, g->data2, g->data3, g->data4[0], g->data4[1],
953 g->data4[2], g->data4[3], g->data4[4], g->data4[5],
954 g->data4[6], g->data4[7]);
955 #endif
956 }
957
958 file_protected int
file_pipe_closexec(int * fds)959 file_pipe_closexec(int *fds)
960 {
961 #ifdef __MINGW32__
962 return 0;
963 #elif defined(HAVE_PIPE2)
964 return pipe2(fds, O_CLOEXEC);
965 #else
966 if (pipe(fds) == -1)
967 return -1;
968 # ifdef F_SETFD
969 (void)fcntl(fds[0], F_SETFD, FD_CLOEXEC);
970 (void)fcntl(fds[1], F_SETFD, FD_CLOEXEC);
971 # endif
972 return 0;
973 #endif
974 }
975
976 file_protected int
file_clear_closexec(int fd)977 file_clear_closexec(int fd) {
978 #ifdef F_SETFD
979 return fcntl(fd, F_SETFD, 0);
980 #else
981 return 0;
982 #endif
983 }
984
985 file_protected char *
file_strtrim(char * str)986 file_strtrim(char *str)
987 {
988 char *last;
989
990 while (isspace(CAST(unsigned char, *str)))
991 str++;
992 last = str;
993 while (*last)
994 last++;
995 --last;
996 while (isspace(CAST(unsigned char, *last)))
997 last--;
998 *++last = '\0';
999 return str;
1000 }
1001