xref: /freebsd/contrib/file/src/funcs.c (revision ae316d1d1cffd71ab7751f94e10118777a88e027)
1  /*
2   * Copyright (c) Christos Zoulas 2003.
3   * All Rights Reserved.
4   *
5   * Redistribution and use in source and binary forms, with or without
6   * modification, are permitted provided that the following conditions
7   * are met:
8   * 1. Redistributions of source code must retain the above copyright
9   *    notice immediately at the beginning of the file, without modification,
10   *    this list of conditions, and the following disclaimer.
11   * 2. Redistributions in binary form must reproduce the above copyright
12   *    notice, this list of conditions and the following disclaimer in the
13   *    documentation and/or other materials provided with the distribution.
14   *
15   * THIS SOFTWARE IS PROVIDED BY THE AUTHOR AND CONTRIBUTORS ``AS IS'' AND
16   * ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
17   * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
18   * ARE DISCLAIMED. IN NO EVENT SHALL THE AUTHOR OR CONTRIBUTORS BE LIABLE FOR
19   * ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR CONSEQUENTIAL
20   * DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS
21   * OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS INTERRUPTION)
22   * HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT
23   * LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY
24   * OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF
25   * SUCH DAMAGE.
26   */
27  #include "file.h"
28  
29  #ifndef	lint
30  FILE_RCSID("@(#)$File: funcs.c,v 1.142 2023/07/30 14:41:14 christos Exp $")
31  #endif	/* lint */
32  
33  #include "magic.h"
34  #include <assert.h>
35  #include <stdarg.h>
36  #include <stdlib.h>
37  #include <string.h>
38  #include <ctype.h>
39  #ifdef HAVE_UNISTD_H
40  #include <unistd.h>	/* for pipe2() */
41  #endif
42  #if defined(HAVE_WCHAR_H)
43  #include <wchar.h>
44  #endif
45  #if defined(HAVE_WCTYPE_H)
46  #include <wctype.h>
47  #endif
48  #include <limits.h>
49  
50  #ifndef SIZE_MAX
51  #define SIZE_MAX	((size_t)~0)
52  #endif
53  
54  file_protected char *
file_copystr(char * buf,size_t blen,size_t width,const char * str)55  file_copystr(char *buf, size_t blen, size_t width, const char *str)
56  {
57  	if (blen == 0)
58  		return buf;
59  	if (width >= blen)
60  		width = blen - 1;
61  	memcpy(buf, str, width);
62  	buf[width] = '\0';
63  	return buf;
64  }
65  
66  file_private void
file_clearbuf(struct magic_set * ms)67  file_clearbuf(struct magic_set *ms)
68  {
69  	free(ms->o.buf);
70  	ms->o.buf = NULL;
71  	ms->o.blen = 0;
72  }
73  
74  file_private int
file_checkfield(char * msg,size_t mlen,const char * what,const char ** pp)75  file_checkfield(char *msg, size_t mlen, const char *what, const char **pp)
76  {
77  	const char *p = *pp;
78  	int fw = 0;
79  
80  	while (*p && isdigit((unsigned char)*p))
81  		fw = fw * 10 + (*p++ - '0');
82  
83  	*pp = p;
84  
85  	if (fw < 1024)
86  		return 1;
87  	if (msg)
88  		snprintf(msg, mlen, "field %s too large: %d", what, fw);
89  
90  	return 0;
91  }
92  
93  file_protected int
file_checkfmt(char * msg,size_t mlen,const char * fmt)94  file_checkfmt(char *msg, size_t mlen, const char *fmt)
95  {
96  	const char *p;
97  	for (p = fmt; *p; p++) {
98  		if (*p != '%')
99  			continue;
100  		if (*++p == '%')
101  			continue;
102  		// Skip uninteresting.
103  		while (strchr("#0.'+- ", *p) != NULL)
104  			p++;
105  		if (*p == '*') {
106  			if (msg)
107  				snprintf(msg, mlen, "* not allowed in format");
108  			return -1;
109  		}
110  
111  		if (!file_checkfield(msg, mlen, "width", &p))
112  			return -1;
113  
114  		if (*p == '.') {
115  			p++;
116  			if (!file_checkfield(msg, mlen, "precision", &p))
117  				return -1;
118  		}
119  
120  		if (!isalpha((unsigned char)*p)) {
121  			if (msg)
122  				snprintf(msg, mlen, "bad format char: %c", *p);
123  			return -1;
124  		}
125  	}
126  	return 0;
127  }
128  
129  /*
130   * Like printf, only we append to a buffer.
131   */
132  file_protected int
file_vprintf(struct magic_set * ms,const char * fmt,va_list ap)133  file_vprintf(struct magic_set *ms, const char *fmt, va_list ap)
134  {
135  	int len;
136  	char *buf, *newstr;
137  	char tbuf[1024];
138  
139  	if (ms->event_flags & EVENT_HAD_ERR)
140  		return 0;
141  
142  	if (file_checkfmt(tbuf, sizeof(tbuf), fmt)) {
143  		file_clearbuf(ms);
144  		file_error(ms, 0, "Bad magic format `%s' (%s)", fmt, tbuf);
145  		return -1;
146  	}
147  
148  	len = vasprintf(&buf, fmt, ap);
149  	if (len < 0 || (size_t)len > 1024 || len + ms->o.blen > 1024 * 1024) {
150  		size_t blen = ms->o.blen;
151  		free(buf);
152  		file_clearbuf(ms);
153  		file_error(ms, 0, "Output buffer space exceeded %d+%"
154  		    SIZE_T_FORMAT "u", len, blen);
155  		return -1;
156  	}
157  
158  	if (ms->o.buf != NULL) {
159  		len = asprintf(&newstr, "%s%s", ms->o.buf, buf);
160  		free(buf);
161  		if (len < 0)
162  			goto out;
163  		free(ms->o.buf);
164  		buf = newstr;
165  	}
166  	ms->o.buf = buf;
167  	ms->o.blen = len;
168  	return 0;
169  out:
170  	file_clearbuf(ms);
171  	file_error(ms, errno, "vasprintf failed");
172  	return -1;
173  }
174  
175  file_protected int
file_printf(struct magic_set * ms,const char * fmt,...)176  file_printf(struct magic_set *ms, const char *fmt, ...)
177  {
178  	int rv;
179  	va_list ap;
180  
181  	va_start(ap, fmt);
182  	rv = file_vprintf(ms, fmt, ap);
183  	va_end(ap);
184  	return rv;
185  }
186  
187  /*
188   * error - print best error message possible
189   */
190  /*VARARGS*/
191  __attribute__((__format__(__printf__, 3, 0)))
192  file_private void
file_error_core(struct magic_set * ms,int error,const char * f,va_list va,size_t lineno)193  file_error_core(struct magic_set *ms, int error, const char *f, va_list va,
194      size_t lineno)
195  {
196  	/* Only the first error is ok */
197  	if (ms->event_flags & EVENT_HAD_ERR)
198  		return;
199  	if (lineno != 0) {
200  		file_clearbuf(ms);
201  		(void)file_printf(ms, "line %" SIZE_T_FORMAT "u:", lineno);
202  	}
203  	if (ms->o.buf && *ms->o.buf)
204  		(void)file_printf(ms, " ");
205  	(void)file_vprintf(ms, f, va);
206  	if (error > 0)
207  		(void)file_printf(ms, " (%s)", strerror(error));
208  	ms->event_flags |= EVENT_HAD_ERR;
209  	ms->error = error;
210  }
211  
212  /*VARARGS*/
213  file_protected void
file_error(struct magic_set * ms,int error,const char * f,...)214  file_error(struct magic_set *ms, int error, const char *f, ...)
215  {
216  	va_list va;
217  	va_start(va, f);
218  	file_error_core(ms, error, f, va, 0);
219  	va_end(va);
220  }
221  
222  /*
223   * Print an error with magic line number.
224   */
225  /*VARARGS*/
226  file_protected void
file_magerror(struct magic_set * ms,const char * f,...)227  file_magerror(struct magic_set *ms, const char *f, ...)
228  {
229  	va_list va;
230  	va_start(va, f);
231  	file_error_core(ms, 0, f, va, ms->line);
232  	va_end(va);
233  }
234  
235  file_protected void
file_oomem(struct magic_set * ms,size_t len)236  file_oomem(struct magic_set *ms, size_t len)
237  {
238  	file_error(ms, errno, "cannot allocate %" SIZE_T_FORMAT "u bytes",
239  	    len);
240  }
241  
242  file_protected void
file_badseek(struct magic_set * ms)243  file_badseek(struct magic_set *ms)
244  {
245  	file_error(ms, errno, "error seeking");
246  }
247  
248  file_protected void
file_badread(struct magic_set * ms)249  file_badread(struct magic_set *ms)
250  {
251  	file_error(ms, errno, "error reading");
252  }
253  
254  #ifndef COMPILE_ONLY
255  #define FILE_SEPARATOR "\n- "
256  
257  file_protected int
file_separator(struct magic_set * ms)258  file_separator(struct magic_set *ms)
259  {
260  	return file_printf(ms, FILE_SEPARATOR);
261  }
262  
263  static void
trim_separator(struct magic_set * ms)264  trim_separator(struct magic_set *ms)
265  {
266  	size_t l;
267  
268  	if (ms->o.buf == NULL)
269  		return;
270  
271  	l = strlen(ms->o.buf);
272  	if (l < sizeof(FILE_SEPARATOR))
273  		return;
274  
275  	l -= sizeof(FILE_SEPARATOR) - 1;
276  	if (strcmp(ms->o.buf + l, FILE_SEPARATOR) != 0)
277  		return;
278  
279  	ms->o.buf[l] = '\0';
280  }
281  
282  static int
checkdone(struct magic_set * ms,int * rv)283  checkdone(struct magic_set *ms, int *rv)
284  {
285  	if ((ms->flags & MAGIC_CONTINUE) == 0)
286  		return 1;
287  	if (file_separator(ms) == -1)
288  		*rv = -1;
289  	return 0;
290  }
291  
292  file_protected int
file_default(struct magic_set * ms,size_t nb)293  file_default(struct magic_set *ms, size_t nb)
294  {
295  	if (ms->flags & MAGIC_MIME) {
296  		if ((ms->flags & MAGIC_MIME_TYPE) &&
297  		    file_printf(ms, "application/%s",
298  			nb ? "octet-stream" : "x-empty") == -1)
299  			return -1;
300  		return 1;
301  	}
302  	if (ms->flags & MAGIC_APPLE) {
303  		if (file_printf(ms, "UNKNUNKN") == -1)
304  			return -1;
305  		return 1;
306  	}
307  	if (ms->flags & MAGIC_EXTENSION) {
308  		if (file_printf(ms, "???") == -1)
309  			return -1;
310  		return 1;
311  	}
312  	return 0;
313  }
314  
315  /*
316   * The magic detection functions return:
317   *	 1: found
318   *	 0: not found
319   *	-1: error
320   */
321  /*ARGSUSED*/
322  file_protected int
file_buffer(struct magic_set * ms,int fd,struct stat * st,const char * inname,const void * buf,size_t nb)323  file_buffer(struct magic_set *ms, int fd, struct stat *st,
324      const char *inname __attribute__ ((__unused__)),
325      const void *buf, size_t nb)
326  {
327  	int m = 0, rv = 0, looks_text = 0;
328  	const char *code = NULL;
329  	const char *code_mime = "binary";
330  	const char *def = "data";
331  	const char *ftype = NULL;
332  	char *rbuf = NULL;
333  	struct buffer b;
334  
335  	buffer_init(&b, fd, st, buf, nb);
336  	ms->mode = b.st.st_mode;
337  
338  	if (nb == 0) {
339  		def = "empty";
340  		goto simple;
341  	} else if (nb == 1) {
342  		def = "very short file (no magic)";
343  		goto simple;
344  	}
345  
346  	if ((ms->flags & MAGIC_NO_CHECK_ENCODING) == 0) {
347  		looks_text = file_encoding(ms, &b, NULL, 0,
348  		    &code, &code_mime, &ftype);
349  	}
350  
351  #ifdef __EMX__
352  	if ((ms->flags & MAGIC_NO_CHECK_APPTYPE) == 0 && inname) {
353  		m = file_os2_apptype(ms, inname, &b);
354  		if ((ms->flags & MAGIC_DEBUG) != 0)
355  			(void)fprintf(stderr, "[try os2_apptype %d]\n", m);
356  		switch (m) {
357  		case -1:
358  			return -1;
359  		case 0:
360  			break;
361  		default:
362  			return 1;
363  		}
364  	}
365  #endif
366  #if HAVE_FORK
367  	/* try compression stuff */
368  	if ((ms->flags & MAGIC_NO_CHECK_COMPRESS) == 0) {
369  		m = file_zmagic(ms, &b, inname);
370  		if ((ms->flags & MAGIC_DEBUG) != 0)
371  			(void)fprintf(stderr, "[try zmagic %d]\n", m);
372  		if (m) {
373  			goto done_encoding;
374  		}
375  	}
376  #endif
377  	/* Check if we have a tar file */
378  	if ((ms->flags & MAGIC_NO_CHECK_TAR) == 0) {
379  		m = file_is_tar(ms, &b);
380  		if ((ms->flags & MAGIC_DEBUG) != 0)
381  			(void)fprintf(stderr, "[try tar %d]\n", m);
382  		if (m) {
383  			if (checkdone(ms, &rv))
384  				goto done;
385  		}
386  	}
387  
388  	/* Check if we have a JSON file */
389  	if ((ms->flags & MAGIC_NO_CHECK_JSON) == 0) {
390  		m = file_is_json(ms, &b);
391  		if ((ms->flags & MAGIC_DEBUG) != 0)
392  			(void)fprintf(stderr, "[try json %d]\n", m);
393  		if (m) {
394  			if (checkdone(ms, &rv))
395  				goto done;
396  		}
397  	}
398  
399  	/* Check if we have a CSV file */
400  	if ((ms->flags & MAGIC_NO_CHECK_CSV) == 0) {
401  		m = file_is_csv(ms, &b, looks_text, code);
402  		if ((ms->flags & MAGIC_DEBUG) != 0)
403  			(void)fprintf(stderr, "[try csv %d]\n", m);
404  		if (m) {
405  			if (checkdone(ms, &rv))
406  				goto done;
407  		}
408  	}
409  
410  	/* Check if we have a SIMH tape file */
411  	if ((ms->flags & MAGIC_NO_CHECK_SIMH) == 0) {
412  		m = file_is_simh(ms, &b);
413  		if ((ms->flags & MAGIC_DEBUG) != 0)
414  			(void)fprintf(stderr, "[try simh %d]\n", m);
415  		if (m) {
416  			if (checkdone(ms, &rv))
417  				goto done;
418  		}
419  	}
420  
421  	/* Check if we have a CDF file */
422  	if ((ms->flags & MAGIC_NO_CHECK_CDF) == 0) {
423  		m = file_trycdf(ms, &b);
424  		if ((ms->flags & MAGIC_DEBUG) != 0)
425  			(void)fprintf(stderr, "[try cdf %d]\n", m);
426  		if (m) {
427  			if (checkdone(ms, &rv))
428  				goto done;
429  		}
430  	}
431  #ifdef BUILTIN_ELF
432  	if ((ms->flags & MAGIC_NO_CHECK_ELF) == 0 && nb > 5 && fd != -1) {
433  		file_pushbuf_t *pb;
434  		/*
435  		 * We matched something in the file, so this
436  		 * *might* be an ELF file, and the file is at
437  		 * least 5 bytes long, so if it's an ELF file
438  		 * it has at least one byte past the ELF magic
439  		 * number - try extracting information from the
440  		 * ELF headers that cannot easily be  extracted
441  		 * with rules in the magic file. We we don't
442  		 * print the information yet.
443  		 */
444  		if ((pb = file_push_buffer(ms)) == NULL)
445  			return -1;
446  
447  		rv = file_tryelf(ms, &b);
448  		rbuf = file_pop_buffer(ms, pb);
449  		if (rv == -1) {
450  			free(rbuf);
451  			rbuf = NULL;
452  		}
453  		if ((ms->flags & MAGIC_DEBUG) != 0)
454  			(void)fprintf(stderr, "[try elf %d]\n", m);
455  	}
456  #endif
457  
458  	/* try soft magic tests */
459  	if ((ms->flags & MAGIC_NO_CHECK_SOFT) == 0) {
460  		m = file_softmagic(ms, &b, NULL, NULL, BINTEST, looks_text);
461  		if ((ms->flags & MAGIC_DEBUG) != 0)
462  			(void)fprintf(stderr, "[try softmagic %d]\n", m);
463  		if (m == 1 && rbuf) {
464  			if (file_printf(ms, "%s", rbuf) == -1)
465  				goto done;
466  		}
467  		if (m) {
468  			if (checkdone(ms, &rv))
469  				goto done;
470  		}
471  	}
472  
473  	/* try text properties */
474  	if ((ms->flags & MAGIC_NO_CHECK_TEXT) == 0) {
475  
476  		m = file_ascmagic(ms, &b, looks_text);
477  		if ((ms->flags & MAGIC_DEBUG) != 0)
478  			(void)fprintf(stderr, "[try ascmagic %d]\n", m);
479  		if (m) {
480  			goto done;
481  		}
482  	}
483  
484  simple:
485  	/* give up */
486  	if (m == 0) {
487  		m = 1;
488  		rv = file_default(ms, nb);
489  		if (rv == 0)
490  			if (file_printf(ms, "%s", def) == -1)
491  				rv = -1;
492  	}
493   done:
494  	trim_separator(ms);
495  	if ((ms->flags & MAGIC_MIME_ENCODING) != 0) {
496  		if (ms->flags & MAGIC_MIME_TYPE)
497  			if (file_printf(ms, "; charset=") == -1)
498  				rv = -1;
499  		if (file_printf(ms, "%s", code_mime) == -1)
500  			rv = -1;
501  	}
502  #if HAVE_FORK
503   done_encoding:
504  #endif
505  	free(rbuf);
506  	buffer_fini(&b);
507  	if (rv)
508  		return rv;
509  
510  	return m;
511  }
512  #endif
513  
514  file_protected int
file_reset(struct magic_set * ms,int checkloaded)515  file_reset(struct magic_set *ms, int checkloaded)
516  {
517  	if (checkloaded && ms->mlist[0] == NULL) {
518  		file_error(ms, 0, "no magic files loaded");
519  		return -1;
520  	}
521  	file_clearbuf(ms);
522  	if (ms->o.pbuf) {
523  		free(ms->o.pbuf);
524  		ms->o.pbuf = NULL;
525  	}
526  	ms->event_flags &= ~EVENT_HAD_ERR;
527  	ms->error = -1;
528  	return 0;
529  }
530  
531  #define OCTALIFY(n, o)	\
532  	/*LINTED*/ \
533  	(void)(*(n)++ = '\\', \
534  	*(n)++ = ((CAST(uint32_t, *(o)) >> 6) & 3) + '0', \
535  	*(n)++ = ((CAST(uint32_t, *(o)) >> 3) & 7) + '0', \
536  	*(n)++ = ((CAST(uint32_t, *(o)) >> 0) & 7) + '0', \
537  	(o)++)
538  
539  file_protected const char *
file_getbuffer(struct magic_set * ms)540  file_getbuffer(struct magic_set *ms)
541  {
542  	char *pbuf, *op, *np;
543  	size_t psize, len;
544  
545  	if (ms->event_flags & EVENT_HAD_ERR)
546  		return NULL;
547  
548  	if (ms->flags & MAGIC_RAW)
549  		return ms->o.buf;
550  
551  	if (ms->o.buf == NULL)
552  		return NULL;
553  
554  	/* * 4 is for octal representation, + 1 is for NUL */
555  	len = strlen(ms->o.buf);
556  	if (len > (SIZE_MAX - 1) / 4) {
557  		file_oomem(ms, len);
558  		return NULL;
559  	}
560  	psize = len * 4 + 1;
561  	if ((pbuf = CAST(char *, realloc(ms->o.pbuf, psize))) == NULL) {
562  		file_oomem(ms, psize);
563  		return NULL;
564  	}
565  	ms->o.pbuf = pbuf;
566  
567  #if defined(HAVE_WCHAR_H) && defined(HAVE_MBRTOWC) && defined(HAVE_WCWIDTH)
568  	{
569  		mbstate_t state;
570  		wchar_t nextchar;
571  		int mb_conv = 1;
572  		size_t bytesconsumed;
573  		char *eop;
574  		(void)memset(&state, 0, sizeof(mbstate_t));
575  
576  		np = ms->o.pbuf;
577  		op = ms->o.buf;
578  		eop = op + len;
579  
580  		while (op < eop) {
581  			bytesconsumed = mbrtowc(&nextchar, op,
582  			    CAST(size_t, eop - op), &state);
583  			if (bytesconsumed == CAST(size_t, -1) ||
584  			    bytesconsumed == CAST(size_t, -2)) {
585  				mb_conv = 0;
586  				break;
587  			}
588  
589  			if (iswprint(nextchar)) {
590  				(void)memcpy(np, op, bytesconsumed);
591  				op += bytesconsumed;
592  				np += bytesconsumed;
593  			} else {
594  				while (bytesconsumed-- > 0)
595  					OCTALIFY(np, op);
596  			}
597  		}
598  		*np = '\0';
599  
600  		/* Parsing succeeded as a multi-byte sequence */
601  		if (mb_conv != 0)
602  			return ms->o.pbuf;
603  	}
604  #endif
605  
606  	for (np = ms->o.pbuf, op = ms->o.buf; *op;) {
607  		if (isprint(CAST(unsigned char, *op))) {
608  			*np++ = *op++;
609  		} else {
610  			OCTALIFY(np, op);
611  		}
612  	}
613  	*np = '\0';
614  	return ms->o.pbuf;
615  }
616  
617  file_protected int
file_check_mem(struct magic_set * ms,unsigned int level)618  file_check_mem(struct magic_set *ms, unsigned int level)
619  {
620  	size_t len;
621  
622  	if (level >= ms->c.len) {
623  		len = (ms->c.len = 20 + level) * sizeof(*ms->c.li);
624  		ms->c.li = CAST(struct level_info *, (ms->c.li == NULL) ?
625  		    malloc(len) :
626  		    realloc(ms->c.li, len));
627  		if (ms->c.li == NULL) {
628  			file_oomem(ms, len);
629  			return -1;
630  		}
631  	}
632  	ms->c.li[level].got_match = 0;
633  #ifdef ENABLE_CONDITIONALS
634  	ms->c.li[level].last_match = 0;
635  	ms->c.li[level].last_cond = COND_NONE;
636  #endif /* ENABLE_CONDITIONALS */
637  	return 0;
638  }
639  
640  file_protected size_t
file_printedlen(const struct magic_set * ms)641  file_printedlen(const struct magic_set *ms)
642  {
643  	return ms->o.blen;
644  }
645  
646  file_protected int
file_replace(struct magic_set * ms,const char * pat,const char * rep)647  file_replace(struct magic_set *ms, const char *pat, const char *rep)
648  {
649  	file_regex_t rx;
650  	int rc, rv = -1;
651  
652  	rc = file_regcomp(ms, &rx, pat, REG_EXTENDED);
653  	if (rc == 0) {
654  		regmatch_t rm;
655  		int nm = 0;
656  		while (file_regexec(ms, &rx, ms->o.buf, 1, &rm, 0) == 0) {
657  			ms->o.buf[rm.rm_so] = '\0';
658  			if (file_printf(ms, "%s%s", rep,
659  			    rm.rm_eo != 0 ? ms->o.buf + rm.rm_eo : "") == -1)
660  				goto out;
661  			nm++;
662  		}
663  		rv = nm;
664  	}
665  out:
666  	file_regfree(&rx);
667  	return rv;
668  }
669  
670  file_private int
check_regex(struct magic_set * ms,const char * pat)671  check_regex(struct magic_set *ms, const char *pat)
672  {
673  	char sbuf[512];
674  	unsigned char oc = '\0';
675  	const char *p;
676  	unsigned long l;
677  
678  	for (p = pat; *p; p++) {
679  		unsigned char c = *p;
680  		// Avoid repetition
681  		if (c == oc && strchr("?*+{", c) != NULL) {
682  			size_t len = strlen(pat);
683  			file_magwarn(ms,
684  			    "repetition-operator operand `%c' "
685  			    "invalid in regex `%s'", c,
686  			    file_printable(ms, sbuf, sizeof(sbuf), pat, len));
687  			return -1;
688  		}
689  		if (c == '{') {
690  			char *ep, *eep;
691  			errno = 0;
692  			l = strtoul(p + 1, &ep, 10);
693  			if (ep != p + 1 && l > 1000)
694  				goto bounds;
695  
696  			if (*ep == ',') {
697  				l = strtoul(ep + 1, &eep, 10);
698  				if (eep != ep + 1 && l > 1000)
699  					goto bounds;
700  			}
701  		}
702  		oc = c;
703  		if (isprint(c) || isspace(c) || c == '\b'
704  		    || c == 0x8a) // XXX: apple magic fixme
705  			continue;
706  		size_t len = strlen(pat);
707  		file_magwarn(ms,
708  		    "non-ascii characters in regex \\%#o `%s'",
709  		    c, file_printable(ms, sbuf, sizeof(sbuf), pat, len));
710  		return -1;
711  	}
712  	return 0;
713  bounds:
714  	file_magwarn(ms, "bounds too large %ld in regex `%s'", l, pat);
715  	return -1;
716  }
717  
718  file_protected int
file_regcomp(struct magic_set * ms file_locale_used,file_regex_t * rx,const char * pat,int flags)719  file_regcomp(struct magic_set *ms file_locale_used, file_regex_t *rx,
720      const char *pat, int flags)
721  {
722  	if (check_regex(ms, pat) == -1)
723  		return -1;
724  
725  #ifdef USE_C_LOCALE
726  	locale_t old = uselocale(ms->c_lc_ctype);
727  	assert(old != NULL);
728  #else
729  	char old[1024];
730  	strlcpy(old, setlocale(LC_CTYPE, NULL), sizeof(old));
731  	(void)setlocale(LC_CTYPE, "C");
732  #endif
733  	int rc;
734  	rc = regcomp(rx, pat, flags);
735  
736  #ifdef USE_C_LOCALE
737  	uselocale(old);
738  #else
739  	(void)setlocale(LC_CTYPE, old);
740  #endif
741  	if (rc > 0 && (ms->flags & MAGIC_CHECK)) {
742  		char errmsg[512], buf[512];
743  
744  		(void)regerror(rc, rx, errmsg, sizeof(errmsg));
745  		file_magerror(ms, "regex error %d for `%s', (%s)", rc,
746  		    file_printable(ms, buf, sizeof(buf), pat, strlen(pat)),
747  		    errmsg);
748  	}
749  	return rc;
750  }
751  
752  /*ARGSUSED*/
753  file_protected int
file_regexec(struct magic_set * ms file_locale_used,file_regex_t * rx,const char * str,size_t nmatch,regmatch_t * pmatch,int eflags)754  file_regexec(struct magic_set *ms file_locale_used, file_regex_t *rx,
755      const char *str, size_t nmatch, regmatch_t* pmatch, int eflags)
756  {
757  #ifdef USE_C_LOCALE
758  	locale_t old = uselocale(ms->c_lc_ctype);
759  	assert(old != NULL);
760  #else
761  	char old[1024];
762  	strlcpy(old, setlocale(LC_CTYPE, NULL), sizeof(old));
763  	(void)setlocale(LC_CTYPE, "C");
764  #endif
765  	int rc;
766  	/* XXX: force initialization because glibc does not always do this */
767  	if (nmatch != 0)
768  		memset(pmatch, 0, nmatch * sizeof(*pmatch));
769  	rc = regexec(rx, str, nmatch, pmatch, eflags);
770  #ifdef USE_C_LOCALE
771  	uselocale(old);
772  #else
773  	(void)setlocale(LC_CTYPE, old);
774  #endif
775  	return rc;
776  }
777  
778  file_protected void
file_regfree(file_regex_t * rx)779  file_regfree(file_regex_t *rx)
780  {
781  	regfree(rx);
782  }
783  
784  file_protected file_pushbuf_t *
file_push_buffer(struct magic_set * ms)785  file_push_buffer(struct magic_set *ms)
786  {
787  	file_pushbuf_t *pb;
788  
789  	if (ms->event_flags & EVENT_HAD_ERR)
790  		return NULL;
791  
792  	if ((pb = (CAST(file_pushbuf_t *, malloc(sizeof(*pb))))) == NULL)
793  		return NULL;
794  
795  	pb->buf = ms->o.buf;
796  	pb->blen = ms->o.blen;
797  	pb->offset = ms->offset;
798  
799  	ms->o.buf = NULL;
800  	ms->o.blen = 0;
801  	ms->offset = 0;
802  
803  	return pb;
804  }
805  
806  file_protected char *
file_pop_buffer(struct magic_set * ms,file_pushbuf_t * pb)807  file_pop_buffer(struct magic_set *ms, file_pushbuf_t *pb)
808  {
809  	char *rbuf;
810  
811  	if (ms->event_flags & EVENT_HAD_ERR) {
812  		free(pb->buf);
813  		free(pb);
814  		return NULL;
815  	}
816  
817  	rbuf = ms->o.buf;
818  
819  	ms->o.buf = pb->buf;
820  	ms->o.blen = pb->blen;
821  	ms->offset = pb->offset;
822  
823  	free(pb);
824  	return rbuf;
825  }
826  
827  /*
828   * convert string to ascii printable format.
829   */
830  file_protected char *
file_printable(struct magic_set * ms,char * buf,size_t bufsiz,const char * str,size_t slen)831  file_printable(struct magic_set *ms, char *buf, size_t bufsiz,
832      const char *str, size_t slen)
833  {
834  	char *ptr, *eptr = buf + bufsiz - 1;
835  	const unsigned char *s = RCAST(const unsigned char *, str);
836  	const unsigned char *es = s + slen;
837  
838  	for (ptr = buf;  ptr < eptr && s < es && *s; s++) {
839  		if ((ms->flags & MAGIC_RAW) != 0 || isprint(*s)) {
840  			*ptr++ = *s;
841  			continue;
842  		}
843  		if (ptr >= eptr - 3)
844  			break;
845  		*ptr++ = '\\';
846  		*ptr++ = ((CAST(unsigned int, *s) >> 6) & 7) + '0';
847  		*ptr++ = ((CAST(unsigned int, *s) >> 3) & 7) + '0';
848  		*ptr++ = ((CAST(unsigned int, *s) >> 0) & 7) + '0';
849  	}
850  	*ptr = '\0';
851  	return buf;
852  }
853  
854  struct guid {
855  	uint32_t data1;
856  	uint16_t data2;
857  	uint16_t data3;
858  	uint8_t data4[8];
859  };
860  
861  file_protected int
file_parse_guid(const char * s,uint64_t * guid)862  file_parse_guid(const char *s, uint64_t *guid)
863  {
864  	struct guid *g = CAST(struct guid *, CAST(void *, guid));
865  #ifndef WIN32
866  	return sscanf(s,
867  	    "%8x-%4hx-%4hx-%2hhx%2hhx-%2hhx%2hhx%2hhx%2hhx%2hhx%2hhx",
868  	    &g->data1, &g->data2, &g->data3, &g->data4[0], &g->data4[1],
869  	    &g->data4[2], &g->data4[3], &g->data4[4], &g->data4[5],
870  	    &g->data4[6], &g->data4[7]) == 11 ? 0 : -1;
871  #else
872  	/* MS-Windows runtime doesn't support %hhx, except under
873  	   non-default __USE_MINGW_ANSI_STDIO.  */
874  	uint16_t data16[8];
875  	int rv = sscanf(s, "%8x-%4hx-%4hx-%2hx%2hx-%2hx%2hx%2hx%2hx%2hx%2hx",
876  	    &g->data1, &g->data2, &g->data3, &data16[0], &data16[1],
877  	    &data16[2], &data16[3], &data16[4], &data16[5],
878  	    &data16[6], &data16[7]) == 11 ? 0 : -1;
879  	int i;
880  	for (i = 0; i < 8; i++)
881  	    g->data4[i] = data16[i];
882  	return rv;
883  #endif
884  }
885  
886  file_protected int
file_print_guid(char * str,size_t len,const uint64_t * guid)887  file_print_guid(char *str, size_t len, const uint64_t *guid)
888  {
889  	const struct guid *g = CAST(const struct guid *,
890  	    CAST(const void *, guid));
891  
892  #ifndef WIN32
893  	return snprintf(str, len, "%.8X-%.4hX-%.4hX-%.2hhX%.2hhX-"
894  	    "%.2hhX%.2hhX%.2hhX%.2hhX%.2hhX%.2hhX",
895  	    g->data1, g->data2, g->data3, g->data4[0], g->data4[1],
896  	    g->data4[2], g->data4[3], g->data4[4], g->data4[5],
897  	    g->data4[6], g->data4[7]);
898  #else
899  	return snprintf(str, len, "%.8X-%.4hX-%.4hX-%.2hX%.2hX-"
900  	    "%.2hX%.2hX%.2hX%.2hX%.2hX%.2hX",
901  	    g->data1, g->data2, g->data3, g->data4[0], g->data4[1],
902  	    g->data4[2], g->data4[3], g->data4[4], g->data4[5],
903  	    g->data4[6], g->data4[7]);
904  #endif
905  }
906  
907  file_protected int
file_pipe_closexec(int * fds)908  file_pipe_closexec(int *fds)
909  {
910  #ifdef __MINGW32__
911  	return 0;
912  #elif defined(HAVE_PIPE2)
913  	return pipe2(fds, O_CLOEXEC);
914  #else
915  	if (pipe(fds) == -1)
916  		return -1;
917  # ifdef F_SETFD
918  	(void)fcntl(fds[0], F_SETFD, FD_CLOEXEC);
919  	(void)fcntl(fds[1], F_SETFD, FD_CLOEXEC);
920  # endif
921  	return 0;
922  #endif
923  }
924  
925  file_protected int
file_clear_closexec(int fd)926  file_clear_closexec(int fd) {
927  #ifdef F_SETFD
928  	return fcntl(fd, F_SETFD, 0);
929  #else
930  	return 0;
931  #endif
932  }
933  
934  file_protected char *
file_strtrim(char * str)935  file_strtrim(char *str)
936  {
937  	char *last;
938  
939  	while (isspace(CAST(unsigned char, *str)))
940  		str++;
941  	last = str;
942  	while (*last)
943  		last++;
944  	--last;
945  	while (isspace(CAST(unsigned char, *last)))
946  		last--;
947  	*++last = '\0';
948  	return str;
949  }
950