xref: /titanic_44/usr/src/lib/libast/common/misc/magic.c (revision 1e4c938b57d1656808e4112127ff1dce3eba5314)
1 /***********************************************************************
2 *                                                                      *
3 *               This software is part of the ast package               *
4 *          Copyright (c) 1985-2010 AT&T Intellectual Property          *
5 *                      and is licensed under the                       *
6 *                  Common Public License, Version 1.0                  *
7 *                    by AT&T Intellectual Property                     *
8 *                                                                      *
9 *                A copy of the License is available at                 *
10 *            http://www.opensource.org/licenses/cpl1.0.txt             *
11 *         (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9)         *
12 *                                                                      *
13 *              Information and Software Systems Research               *
14 *                            AT&T Research                             *
15 *                           Florham Park NJ                            *
16 *                                                                      *
17 *                 Glenn Fowler <gsf@research.att.com>                  *
18 *                  David Korn <dgk@research.att.com>                   *
19 *                   Phong Vo <kpv@research.att.com>                    *
20 *                                                                      *
21 ***********************************************************************/
22 #pragma prototyped
23 /*
24  * Glenn Fowler
25  * AT&T Research
26  *
27  * library interface to file
28  *
29  * the sum of the hacks {s5,v10,planix} is _____ than the parts
30  */
31 
32 static const char id[] = "\n@(#)$Id: magic library (AT&T Research) 2008-09-10 $\0\n";
33 
34 static const char lib[] = "libast:magic";
35 
36 #include <ast.h>
37 #include <ctype.h>
38 #include <ccode.h>
39 #include <dt.h>
40 #include <modex.h>
41 #include <error.h>
42 #include <regex.h>
43 #include <swap.h>
44 
45 #define T(m)		(*m?ERROR_translate(NiL,NiL,lib,m):m)
46 
47 #define match(s,p)	strgrpmatch(s,p,NiL,0,STR_LEFT|STR_RIGHT|STR_ICASE)
48 
49 #define MAXNEST		10		/* { ... } nesting limit	*/
50 #define MINITEM		4		/* magic buffer rounding	*/
51 
52 typedef struct				/* identifier dictionary entry	*/
53 {
54 	const char	name[16];	/* identifier name		*/
55 	int		value;		/* identifier value		*/
56 	Dtlink_t	link;		/* dictionary link		*/
57 } Info_t;
58 
59 typedef struct Edit			/* edit substitution		*/
60 {
61 	struct Edit*	next;		/* next in list			*/
62 	regex_t*	from;		/* from pattern			*/
63 } Edit_t;
64 
65 struct Entry;
66 
67 typedef struct				/* loop info			*/
68 {
69 	struct Entry*	lab;		/* call this function		*/
70 	int		start;		/* start here			*/
71 	int		size;		/* increment by this amount	*/
72 	int		count;		/* dynamic loop count		*/
73 	int		offset;		/* dynamic offset		*/
74 } Loop_t;
75 
76 typedef struct Entry			/* magic file entry		*/
77 {
78 	struct Entry*	next;		/* next in list			*/
79 	char*		expr;		/* offset expression		*/
80 	union
81 	{
82 	unsigned long	num;
83 	char*		str;
84 	struct Entry*	lab;
85 	regex_t*	sub;
86 	Loop_t*		loop;
87 	}		value;		/* comparison value		*/
88 	char*		desc;		/* file description		*/
89 	char*		mime;		/* file mime type		*/
90 	unsigned long	offset;		/* offset in bytes		*/
91 	unsigned long	mask;		/* mask before compare		*/
92 	char		cont;		/* continuation operation	*/
93 	char		type;		/* datum type			*/
94 	char		op;		/* comparison operation		*/
95 	char		nest;		/* { or } nesting operation	*/
96 	char		swap;		/* forced swap order		*/
97 } Entry_t;
98 
99 #define CC_BIT		5
100 
101 #if (CC_MAPS*CC_BIT) <= (CHAR_BIT*2)
102 typedef unsigned short Cctype_t;
103 #else
104 typedef unsigned long Cctype_t;
105 #endif
106 
107 #define CC_text		0x01
108 #define CC_control	0x02
109 #define CC_latin	0x04
110 #define CC_binary	0x08
111 #define CC_utf_8	0x10
112 
113 #define CC_notext	CC_text		/* CC_text is flipped before checking */
114 
115 #define CC_MASK		(CC_binary|CC_latin|CC_control|CC_text)
116 
117 #define CCTYPE(c)	(((c)>0240)?CC_binary:((c)>=0200)?CC_latin:((c)<040&&(c)!=007&&(c)!=011&&(c)!=012&&(c)!=013&&(c)!=015)?CC_control:CC_text)
118 
119 #define ID_NONE		0
120 #define ID_ASM		1
121 #define ID_C		2
122 #define ID_COBOL	3
123 #define ID_COPYBOOK	4
124 #define ID_CPLUSPLUS	5
125 #define ID_FORTRAN	6
126 #define ID_HTML		7
127 #define ID_INCL1	8
128 #define ID_INCL2	9
129 #define ID_INCL3	10
130 #define ID_MAM1		11
131 #define ID_MAM2		12
132 #define ID_MAM3		13
133 #define ID_NOTEXT	14
134 #define ID_PL1		15
135 #define ID_YACC		16
136 
137 #define ID_MAX		ID_YACC
138 
139 #define INFO_atime	1
140 #define INFO_blocks	2
141 #define INFO_ctime	3
142 #define INFO_fstype	4
143 #define INFO_gid	5
144 #define INFO_mode	6
145 #define INFO_mtime	7
146 #define INFO_name	8
147 #define INFO_nlink	9
148 #define INFO_size	10
149 #define INFO_uid	11
150 
151 #define _MAGIC_PRIVATE_ \
152 	Magicdisc_t*	disc;			/* discipline		*/ \
153 	Vmalloc_t*	vm;			/* vmalloc region	*/ \
154 	Entry_t*	magic;			/* parsed magic table	*/ \
155 	Entry_t*	magiclast;		/* last entry in magic	*/ \
156 	char*		mime;			/* MIME type		*/ \
157 	unsigned char*	x2n;			/* CC_ALIEN=>CC_NATIVE	*/ \
158 	char		fbuf[SF_BUFSIZE + 1];	/* file data		*/ \
159 	char		xbuf[SF_BUFSIZE + 1];	/* indirect file data	*/ \
160 	char		nbuf[256];		/* !CC_NATIVE data	*/ \
161 	char		mbuf[64];		/* mime string		*/ \
162 	char		sbuf[64];		/* type suffix string	*/ \
163 	char		tbuf[2 * PATH_MAX];	/* type string		*/ \
164 	Cctype_t	cctype[UCHAR_MAX + 1];	/* char code types	*/ \
165 	unsigned int	count[UCHAR_MAX + 1];	/* char frequency count	*/ \
166 	unsigned int	multi[UCHAR_MAX + 1];	/* muti char count	*/ \
167 	int		keep[MAXNEST];		/* ckmagic nest stack	*/ \
168 	char*		cap[MAXNEST];		/* ckmagic mime stack	*/ \
169 	char*		msg[MAXNEST];		/* ckmagic text stack	*/ \
170 	Entry_t*	ret[MAXNEST];		/* ckmagic return stack	*/ \
171 	int		fbsz;			/* fbuf size		*/ \
172 	int		fbmx;			/* fbuf max size	*/ \
173 	int		xbsz;			/* xbuf size		*/ \
174 	int		swap;			/* swap() operation	*/ \
175 	unsigned long	flags;			/* disc+open flags	*/ \
176 	long		xoff;			/* xbuf offset		*/ \
177 	int		identifier[ID_MAX + 1];	/* Info_t identifier	*/ \
178 	Sfio_t*		fp;			/* fbuf fp		*/ \
179 	Sfio_t*		tmp;			/* tmp string		*/ \
180 	regdisc_t	redisc;			/* regex discipline	*/ \
181 	Dtdisc_t	dtdisc;			/* dict discipline	*/ \
182 	Dt_t*		idtab;			/* identifier dict	*/ \
183 	Dt_t*		infotab;		/* info keyword dict	*/
184 
185 #include <magic.h>
186 
187 static Info_t		dict[] =		/* keyword dictionary	*/
188 {
189 	{ 	"COMMON",	ID_FORTRAN	},
190 	{ 	"COMPUTE",	ID_COBOL	},
191 	{ 	"COMP",		ID_COPYBOOK	},
192 	{ 	"COMPUTATIONAL",ID_COPYBOOK	},
193 	{ 	"DCL",		ID_PL1		},
194 	{ 	"DEFINED",	ID_PL1		},
195 	{ 	"DIMENSION",	ID_FORTRAN	},
196 	{ 	"DIVISION",	ID_COBOL	},
197 	{ 	"FILLER",	ID_COPYBOOK	},
198 	{ 	"FIXED",	ID_PL1		},
199 	{ 	"FUNCTION",	ID_FORTRAN	},
200 	{ 	"HTML",		ID_HTML		},
201 	{ 	"INTEGER",	ID_FORTRAN	},
202 	{ 	"MAIN",		ID_PL1		},
203 	{ 	"OPTIONS",	ID_PL1		},
204 	{ 	"PERFORM",	ID_COBOL	},
205 	{ 	"PIC",		ID_COPYBOOK	},
206 	{ 	"REAL",		ID_FORTRAN	},
207 	{ 	"REDEFINES",	ID_COPYBOOK	},
208 	{ 	"S9",		ID_COPYBOOK	},
209 	{ 	"SECTION",	ID_COBOL	},
210 	{ 	"SELECT",	ID_COBOL	},
211 	{ 	"SUBROUTINE",	ID_FORTRAN	},
212 	{ 	"TEXT",		ID_ASM		},
213 	{ 	"VALUE",	ID_COPYBOOK	},
214 	{ 	"attr",		ID_MAM3		},
215 	{ 	"binary",	ID_YACC		},
216 	{ 	"block",	ID_FORTRAN	},
217 	{ 	"bss",		ID_ASM		},
218 	{ 	"byte",		ID_ASM		},
219 	{ 	"char",		ID_C		},
220 	{ 	"class",	ID_CPLUSPLUS	},
221 	{ 	"clr",		ID_NOTEXT	},
222 	{ 	"comm",		ID_ASM		},
223 	{ 	"common",	ID_FORTRAN	},
224 	{ 	"data",		ID_ASM		},
225 	{ 	"dimension",	ID_FORTRAN	},
226 	{ 	"done",		ID_MAM2		},
227 	{ 	"double",	ID_C		},
228 	{ 	"even",		ID_ASM		},
229 	{ 	"exec",		ID_MAM3		},
230 	{ 	"extern",	ID_C		},
231 	{ 	"float",	ID_C		},
232 	{ 	"function",	ID_FORTRAN	},
233 	{ 	"globl",	ID_ASM		},
234 	{ 	"h",		ID_INCL3	},
235 	{ 	"html",		ID_HTML		},
236 	{ 	"include",	ID_INCL1	},
237 	{ 	"int",		ID_C		},
238 	{ 	"integer",	ID_FORTRAN	},
239 	{ 	"jmp",		ID_NOTEXT	},
240 	{ 	"left",		ID_YACC		},
241 	{ 	"libc",		ID_INCL2	},
242 	{ 	"long",		ID_C		},
243 	{ 	"make",		ID_MAM1		},
244 	{ 	"mov",		ID_NOTEXT	},
245 	{ 	"private",	ID_CPLUSPLUS	},
246 	{ 	"public",	ID_CPLUSPLUS	},
247 	{ 	"real",		ID_FORTRAN	},
248 	{ 	"register",	ID_C		},
249 	{ 	"right",	ID_YACC		},
250 	{ 	"sfio",		ID_INCL2	},
251 	{ 	"static",	ID_C		},
252 	{ 	"stdio",	ID_INCL2	},
253 	{ 	"struct",	ID_C		},
254 	{ 	"subroutine",	ID_FORTRAN	},
255 	{ 	"sys",		ID_NOTEXT	},
256 	{ 	"term",		ID_YACC		},
257 	{ 	"text",		ID_ASM		},
258 	{ 	"tst",		ID_NOTEXT	},
259 	{ 	"type",		ID_YACC		},
260 	{ 	"typedef",	ID_C		},
261 	{ 	"u",		ID_INCL2	},
262 	{ 	"union",	ID_YACC		},
263 	{ 	"void",		ID_C		},
264 };
265 
266 static Info_t		info[] =
267 {
268 	{	"atime",	INFO_atime		},
269 	{	"blocks",	INFO_blocks		},
270 	{	"ctime",	INFO_ctime		},
271 	{	"fstype",	INFO_fstype		},
272 	{	"gid",		INFO_gid		},
273 	{	"mode",		INFO_mode		},
274 	{	"mtime",	INFO_mtime		},
275 	{	"name",		INFO_name		},
276 	{	"nlink",	INFO_nlink		},
277 	{	"size",		INFO_size		},
278 	{	"uid",		INFO_uid		},
279 };
280 
281 /*
282  * return pointer to data at offset off and size siz
283  */
284 
285 static char*
286 getdata(register Magic_t* mp, register long off, register int siz)
287 {
288 	register long	n;
289 
290 	if (off < 0)
291 		return 0;
292 	if (off + siz <= mp->fbsz)
293 		return mp->fbuf + off;
294 	if (off < mp->xoff || off + siz > mp->xoff + mp->xbsz)
295 	{
296 		if (off + siz > mp->fbmx)
297 			return 0;
298 		n = (off / (SF_BUFSIZE / 2)) * (SF_BUFSIZE / 2);
299 		if (sfseek(mp->fp, n, SEEK_SET) != n)
300 			return 0;
301 		if ((mp->xbsz = sfread(mp->fp, mp->xbuf, sizeof(mp->xbuf) - 1)) < 0)
302 		{
303 			mp->xoff = 0;
304 			mp->xbsz = 0;
305 			return 0;
306 		}
307 		mp->xbuf[mp->xbsz] = 0;
308 		mp->xoff = n;
309 		if (off + siz > mp->xoff + mp->xbsz)
310 			return 0;
311 	}
312 	return mp->xbuf + off - mp->xoff;
313 }
314 
315 /*
316  * @... evaluator for strexpr()
317  */
318 
319 static long
320 indirect(const char* cs, char** e, void* handle)
321 {
322 	register char*		s = (char*)cs;
323 	register Magic_t*	mp = (Magic_t*)handle;
324 	register long		n = 0;
325 	register char*		p;
326 
327 	if (s)
328 	{
329 		if (*s == '@')
330 		{
331 			n = *++s == '(' ? strexpr(s, e, indirect, mp) : strtol(s, e, 0);
332 			switch (*(s = *e))
333 			{
334 			case 'b':
335 			case 'B':
336 				s++;
337 				if (p = getdata(mp, n, 1))
338 					n = *(unsigned char*)p;
339 				else
340 					s = (char*)cs;
341 				break;
342 			case 'h':
343 			case 'H':
344 				s++;
345 				if (p = getdata(mp, n, 2))
346 					n = swapget(mp->swap, p, 2);
347 				else
348 					s = (char*)cs;
349 				break;
350 			case 'q':
351 			case 'Q':
352 				s++;
353 				if (p = getdata(mp, n, 8))
354 					n = swapget(mp->swap, p, 8);
355 				else
356 					s = (char*)cs;
357 				break;
358 			default:
359 				if (isalnum(*s))
360 					s++;
361 				if (p = getdata(mp, n, 4))
362 					n = swapget(mp->swap, p, 4);
363 				else
364 					s = (char*)cs;
365 				break;
366 			}
367 		}
368 		*e = s;
369 	}
370 	else if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
371 		(*mp->disc->errorf)(mp, mp->disc, 2, "%s in indirect expression", *e);
372 	return n;
373 }
374 
375 /*
376  * emit regex error message
377  */
378 
379 static void
380 regmessage(Magic_t* mp, regex_t* re, int code)
381 {
382 	char	buf[128];
383 
384 	if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
385 	{
386 		regerror(code, re, buf, sizeof(buf));
387 		(*mp->disc->errorf)(mp, mp->disc, 3, "regex: %s", buf);
388 	}
389 }
390 
391 /*
392  * decompose vcodex(3) method composition
393  */
394 
395 static char*
396 vcdecomp(char* b, char* e, unsigned char* m, unsigned char* x)
397 {
398 	unsigned char*	map;
399 	const char*	o;
400 	int		c;
401 	int		n;
402 	int		i;
403 	int		a;
404 
405 	map = CCMAP(CC_ASCII, CC_NATIVE);
406 	a = 0;
407 	i = 1;
408 	for (;;)
409 	{
410 		if (i)
411 			i = 0;
412 		else
413 			*b++ = '^';
414 		if (m < (x - 1) && !*(m + 1))
415 		{
416 			/*
417 			 * obsolete indices
418 			 */
419 
420 			if (!a)
421 			{
422 				a = 1;
423 				o = "old, ";
424 				while (b < e && (c = *o++))
425 					*b++ = c;
426 			}
427 			switch (*m)
428 			{
429 			case 0:		o = "delta"; break;
430 			case 1:		o = "huffman"; break;
431 			case 2:		o = "huffgroup"; break;
432 			case 3:		o = "arith"; break;
433 			case 4:		o = "bwt"; break;
434 			case 5:		o = "rle"; break;
435 			case 6:		o = "mtf"; break;
436 			case 7:		o = "transpose"; break;
437 			case 8:		o = "table"; break;
438 			case 9:		o = "huffpart"; break;
439 			case 50:	o = "map"; break;
440 			case 100:	o = "recfm"; break;
441 			case 101:	o = "ss7"; break;
442 			default:	o = "UNKNOWN"; break;
443 			}
444 			m += 2;
445 			while (b < e && (c = *o++))
446 				*b++ = c;
447 		}
448 		else
449 			while (b < e && m < x && (c = *m++))
450 			{
451 				if (map)
452 					c = map[c];
453 				*b++ = c;
454 			}
455 		if (b >= e)
456 			break;
457 		n = 0;
458 		while (m < x)
459 		{
460 			n = (n<<7) | (*m & 0x7f);
461 			if (!(*m++ & 0x80))
462 				break;
463 		}
464 		if (n >= (x - m))
465 			break;
466 		m += n;
467 	}
468 	return b;
469 }
470 
471 /*
472  * check for magic table match in buf
473  */
474 
475 static char*
476 ckmagic(register Magic_t* mp, const char* file, char* buf, struct stat* st, unsigned long off)
477 {
478 	register Entry_t*	ep;
479 	register char*		p;
480 	register char*		b;
481 	register int		level = 0;
482 	int			call = -1;
483 	int			c;
484 	char*			q;
485 	char*			t;
486 	char*			base = 0;
487 	unsigned long		num;
488 	unsigned long		mask;
489 	regmatch_t		matches[10];
490 
491 	mp->swap = 0;
492 	b = mp->msg[0] = buf;
493 	mp->mime = mp->cap[0] = 0;
494 	mp->keep[0] = 0;
495 	for (ep = mp->magic; ep; ep = ep->next)
496 	{
497 	fun:
498 		if (ep->nest == '{')
499 		{
500 			if (++level >= MAXNEST)
501 			{
502 				call = -1;
503 				level = 0;
504 				mp->keep[0] = 0;
505 				b = mp->msg[0];
506 				mp->mime = mp->cap[0];
507 				continue;
508 			}
509 			mp->keep[level] = mp->keep[level - 1] != 0;
510 			mp->msg[level] = b;
511 			mp->cap[level] = mp->mime;
512 		}
513 		switch (ep->cont)
514 		{
515 		case '#':
516 			if (mp->keep[level] && b > buf)
517 			{
518 				*b = 0;
519 				return buf;
520 			}
521 			mp->swap = 0;
522 			b = mp->msg[0] = buf;
523 			mp->mime = mp->cap[0] = 0;
524 			if (ep->type == ' ')
525 				continue;
526 			break;
527 		case '$':
528 			if (mp->keep[level] && call < (MAXNEST - 1))
529 			{
530 				mp->ret[++call] = ep;
531 				ep = ep->value.lab;
532 				goto fun;
533 			}
534 			continue;
535 		case ':':
536 			ep = mp->ret[call--];
537 			if (ep->op == 'l')
538 				goto fun;
539 			continue;
540 		case '|':
541 			if (mp->keep[level] > 1)
542 				goto checknest;
543 			/*FALLTHROUGH*/
544 		default:
545 			if (!mp->keep[level])
546 			{
547 				b = mp->msg[level];
548 				mp->mime = mp->cap[level];
549 				goto checknest;
550 			}
551 			break;
552 		}
553 		p = "";
554 		num = 0;
555 		if (!ep->expr)
556 			num = ep->offset + off;
557 		else
558 			switch (ep->offset)
559 			{
560 			case 0:
561 				num = strexpr(ep->expr, NiL, indirect, mp) + off;
562 				break;
563 			case INFO_atime:
564 				num = st->st_atime;
565 				ep->type = 'D';
566 				break;
567 			case INFO_blocks:
568 				num = iblocks(st);
569 				ep->type = 'N';
570 				break;
571 			case INFO_ctime:
572 				num = st->st_ctime;
573 				ep->type = 'D';
574 				break;
575 			case INFO_fstype:
576 				p = fmtfs(st);
577 				ep->type = toupper(ep->type);
578 				break;
579 			case INFO_gid:
580 				if (ep->type == 'e' || ep->type == 'm' || ep->type == 's')
581 				{
582 					p = fmtgid(st->st_gid);
583 					ep->type = toupper(ep->type);
584 				}
585 				else
586 				{
587 					num = st->st_gid;
588 					ep->type = 'N';
589 				}
590 				break;
591 			case INFO_mode:
592 				if (ep->type == 'e' || ep->type == 'm' || ep->type == 's')
593 				{
594 					p = fmtmode(st->st_mode, 0);
595 					ep->type = toupper(ep->type);
596 				}
597 				else
598 				{
599 					num = modex(st->st_mode);
600 					ep->type = 'N';
601 				}
602 				break;
603 			case INFO_mtime:
604 				num = st->st_ctime;
605 				ep->type = 'D';
606 				break;
607 			case INFO_name:
608 				if (!base)
609 				{
610 					if (base = strrchr(file, '/'))
611 						base++;
612 					else
613 						base = (char*)file;
614 				}
615 				p = base;
616 				ep->type = toupper(ep->type);
617 				break;
618 			case INFO_nlink:
619 				num = st->st_nlink;
620 				ep->type = 'N';
621 				break;
622 			case INFO_size:
623 				num = st->st_size;
624 				ep->type = 'N';
625 				break;
626 			case INFO_uid:
627 				if (ep->type == 'e' || ep->type == 'm' || ep->type == 's')
628 				{
629 					p = fmtuid(st->st_uid);
630 					ep->type = toupper(ep->type);
631 				}
632 				else
633 				{
634 					num = st->st_uid;
635 					ep->type = 'N';
636 				}
637 				break;
638 			}
639 		switch (ep->type)
640 		{
641 
642 		case 'b':
643 			if (!(p = getdata(mp, num, 1)))
644 				goto next;
645 			num = *(unsigned char*)p;
646 			break;
647 
648 		case 'h':
649 			if (!(p = getdata(mp, num, 2)))
650 				goto next;
651 			num = swapget(ep->swap ? (~ep->swap ^ mp->swap) : mp->swap, p, 2);
652 			break;
653 
654 		case 'd':
655 		case 'l':
656 		case 'v':
657 			if (!(p = getdata(mp, num, 4)))
658 				goto next;
659 			num = swapget(ep->swap ? (~ep->swap ^ mp->swap) : mp->swap, p, 4);
660 			break;
661 
662 		case 'q':
663 			if (!(p = getdata(mp, num, 8)))
664 				goto next;
665 			num = swapget(ep->swap ? (~ep->swap ^ mp->swap) : mp->swap, p, 8);
666 			break;
667 
668 		case 'e':
669 			if (!(p = getdata(mp, num, 0)))
670 				goto next;
671 			/*FALLTHROUGH*/
672 		case 'E':
673 			if (!ep->value.sub)
674 				goto next;
675 			if ((c = regexec(ep->value.sub, p, elementsof(matches), matches, 0)) || (c = regsubexec(ep->value.sub, p, elementsof(matches), matches)))
676 			{
677 				c = mp->fbsz;
678 				if (c >= sizeof(mp->nbuf))
679 					c = sizeof(mp->nbuf) - 1;
680 				p = (char*)memcpy(mp->nbuf, p, c);
681 				p[c] = 0;
682 				ccmapstr(mp->x2n, p, c);
683 				if ((c = regexec(ep->value.sub, p, elementsof(matches), matches, 0)) || (c = regsubexec(ep->value.sub, p, elementsof(matches), matches)))
684 				{
685 					if (c != REG_NOMATCH)
686 						regmessage(mp, ep->value.sub, c);
687 					goto next;
688 				}
689 			}
690 			p = ep->value.sub->re_sub->re_buf;
691 			q = T(ep->desc);
692 			t = *q ? q : p;
693 			if (mp->keep[level]++ && b > buf && *(b - 1) != ' ' && *t && *t != ',' && *t != '.' && *t != '\b')
694 				*b++ = ' ';
695 			b += sfsprintf(b, PATH_MAX - (b - buf), *q ? q : "%s", p + (*p == '\b'));
696 			if (ep->mime)
697 				mp->mime = ep->mime;
698 			goto checknest;
699 
700 		case 's':
701 			if (!(p = getdata(mp, num, ep->mask)))
702 				goto next;
703 			goto checkstr;
704 		case 'm':
705 			if (!(p = getdata(mp, num, 0)))
706 				goto next;
707 			/*FALLTHROUGH*/
708 		case 'M':
709 		case 'S':
710 		checkstr:
711 			for (;;)
712 			{
713 				if (*ep->value.str == '*' && !*(ep->value.str + 1) && isprint(*p))
714 					break;
715 				if ((ep->type == 'm' || ep->type == 'M') ? strmatch(p, ep->value.str) : !memcmp(p, ep->value.str, ep->mask))
716 					break;
717 				if (p == mp->nbuf || ep->mask >= sizeof(mp->nbuf))
718 					goto next;
719 				p = (char*)memcpy(mp->nbuf, p, ep->mask);
720 				p[ep->mask] = 0;
721 				ccmapstr(mp->x2n, p, ep->mask);
722 			}
723 			q = T(ep->desc);
724 			if (mp->keep[level]++ && b > buf && *(b - 1) != ' ' && *q && *q != ',' && *q != '.' && *q != '\b')
725 				*b++ = ' ';
726 			for (t = p; (c = *t) >= 0 && c <= 0177 && isprint(c) && c != '\n'; t++);
727 			*t = 0;
728 			b += sfsprintf(b, PATH_MAX - (b - buf), q + (*q == '\b'), p);
729 			*t = c;
730 			if (ep->mime)
731 				mp->mime = ep->mime;
732 			goto checknest;
733 
734 		}
735 		if (mask = ep->mask)
736 			num &= mask;
737 		switch (ep->op)
738 		{
739 
740 		case '=':
741 		case '@':
742 			if (num == ep->value.num)
743 				break;
744 			if (ep->cont != '#')
745 				goto next;
746 			if (!mask)
747 				mask = ~mask;
748 			if (ep->type == 'h')
749 			{
750 				if ((num = swapget(mp->swap = 1, p, 2) & mask) == ep->value.num)
751 				{
752 					if (!(mp->swap & (mp->swap + 1)))
753 						mp->swap = 7;
754 					goto swapped;
755 				}
756 			}
757 			else if (ep->type == 'l')
758 			{
759 				for (c = 1; c < 4; c++)
760 					if ((num = swapget(mp->swap = c, p, 4) & mask) == ep->value.num)
761 					{
762 						if (!(mp->swap & (mp->swap + 1)))
763 							mp->swap = 7;
764 						goto swapped;
765 					}
766 			}
767 			else if (ep->type == 'q')
768 			{
769 				for (c = 1; c < 8; c++)
770 					if ((num = swapget(mp->swap = c, p, 8) & mask) == ep->value.num)
771 						goto swapped;
772 			}
773 			goto next;
774 
775 		case '!':
776 			if (num != ep->value.num)
777 				break;
778 			goto next;
779 
780 		case '^':
781 			if (num ^ ep->value.num)
782 				break;
783 			goto next;
784 
785 		case '>':
786 			if (num > ep->value.num)
787 				break;
788 			goto next;
789 
790 		case '<':
791 			if (num < ep->value.num)
792 				break;
793 			goto next;
794 
795 		case 'l':
796 			if (num > 0 && mp->keep[level] && call < (MAXNEST - 1))
797 			{
798 				if (!ep->value.loop->count)
799 				{
800 					ep->value.loop->count = num;
801 					ep->value.loop->offset = off;
802 					off = ep->value.loop->start;
803 				}
804 				else if (!--ep->value.loop->count)
805 				{
806 					off = ep->value.loop->offset;
807 					goto next;
808 				}
809 				else
810 					off += ep->value.loop->size;
811 				mp->ret[++call] = ep;
812 				ep = ep->value.loop->lab;
813 				goto fun;
814 			}
815 			goto next;
816 
817 		case 'm':
818 			c = mp->swap;
819 			t = ckmagic(mp, file, b + (b > buf), st, num);
820 			mp->swap = c;
821 			if (!t)
822 				goto next;
823 			if (b > buf)
824 				*b = ' ';
825 			b += strlen(b);
826 			break;
827 
828 		case 'r':
829 #if _UWIN
830 		{
831 			char*			e;
832 			Sfio_t*			rp;
833 			Sfio_t*			gp;
834 
835 			if (!(t = strrchr(file, '.')))
836 				goto next;
837 			sfprintf(mp->tmp, "/reg/classes_root/%s", t);
838 			if (!(t = sfstruse(mp->tmp)) || !(rp = sfopen(NiL, t, "r")))
839 				goto next;
840 			*ep->desc = 0;
841 			*ep->mime = 0;
842 			gp = 0;
843 			while (t = sfgetr(rp, '\n', 1))
844 			{
845 				if (strneq(t, "Content Type=", 13))
846 				{
847 					ep->mime = vmnewof(mp->vm, ep->mime, char, sfvalue(rp), 0);
848 					strcpy(ep->mime, t + 13);
849 					if (gp)
850 						break;
851 				}
852 				else
853 				{
854 					sfprintf(mp->tmp, "/reg/classes_root/%s", t);
855 					if ((e = sfstruse(mp->tmp)) && (gp = sfopen(NiL, e, "r")))
856 					{
857 						ep->desc = vmnewof(mp->vm, ep->desc, char, strlen(t), 1);
858 						strcpy(ep->desc, t);
859 						if (*ep->mime)
860 							break;
861 					}
862 				}
863 			}
864 			sfclose(rp);
865 			if (!gp)
866 				goto next;
867 			if (!*ep->mime)
868 			{
869 				t = T(ep->desc);
870 				if (!strncasecmp(t, "microsoft", 9))
871 					t += 9;
872 				while (isspace(*t))
873 					t++;
874 				e = "application/x-ms-";
875 				ep->mime = vmnewof(mp->vm, ep->mime, char, strlen(t), strlen(e));
876 				e = strcopy(ep->mime, e);
877 				while ((c = *t++) && c != '.' && c != ' ')
878 					*e++ = isupper(c) ? tolower(c) : c;
879 				*e = 0;
880 			}
881 			while (t = sfgetr(gp, '\n', 1))
882 				if (*t && !streq(t, "\"\""))
883 				{
884 					ep->desc = vmnewof(mp->vm, ep->desc, char, sfvalue(gp), 0);
885 					strcpy(ep->desc, t);
886 					break;
887 				}
888 			sfclose(gp);
889 			if (!*ep->desc)
890 				goto next;
891 			if (!t)
892 				for (t = T(ep->desc); *t; t++)
893 					if (*t == '.')
894 						*t = ' ';
895 			if (!mp->keep[level])
896 				mp->keep[level] = 2;
897 			mp->mime = ep->mime;
898 			break;
899 		}
900 #else
901 			if (ep->cont == '#' && !mp->keep[level])
902 				mp->keep[level] = 1;
903 			goto next;
904 #endif
905 
906 		case 'v':
907 			if (!(p = getdata(mp, num, 4)))
908 				goto next;
909 			c = 0;
910 			do
911 			{
912 				num++;
913 				c = (c<<7) | (*p & 0x7f);
914 			} while (*p++ & 0x80);
915 			if (!(p = getdata(mp, num, c)))
916 				goto next;
917 			if (mp->keep[level]++ && b > buf && *(b - 1) != ' ')
918 			{
919 				*b++ = ',';
920 				*b++ = ' ';
921 			}
922 			b = vcdecomp(b, buf + PATH_MAX, (unsigned char*)p, (unsigned char*)p + c);
923 			goto checknest;
924 
925 		}
926 	swapped:
927 		q = T(ep->desc);
928 		if (mp->keep[level]++ && b > buf && *(b - 1) != ' ' && *q && *q != ',' && *q != '.' && *q != '\b')
929 			*b++ = ' ';
930 		if (ep->type == 'd' || ep->type == 'D')
931 			b += sfsprintf(b, PATH_MAX - (b - buf), q + (*q == '\b'), fmttime("%?%l", (time_t)num));
932 		else if (ep->type == 'v')
933 			b += sfsprintf(b, PATH_MAX - (b - buf), q + (*q == '\b'), fmtversion(num));
934 		else
935 			b += sfsprintf(b, PATH_MAX - (b - buf), q + (*q == '\b'), num);
936 		if (ep->mime && *ep->mime)
937 			mp->mime = ep->mime;
938 	checknest:
939 		if (ep->nest == '}')
940 		{
941 			if (!mp->keep[level])
942 			{
943 				b = mp->msg[level];
944 				mp->mime = mp->cap[level];
945 			}
946 			else if (level > 0)
947 				mp->keep[level - 1] = mp->keep[level];
948 			if (--level < 0)
949 			{
950 				level = 0;
951 				mp->keep[0] = 0;
952 			}
953 		}
954 		continue;
955 	next:
956 		if (ep->cont == '&')
957 			mp->keep[level] = 0;
958 		goto checknest;
959 	}
960 	if (mp->keep[level] && b > buf)
961 	{
962 		*b = 0;
963 		return buf;
964 	}
965 	return 0;
966 }
967 
968 /*
969  * check english language stats
970  */
971 
972 static int
973 ckenglish(register Magic_t* mp, int pun, int badpun)
974 {
975 	register char*	s;
976 	register int	vowl = 0;
977 	register int	freq = 0;
978 	register int	rare = 0;
979 
980 	if (5 * badpun > pun)
981 		return 0;
982 	if (2 * mp->count[';'] > mp->count['E'] + mp->count['e'])
983 		return 0;
984 	if ((mp->count['>'] + mp->count['<'] + mp->count['/']) > mp->count['E'] + mp->count['e'])
985 		return 0;
986 	for (s = "aeiou"; *s; s++)
987 		vowl += mp->count[toupper(*s)] + mp->count[*s];
988 	for (s = "etaion"; *s; s++)
989 		freq += mp->count[toupper(*s)] + mp->count[*s];
990 	for (s = "vjkqxz"; *s; s++)
991 		rare += mp->count[toupper(*s)] + mp->count[*s];
992 	return 5 * vowl >= mp->fbsz - mp->count[' '] && freq >= 10 * rare;
993 }
994 
995 /*
996  * check programming language stats
997  */
998 
999 static char*
1000 cklang(register Magic_t* mp, const char* file, char* buf, struct stat* st)
1001 {
1002 	register int		c;
1003 	register unsigned char*	b;
1004 	register unsigned char*	e;
1005 	register int		q;
1006 	register char*		s;
1007 	char*			t;
1008 	char*			base;
1009 	char*			suff;
1010 	char*			t1;
1011 	char*			t2;
1012 	char*			t3;
1013 	int			n;
1014 	int			badpun;
1015 	int			code;
1016 	int			pun;
1017 	Cctype_t		flags;
1018 	Info_t*			ip;
1019 
1020 	b = (unsigned char*)mp->fbuf;
1021 	e = b + mp->fbsz;
1022 	memzero(mp->count, sizeof(mp->count));
1023 	memzero(mp->multi, sizeof(mp->multi));
1024 	memzero(mp->identifier, sizeof(mp->identifier));
1025 
1026 	/*
1027 	 * check character coding
1028 	 */
1029 
1030 	flags = 0;
1031 	while (b < e)
1032 		flags |= mp->cctype[*b++];
1033 	b = (unsigned char*)mp->fbuf;
1034 	code = 0;
1035 	q = CC_ASCII;
1036 	n = CC_MASK;
1037 	for (c = 0; c < CC_MAPS; c++)
1038 	{
1039 		flags ^= CC_text;
1040 		if ((flags & CC_MASK) < n)
1041 		{
1042 			n = flags & CC_MASK;
1043 			q = c;
1044 		}
1045 		flags >>= CC_BIT;
1046 	}
1047 	flags = n;
1048 	if (!(flags & (CC_binary|CC_notext)))
1049 	{
1050 		if (q != CC_NATIVE)
1051 		{
1052 			code = q;
1053 			ccmaps(mp->fbuf, mp->fbsz, q, CC_NATIVE);
1054 		}
1055 		if (b[0] == '#' && b[1] == '!')
1056 		{
1057 			for (b += 2; b < e && isspace(*b); b++);
1058 			for (s = (char*)b; b < e && isprint(*b); b++);
1059 			c = *b;
1060 			*b = 0;
1061 			if ((st->st_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) || match(s, "/*bin*/*") || !access(s, F_OK))
1062 			{
1063 				if (t = strrchr(s, '/'))
1064 					s = t + 1;
1065 				for (t = s; *t; t++)
1066 					if (isspace(*t))
1067 					{
1068 						*t = 0;
1069 						break;
1070 					}
1071 				sfsprintf(mp->mbuf, sizeof(mp->mbuf), "application/x-%s", *s ? s : "sh");
1072 				mp->mime = mp->mbuf;
1073 				if (match(s, "*sh"))
1074 				{
1075 					t1 = T("command");
1076 					if (streq(s, "sh"))
1077 						*s = 0;
1078 					else
1079 					{
1080 						*b++ = ' ';
1081 						*b = 0;
1082 					}
1083 				}
1084 				else
1085 				{
1086 					t1 = T("interpreter");
1087 					*b++ = ' ';
1088 					*b = 0;
1089 				}
1090 				sfsprintf(mp->sbuf, sizeof(mp->sbuf), T("%s%s script"), s, t1);
1091 				s = mp->sbuf;
1092 				goto qualify;
1093 			}
1094 			*b = c;
1095 			b = (unsigned char*)mp->fbuf;
1096 		}
1097 		badpun = 0;
1098 		pun = 0;
1099 		q = 0;
1100 		s = 0;
1101 		t = 0;
1102 		while (b < e)
1103 		{
1104 			c = *b++;
1105 			mp->count[c]++;
1106 			if (c == q && (q != '*' || *b == '/' && b++))
1107 			{
1108 				mp->multi[q]++;
1109 				q = 0;
1110 			}
1111 			else if (c == '\\')
1112 			{
1113 				s = 0;
1114 				b++;
1115 			}
1116 			else if (!q)
1117 			{
1118 				if (isalpha(c) || c == '_')
1119 				{
1120 					if (!s)
1121 						s = (char*)b - 1;
1122 				}
1123 				else if (!isdigit(c))
1124 				{
1125 					if (s)
1126 					{
1127 						if (s > mp->fbuf)
1128 							switch (*(s - 1))
1129 							{
1130 							case ':':
1131 								if (*b == ':')
1132 									mp->multi[':']++;
1133 								break;
1134 							case '.':
1135 								if (((char*)b - s) == 3 && (s == (mp->fbuf + 1) || *(s - 2) == '\n'))
1136 									mp->multi['.']++;
1137 								break;
1138 							case '\n':
1139 							case '\\':
1140 								if (*b == '{')
1141 									t = (char*)b + 1;
1142 								break;
1143 							case '{':
1144 								if (s == t && *b == '}')
1145 									mp->multi['X']++;
1146 								break;
1147 							}
1148 							if (!mp->idtab)
1149 							{
1150 								if (mp->idtab = dtnew(mp->vm, &mp->dtdisc, Dthash))
1151 									for (q = 0; q < elementsof(dict); q++)
1152 										dtinsert(mp->idtab, &dict[q]);
1153 								else if (mp->disc->errorf)
1154 									(*mp->disc->errorf)(mp, mp->disc, 3, "out of space");
1155 								q = 0;
1156 							}
1157 							if (mp->idtab)
1158 							{
1159 								*(b - 1) = 0;
1160 								if (ip = (Info_t*)dtmatch(mp->idtab, s))
1161 									mp->identifier[ip->value]++;
1162 								*(b - 1) = c;
1163 							}
1164 							s = 0;
1165 						}
1166 					switch (c)
1167 					{
1168 					case '\t':
1169 						if (b == (unsigned char*)(mp->fbuf + 1) || *(b - 2) == '\n')
1170 							mp->multi['\t']++;
1171 						break;
1172 					case '"':
1173 					case '\'':
1174 						q = c;
1175 						break;
1176 					case '/':
1177 						if (*b == '*')
1178 							q = *b++;
1179 						else if (*b == '/')
1180 							q = '\n';
1181 						break;
1182 					case '$':
1183 						if (*b == '(' && *(b + 1) != ' ')
1184 							mp->multi['$']++;
1185 						break;
1186 					case '{':
1187 					case '}':
1188 					case '[':
1189 					case ']':
1190 					case '(':
1191 						mp->multi[c]++;
1192 						break;
1193 					case ')':
1194 						mp->multi[c]++;
1195 						goto punctuation;
1196 					case ':':
1197 						if (*b == ':' && isspace(*(b + 1)) && b > (unsigned char*)(mp->fbuf + 1) && isspace(*(b - 2)))
1198 							mp->multi[':']++;
1199 						goto punctuation;
1200 					case '.':
1201 					case ',':
1202 					case '%':
1203 					case ';':
1204 					case '?':
1205 					punctuation:
1206 						pun++;
1207 						if (*b != ' ' && *b != '\n')
1208 							badpun++;
1209 						break;
1210 					}
1211 				}
1212 			}
1213 		}
1214 	}
1215 	else
1216 		while (b < e)
1217 			mp->count[*b++]++;
1218 	base = (t1 = strrchr(file, '/')) ? t1 + 1 : (char*)file;
1219 	suff = (t1 = strrchr(base, '.')) ? t1 + 1 : "";
1220 	if (!flags)
1221 	{
1222 		if (match(suff, "*sh|bat|cmd"))
1223 			goto id_sh;
1224 		if (match(base, "*@(mkfile)"))
1225 			goto id_mk;
1226 		if (match(base, "*@(makefile|.mk)"))
1227 			goto id_make;
1228 		if (match(base, "*@(mamfile|.mam)"))
1229 			goto id_mam;
1230 		if (match(suff, "[cly]?(pp|xx|++)|cc|ll|yy"))
1231 			goto id_c;
1232 		if (match(suff, "f"))
1233 			goto id_fortran;
1234 		if (match(suff, "htm+(l)"))
1235 			goto id_html;
1236 		if (match(suff, "cpy"))
1237 			goto id_copybook;
1238 		if (match(suff, "cob|cbl|cb2"))
1239 			goto id_cobol;
1240 		if (match(suff, "pl[1i]"))
1241 			goto id_pl1;
1242 		if (match(suff, "tex"))
1243 			goto id_tex;
1244 		if (match(suff, "asm|s"))
1245 			goto id_asm;
1246 		if ((st->st_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) && (!suff || suff != strchr(suff, '.')))
1247 		{
1248 		id_sh:
1249 			s = T("command script");
1250 			mp->mime = "application/sh";
1251 			goto qualify;
1252 		}
1253 		if (strmatch(mp->fbuf, "From * [0-9][0-9]:[0-9][0-9]:[0-9][0-9] *"))
1254 		{
1255 			s = T("mail message");
1256 			mp->mime = "message/rfc822";
1257 			goto qualify;
1258 		}
1259 		if (match(base, "*@(mkfile)"))
1260 		{
1261 		id_mk:
1262 			s = "mkfile";
1263 			mp->mime = "application/mk";
1264 			goto qualify;
1265 		}
1266 		if (match(base, "*@(makefile|.mk)") || mp->multi['\t'] >= mp->count[':'] && (mp->multi['$'] > 0 || mp->multi[':'] > 0))
1267 		{
1268 		id_make:
1269 			s = "makefile";
1270 			mp->mime = "application/make";
1271 			goto qualify;
1272 		}
1273 		if (mp->multi['.'] >= 3)
1274 		{
1275 			s = T("nroff input");
1276 			mp->mime = "application/x-troff";
1277 			goto qualify;
1278 		}
1279 		if (mp->multi['X'] >= 3)
1280 		{
1281 			s = T("TeX input");
1282 			mp->mime = "application/x-tex";
1283 			goto qualify;
1284 		}
1285 		if (mp->fbsz < SF_BUFSIZE &&
1286 		    (mp->multi['('] == mp->multi[')'] &&
1287 		     mp->multi['{'] == mp->multi['}'] &&
1288 		     mp->multi['['] == mp->multi[']']) ||
1289 		    mp->fbsz >= SF_BUFSIZE &&
1290 		    (mp->multi['('] >= mp->multi[')'] &&
1291 		     mp->multi['{'] >= mp->multi['}'] &&
1292 		     mp->multi['['] >= mp->multi[']']))
1293 		{
1294 			c = mp->identifier[ID_INCL1];
1295 			if (c >= 2 && mp->identifier[ID_INCL2] >= c && mp->identifier[ID_INCL3] >= c && mp->count['.'] >= c ||
1296 			    mp->identifier[ID_C] >= 5 && mp->count[';'] >= 5 ||
1297 			    mp->count['='] >= 20 && mp->count[';'] >= 20)
1298 			{
1299 			id_c:
1300 				t1 = "";
1301 				t2 = "c ";
1302 				t3 = T("program");
1303 				switch (*suff)
1304 				{
1305 				case 'c':
1306 				case 'C':
1307 					mp->mime = "application/x-cc";
1308 					break;
1309 				case 'l':
1310 				case 'L':
1311 					t1 = "lex ";
1312 					mp->mime = "application/x-lex";
1313 					break;
1314 				default:
1315 					t3 = T("header");
1316 					if (mp->identifier[ID_YACC] < 5 || mp->count['%'] < 5)
1317 					{
1318 						mp->mime = "application/x-cc";
1319 						break;
1320 					}
1321 					/*FALLTHROUGH*/
1322 				case 'y':
1323 				case 'Y':
1324 					t1 = "yacc ";
1325 					mp->mime = "application/x-yacc";
1326 					break;
1327 				}
1328 				if (mp->identifier[ID_CPLUSPLUS] >= 3)
1329 				{
1330 					t2 = "c++ ";
1331 					mp->mime = "application/x-c++";
1332 				}
1333 				sfsprintf(mp->sbuf, sizeof(mp->sbuf), "%s%s%s", t1, t2, t3);
1334 				s = mp->sbuf;
1335 				goto qualify;
1336 			}
1337 		}
1338 		if (mp->identifier[ID_MAM1] >= 2 && mp->identifier[ID_MAM3] >= 2 &&
1339 		    (mp->fbsz < SF_BUFSIZE && mp->identifier[ID_MAM1] == mp->identifier[ID_MAM2] ||
1340 		     mp->fbsz >= SF_BUFSIZE && mp->identifier[ID_MAM1] >= mp->identifier[ID_MAM2]))
1341 		{
1342 		id_mam:
1343 			s = T("mam program");
1344 			mp->mime = "application/x-mam";
1345 			goto qualify;
1346 		}
1347 		if (mp->identifier[ID_FORTRAN] >= 8)
1348 		{
1349 		id_fortran:
1350 			s = T("fortran program");
1351 			mp->mime = "application/x-fortran";
1352 			goto qualify;
1353 		}
1354 		if (mp->identifier[ID_HTML] > 0 && mp->count['<'] >= 8 && (c = mp->count['<'] - mp->count['>']) >= -2 && c <= 2)
1355 		{
1356 		id_html:
1357 			s = T("html input");
1358 			mp->mime = "text/html";
1359 			goto qualify;
1360 		}
1361 		if (mp->identifier[ID_COPYBOOK] > 0 && mp->identifier[ID_COBOL] == 0 && (c = mp->count['('] - mp->count[')']) >= -2 && c <= 2)
1362 		{
1363 		id_copybook:
1364 			s = T("cobol copybook");
1365 			mp->mime = "application/x-cobol";
1366 			goto qualify;
1367 		}
1368 		if (mp->identifier[ID_COBOL] > 0 && mp->identifier[ID_COPYBOOK] > 0 && (c = mp->count['('] - mp->count[')']) >= -2 && c <= 2)
1369 		{
1370 		id_cobol:
1371 			s = T("cobol program");
1372 			mp->mime = "application/x-cobol";
1373 			goto qualify;
1374 		}
1375 		if (mp->identifier[ID_PL1] > 0 && (c = mp->count['('] - mp->count[')']) >= -2 && c <= 2)
1376 		{
1377 		id_pl1:
1378 			s = T("pl1 program");
1379 			mp->mime = "application/x-pl1";
1380 			goto qualify;
1381 		}
1382 		if (mp->count['{'] >= 6 && (c = mp->count['{'] - mp->count['}']) >= -2 && c <= 2 && mp->count['\\'] >= mp->count['{'])
1383 		{
1384 		id_tex:
1385 			s = T("TeX input");
1386 			mp->mime = "text/tex";
1387 			goto qualify;
1388 		}
1389 		if (mp->identifier[ID_ASM] >= 4)
1390 		{
1391 		id_asm:
1392 			s = T("as program");
1393 			mp->mime = "application/x-as";
1394 			goto qualify;
1395 		}
1396 		if (ckenglish(mp, pun, badpun))
1397 		{
1398 			s = T("english text");
1399 			mp->mime = "text/plain";
1400 			goto qualify;
1401 		}
1402 	}
1403 	else if (streq(base, "core"))
1404 	{
1405 		mp->mime = "x-system/core";
1406 		return T("core dump");
1407 	}
1408 	if (flags & (CC_binary|CC_notext))
1409 	{
1410 		b = (unsigned char*)mp->fbuf;
1411 		e = b + mp->fbsz;
1412 		n = 0;
1413 		for (;;)
1414 		{
1415 			c = *b++;
1416 			q = 0;
1417 			while (c & 0x80)
1418 			{
1419 				c <<= 1;
1420 				q++;
1421 			}
1422 			switch (q)
1423 			{
1424 			case 4:
1425 				if (b < e && (*b++ & 0xc0) != 0x80)
1426 					break;
1427 			case 3:
1428 				if (b < e && (*b++ & 0xc0) != 0x80)
1429 					break;
1430 			case 2:
1431 				if (b < e && (*b++ & 0xc0) != 0x80)
1432 					break;
1433 				n = 1;
1434 			case 0:
1435 				if (b >= e)
1436 				{
1437 					if (n)
1438 					{
1439 						flags &= ~(CC_binary|CC_notext);
1440 						flags |= CC_utf_8;
1441 					}
1442 					break;
1443 				}
1444 				continue;
1445 			}
1446 			break;
1447 		}
1448 	}
1449 	if (flags & (CC_binary|CC_notext))
1450 	{
1451 		unsigned long	d = 0;
1452 
1453 		if ((q = mp->fbsz / UCHAR_MAX) >= 2)
1454 		{
1455 			/*
1456 			 * compression/encryption via standard deviation
1457 			 */
1458 
1459 
1460 			for (c = 0; c < UCHAR_MAX; c++)
1461 			{
1462 				pun = mp->count[c] - q;
1463 				d += pun * pun;
1464 			}
1465 			d /= mp->fbsz;
1466 		}
1467 		if (d <= 0)
1468 			s = T("binary");
1469 		else if (d < 4)
1470 			s = T("encrypted");
1471 		else if (d < 16)
1472 			s = T("packed");
1473 		else if (d < 64)
1474 			s = T("compressed");
1475 		else if (d < 256)
1476 			s = T("delta");
1477 		else
1478 			s = T("data");
1479 		mp->mime = "application/octet-stream";
1480 		return s;
1481 	}
1482 	mp->mime = "text/plain";
1483 	if (flags & CC_utf_8)
1484 		s = (flags & CC_control) ? T("utf-8 text with control characters") : T("utf-8 text");
1485 	else if (flags & CC_latin)
1486 		s = (flags & CC_control) ? T("latin text with control characters") : T("latin text");
1487 	else
1488 		s = (flags & CC_control) ? T("text with control characters") : T("text");
1489  qualify:
1490 	if (!flags && mp->count['\n'] >= mp->count['\r'] && mp->count['\n'] <= (mp->count['\r'] + 1) && mp->count['\r'])
1491 	{
1492 		t = "dos ";
1493 		mp->mime = "text/dos";
1494 	}
1495 	else
1496 		t = "";
1497 	if (code)
1498 	{
1499 		if (code == CC_ASCII)
1500 			sfsprintf(buf, PATH_MAX, "ascii %s%s", t, s);
1501 		else
1502 		{
1503 			sfsprintf(buf, PATH_MAX, "ebcdic%d %s%s", code - 1, t, s);
1504 			mp->mime = "text/ebcdic";
1505 		}
1506 		s = buf;
1507 	}
1508 	else if (*t)
1509 	{
1510 		sfsprintf(buf, PATH_MAX, "%s%s", t, s);
1511 		s = buf;
1512 	}
1513 	return s;
1514 }
1515 
1516 /*
1517  * return the basic magic string for file,st in buf,size
1518  */
1519 
1520 static char*
1521 type(register Magic_t* mp, const char* file, struct stat* st, char* buf, int size)
1522 {
1523 	register char*	s;
1524 	register char*	t;
1525 
1526 	mp->mime = 0;
1527 	if (!S_ISREG(st->st_mode))
1528 	{
1529 		if (S_ISDIR(st->st_mode))
1530 		{
1531 			mp->mime = "x-system/dir";
1532 			return T("directory");
1533 		}
1534 		if (S_ISLNK(st->st_mode))
1535 		{
1536 			mp->mime = "x-system/lnk";
1537 			s = buf;
1538 			s += sfsprintf(s, PATH_MAX, T("symbolic link to "));
1539 			if (pathgetlink(file, s, size - (s - buf)) < 0)
1540 				return T("cannot read symbolic link text");
1541 			return buf;
1542 		}
1543 		if (S_ISBLK(st->st_mode))
1544 		{
1545 			mp->mime = "x-system/blk";
1546 			sfsprintf(buf, PATH_MAX, T("block special (%s)"), fmtdev(st));
1547 			return buf;
1548 		}
1549 		if (S_ISCHR(st->st_mode))
1550 		{
1551 			mp->mime = "x-system/chr";
1552 			sfsprintf(buf, PATH_MAX, T("character special (%s)"), fmtdev(st));
1553 			return buf;
1554 		}
1555 		if (S_ISFIFO(st->st_mode))
1556 		{
1557 			mp->mime = "x-system/fifo";
1558 			return "fifo";
1559 		}
1560 #ifdef S_ISSOCK
1561 		if (S_ISSOCK(st->st_mode))
1562 		{
1563 			mp->mime = "x-system/sock";
1564 			return "socket";
1565 		}
1566 #endif
1567 	}
1568 	if (!(mp->fbmx = st->st_size))
1569 		s = T("empty");
1570 	else if (!mp->fp)
1571 		s = T("cannot read");
1572 	else
1573 	{
1574 		mp->fbsz = sfread(mp->fp, mp->fbuf, sizeof(mp->fbuf) - 1);
1575 		if (mp->fbsz < 0)
1576 			s = fmterror(errno);
1577 		else if (mp->fbsz == 0)
1578 			s = T("empty");
1579 		else
1580 		{
1581 			mp->fbuf[mp->fbsz] = 0;
1582 			mp->xoff = 0;
1583 			mp->xbsz = 0;
1584 			if (!(s = ckmagic(mp, file, buf, st, 0)))
1585 				s = cklang(mp, file, buf, st);
1586 		}
1587 	}
1588 	if (!mp->mime)
1589 		mp->mime = "application/unknown";
1590 	else if ((t = strchr(mp->mime, '%')) && *(t + 1) == 's' && !*(t + 2))
1591 	{
1592 		register char*	b;
1593 		register char*	be;
1594 		register char*	m;
1595 		register char*	me;
1596 
1597 		b = mp->mime;
1598 		me = (m = mp->mime = mp->fbuf) + sizeof(mp->fbuf) - 1;
1599 		while (m < me && b < t)
1600 			*m++ = *b++;
1601 		b = t = s;
1602 		for (;;)
1603 		{
1604 			if (!(be = strchr(t, ' ')))
1605 			{
1606 				be = b + strlen(b);
1607 				break;
1608 			}
1609 			if (*(be - 1) == ',' || strneq(be + 1, "data", 4) || strneq(be + 1, "file", 4))
1610 				break;
1611 			b = t;
1612 			t = be + 1;
1613 		}
1614 		while (m < me && b < be)
1615 			if ((*m++ = *b++) == ' ')
1616 				*(m - 1) = '-';
1617 		*m = 0;
1618 	}
1619 	return s;
1620 }
1621 
1622 /*
1623  * low level for magicload()
1624  */
1625 
1626 static int
1627 load(register Magic_t* mp, char* file, register Sfio_t* fp)
1628 {
1629 	register Entry_t*	ep;
1630 	register char*		p;
1631 	register char*		p2;
1632 	char*			p3;
1633 	char*			next;
1634 	int			n;
1635 	int			lge;
1636 	int			lev;
1637 	int			ent;
1638 	int			old;
1639 	int			cont;
1640 	Info_t*			ip;
1641 	Entry_t*		ret;
1642 	Entry_t*		first;
1643 	Entry_t*		last = 0;
1644 	Entry_t*		fun['z' - 'a' + 1];
1645 
1646 	memzero(fun, sizeof(fun));
1647 	cont = '$';
1648 	ent = 0;
1649 	lev = 0;
1650 	old = 0;
1651 	ret = 0;
1652 	error_info.file = file;
1653 	error_info.line = 0;
1654 	first = ep = vmnewof(mp->vm, 0, Entry_t, 1, 0);
1655 	while (p = sfgetr(fp, '\n', 1))
1656 	{
1657 		error_info.line++;
1658 		for (; isspace(*p); p++);
1659 
1660 		/*
1661 		 * nesting
1662 		 */
1663 
1664 		switch (*p)
1665 		{
1666 		case 0:
1667 		case '#':
1668 			cont = '#';
1669 			continue;
1670 		case '{':
1671 			if (++lev < MAXNEST)
1672 				ep->nest = *p;
1673 			else if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
1674 				(*mp->disc->errorf)(mp, mp->disc, 1, "{ ... } operator nesting too deep -- %d max", MAXNEST);
1675 			continue;
1676 		case '}':
1677 			if (!last || lev <= 0)
1678 			{
1679 				if (mp->disc->errorf)
1680 					(*mp->disc->errorf)(mp, mp->disc, 2, "`%c': invalid nesting", *p);
1681 			}
1682 			else if (lev-- == ent)
1683 			{
1684 				ent = 0;
1685 				ep->cont = ':';
1686 				ep->offset = ret->offset;
1687 				ep->nest = ' ';
1688 				ep->type = ' ';
1689 				ep->op = ' ';
1690 				ep->desc = "[RETURN]";
1691 				last = ep;
1692 				ep = ret->next = vmnewof(mp->vm, 0, Entry_t, 1, 0);
1693 				ret = 0;
1694 			}
1695 			else
1696 				last->nest = *p;
1697 			continue;
1698 		default:
1699 			if (*(p + 1) == '{' || *(p + 1) == '(' && *p != '+' && *p != '>' && *p != '&' && *p != '|')
1700 			{
1701 				n = *p++;
1702 				if (n >= 'a' && n <= 'z')
1703 					n -= 'a';
1704 				else
1705 				{
1706 					if (mp->disc->errorf)
1707 						(*mp->disc->errorf)(mp, mp->disc, 2, "%c: invalid function name", n);
1708 					n = 0;
1709 				}
1710 				if (ret && mp->disc->errorf)
1711 					(*mp->disc->errorf)(mp, mp->disc, 2, "%c: function has no return", ret->offset + 'a');
1712 				if (*p == '{')
1713 				{
1714 					ent = ++lev;
1715 					ret = ep;
1716 					ep->desc = "[FUNCTION]";
1717 				}
1718 				else
1719 				{
1720 					if (*(p + 1) != ')' && mp->disc->errorf)
1721 						(*mp->disc->errorf)(mp, mp->disc, 2, "%c: invalid function call argument list", n + 'a');
1722 					ep->desc = "[CALL]";
1723 				}
1724 				ep->cont = cont;
1725 				ep->offset = n;
1726 				ep->nest = ' ';
1727 				ep->type = ' ';
1728 				ep->op = ' ';
1729 				last = ep;
1730 				ep = ep->next = vmnewof(mp->vm, 0, Entry_t, 1, 0);
1731 				if (ret)
1732 					fun[n] = last->value.lab = ep;
1733 				else if (!(last->value.lab = fun[n]) && mp->disc->errorf)
1734 					(*mp->disc->errorf)(mp, mp->disc, 2, "%c: function not defined", n + 'a');
1735 				continue;
1736 			}
1737 			if (!ep->nest)
1738 				ep->nest = (lev > 0 && lev != ent) ? ('0' + lev - !!ent) : ' ';
1739 			break;
1740 		}
1741 
1742 		/*
1743 		 * continuation
1744 		 */
1745 
1746 		cont = '$';
1747 		switch (*p)
1748 		{
1749 		case '>':
1750 			old = 1;
1751 			if (*(p + 1) == *p)
1752 			{
1753 				/*
1754 				 * old style nesting push
1755 				 */
1756 
1757 				p++;
1758 				old = 2;
1759 				if (!lev && last)
1760 				{
1761 					lev = 1;
1762 					last->nest = '{';
1763 					if (last->cont == '>')
1764 						last->cont = '&';
1765 					ep->nest = '1';
1766 				}
1767 			}
1768 			/*FALLTHROUGH*/
1769 		case '+':
1770 		case '&':
1771 		case '|':
1772 			ep->cont = *p++;
1773 			break;
1774 		default:
1775 			if ((mp->flags & MAGIC_VERBOSE) && !isalpha(*p) && mp->disc->errorf)
1776 				(*mp->disc->errorf)(mp, mp->disc, 1, "`%c': invalid line continuation operator", *p);
1777 			/*FALLTHROUGH*/
1778 		case '*':
1779 		case '0': case '1': case '2': case '3': case '4':
1780 		case '5': case '6': case '7': case '8': case '9':
1781 			ep->cont = (lev > 0) ? '&' : '#';
1782 			break;
1783 		}
1784 		switch (old)
1785 		{
1786 		case 1:
1787 			old = 0;
1788 			if (lev)
1789 			{
1790 				/*
1791 				 * old style nesting pop
1792 				 */
1793 
1794 				lev = 0;
1795 				if (last)
1796 					last->nest = '}';
1797 				ep->nest = ' ';
1798 				if (ep->cont == '&')
1799 					ep->cont = '#';
1800 			}
1801 			break;
1802 		case 2:
1803 			old = 1;
1804 			break;
1805 		}
1806 		if (isdigit(*p))
1807 		{
1808 			/*
1809 			 * absolute offset
1810 			 */
1811 
1812 			ep->offset = strton(p, &next, NiL, 0);
1813 			p2 = next;
1814 		}
1815 		else
1816 		{
1817 			for (p2 = p; *p2 && !isspace(*p2); p2++);
1818 			if (!*p2)
1819 			{
1820 				if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
1821 					(*mp->disc->errorf)(mp, mp->disc, 1, "not enough fields: `%s'", p);
1822 				continue;
1823 			}
1824 
1825 			/*
1826 			 * offset expression
1827 			 */
1828 
1829 			*p2++ = 0;
1830 			ep->expr = vmstrdup(mp->vm, p);
1831 			if (isalpha(*p))
1832 				ep->offset = (ip = (Info_t*)dtmatch(mp->infotab, p)) ? ip->value : 0;
1833 			else if (*p == '(' && ep->cont == '>')
1834 			{
1835 				/*
1836 				 * convert old style indirection to @
1837 				 */
1838 
1839 				p = ep->expr + 1;
1840 				for (;;)
1841 				{
1842 					switch (*p++)
1843 					{
1844 					case 0:
1845 					case '@':
1846 					case '(':
1847 						break;
1848 					case ')':
1849 						break;
1850 					default:
1851 						continue;
1852 					}
1853 					break;
1854 				}
1855 				if (*--p == ')')
1856 				{
1857 					*p = 0;
1858 					*ep->expr = '@';
1859 				}
1860 			}
1861 		}
1862 		for (; isspace(*p2); p2++);
1863 		for (p = p2; *p2 && !isspace(*p2); p2++);
1864 		if (!*p2)
1865 		{
1866 			if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
1867 				(*mp->disc->errorf)(mp, mp->disc, 1, "not enough fields: `%s'", p);
1868 			continue;
1869 		}
1870 		*p2++ = 0;
1871 
1872 		/*
1873 		 * type
1874 		 */
1875 
1876 		if ((*p == 'b' || *p == 'l') && *(p + 1) == 'e')
1877 		{
1878 			ep->swap = ~(*p == 'l' ? 7 : 0);
1879 			p += 2;
1880 		}
1881 		if (*p == 's')
1882 		{
1883 			if (*(p + 1) == 'h')
1884 				ep->type = 'h';
1885 			else
1886 				ep->type = 's';
1887 		}
1888 		else if (*p == 'a')
1889 			ep->type = 's';
1890 		else
1891 			ep->type = *p;
1892 		if (p = strchr(p, '&'))
1893 		{
1894 			/*
1895 			 * old style mask
1896 			 */
1897 
1898 			ep->mask = strton(++p, NiL, NiL, 0);
1899 		}
1900 		for (; isspace(*p2); p2++);
1901 		if (ep->mask)
1902 			*--p2 = '=';
1903 
1904 		/*
1905 		 * comparison operation
1906 		 */
1907 
1908 		p = p2;
1909 		if (p2 = strchr(p, '\t'))
1910 			*p2++ = 0;
1911 		else
1912 		{
1913 			int	qe = 0;
1914 			int	qn = 0;
1915 
1916 			/*
1917 			 * assume balanced {}[]()\\""'' field
1918 			 */
1919 
1920 			for (p2 = p;;)
1921 			{
1922 				switch (n = *p2++)
1923 				{
1924 				case 0:
1925 					break;
1926 				case '{':
1927 					if (!qe)
1928 						qe = '}';
1929 					if (qe == '}')
1930 						qn++;
1931 					continue;
1932 				case '(':
1933 					if (!qe)
1934 						qe = ')';
1935 					if (qe == ')')
1936 						qn++;
1937 					continue;
1938 				case '[':
1939 					if (!qe)
1940 						qe = ']';
1941 					if (qe == ']')
1942 						qn++;
1943 					continue;
1944 				case '}':
1945 				case ')':
1946 				case ']':
1947 					if (qe == n && qn > 0)
1948 						qn--;
1949 					continue;
1950 				case '"':
1951 				case '\'':
1952 					if (!qe)
1953 						qe = n;
1954 					else if (qe == n)
1955 						qe = 0;
1956 					continue;
1957 				case '\\':
1958 					if (*p2)
1959 						p2++;
1960 					continue;
1961 				default:
1962 					if (!qe && isspace(n))
1963 						break;
1964 					continue;
1965 				}
1966 				if (n)
1967 					*(p2 - 1) = 0;
1968 				else
1969 					p2--;
1970 				break;
1971 			}
1972 		}
1973 		lge = 0;
1974 		if (ep->type == 'e' || ep->type == 'm' || ep->type == 's')
1975 			ep->op = '=';
1976 		else
1977 		{
1978 			if (*p == '&')
1979 			{
1980 				ep->mask = strton(++p, &next, NiL, 0);
1981 				p = next;
1982 			}
1983 			switch (*p)
1984 			{
1985 			case '=':
1986 			case '>':
1987 			case '<':
1988 			case '*':
1989 				ep->op = *p++;
1990 				if (*p == '=')
1991 				{
1992 					p++;
1993 					switch (ep->op)
1994 					{
1995 					case '>':
1996 						lge = -1;
1997 						break;
1998 					case '<':
1999 						lge = 1;
2000 						break;
2001 					}
2002 				}
2003 				break;
2004 			case '!':
2005 			case '@':
2006 				ep->op = *p++;
2007 				if (*p == '=')
2008 					p++;
2009 				break;
2010 			case 'x':
2011 				p++;
2012 				ep->op = '*';
2013 				break;
2014 			default:
2015 				ep->op = '=';
2016 				if (ep->mask)
2017 					ep->value.num = ep->mask;
2018 				break;
2019 			}
2020 		}
2021 		if (ep->op != '*' && !ep->value.num)
2022 		{
2023 			if (ep->type == 'e')
2024 			{
2025 				if (ep->value.sub = vmnewof(mp->vm, 0, regex_t, 1, 0))
2026 				{
2027 					ep->value.sub->re_disc = &mp->redisc;
2028 					if (!(n = regcomp(ep->value.sub, p, REG_DELIMITED|REG_LENIENT|REG_NULL|REG_DISCIPLINE)))
2029 					{
2030 						p += ep->value.sub->re_npat;
2031 						if (!(n = regsubcomp(ep->value.sub, p, NiL, 0, 0)))
2032 							p += ep->value.sub->re_npat;
2033 					}
2034 					if (n)
2035 					{
2036 						regmessage(mp, ep->value.sub, n);
2037 						ep->value.sub = 0;
2038 					}
2039 					else if (*p && mp->disc->errorf)
2040 						(*mp->disc->errorf)(mp, mp->disc, 1, "invalid characters after substitution: %s", p);
2041 				}
2042 			}
2043 			else if (ep->type == 'm')
2044 			{
2045 				ep->mask = stresc(p) + 1;
2046 				ep->value.str = vmnewof(mp->vm, 0, char, ep->mask + 1, 0);
2047 				memcpy(ep->value.str, p, ep->mask);
2048 				if ((!ep->expr || !ep->offset) && !strmatch(ep->value.str, "\\!\\(*\\)"))
2049 					ep->value.str[ep->mask - 1] = '*';
2050 			}
2051 			else if (ep->type == 's')
2052 			{
2053 				ep->mask = stresc(p);
2054 				ep->value.str = vmnewof(mp->vm, 0, char, ep->mask, 0);
2055 				memcpy(ep->value.str, p, ep->mask);
2056 			}
2057 			else if (*p == '\'')
2058 			{
2059 				stresc(p);
2060 				ep->value.num = *(unsigned char*)(p + 1) + lge;
2061 			}
2062 			else if (strmatch(p, "+([a-z])\\(*\\)"))
2063 			{
2064 				char*	t;
2065 
2066 				t = p;
2067 				ep->type = 'V';
2068 				ep->op = *p;
2069 				while (*p && *p++ != '(');
2070 				switch (ep->op)
2071 				{
2072 				case 'l':
2073 					n = *p++;
2074 					if (n < 'a' || n > 'z')
2075 					{
2076 						if (mp->disc->errorf)
2077 							(*mp->disc->errorf)(mp, mp->disc, 2, "%c: invalid function name", n);
2078 					}
2079 					else if (!fun[n -= 'a'])
2080 					{
2081 						if (mp->disc->errorf)
2082 							(*mp->disc->errorf)(mp, mp->disc, 2, "%c: function not defined", n + 'a');
2083 					}
2084 					else
2085 					{
2086 						ep->value.loop = vmnewof(mp->vm, 0, Loop_t, 1, 0);
2087 						ep->value.loop->lab = fun[n];
2088 						while (*p && *p++ != ',');
2089 						ep->value.loop->start = strton(p, &t, NiL, 0);
2090 						while (*t && *t++ != ',');
2091 						ep->value.loop->size = strton(t, &t, NiL, 0);
2092 					}
2093 					break;
2094 				case 'm':
2095 				case 'r':
2096 					ep->desc = vmnewof(mp->vm, 0, char, 32, 0);
2097 					ep->mime = vmnewof(mp->vm, 0, char, 32, 0);
2098 					break;
2099 				case 'v':
2100 					break;
2101 				default:
2102 					if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
2103 						(*mp->disc->errorf)(mp, mp->disc, 1, "%-.*s: unknown function", p - t, t);
2104 					break;
2105 				}
2106 			}
2107 			else
2108 			{
2109 				ep->value.num = strton(p, NiL, NiL, 0) + lge;
2110 				if (ep->op == '@')
2111 					ep->value.num = swapget(0, (char*)&ep->value.num, sizeof(ep->value.num));
2112 			}
2113 		}
2114 
2115 		/*
2116 		 * file description
2117 		 */
2118 
2119 		if (p2)
2120 		{
2121 			for (; isspace(*p2); p2++);
2122 			if (p = strchr(p2, '\t'))
2123 			{
2124 				/*
2125 				 * check for message catalog index
2126 				 */
2127 
2128 				*p++ = 0;
2129 				if (isalpha(*p2))
2130 				{
2131 					for (p3 = p2; isalnum(*p3); p3++);
2132 					if (*p3++ == ':')
2133 					{
2134 						for (; isdigit(*p3); p3++);
2135 						if (!*p3)
2136 						{
2137 							for (p2 = p; isspace(*p2); p2++);
2138 							if (p = strchr(p2, '\t'))
2139 								*p++ = 0;
2140 						}
2141 					}
2142 				}
2143 			}
2144 			stresc(p2);
2145 			ep->desc = vmstrdup(mp->vm, p2);
2146 			if (p)
2147 			{
2148 				for (; isspace(*p); p++);
2149 				if (*p)
2150 					ep->mime = vmstrdup(mp->vm, p);
2151 			}
2152 		}
2153 		else
2154 			ep->desc = "";
2155 
2156 		/*
2157 		 * get next entry
2158 		 */
2159 
2160 		last = ep;
2161 		ep = ep->next = vmnewof(mp->vm, 0, Entry_t, 1, 0);
2162 	}
2163 	if (last)
2164 	{
2165 		last->next = 0;
2166 		if (mp->magiclast)
2167 			mp->magiclast->next = first;
2168 		else
2169 			mp->magic = first;
2170 		mp->magiclast = last;
2171 	}
2172 	vmfree(mp->vm, ep);
2173 	if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
2174 	{
2175 		if (lev < 0)
2176 			(*mp->disc->errorf)(mp, mp->disc, 1, "too many } operators");
2177 		else if (lev > 0)
2178 			(*mp->disc->errorf)(mp, mp->disc, 1, "not enough } operators");
2179 		if (ret)
2180 			(*mp->disc->errorf)(mp, mp->disc, 2, "%c: function has no return", ret->offset + 'a');
2181 	}
2182 	error_info.file = 0;
2183 	error_info.line = 0;
2184 	return 0;
2185 }
2186 
2187 /*
2188  * load a magic file into mp
2189  */
2190 
2191 int
2192 magicload(register Magic_t* mp, const char* file, unsigned long flags)
2193 {
2194 	register char*		s;
2195 	register char*		e;
2196 	register char*		t;
2197 	int			n;
2198 	int			found;
2199 	int			list;
2200 	Sfio_t*			fp;
2201 
2202 	mp->flags = mp->disc->flags | flags;
2203 	found = 0;
2204 	if (list = !(s = (char*)file) || !*s || (*s == '-' || *s == '.') && !*(s + 1))
2205 	{
2206 		if (!(s = getenv(MAGIC_FILE_ENV)) || !*s)
2207 			s = MAGIC_FILE;
2208 	}
2209 	for (;;)
2210 	{
2211 		if (!list)
2212 			e = 0;
2213 		else if (e = strchr(s, ':'))
2214 		{
2215 			/*
2216 			 * ok, so ~ won't work for the last list element
2217 			 * we do it for MAGIC_FILES_ENV anyway
2218 			 */
2219 
2220 			if ((strneq(s, "~/", n = 2) || strneq(s, "$HOME/", n = 6) || strneq(s, "${HOME}/", n = 8)) && (t = getenv("HOME")))
2221 			{
2222 				sfputr(mp->tmp, t, -1);
2223 				s += n - 1;
2224 			}
2225 			sfwrite(mp->tmp, s, e - s);
2226 			if (!(s = sfstruse(mp->tmp)))
2227 				goto nospace;
2228 		}
2229 		if (!*s || streq(s, "-"))
2230 			s = MAGIC_FILE;
2231 		if (!(fp = sfopen(NiL, s, "r")))
2232 		{
2233 			if (list)
2234 			{
2235 				if (!(t = pathpath(mp->fbuf, s, "", PATH_REGULAR|PATH_READ)) && !strchr(s, '/'))
2236 				{
2237 					strcpy(mp->fbuf, s);
2238 					sfprintf(mp->tmp, "%s/%s", MAGIC_DIR, mp->fbuf);
2239 					if (!(s = sfstruse(mp->tmp)))
2240 						goto nospace;
2241 					if (!(t = pathpath(mp->fbuf, s, "", PATH_REGULAR|PATH_READ)))
2242 						goto next;
2243 				}
2244 				if (!(fp = sfopen(NiL, t, "r")))
2245 					goto next;
2246 			}
2247 			else
2248 			{
2249 				if (mp->disc->errorf)
2250 					(*mp->disc->errorf)(mp, mp->disc, 3, "%s: cannot open magic file", s);
2251 				return -1;
2252 			}
2253 		}
2254 		found = 1;
2255 		n = load(mp, s, fp);
2256 		sfclose(fp);
2257 		if (n && !list)
2258 			return -1;
2259 	next:
2260 		if (!e)
2261 			break;
2262 		s = e + 1;
2263 	}
2264 	if (!found)
2265 	{
2266 		if (mp->flags & MAGIC_VERBOSE)
2267 		{
2268 			if (mp->disc->errorf)
2269 				(*mp->disc->errorf)(mp, mp->disc, 2, "cannot find magic file");
2270 		}
2271 		return -1;
2272 	}
2273 	return 0;
2274  nospace:
2275 	if (mp->disc->errorf)
2276 		(*mp->disc->errorf)(mp, mp->disc, 3, "out of space");
2277 	return -1;
2278 }
2279 
2280 /*
2281  * open a magic session
2282  */
2283 
2284 Magic_t*
2285 magicopen(Magicdisc_t* disc)
2286 {
2287 	register Magic_t*	mp;
2288 	register int		i;
2289 	register int		n;
2290 	register int		f;
2291 	register int		c;
2292 	register Vmalloc_t*	vm;
2293 	unsigned char*		map[CC_MAPS + 1];
2294 
2295 	if (!(vm = vmopen(Vmdcheap, Vmbest, 0)))
2296 		return 0;
2297 	if (!(mp = vmnewof(vm, 0, Magic_t, 1, 0)))
2298 	{
2299 		vmclose(vm);
2300 		return 0;
2301 	}
2302 	mp->id = lib;
2303 	mp->disc = disc;
2304 	mp->vm = vm;
2305 	mp->flags = disc->flags;
2306 	mp->redisc.re_version = REG_VERSION;
2307 	mp->redisc.re_flags = REG_NOFREE;
2308 	mp->redisc.re_errorf = (regerror_t)disc->errorf;
2309 	mp->redisc.re_resizef = (regresize_t)vmgetmem;
2310 	mp->redisc.re_resizehandle = (void*)mp->vm;
2311 	mp->dtdisc.key = offsetof(Info_t, name);
2312 	mp->dtdisc.link = offsetof(Info_t, link);
2313 	if (!(mp->tmp = sfstropen()) || !(mp->infotab = dtnew(mp->vm, &mp->dtdisc, Dthash)))
2314 		goto bad;
2315 	for (n = 0; n < elementsof(info); n++)
2316 		dtinsert(mp->infotab, &info[n]);
2317 	for (i = 0; i < CC_MAPS; i++)
2318 		map[i] = ccmap(i, CC_ASCII);
2319 	mp->x2n = ccmap(CC_ALIEN, CC_NATIVE);
2320 	for (n = 0; n <= UCHAR_MAX; n++)
2321 	{
2322 		f = 0;
2323 		i = CC_MAPS;
2324 		while (--i >= 0)
2325 		{
2326 			c = ccmapchr(map[i], n);
2327 			f = (f << CC_BIT) | CCTYPE(c);
2328 		}
2329 		mp->cctype[n] = f;
2330 	}
2331 	return mp;
2332  bad:
2333 	magicclose(mp);
2334 	return 0;
2335 }
2336 
2337 /*
2338  * close a magicopen() session
2339  */
2340 
2341 int
2342 magicclose(register Magic_t* mp)
2343 {
2344 	if (!mp)
2345 		return -1;
2346 	if (mp->tmp)
2347 		sfstrclose(mp->tmp);
2348 	if (mp->vm)
2349 		vmclose(mp->vm);
2350 	return 0;
2351 }
2352 
2353 /*
2354  * return the magic string for file with optional stat info st
2355  */
2356 
2357 char*
2358 magictype(register Magic_t* mp, Sfio_t* fp, const char* file, register struct stat* st)
2359 {
2360 	off_t	off;
2361 	char*	s;
2362 
2363 	mp->flags = mp->disc->flags;
2364 	mp->mime = 0;
2365 	if (!st)
2366 		s = T("cannot stat");
2367 	else
2368 	{
2369 		if (mp->fp = fp)
2370 			off = sfseek(mp->fp, (off_t)0, SEEK_CUR);
2371 		s = type(mp, file, st, mp->tbuf, sizeof(mp->tbuf));
2372 		if (mp->fp)
2373 			sfseek(mp->fp, off, SEEK_SET);
2374 		if (!(mp->flags & MAGIC_MIME))
2375 		{
2376 			if (S_ISREG(st->st_mode) && (st->st_size > 0) && (st->st_size < 128))
2377 				sfprintf(mp->tmp, "%s ", T("short"));
2378 			sfprintf(mp->tmp, "%s", s);
2379 			if (!mp->fp && (st->st_mode & (S_IXUSR|S_IXGRP|S_IXOTH)))
2380 				sfprintf(mp->tmp, ", %s", S_ISDIR(st->st_mode) ? T("searchable") : T("executable"));
2381 			if (st->st_mode & S_ISUID)
2382 				sfprintf(mp->tmp, ", setuid=%s", fmtuid(st->st_uid));
2383 			if (st->st_mode & S_ISGID)
2384 				sfprintf(mp->tmp, ", setgid=%s", fmtgid(st->st_gid));
2385 			if (st->st_mode & S_ISVTX)
2386 				sfprintf(mp->tmp, ", sticky");
2387 			if (!(s = sfstruse(mp->tmp)))
2388 				s = T("out of space");
2389 		}
2390 	}
2391 	if (mp->flags & MAGIC_MIME)
2392 		s = mp->mime;
2393 	if (!s)
2394 		s = T("error");
2395 	return s;
2396 }
2397 
2398 /*
2399  * list the magic table in mp on sp
2400  */
2401 
2402 int
2403 magiclist(register Magic_t* mp, register Sfio_t* sp)
2404 {
2405 	register Entry_t*	ep = mp->magic;
2406 	register Entry_t*	rp = 0;
2407 
2408 	mp->flags = mp->disc->flags;
2409 	sfprintf(sp, "cont\toffset\ttype\top\tmask\tvalue\tmime\tdesc\n");
2410 	while (ep)
2411 	{
2412 		sfprintf(sp, "%c %c\t", ep->cont, ep->nest);
2413 		if (ep->expr)
2414 			sfprintf(sp, "%s", ep->expr);
2415 		else
2416 			sfprintf(sp, "%ld", ep->offset);
2417 		sfprintf(sp, "\t%s%c\t%c\t%lo\t", ep->swap == (char)~3 ? "L" : ep->swap == (char)~0 ? "B" : "", ep->type, ep->op, ep->mask);
2418 		switch (ep->type)
2419 		{
2420 		case 'm':
2421 		case 's':
2422 			sfputr(sp, fmtesc(ep->value.str), -1);
2423 			break;
2424 		case 'V':
2425 			switch (ep->op)
2426 			{
2427 			case 'l':
2428 				sfprintf(sp, "loop(%d,%d,%d,%d)", ep->value.loop->start, ep->value.loop->size, ep->value.loop->count, ep->value.loop->offset);
2429 				break;
2430 			case 'v':
2431 				sfprintf(sp, "vcodex()");
2432 				break;
2433 			default:
2434 				sfprintf(sp, "%p", ep->value.str);
2435 				break;
2436 			}
2437 			break;
2438 		default:
2439 			sfprintf(sp, "%lo", ep->value.num);
2440 			break;
2441 		}
2442 		sfprintf(sp, "\t%s\t%s\n", ep->mime ? ep->mime : "", fmtesc(ep->desc));
2443 		if (ep->cont == '$' && !ep->value.lab->mask)
2444 		{
2445 			rp = ep;
2446 			ep = ep->value.lab;
2447 		}
2448 		else
2449 		{
2450 			if (ep->cont == ':')
2451 			{
2452 				ep = rp;
2453 				ep->value.lab->mask = 1;
2454 			}
2455 			ep = ep->next;
2456 		}
2457 	}
2458 	return 0;
2459 }
2460