xref: /titanic_41/usr/src/lib/libast/common/misc/magic.c (revision 9ec394dbf343c1f23c6e13c39df427f238e5a369)
1 /***********************************************************************
2 *                                                                      *
3 *               This software is part of the ast package               *
4 *           Copyright (c) 1985-2007 AT&T Knowledge Ventures            *
5 *                      and is licensed under the                       *
6 *                  Common Public License, Version 1.0                  *
7 *                      by AT&T Knowledge Ventures                      *
8 *                                                                      *
9 *                A copy of the License is available at                 *
10 *            http://www.opensource.org/licenses/cpl1.0.txt             *
11 *         (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9)         *
12 *                                                                      *
13 *              Information and Software Systems Research               *
14 *                            AT&T Research                             *
15 *                           Florham Park NJ                            *
16 *                                                                      *
17 *                 Glenn Fowler <gsf@research.att.com>                  *
18 *                  David Korn <dgk@research.att.com>                   *
19 *                   Phong Vo <kpv@research.att.com>                    *
20 *                                                                      *
21 ***********************************************************************/
22 #pragma prototyped
23 /*
24  * Glenn Fowler
25  * AT&T Research
26  *
27  * library interface to file
28  *
29  * the sum of the hacks {s5,v10,planix} is _____ than the parts
30  */
31 
32 static const char id[] = "\n@(#)$Id: magic library (AT&T Research) 2007-01-08 $\0\n";
33 
34 static const char lib[] = "libast:magic";
35 
36 #include <ast.h>
37 #include <ctype.h>
38 #include <ccode.h>
39 #include <dt.h>
40 #include <modex.h>
41 #include <error.h>
42 #include <regex.h>
43 #include <swap.h>
44 
45 #define T(m)		(*m?ERROR_translate(NiL,NiL,lib,m):m)
46 
47 #define match(s,p)	strgrpmatch(s,p,NiL,0,STR_LEFT|STR_RIGHT|STR_ICASE)
48 
49 #define MAXNEST		10		/* { ... } nesting limit	*/
50 #define MINITEM		4		/* magic buffer rounding	*/
51 
52 typedef struct				/* identifier dictionary entry	*/
53 {
54 	const char	name[16];	/* identifier name		*/
55 	int		value;		/* identifier value		*/
56 	Dtlink_t	link;		/* dictionary link		*/
57 } Info_t;
58 
59 typedef struct Edit			/* edit substitution		*/
60 {
61 	struct Edit*	next;		/* next in list			*/
62 	regex_t*	from;		/* from pattern			*/
63 } Edit_t;
64 
65 struct Entry;
66 
67 typedef struct				/* loop info			*/
68 {
69 	struct Entry*	lab;		/* call this function		*/
70 	int		start;		/* start here			*/
71 	int		size;		/* increment by this amount	*/
72 	int		count;		/* dynamic loop count		*/
73 	int		offset;		/* dynamic offset		*/
74 } Loop_t;
75 
76 typedef struct Entry			/* magic file entry		*/
77 {
78 	struct Entry*	next;		/* next in list			*/
79 	char*		expr;		/* offset expression		*/
80 	union
81 	{
82 	unsigned long	num;
83 	char*		str;
84 	struct Entry*	lab;
85 	regex_t*	sub;
86 	Loop_t*		loop;
87 	}		value;		/* comparison value		*/
88 	char*		desc;		/* file description		*/
89 	char*		mime;		/* file mime type		*/
90 	unsigned long	offset;		/* offset in bytes		*/
91 	unsigned long	mask;		/* mask before compare		*/
92 	char		cont;		/* continuation operation	*/
93 	char		type;		/* datum type			*/
94 	char		op;		/* comparison operation		*/
95 	char		nest;		/* { or } nesting operation	*/
96 	char		swap;		/* forced swap order		*/
97 } Entry_t;
98 
99 #define CC_BIT		5
100 
101 #if (CC_MAPS*CC_BIT) <= (CHAR_BIT*2)
102 typedef unsigned short Cctype_t;
103 #else
104 typedef unsigned long Cctype_t;
105 #endif
106 
107 #define CC_text		0x01
108 #define CC_control	0x02
109 #define CC_latin	0x04
110 #define CC_binary	0x08
111 #define CC_utf_8	0x10
112 
113 #define CC_notext	CC_text		/* CC_text is flipped before checking */
114 
115 #define CC_MASK		(CC_binary|CC_latin|CC_control|CC_text)
116 
117 #define CCTYPE(c)	(((c)>0240)?CC_binary:((c)>=0200)?CC_latin:((c)<040&&(c)!=007&&(c)!=011&&(c)!=012&&(c)!=013&&(c)!=015)?CC_control:CC_text)
118 
119 #define ID_NONE		0
120 #define ID_ASM		1
121 #define ID_C		2
122 #define ID_COBOL	3
123 #define ID_COPYBOOK	4
124 #define ID_CPLUSPLUS	5
125 #define ID_FORTRAN	6
126 #define ID_HTML		7
127 #define ID_INCL1	8
128 #define ID_INCL2	9
129 #define ID_INCL3	10
130 #define ID_MAM1		11
131 #define ID_MAM2		12
132 #define ID_MAM3		13
133 #define ID_NOTEXT	14
134 #define ID_PL1		15
135 #define ID_YACC		16
136 
137 #define ID_MAX		ID_YACC
138 
139 #define INFO_atime	1
140 #define INFO_blocks	2
141 #define INFO_ctime	3
142 #define INFO_fstype	4
143 #define INFO_gid	5
144 #define INFO_mode	6
145 #define INFO_mtime	7
146 #define INFO_name	8
147 #define INFO_nlink	9
148 #define INFO_size	10
149 #define INFO_uid	11
150 
151 #define _MAGIC_PRIVATE_ \
152 	Magicdisc_t*	disc;			/* discipline		*/ \
153 	Vmalloc_t*	vm;			/* vmalloc region	*/ \
154 	Entry_t*	magic;			/* parsed magic table	*/ \
155 	Entry_t*	magiclast;		/* last entry in magic	*/ \
156 	char*		mime;			/* MIME type		*/ \
157 	unsigned char*	x2n;			/* CC_ALIEN=>CC_NATIVE	*/ \
158 	char		fbuf[SF_BUFSIZE + 1];	/* file data		*/ \
159 	char		xbuf[SF_BUFSIZE + 1];	/* indirect file data	*/ \
160 	char		nbuf[256];		/* !CC_NATIVE data	*/ \
161 	char		mbuf[64];		/* mime string		*/ \
162 	char		sbuf[64];		/* type suffix string	*/ \
163 	char		tbuf[2 * PATH_MAX];	/* type string		*/ \
164 	Cctype_t	cctype[UCHAR_MAX + 1];	/* char code types	*/ \
165 	unsigned int	count[UCHAR_MAX + 1];	/* char frequency count	*/ \
166 	unsigned int	multi[UCHAR_MAX + 1];	/* muti char count	*/ \
167 	int		keep[MAXNEST];		/* ckmagic nest stack	*/ \
168 	char*		cap[MAXNEST];		/* ckmagic mime stack	*/ \
169 	char*		msg[MAXNEST];		/* ckmagic text stack	*/ \
170 	Entry_t*	ret[MAXNEST];		/* ckmagic return stack	*/ \
171 	int		fbsz;			/* fbuf size		*/ \
172 	int		fbmx;			/* fbuf max size	*/ \
173 	int		xbsz;			/* xbuf size		*/ \
174 	int		swap;			/* swap() operation	*/ \
175 	unsigned long	flags;			/* disc+open flags	*/ \
176 	long		xoff;			/* xbuf offset		*/ \
177 	int		identifier[ID_MAX + 1];	/* Info_t identifier	*/ \
178 	Sfio_t*		fp;			/* fbuf fp		*/ \
179 	Sfio_t*		tmp;			/* tmp string		*/ \
180 	regdisc_t	redisc;			/* regex discipline	*/ \
181 	Dtdisc_t	dtdisc;			/* dict discipline	*/ \
182 	Dt_t*		idtab;			/* identifier dict	*/ \
183 	Dt_t*		infotab;		/* info keyword dict	*/
184 
185 #include <magic.h>
186 
187 static Info_t		dict[] =		/* keyword dictionary	*/
188 {
189 	{ 	"COMMON",	ID_FORTRAN	},
190 	{ 	"COMPUTE",	ID_COBOL	},
191 	{ 	"COMP",		ID_COPYBOOK	},
192 	{ 	"COMPUTATIONAL",ID_COPYBOOK	},
193 	{ 	"DCL",		ID_PL1		},
194 	{ 	"DEFINED",	ID_PL1		},
195 	{ 	"DIMENSION",	ID_FORTRAN	},
196 	{ 	"DIVISION",	ID_COBOL	},
197 	{ 	"FILLER",	ID_COPYBOOK	},
198 	{ 	"FIXED",	ID_PL1		},
199 	{ 	"FUNCTION",	ID_FORTRAN	},
200 	{ 	"HTML",		ID_HTML		},
201 	{ 	"INTEGER",	ID_FORTRAN	},
202 	{ 	"MAIN",		ID_PL1		},
203 	{ 	"OPTIONS",	ID_PL1		},
204 	{ 	"PERFORM",	ID_COBOL	},
205 	{ 	"PIC",		ID_COPYBOOK	},
206 	{ 	"REAL",		ID_FORTRAN	},
207 	{ 	"REDEFINES",	ID_COPYBOOK	},
208 	{ 	"S9",		ID_COPYBOOK	},
209 	{ 	"SECTION",	ID_COBOL	},
210 	{ 	"SELECT",	ID_COBOL	},
211 	{ 	"SUBROUTINE",	ID_FORTRAN	},
212 	{ 	"TEXT",		ID_ASM		},
213 	{ 	"VALUE",	ID_COPYBOOK	},
214 	{ 	"attr",		ID_MAM3		},
215 	{ 	"binary",	ID_YACC		},
216 	{ 	"block",	ID_FORTRAN	},
217 	{ 	"bss",		ID_ASM		},
218 	{ 	"byte",		ID_ASM		},
219 	{ 	"char",		ID_C		},
220 	{ 	"class",	ID_CPLUSPLUS	},
221 	{ 	"clr",		ID_NOTEXT	},
222 	{ 	"comm",		ID_ASM		},
223 	{ 	"common",	ID_FORTRAN	},
224 	{ 	"data",		ID_ASM		},
225 	{ 	"dimension",	ID_FORTRAN	},
226 	{ 	"done",		ID_MAM2		},
227 	{ 	"double",	ID_C		},
228 	{ 	"even",		ID_ASM		},
229 	{ 	"exec",		ID_MAM3		},
230 	{ 	"extern",	ID_C		},
231 	{ 	"float",	ID_C		},
232 	{ 	"function",	ID_FORTRAN	},
233 	{ 	"globl",	ID_ASM		},
234 	{ 	"h",		ID_INCL3	},
235 	{ 	"html",		ID_HTML		},
236 	{ 	"include",	ID_INCL1	},
237 	{ 	"int",		ID_C		},
238 	{ 	"integer",	ID_FORTRAN	},
239 	{ 	"jmp",		ID_NOTEXT	},
240 	{ 	"left",		ID_YACC		},
241 	{ 	"libc",		ID_INCL2	},
242 	{ 	"long",		ID_C		},
243 	{ 	"make",		ID_MAM1		},
244 	{ 	"mov",		ID_NOTEXT	},
245 	{ 	"private",	ID_CPLUSPLUS	},
246 	{ 	"public",	ID_CPLUSPLUS	},
247 	{ 	"real",		ID_FORTRAN	},
248 	{ 	"register",	ID_C		},
249 	{ 	"right",	ID_YACC		},
250 	{ 	"sfio",		ID_INCL2	},
251 	{ 	"static",	ID_C		},
252 	{ 	"stdio",	ID_INCL2	},
253 	{ 	"struct",	ID_C		},
254 	{ 	"subroutine",	ID_FORTRAN	},
255 	{ 	"sys",		ID_NOTEXT	},
256 	{ 	"term",		ID_YACC		},
257 	{ 	"text",		ID_ASM		},
258 	{ 	"tst",		ID_NOTEXT	},
259 	{ 	"type",		ID_YACC		},
260 	{ 	"typedef",	ID_C		},
261 	{ 	"u",		ID_INCL2	},
262 	{ 	"union",	ID_YACC		},
263 	{ 	"void",		ID_C		},
264 };
265 
266 static Info_t		info[] =
267 {
268 	{	"atime",	INFO_atime		},
269 	{	"blocks",	INFO_blocks		},
270 	{	"ctime",	INFO_ctime		},
271 	{	"fstype",	INFO_fstype		},
272 	{	"gid",		INFO_gid		},
273 	{	"mode",		INFO_mode		},
274 	{	"mtime",	INFO_mtime		},
275 	{	"name",		INFO_name		},
276 	{	"nlink",	INFO_nlink		},
277 	{	"size",		INFO_size		},
278 	{	"uid",		INFO_uid		},
279 };
280 
281 /*
282  * return pointer to data at offset off and size siz
283  */
284 
285 static char*
286 getdata(register Magic_t* mp, register long off, register int siz)
287 {
288 	register long	n;
289 
290 	if (off < 0)
291 		return 0;
292 	if (off + siz <= mp->fbsz)
293 		return mp->fbuf + off;
294 	if (off < mp->xoff || off + siz > mp->xoff + mp->xbsz)
295 	{
296 		if (off + siz > mp->fbmx)
297 			return 0;
298 		n = (off / (SF_BUFSIZE / 2)) * (SF_BUFSIZE / 2);
299 		if (sfseek(mp->fp, n, SEEK_SET) != n)
300 			return 0;
301 		if ((mp->xbsz = sfread(mp->fp, mp->xbuf, sizeof(mp->xbuf) - 1)) < 0)
302 		{
303 			mp->xoff = 0;
304 			mp->xbsz = 0;
305 			return 0;
306 		}
307 		mp->xbuf[mp->xbsz] = 0;
308 		mp->xoff = n;
309 		if (off + siz > mp->xoff + mp->xbsz)
310 			return 0;
311 	}
312 	return mp->xbuf + off - mp->xoff;
313 }
314 
315 /*
316  * @... evaluator for strexpr()
317  */
318 
319 static long
320 indirect(const char* cs, char** e, void* handle)
321 {
322 	register char*		s = (char*)cs;
323 	register Magic_t*	mp = (Magic_t*)handle;
324 	register long		n = 0;
325 	register char*		p;
326 
327 	if (s)
328 	{
329 		if (*s == '@')
330 		{
331 			n = *++s == '(' ? strexpr(s, e, indirect, mp) : strtol(s, e, 0);
332 			switch (*(s = *e))
333 			{
334 			case 'b':
335 			case 'B':
336 				s++;
337 				if (p = getdata(mp, n, 1))
338 					n = *(unsigned char*)p;
339 				else
340 					s = (char*)cs;
341 				break;
342 			case 'h':
343 			case 'H':
344 				s++;
345 				if (p = getdata(mp, n, 2))
346 					n = swapget(mp->swap, p, 2);
347 				else
348 					s = (char*)cs;
349 				break;
350 			case 'q':
351 			case 'Q':
352 				s++;
353 				if (p = getdata(mp, n, 8))
354 					n = swapget(mp->swap, p, 8);
355 				else
356 					s = (char*)cs;
357 				break;
358 			default:
359 				if (isalnum(*s))
360 					s++;
361 				if (p = getdata(mp, n, 4))
362 					n = swapget(mp->swap, p, 4);
363 				else
364 					s = (char*)cs;
365 				break;
366 			}
367 		}
368 		*e = s;
369 	}
370 	else if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
371 		(*mp->disc->errorf)(mp, mp->disc, 2, "%s in indirect expression", *e);
372 	return n;
373 }
374 
375 /*
376  * emit regex error message
377  */
378 
379 static void
380 regmessage(Magic_t* mp, regex_t* re, int code)
381 {
382 	char	buf[128];
383 
384 	if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
385 	{
386 		regerror(code, re, buf, sizeof(buf));
387 		(*mp->disc->errorf)(mp, mp->disc, 3, "regex: %s", buf);
388 	}
389 }
390 
391 /*
392  * decompose vcodex(3) method composition
393  */
394 
395 static char*
396 vcdecomp(char* b, char* e, unsigned char* m, unsigned char* x)
397 {
398 	unsigned char*	map;
399 	int		c;
400 	int		n;
401 	int		i;
402 
403 	map = CCMAP(CC_ASCII, CC_NATIVE);
404 	i = 1;
405 	for (;;)
406 	{
407 		if (i)
408 			i = 0;
409 		else
410 			*b++ = '^';
411 		while (b < e && m < x && (c = *m++))
412 		{
413 			if (map)
414 				c = map[c];
415 			*b++ = c;
416 		}
417 		if (b >= e)
418 			break;
419 		n = 0;
420 		while (m < x)
421 		{
422 			n = (n<<7) | (*m & 0x7f);
423 			if (!(*m++ & 0x80))
424 				break;
425 		}
426 		if (n >= (x - m))
427 			break;
428 		m += n;
429 	}
430 	return b;
431 }
432 
433 /*
434  * check for magic table match in buf
435  */
436 
437 static char*
438 ckmagic(register Magic_t* mp, const char* file, char* buf, struct stat* st, unsigned long off)
439 {
440 	register Entry_t*	ep;
441 	register char*		p;
442 	register char*		b;
443 	register int		level = 0;
444 	int			call = -1;
445 	int			c;
446 	char*			q;
447 	char*			t;
448 	char*			base = 0;
449 	unsigned long		num;
450 	unsigned long		mask;
451 	regmatch_t		matches[10];
452 
453 	mp->swap = 0;
454 	b = mp->msg[0] = buf;
455 	mp->mime = mp->cap[0] = 0;
456 	mp->keep[0] = 0;
457 	for (ep = mp->magic; ep; ep = ep->next)
458 	{
459 	fun:
460 		if (ep->nest == '{')
461 		{
462 			if (++level >= MAXNEST)
463 			{
464 				call = -1;
465 				level = 0;
466 				mp->keep[0] = 0;
467 				b = mp->msg[0];
468 				mp->mime = mp->cap[0];
469 				continue;
470 			}
471 			mp->keep[level] = mp->keep[level - 1] != 0;
472 			mp->msg[level] = b;
473 			mp->cap[level] = mp->mime;
474 		}
475 		switch (ep->cont)
476 		{
477 		case '#':
478 			if (mp->keep[level] && b > buf)
479 			{
480 				*b = 0;
481 				return buf;
482 			}
483 			mp->swap = 0;
484 			b = mp->msg[0] = buf;
485 			mp->mime = mp->cap[0] = 0;
486 			if (ep->type == ' ')
487 				continue;
488 			break;
489 		case '$':
490 			if (mp->keep[level] && call < (MAXNEST - 1))
491 			{
492 				mp->ret[++call] = ep;
493 				ep = ep->value.lab;
494 				goto fun;
495 			}
496 			continue;
497 		case ':':
498 			ep = mp->ret[call--];
499 			if (ep->op == 'l')
500 				goto fun;
501 			continue;
502 		case '|':
503 			if (mp->keep[level] > 1)
504 				goto checknest;
505 			/*FALLTHROUGH*/
506 		default:
507 			if (!mp->keep[level])
508 			{
509 				b = mp->msg[level];
510 				mp->mime = mp->cap[level];
511 				goto checknest;
512 			}
513 			break;
514 		}
515 		if (!ep->expr)
516 			num = ep->offset + off;
517 		else
518 			switch (ep->offset)
519 			{
520 			case 0:
521 				num = strexpr(ep->expr, NiL, indirect, mp) + off;
522 				break;
523 			case INFO_atime:
524 				num = st->st_atime;
525 				ep->type = 'D';
526 				break;
527 			case INFO_blocks:
528 				num = iblocks(st);
529 				ep->type = 'N';
530 				break;
531 			case INFO_ctime:
532 				num = st->st_ctime;
533 				ep->type = 'D';
534 				break;
535 			case INFO_fstype:
536 				p = fmtfs(st);
537 				ep->type = toupper(ep->type);
538 				break;
539 			case INFO_gid:
540 				if (ep->type == 'e' || ep->type == 'm' || ep->type == 's')
541 				{
542 					p = fmtgid(st->st_gid);
543 					ep->type = toupper(ep->type);
544 				}
545 				else
546 				{
547 					num = st->st_gid;
548 					ep->type = 'N';
549 				}
550 				break;
551 			case INFO_mode:
552 				if (ep->type == 'e' || ep->type == 'm' || ep->type == 's')
553 				{
554 					p = fmtmode(st->st_mode, 0);
555 					ep->type = toupper(ep->type);
556 				}
557 				else
558 				{
559 					num = modex(st->st_mode);
560 					ep->type = 'N';
561 				}
562 				break;
563 			case INFO_mtime:
564 				num = st->st_ctime;
565 				ep->type = 'D';
566 				break;
567 			case INFO_name:
568 				if (!base)
569 				{
570 					if (base = strrchr(file, '/'))
571 						base++;
572 					else
573 						base = (char*)file;
574 				}
575 				p = base;
576 				ep->type = toupper(ep->type);
577 				break;
578 			case INFO_nlink:
579 				num = st->st_nlink;
580 				ep->type = 'N';
581 				break;
582 			case INFO_size:
583 				num = st->st_size;
584 				ep->type = 'N';
585 				break;
586 			case INFO_uid:
587 				if (ep->type == 'e' || ep->type == 'm' || ep->type == 's')
588 				{
589 					p = fmtuid(st->st_uid);
590 					ep->type = toupper(ep->type);
591 				}
592 				else
593 				{
594 					num = st->st_uid;
595 					ep->type = 'N';
596 				}
597 				break;
598 			}
599 		switch (ep->type)
600 		{
601 
602 		case 'b':
603 			if (!(p = getdata(mp, num, 1)))
604 				goto next;
605 			num = *(unsigned char*)p;
606 			break;
607 
608 		case 'h':
609 			if (!(p = getdata(mp, num, 2)))
610 				goto next;
611 			num = swapget(ep->swap ? (~ep->swap ^ mp->swap) : mp->swap, p, 2);
612 			break;
613 
614 		case 'd':
615 		case 'l':
616 		case 'v':
617 			if (!(p = getdata(mp, num, 4)))
618 				goto next;
619 			num = swapget(ep->swap ? (~ep->swap ^ mp->swap) : mp->swap, p, 4);
620 			break;
621 
622 		case 'q':
623 			if (!(p = getdata(mp, num, 8)))
624 				goto next;
625 			num = swapget(ep->swap ? (~ep->swap ^ mp->swap) : mp->swap, p, 8);
626 			break;
627 
628 		case 'e':
629 			if (!(p = getdata(mp, num, 0)))
630 				goto next;
631 			/*FALLTHROUGH*/
632 		case 'E':
633 			if (!ep->value.sub)
634 				goto next;
635 			if ((c = regexec(ep->value.sub, p, elementsof(matches), matches, 0)) || (c = regsubexec(ep->value.sub, p, elementsof(matches), matches)))
636 			{
637 				c = mp->fbsz;
638 				if (c >= sizeof(mp->nbuf))
639 					c = sizeof(mp->nbuf) - 1;
640 				p = (char*)memcpy(mp->nbuf, p, c);
641 				p[c] = 0;
642 				ccmapstr(mp->x2n, p, c);
643 				if ((c = regexec(ep->value.sub, p, elementsof(matches), matches, 0)) || (c = regsubexec(ep->value.sub, p, elementsof(matches), matches)))
644 				{
645 					if (c != REG_NOMATCH)
646 						regmessage(mp, ep->value.sub, c);
647 					goto next;
648 				}
649 			}
650 			p = ep->value.sub->re_sub->re_buf;
651 			q = T(ep->desc);
652 			t = *q ? q : p;
653 			if (mp->keep[level]++ && b > buf && *(b - 1) != ' ' && *t && *t != ',' && *t != '.' && *t != '\b')
654 				*b++ = ' ';
655 			b += sfsprintf(b, PATH_MAX - (b - buf), *q ? q : "%s", p + (*p == '\b'));
656 			if (ep->mime)
657 				mp->mime = ep->mime;
658 			goto checknest;
659 
660 		case 's':
661 			if (!(p = getdata(mp, num, ep->mask)))
662 				goto next;
663 			goto checkstr;
664 		case 'm':
665 			if (!(p = getdata(mp, num, 0)))
666 				goto next;
667 			/*FALLTHROUGH*/
668 		case 'M':
669 		case 'S':
670 		checkstr:
671 			for (;;)
672 			{
673 				if (*ep->value.str == '*' && !*(ep->value.str + 1) && isprint(*p))
674 					break;
675 				if ((ep->type == 'm' || ep->type == 'M') ? strmatch(p, ep->value.str) : !memcmp(p, ep->value.str, ep->mask))
676 					break;
677 				if (p == mp->nbuf || ep->mask >= sizeof(mp->nbuf))
678 					goto next;
679 				p = (char*)memcpy(mp->nbuf, p, ep->mask);
680 				p[ep->mask] = 0;
681 				ccmapstr(mp->x2n, p, ep->mask);
682 			}
683 			q = T(ep->desc);
684 			if (mp->keep[level]++ && b > buf && *(b - 1) != ' ' && *q && *q != ',' && *q != '.' && *q != '\b')
685 				*b++ = ' ';
686 			for (t = p; (c = *t) >= 0 && c <= 0177 && isprint(c) && c != '\n'; t++);
687 			*t = 0;
688 			b += sfsprintf(b, PATH_MAX - (b - buf), q + (*q == '\b'), p);
689 			*t = c;
690 			if (ep->mime)
691 				mp->mime = ep->mime;
692 			goto checknest;
693 
694 		}
695 		if (mask = ep->mask)
696 			num &= mask;
697 		switch (ep->op)
698 		{
699 
700 		case '=':
701 		case '@':
702 			if (num == ep->value.num)
703 				break;
704 			if (ep->cont != '#')
705 				goto next;
706 			if (!mask)
707 				mask = ~mask;
708 			if (ep->type == 'h')
709 			{
710 				if ((num = swapget(mp->swap = 1, p, 2) & mask) == ep->value.num)
711 				{
712 					if (!(mp->swap & (mp->swap + 1)))
713 						mp->swap = 7;
714 					goto swapped;
715 				}
716 			}
717 			else if (ep->type == 'l')
718 			{
719 				for (c = 1; c < 4; c++)
720 					if ((num = swapget(mp->swap = c, p, 4) & mask) == ep->value.num)
721 					{
722 						if (!(mp->swap & (mp->swap + 1)))
723 							mp->swap = 7;
724 						goto swapped;
725 					}
726 			}
727 			else if (ep->type == 'q')
728 			{
729 				for (c = 1; c < 8; c++)
730 					if ((num = swapget(mp->swap = c, p, 8) & mask) == ep->value.num)
731 						goto swapped;
732 			}
733 			goto next;
734 
735 		case '!':
736 			if (num != ep->value.num)
737 				break;
738 			goto next;
739 
740 		case '^':
741 			if (num ^ ep->value.num)
742 				break;
743 			goto next;
744 
745 		case '>':
746 			if (num > ep->value.num)
747 				break;
748 			goto next;
749 
750 		case '<':
751 			if (num < ep->value.num)
752 				break;
753 			goto next;
754 
755 		case 'l':
756 			if (num > 0 && mp->keep[level] && call < (MAXNEST - 1))
757 			{
758 				if (!ep->value.loop->count)
759 				{
760 					ep->value.loop->count = num;
761 					ep->value.loop->offset = off;
762 					off = ep->value.loop->start;
763 				}
764 				else if (!--ep->value.loop->count)
765 				{
766 					off = ep->value.loop->offset;
767 					goto next;
768 				}
769 				else
770 					off += ep->value.loop->size;
771 				mp->ret[++call] = ep;
772 				ep = ep->value.loop->lab;
773 				goto fun;
774 			}
775 			goto next;
776 
777 		case 'm':
778 			c = mp->swap;
779 			t = ckmagic(mp, file, b + (b > buf), st, num);
780 			mp->swap = c;
781 			if (!t)
782 				goto next;
783 			if (b > buf)
784 				*b = ' ';
785 			b += strlen(b);
786 			break;
787 
788 		case 'r':
789 #if _UWIN
790 		{
791 			char*			e;
792 			Sfio_t*			rp;
793 			Sfio_t*			gp;
794 
795 			if (!(t = strrchr(file, '.')))
796 				goto next;
797 			sfprintf(mp->tmp, "/reg/classes_root/%s", t);
798 			if (!(t = sfstruse(mp->tmp)) || !(rp = sfopen(NiL, t, "r")))
799 				goto next;
800 			*ep->desc = 0;
801 			*ep->mime = 0;
802 			gp = 0;
803 			while (t = sfgetr(rp, '\n', 1))
804 			{
805 				if (strneq(t, "Content Type=", 13))
806 				{
807 					ep->mime = vmnewof(mp->vm, ep->mime, char, sfvalue(rp), 0);
808 					strcpy(ep->mime, t + 13);
809 					if (gp)
810 						break;
811 				}
812 				else
813 				{
814 					sfprintf(mp->tmp, "/reg/classes_root/%s", t);
815 					if ((e = sfstruse(mp->tmp)) && (gp = sfopen(NiL, e, "r")))
816 					{
817 						ep->desc = vmnewof(mp->vm, ep->desc, char, strlen(t), 1);
818 						strcpy(ep->desc, t);
819 						if (*ep->mime)
820 							break;
821 					}
822 				}
823 			}
824 			sfclose(rp);
825 			if (!gp)
826 				goto next;
827 			if (!*ep->mime)
828 			{
829 				t = T(ep->desc);
830 				if (!strncasecmp(t, "microsoft", 9))
831 					t += 9;
832 				while (isspace(*t))
833 					t++;
834 				e = "application/x-ms-";
835 				ep->mime = vmnewof(mp->vm, ep->mime, char, strlen(t), strlen(e));
836 				e = strcopy(ep->mime, e);
837 				while ((c = *t++) && c != '.' && c != ' ')
838 					*e++ = isupper(c) ? tolower(c) : c;
839 				*e = 0;
840 			}
841 			while (t = sfgetr(gp, '\n', 1))
842 				if (*t && !streq(t, "\"\""))
843 				{
844 					ep->desc = vmnewof(mp->vm, ep->desc, char, sfvalue(gp), 0);
845 					strcpy(ep->desc, t);
846 					break;
847 				}
848 			sfclose(gp);
849 			if (!*ep->desc)
850 				goto next;
851 			if (!t)
852 				for (t = T(ep->desc); *t; t++)
853 					if (*t == '.')
854 						*t = ' ';
855 			if (!mp->keep[level])
856 				mp->keep[level] = 2;
857 			mp->mime = ep->mime;
858 			break;
859 		}
860 #else
861 			if (ep->cont == '#' && !mp->keep[level])
862 				mp->keep[level] = 1;
863 			goto next;
864 #endif
865 
866 		case 'v':
867 			if (!(p = getdata(mp, num, 4)))
868 				goto next;
869 			c = 0;
870 			do
871 			{
872 				num++;
873 				c = (c<<7) | (*p & 0x7f);
874 			} while (*p++ & 0x80);
875 			if (!(p = getdata(mp, num, c)))
876 				goto next;
877 			if (mp->keep[level]++ && b > buf && *(b - 1) != ' ')
878 			{
879 				*b++ = ',';
880 				*b++ = ' ';
881 			}
882 			b = vcdecomp(b, buf + PATH_MAX, (unsigned char*)p, (unsigned char*)p + c);
883 			goto checknest;
884 
885 		}
886 	swapped:
887 		q = T(ep->desc);
888 		if (mp->keep[level]++ && b > buf && *(b - 1) != ' ' && *q && *q != ',' && *q != '.' && *q != '\b')
889 			*b++ = ' ';
890 		if (ep->type == 'd' || ep->type == 'D')
891 			b += sfsprintf(b, PATH_MAX - (b - buf), q + (*q == '\b'), fmttime("%?%l", (time_t)num));
892 		else if (ep->type == 'v')
893 			b += sfsprintf(b, PATH_MAX - (b - buf), q + (*q == '\b'), fmtversion(num));
894 		else
895 			b += sfsprintf(b, PATH_MAX - (b - buf), q + (*q == '\b'), num);
896 		if (ep->mime && *ep->mime)
897 			mp->mime = ep->mime;
898 	checknest:
899 		if (ep->nest == '}')
900 		{
901 			if (!mp->keep[level])
902 			{
903 				b = mp->msg[level];
904 				mp->mime = mp->cap[level];
905 			}
906 			else if (level > 0)
907 				mp->keep[level - 1] = mp->keep[level];
908 			if (--level < 0)
909 			{
910 				level = 0;
911 				mp->keep[0] = 0;
912 			}
913 		}
914 		continue;
915 	next:
916 		if (ep->cont == '&')
917 			mp->keep[level] = 0;
918 		goto checknest;
919 	}
920 	if (mp->keep[level] && b > buf)
921 	{
922 		*b = 0;
923 		return buf;
924 	}
925 	return 0;
926 }
927 
928 /*
929  * check english language stats
930  */
931 
932 static int
933 ckenglish(register Magic_t* mp, int pun, int badpun)
934 {
935 	register char*	s;
936 	register int	vowl = 0;
937 	register int	freq = 0;
938 	register int	rare = 0;
939 
940 	if (5 * badpun > pun)
941 		return 0;
942 	if (2 * mp->count[';'] > mp->count['E'] + mp->count['e'])
943 		return 0;
944 	if ((mp->count['>'] + mp->count['<'] + mp->count['/']) > mp->count['E'] + mp->count['e'])
945 		return 0;
946 	for (s = "aeiou"; *s; s++)
947 		vowl += mp->count[toupper(*s)] + mp->count[*s];
948 	for (s = "etaion"; *s; s++)
949 		freq += mp->count[toupper(*s)] + mp->count[*s];
950 	for (s = "vjkqxz"; *s; s++)
951 		rare += mp->count[toupper(*s)] + mp->count[*s];
952 	return 5 * vowl >= mp->fbsz - mp->count[' '] && freq >= 10 * rare;
953 }
954 
955 /*
956  * check programming language stats
957  */
958 
959 static char*
960 cklang(register Magic_t* mp, const char* file, char* buf, struct stat* st)
961 {
962 	register int		c;
963 	register unsigned char*	b;
964 	register unsigned char*	e;
965 	register int		q;
966 	register char*		s;
967 	char*			t;
968 	char*			base;
969 	char*			suff;
970 	char*			t1;
971 	char*			t2;
972 	char*			t3;
973 	int			n;
974 	int			badpun;
975 	int			code;
976 	int			pun;
977 	Cctype_t		flags;
978 	Info_t*			ip;
979 
980 	b = (unsigned char*)mp->fbuf;
981 	e = b + mp->fbsz;
982 	memzero(mp->count, sizeof(mp->count));
983 	memzero(mp->multi, sizeof(mp->multi));
984 	memzero(mp->identifier, sizeof(mp->identifier));
985 
986 	/*
987 	 * check character coding
988 	 */
989 
990 	flags = 0;
991 	while (b < e)
992 		flags |= mp->cctype[*b++];
993 	b = (unsigned char*)mp->fbuf;
994 	code = 0;
995 	q = CC_ASCII;
996 	n = CC_MASK;
997 	for (c = 0; c < CC_MAPS; c++)
998 	{
999 		flags ^= CC_text;
1000 		if ((flags & CC_MASK) < n)
1001 		{
1002 			n = flags & CC_MASK;
1003 			q = c;
1004 		}
1005 		flags >>= CC_BIT;
1006 	}
1007 	flags = n;
1008 	if (!(flags & (CC_binary|CC_notext)))
1009 	{
1010 		if (q != CC_NATIVE)
1011 		{
1012 			code = q;
1013 			ccmaps(mp->fbuf, mp->fbsz, q, CC_NATIVE);
1014 		}
1015 		if (b[0] == '#' && b[1] == '!')
1016 		{
1017 			for (b += 2; b < e && isspace(*b); b++);
1018 			for (s = (char*)b; b < e && isprint(*b); b++);
1019 			c = *b;
1020 			*b = 0;
1021 			if ((st->st_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) || match(s, "/*bin*/*") || !access(s, F_OK))
1022 			{
1023 				if (t = strrchr(s, '/'))
1024 					s = t + 1;
1025 				for (t = s; *t; t++)
1026 					if (isspace(*t))
1027 					{
1028 						*t = 0;
1029 						break;
1030 					}
1031 				sfsprintf(mp->mbuf, sizeof(mp->mbuf), "application/x-%s", *s ? s : "sh");
1032 				mp->mime = mp->mbuf;
1033 				if (match(s, "*sh"))
1034 				{
1035 					t1 = T("command");
1036 					if (streq(s, "sh"))
1037 						*s = 0;
1038 					else
1039 					{
1040 						*b++ = ' ';
1041 						*b = 0;
1042 					}
1043 				}
1044 				else
1045 				{
1046 					t1 = T("interpreter");
1047 					*b++ = ' ';
1048 					*b = 0;
1049 				}
1050 				sfsprintf(mp->sbuf, sizeof(mp->sbuf), T("%s%s script"), s, t1);
1051 				s = mp->sbuf;
1052 				goto qualify;
1053 			}
1054 			*b = c;
1055 			b = (unsigned char*)mp->fbuf;
1056 		}
1057 		badpun = 0;
1058 		pun = 0;
1059 		q = 0;
1060 		s = 0;
1061 		t = 0;
1062 		while (b < e)
1063 		{
1064 			c = *b++;
1065 			mp->count[c]++;
1066 			if (c == q && (q != '*' || *b == '/' && b++))
1067 			{
1068 				mp->multi[q]++;
1069 				q = 0;
1070 			}
1071 			else if (c == '\\')
1072 			{
1073 				s = 0;
1074 				b++;
1075 			}
1076 			else if (!q)
1077 			{
1078 				if (isalpha(c) || c == '_')
1079 				{
1080 					if (!s)
1081 						s = (char*)b - 1;
1082 				}
1083 				else if (!isdigit(c))
1084 				{
1085 					if (s)
1086 					{
1087 						if (s > mp->fbuf)
1088 							switch (*(s - 1))
1089 							{
1090 							case ':':
1091 								if (*b == ':')
1092 									mp->multi[':']++;
1093 								break;
1094 							case '.':
1095 								if (((char*)b - s) == 3 && (s == (mp->fbuf + 1) || *(s - 2) == '\n'))
1096 									mp->multi['.']++;
1097 								break;
1098 							case '\n':
1099 							case '\\':
1100 								if (*b == '{')
1101 									t = (char*)b + 1;
1102 								break;
1103 							case '{':
1104 								if (s == t && *b == '}')
1105 									mp->multi['X']++;
1106 								break;
1107 							}
1108 							if (!mp->idtab)
1109 							{
1110 								if (mp->idtab = dtnew(mp->vm, &mp->dtdisc, Dthash))
1111 									for (q = 0; q < elementsof(dict); q++)
1112 										dtinsert(mp->idtab, &dict[q]);
1113 								else if (mp->disc->errorf)
1114 									(*mp->disc->errorf)(mp, mp->disc, 3, "out of space");
1115 								q = 0;
1116 							}
1117 							if (mp->idtab)
1118 							{
1119 								*(b - 1) = 0;
1120 								if (ip = (Info_t*)dtmatch(mp->idtab, s))
1121 									mp->identifier[ip->value]++;
1122 								*(b - 1) = c;
1123 							}
1124 							s = 0;
1125 						}
1126 					switch (c)
1127 					{
1128 					case '\t':
1129 						if (b == (unsigned char*)(mp->fbuf + 1) || *(b - 2) == '\n')
1130 							mp->multi['\t']++;
1131 						break;
1132 					case '"':
1133 					case '\'':
1134 						q = c;
1135 						break;
1136 					case '/':
1137 						if (*b == '*')
1138 							q = *b++;
1139 						else if (*b == '/')
1140 							q = '\n';
1141 						break;
1142 					case '$':
1143 						if (*b == '(' && *(b + 1) != ' ')
1144 							mp->multi['$']++;
1145 						break;
1146 					case '{':
1147 					case '}':
1148 					case '[':
1149 					case ']':
1150 					case '(':
1151 						mp->multi[c]++;
1152 						break;
1153 					case ')':
1154 						mp->multi[c]++;
1155 						goto punctuation;
1156 					case ':':
1157 						if (*b == ':' && isspace(*(b + 1)) && b > (unsigned char*)(mp->fbuf + 1) && isspace(*(b - 2)))
1158 							mp->multi[':']++;
1159 						goto punctuation;
1160 					case '.':
1161 					case ',':
1162 					case '%':
1163 					case ';':
1164 					case '?':
1165 					punctuation:
1166 						pun++;
1167 						if (*b != ' ' && *b != '\n')
1168 							badpun++;
1169 						break;
1170 					}
1171 				}
1172 			}
1173 		}
1174 	}
1175 	else
1176 		while (b < e)
1177 			mp->count[*b++]++;
1178 	base = (t1 = strrchr(file, '/')) ? t1 + 1 : (char*)file;
1179 	suff = (t1 = strrchr(base, '.')) ? t1 + 1 : "";
1180 	if (!flags)
1181 	{
1182 		if (match(suff, "*sh|bat|cmd"))
1183 			goto id_sh;
1184 		if (match(base, "*@(mkfile)"))
1185 			goto id_mk;
1186 		if (match(base, "*@(makefile|.mk)"))
1187 			goto id_make;
1188 		if (match(base, "*@(mamfile|.mam)"))
1189 			goto id_mam;
1190 		if (match(suff, "[cly]?(pp|xx|++)|cc|ll|yy"))
1191 			goto id_c;
1192 		if (match(suff, "f"))
1193 			goto id_fortran;
1194 		if (match(suff, "htm+(l)"))
1195 			goto id_html;
1196 		if (match(suff, "cpy"))
1197 			goto id_copybook;
1198 		if (match(suff, "cob|cbl|cb2"))
1199 			goto id_cobol;
1200 		if (match(suff, "pl[1i]"))
1201 			goto id_pl1;
1202 		if (match(suff, "tex"))
1203 			goto id_tex;
1204 		if (match(suff, "asm|s"))
1205 			goto id_asm;
1206 		if ((st->st_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) && (!suff || suff != strchr(suff, '.')))
1207 		{
1208 		id_sh:
1209 			s = T("command script");
1210 			mp->mime = "application/sh";
1211 			goto qualify;
1212 		}
1213 		if (strmatch(mp->fbuf, "From * [0-9][0-9]:[0-9][0-9]:[0-9][0-9] *"))
1214 		{
1215 			s = T("mail message");
1216 			mp->mime = "message/rfc822";
1217 			goto qualify;
1218 		}
1219 		if (match(base, "*@(mkfile)"))
1220 		{
1221 		id_mk:
1222 			s = "mkfile";
1223 			mp->mime = "application/mk";
1224 			goto qualify;
1225 		}
1226 		if (match(base, "*@(makefile|.mk)") || mp->multi['\t'] >= mp->count[':'] && (mp->multi['$'] > 0 || mp->multi[':'] > 0))
1227 		{
1228 		id_make:
1229 			s = "makefile";
1230 			mp->mime = "application/make";
1231 			goto qualify;
1232 		}
1233 		if (mp->multi['.'] >= 3)
1234 		{
1235 			s = T("nroff input");
1236 			mp->mime = "application/x-troff";
1237 			goto qualify;
1238 		}
1239 		if (mp->multi['X'] >= 3)
1240 		{
1241 			s = T("TeX input");
1242 			mp->mime = "application/x-tex";
1243 			goto qualify;
1244 		}
1245 		if (mp->fbsz < SF_BUFSIZE &&
1246 		    (mp->multi['('] == mp->multi[')'] &&
1247 		     mp->multi['{'] == mp->multi['}'] &&
1248 		     mp->multi['['] == mp->multi[']']) ||
1249 		    mp->fbsz >= SF_BUFSIZE &&
1250 		    (mp->multi['('] >= mp->multi[')'] &&
1251 		     mp->multi['{'] >= mp->multi['}'] &&
1252 		     mp->multi['['] >= mp->multi[']']))
1253 		{
1254 			c = mp->identifier[ID_INCL1];
1255 			if (c >= 2 && mp->identifier[ID_INCL2] >= c && mp->identifier[ID_INCL3] >= c && mp->count['.'] >= c ||
1256 			    mp->identifier[ID_C] >= 5 && mp->count[';'] >= 5 ||
1257 			    mp->count['='] >= 20 && mp->count[';'] >= 20)
1258 			{
1259 			id_c:
1260 				t1 = "";
1261 				t2 = "c ";
1262 				t3 = T("program");
1263 				switch (*suff)
1264 				{
1265 				case 'c':
1266 				case 'C':
1267 					mp->mime = "application/x-cc";
1268 					break;
1269 				case 'l':
1270 				case 'L':
1271 					t1 = "lex ";
1272 					mp->mime = "application/x-lex";
1273 					break;
1274 				default:
1275 					t3 = T("header");
1276 					if (mp->identifier[ID_YACC] < 5 || mp->count['%'] < 5)
1277 					{
1278 						mp->mime = "application/x-cc";
1279 						break;
1280 					}
1281 					/*FALLTHROUGH*/
1282 				case 'y':
1283 				case 'Y':
1284 					t1 = "yacc ";
1285 					mp->mime = "application/x-yacc";
1286 					break;
1287 				}
1288 				if (mp->identifier[ID_CPLUSPLUS] >= 3)
1289 				{
1290 					t2 = "c++ ";
1291 					mp->mime = "application/x-c++";
1292 				}
1293 				sfsprintf(mp->sbuf, sizeof(mp->sbuf), "%s%s%s", t1, t2, t3);
1294 				s = mp->sbuf;
1295 				goto qualify;
1296 			}
1297 		}
1298 		if (mp->identifier[ID_MAM1] >= 2 && mp->identifier[ID_MAM3] >= 2 &&
1299 		    (mp->fbsz < SF_BUFSIZE && mp->identifier[ID_MAM1] == mp->identifier[ID_MAM2] ||
1300 		     mp->fbsz >= SF_BUFSIZE && mp->identifier[ID_MAM1] >= mp->identifier[ID_MAM2]))
1301 		{
1302 		id_mam:
1303 			s = T("mam program");
1304 			mp->mime = "application/x-mam";
1305 			goto qualify;
1306 		}
1307 		if (mp->identifier[ID_FORTRAN] >= 8)
1308 		{
1309 		id_fortran:
1310 			s = T("fortran program");
1311 			mp->mime = "application/x-fortran";
1312 			goto qualify;
1313 		}
1314 		if (mp->identifier[ID_HTML] > 0 && mp->count['<'] >= 8 && (c = mp->count['<'] - mp->count['>']) >= -2 && c <= 2)
1315 		{
1316 		id_html:
1317 			s = T("html input");
1318 			mp->mime = "text/html";
1319 			goto qualify;
1320 		}
1321 		if (mp->identifier[ID_COPYBOOK] > 0 && mp->identifier[ID_COBOL] == 0 && (c = mp->count['('] - mp->count[')']) >= -2 && c <= 2)
1322 		{
1323 		id_copybook:
1324 			s = T("cobol copybook");
1325 			mp->mime = "application/x-cobol";
1326 			goto qualify;
1327 		}
1328 		if (mp->identifier[ID_COBOL] > 0 && mp->identifier[ID_COPYBOOK] > 0 && (c = mp->count['('] - mp->count[')']) >= -2 && c <= 2)
1329 		{
1330 		id_cobol:
1331 			s = T("cobol program");
1332 			mp->mime = "application/x-cobol";
1333 			goto qualify;
1334 		}
1335 		if (mp->identifier[ID_PL1] > 0 && (c = mp->count['('] - mp->count[')']) >= -2 && c <= 2)
1336 		{
1337 		id_pl1:
1338 			s = T("pl1 program");
1339 			mp->mime = "application/x-pl1";
1340 			goto qualify;
1341 		}
1342 		if (mp->count['{'] >= 6 && (c = mp->count['{'] - mp->count['}']) >= -2 && c <= 2 && mp->count['\\'] >= mp->count['{'])
1343 		{
1344 		id_tex:
1345 			s = T("TeX input");
1346 			mp->mime = "text/tex";
1347 			goto qualify;
1348 		}
1349 		if (mp->identifier[ID_ASM] >= 4)
1350 		{
1351 		id_asm:
1352 			s = T("as program");
1353 			mp->mime = "application/x-as";
1354 			goto qualify;
1355 		}
1356 		if (ckenglish(mp, pun, badpun))
1357 		{
1358 			s = T("english text");
1359 			mp->mime = "text/plain";
1360 			goto qualify;
1361 		}
1362 	}
1363 	else if (streq(base, "core"))
1364 	{
1365 		mp->mime = "x-system/core";
1366 		return T("core dump");
1367 	}
1368 	if (flags & (CC_binary|CC_notext))
1369 	{
1370 		b = (unsigned char*)mp->fbuf;
1371 		e = b + mp->fbsz;
1372 		n = 0;
1373 		for (;;)
1374 		{
1375 			c = *b++;
1376 			q = 0;
1377 			while (c & 0x80)
1378 			{
1379 				c <<= 1;
1380 				q++;
1381 			}
1382 			switch (q)
1383 			{
1384 			case 4:
1385 				if (b < e && (*b++ & 0xc0) != 0x80)
1386 					break;
1387 			case 3:
1388 				if (b < e && (*b++ & 0xc0) != 0x80)
1389 					break;
1390 			case 2:
1391 				if (b < e && (*b++ & 0xc0) != 0x80)
1392 					break;
1393 				n = 1;
1394 			case 0:
1395 				if (b >= e)
1396 				{
1397 					if (n)
1398 					{
1399 						flags &= ~(CC_binary|CC_notext);
1400 						flags |= CC_utf_8;
1401 					}
1402 					break;
1403 				}
1404 				continue;
1405 			}
1406 			break;
1407 		}
1408 	}
1409 	if (flags & (CC_binary|CC_notext))
1410 	{
1411 		unsigned long	d = 0;
1412 
1413 		if ((q = mp->fbsz / UCHAR_MAX) >= 2)
1414 		{
1415 			/*
1416 			 * compression/encryption via standard deviation
1417 			 */
1418 
1419 
1420 			for (c = 0; c < UCHAR_MAX; c++)
1421 			{
1422 				pun = mp->count[c] - q;
1423 				d += pun * pun;
1424 			}
1425 			d /= mp->fbsz;
1426 		}
1427 		if (d <= 0)
1428 			s = T("binary");
1429 		else if (d < 4)
1430 			s = T("encrypted");
1431 		else if (d < 16)
1432 			s = T("packed");
1433 		else if (d < 64)
1434 			s = T("compressed");
1435 		else if (d < 256)
1436 			s = T("delta");
1437 		else
1438 			s = T("data");
1439 		mp->mime = "application/octet-stream";
1440 		return s;
1441 	}
1442 	mp->mime = "text/plain";
1443 	if (flags & CC_utf_8)
1444 		s = (flags & CC_control) ? T("utf-8 text with control characters") : T("utf-8 text");
1445 	else if (flags & CC_latin)
1446 		s = (flags & CC_control) ? T("latin text with control characters") : T("latin text");
1447 	else
1448 		s = (flags & CC_control) ? T("text with control characters") : T("text");
1449  qualify:
1450 	if (!flags && mp->count['\n'] >= mp->count['\r'] && mp->count['\n'] <= (mp->count['\r'] + 1) && mp->count['\r'])
1451 	{
1452 		t = "dos ";
1453 		mp->mime = "text/dos";
1454 	}
1455 	else
1456 		t = "";
1457 	if (code)
1458 	{
1459 		if (code == CC_ASCII)
1460 			sfsprintf(buf, PATH_MAX, "ascii %s%s", t, s);
1461 		else
1462 		{
1463 			sfsprintf(buf, PATH_MAX, "ebcdic%d %s%s", code - 1, t, s);
1464 			mp->mime = "text/ebcdic";
1465 		}
1466 		s = buf;
1467 	}
1468 	else if (*t)
1469 	{
1470 		sfsprintf(buf, PATH_MAX, "%s%s", t, s);
1471 		s = buf;
1472 	}
1473 	return s;
1474 }
1475 
1476 /*
1477  * return the basic magic string for file,st in buf,size
1478  */
1479 
1480 static char*
1481 type(register Magic_t* mp, const char* file, struct stat* st, char* buf, int size)
1482 {
1483 	register char*	s;
1484 	register char*	t;
1485 
1486 	mp->mime = 0;
1487 	if (!S_ISREG(st->st_mode))
1488 	{
1489 		if (S_ISDIR(st->st_mode))
1490 		{
1491 			mp->mime = "x-system/dir";
1492 			return T("directory");
1493 		}
1494 		if (S_ISLNK(st->st_mode))
1495 		{
1496 			mp->mime = "x-system/lnk";
1497 			s = buf;
1498 			s += sfsprintf(s, PATH_MAX, T("symbolic link to "));
1499 			if (pathgetlink(file, s, size - (s - buf)) < 0)
1500 				return T("cannot read symbolic link text");
1501 			return buf;
1502 		}
1503 		if (S_ISBLK(st->st_mode))
1504 		{
1505 			mp->mime = "x-system/blk";
1506 			sfsprintf(buf, PATH_MAX, T("block special (%s)"), fmtdev(st));
1507 			return buf;
1508 		}
1509 		if (S_ISCHR(st->st_mode))
1510 		{
1511 			mp->mime = "x-system/chr";
1512 			sfsprintf(buf, PATH_MAX, T("character special (%s)"), fmtdev(st));
1513 			return buf;
1514 		}
1515 		if (S_ISFIFO(st->st_mode))
1516 		{
1517 			mp->mime = "x-system/fifo";
1518 			return "fifo";
1519 		}
1520 #ifdef S_ISSOCK
1521 		if (S_ISSOCK(st->st_mode))
1522 		{
1523 			mp->mime = "x-system/sock";
1524 			return "socket";
1525 		}
1526 #endif
1527 	}
1528 	if (!(mp->fbmx = st->st_size))
1529 		s = T("empty");
1530 	else if (!mp->fp)
1531 		s = T("cannot read");
1532 	else
1533 	{
1534 		mp->fbsz = sfread(mp->fp, mp->fbuf, sizeof(mp->fbuf) - 1);
1535 		if (mp->fbsz < 0)
1536 			s = fmterror(errno);
1537 		else if (mp->fbsz == 0)
1538 			s = T("empty");
1539 		else
1540 		{
1541 			mp->fbuf[mp->fbsz] = 0;
1542 			mp->xoff = 0;
1543 			mp->xbsz = 0;
1544 			if (!(s = ckmagic(mp, file, buf, st, 0)))
1545 				s = cklang(mp, file, buf, st);
1546 		}
1547 	}
1548 	if (!mp->mime)
1549 		mp->mime = "application/unknown";
1550 	else if ((t = strchr(mp->mime, '%')) && *(t + 1) == 's' && !*(t + 2))
1551 	{
1552 		register char*	b;
1553 		register char*	be;
1554 		register char*	m;
1555 		register char*	me;
1556 
1557 		b = mp->mime;
1558 		me = (m = mp->mime = mp->fbuf) + sizeof(mp->fbuf) - 1;
1559 		while (m < me && b < t)
1560 			*m++ = *b++;
1561 		b = t = s;
1562 		for (;;)
1563 		{
1564 			if (!(be = strchr(t, ' ')))
1565 			{
1566 				be = b + strlen(b);
1567 				break;
1568 			}
1569 			if (*(be - 1) == ',' || strneq(be + 1, "data", 4) || strneq(be + 1, "file", 4))
1570 				break;
1571 			b = t;
1572 			t = be + 1;
1573 		}
1574 		while (m < me && b < be)
1575 			if ((*m++ = *b++) == ' ')
1576 				*(m - 1) = '-';
1577 		*m = 0;
1578 	}
1579 	return s;
1580 }
1581 
1582 /*
1583  * low level for magicload()
1584  */
1585 
1586 static int
1587 load(register Magic_t* mp, char* file, register Sfio_t* fp)
1588 {
1589 	register Entry_t*	ep;
1590 	register char*		p;
1591 	register char*		p2;
1592 	char*			p3;
1593 	char*			next;
1594 	int			n;
1595 	int			lge;
1596 	int			lev;
1597 	int			ent;
1598 	int			old;
1599 	int			cont;
1600 	Info_t*			ip;
1601 	Entry_t*		ret;
1602 	Entry_t*		first;
1603 	Entry_t*		last = 0;
1604 	Entry_t*		fun['z' - 'a' + 1];
1605 
1606 	memzero(fun, sizeof(fun));
1607 	cont = '$';
1608 	ent = 0;
1609 	lev = 0;
1610 	old = 0;
1611 	ret = 0;
1612 	error_info.file = file;
1613 	error_info.line = 0;
1614 	first = ep = vmnewof(mp->vm, 0, Entry_t, 1, 0);
1615 	while (p = sfgetr(fp, '\n', 1))
1616 	{
1617 		error_info.line++;
1618 		for (; isspace(*p); p++);
1619 
1620 		/*
1621 		 * nesting
1622 		 */
1623 
1624 		switch (*p)
1625 		{
1626 		case 0:
1627 		case '#':
1628 			cont = '#';
1629 			continue;
1630 		case '{':
1631 			if (++lev < MAXNEST)
1632 				ep->nest = *p;
1633 			else if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
1634 				(*mp->disc->errorf)(mp, mp->disc, 1, "{ ... } operator nesting too deep -- %d max", MAXNEST);
1635 			continue;
1636 		case '}':
1637 			if (!last || lev <= 0)
1638 			{
1639 				if (mp->disc->errorf)
1640 					(*mp->disc->errorf)(mp, mp->disc, 2, "`%c': invalid nesting", *p);
1641 			}
1642 			else if (lev-- == ent)
1643 			{
1644 				ent = 0;
1645 				ep->cont = ':';
1646 				ep->offset = ret->offset;
1647 				ep->nest = ' ';
1648 				ep->type = ' ';
1649 				ep->op = ' ';
1650 				ep->desc = "[RETURN]";
1651 				last = ep;
1652 				ep = ret->next = vmnewof(mp->vm, 0, Entry_t, 1, 0);
1653 				ret = 0;
1654 			}
1655 			else
1656 				last->nest = *p;
1657 			continue;
1658 		default:
1659 			if (*(p + 1) == '{' || *(p + 1) == '(' && *p != '+' && *p != '>' && *p != '&' && *p != '|')
1660 			{
1661 				n = *p++;
1662 				if (n >= 'a' && n <= 'z')
1663 					n -= 'a';
1664 				else
1665 				{
1666 					if (mp->disc->errorf)
1667 						(*mp->disc->errorf)(mp, mp->disc, 2, "%c: invalid function name", n);
1668 					n = 0;
1669 				}
1670 				if (ret && mp->disc->errorf)
1671 					(*mp->disc->errorf)(mp, mp->disc, 2, "%c: function has no return", ret->offset + 'a');
1672 				if (*p == '{')
1673 				{
1674 					ent = ++lev;
1675 					ret = ep;
1676 					ep->desc = "[FUNCTION]";
1677 				}
1678 				else
1679 				{
1680 					if (*(p + 1) != ')' && mp->disc->errorf)
1681 						(*mp->disc->errorf)(mp, mp->disc, 2, "%c: invalid function call argument list", n + 'a');
1682 					ep->desc = "[CALL]";
1683 				}
1684 				ep->cont = cont;
1685 				ep->offset = n;
1686 				ep->nest = ' ';
1687 				ep->type = ' ';
1688 				ep->op = ' ';
1689 				last = ep;
1690 				ep = ep->next = vmnewof(mp->vm, 0, Entry_t, 1, 0);
1691 				if (ret)
1692 					fun[n] = last->value.lab = ep;
1693 				else if (!(last->value.lab = fun[n]) && mp->disc->errorf)
1694 					(*mp->disc->errorf)(mp, mp->disc, 2, "%c: function not defined", n + 'a');
1695 				continue;
1696 			}
1697 			if (!ep->nest)
1698 				ep->nest = (lev > 0 && lev != ent) ? ('0' + lev - !!ent) : ' ';
1699 			break;
1700 		}
1701 
1702 		/*
1703 		 * continuation
1704 		 */
1705 
1706 		cont = '$';
1707 		switch (*p)
1708 		{
1709 		case '>':
1710 			old = 1;
1711 			if (*(p + 1) == *p)
1712 			{
1713 				/*
1714 				 * old style nesting push
1715 				 */
1716 
1717 				p++;
1718 				old = 2;
1719 				if (!lev && last)
1720 				{
1721 					lev = 1;
1722 					last->nest = '{';
1723 					if (last->cont == '>')
1724 						last->cont = '&';
1725 					ep->nest = '1';
1726 				}
1727 			}
1728 			/*FALLTHROUGH*/
1729 		case '+':
1730 		case '&':
1731 		case '|':
1732 			ep->cont = *p++;
1733 			break;
1734 		default:
1735 			if ((mp->flags & MAGIC_VERBOSE) && !isalpha(*p) && mp->disc->errorf)
1736 				(*mp->disc->errorf)(mp, mp->disc, 1, "`%c': invalid line continuation operator", *p);
1737 			/*FALLTHROUGH*/
1738 		case '*':
1739 		case '0': case '1': case '2': case '3': case '4':
1740 		case '5': case '6': case '7': case '8': case '9':
1741 			ep->cont = (lev > 0) ? '&' : '#';
1742 			break;
1743 		}
1744 		switch (old)
1745 		{
1746 		case 1:
1747 			old = 0;
1748 			if (lev)
1749 			{
1750 				/*
1751 				 * old style nesting pop
1752 				 */
1753 
1754 				lev = 0;
1755 				if (last)
1756 					last->nest = '}';
1757 				ep->nest = ' ';
1758 				if (ep->cont == '&')
1759 					ep->cont = '#';
1760 			}
1761 			break;
1762 		case 2:
1763 			old = 1;
1764 			break;
1765 		}
1766 		if (isdigit(*p))
1767 		{
1768 			/*
1769 			 * absolute offset
1770 			 */
1771 
1772 			ep->offset = strton(p, &next, NiL, 0);
1773 			p2 = next;
1774 		}
1775 		else
1776 		{
1777 			for (p2 = p; *p2 && !isspace(*p2); p2++);
1778 			if (!*p2)
1779 			{
1780 				if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
1781 					(*mp->disc->errorf)(mp, mp->disc, 1, "not enough fields: `%s'", p);
1782 				continue;
1783 			}
1784 
1785 			/*
1786 			 * offset expression
1787 			 */
1788 
1789 			*p2++ = 0;
1790 			ep->expr = vmstrdup(mp->vm, p);
1791 			if (isalpha(*p))
1792 				ep->offset = (ip = (Info_t*)dtmatch(mp->infotab, p)) ? ip->value : 0;
1793 			else if (*p == '(' && ep->cont == '>')
1794 			{
1795 				/*
1796 				 * convert old style indirection to @
1797 				 */
1798 
1799 				p = ep->expr + 1;
1800 				for (;;)
1801 				{
1802 					switch (*p++)
1803 					{
1804 					case 0:
1805 					case '@':
1806 					case '(':
1807 						break;
1808 					case ')':
1809 						break;
1810 					default:
1811 						continue;
1812 					}
1813 					break;
1814 				}
1815 				if (*--p == ')')
1816 				{
1817 					*p = 0;
1818 					*ep->expr = '@';
1819 				}
1820 			}
1821 		}
1822 		for (; isspace(*p2); p2++);
1823 		for (p = p2; *p2 && !isspace(*p2); p2++);
1824 		if (!*p2)
1825 		{
1826 			if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
1827 				(*mp->disc->errorf)(mp, mp->disc, 1, "not enough fields: `%s'", p);
1828 			continue;
1829 		}
1830 		*p2++ = 0;
1831 
1832 		/*
1833 		 * type
1834 		 */
1835 
1836 		if ((*p == 'b' || *p == 'l') && *(p + 1) == 'e')
1837 		{
1838 			ep->swap = ~(*p == 'l' ? 7 : 0);
1839 			p += 2;
1840 		}
1841 		if (*p == 's')
1842 		{
1843 			if (*(p + 1) == 'h')
1844 				ep->type = 'h';
1845 			else
1846 				ep->type = 's';
1847 		}
1848 		else if (*p == 'a')
1849 			ep->type = 's';
1850 		else
1851 			ep->type = *p;
1852 		if (p = strchr(p, '&'))
1853 		{
1854 			/*
1855 			 * old style mask
1856 			 */
1857 
1858 			ep->mask = strton(++p, NiL, NiL, 0);
1859 		}
1860 		for (; isspace(*p2); p2++);
1861 		if (ep->mask)
1862 			*--p2 = '=';
1863 
1864 		/*
1865 		 * comparison operation
1866 		 */
1867 
1868 		p = p2;
1869 		if (p2 = strchr(p, '\t'))
1870 			*p2++ = 0;
1871 		else
1872 		{
1873 			int	qe = 0;
1874 			int	qn = 0;
1875 
1876 			/*
1877 			 * assume balanced {}[]()\\""'' field
1878 			 */
1879 
1880 			for (p2 = p;;)
1881 			{
1882 				switch (n = *p2++)
1883 				{
1884 				case 0:
1885 					break;
1886 				case '{':
1887 					if (!qe)
1888 						qe = '}';
1889 					if (qe == '}')
1890 						qn++;
1891 					continue;
1892 				case '(':
1893 					if (!qe)
1894 						qe = ')';
1895 					if (qe == ')')
1896 						qn++;
1897 					continue;
1898 				case '[':
1899 					if (!qe)
1900 						qe = ']';
1901 					if (qe == ']')
1902 						qn++;
1903 					continue;
1904 				case '}':
1905 				case ')':
1906 				case ']':
1907 					if (qe == n && qn > 0)
1908 						qn--;
1909 					continue;
1910 				case '"':
1911 				case '\'':
1912 					if (!qe)
1913 						qe = n;
1914 					else if (qe == n)
1915 						qe = 0;
1916 					continue;
1917 				case '\\':
1918 					if (*p2)
1919 						p2++;
1920 					continue;
1921 				default:
1922 					if (!qe && isspace(n))
1923 						break;
1924 					continue;
1925 				}
1926 				if (n)
1927 					*(p2 - 1) = 0;
1928 				else
1929 					p2--;
1930 				break;
1931 			}
1932 		}
1933 		lge = 0;
1934 		if (ep->type == 'e' || ep->type == 'm' || ep->type == 's')
1935 			ep->op = '=';
1936 		else
1937 		{
1938 			if (*p == '&')
1939 			{
1940 				ep->mask = strton(++p, &next, NiL, 0);
1941 				p = next;
1942 			}
1943 			switch (*p)
1944 			{
1945 			case '=':
1946 			case '>':
1947 			case '<':
1948 			case '*':
1949 				ep->op = *p++;
1950 				if (*p == '=')
1951 				{
1952 					p++;
1953 					switch (ep->op)
1954 					{
1955 					case '>':
1956 						lge = -1;
1957 						break;
1958 					case '<':
1959 						lge = 1;
1960 						break;
1961 					}
1962 				}
1963 				break;
1964 			case '!':
1965 			case '@':
1966 				ep->op = *p++;
1967 				if (*p == '=')
1968 					p++;
1969 				break;
1970 			case 'x':
1971 				p++;
1972 				ep->op = '*';
1973 				break;
1974 			default:
1975 				ep->op = '=';
1976 				if (ep->mask)
1977 					ep->value.num = ep->mask;
1978 				break;
1979 			}
1980 		}
1981 		if (ep->op != '*' && !ep->value.num)
1982 		{
1983 			if (ep->type == 'e')
1984 			{
1985 				if (ep->value.sub = vmnewof(mp->vm, 0, regex_t, 1, 0))
1986 				{
1987 					ep->value.sub->re_disc = &mp->redisc;
1988 					if (!(n = regcomp(ep->value.sub, p, REG_DELIMITED|REG_LENIENT|REG_NULL|REG_DISCIPLINE)))
1989 					{
1990 						p += ep->value.sub->re_npat;
1991 						if (!(n = regsubcomp(ep->value.sub, p, NiL, 0, 0)))
1992 							p += ep->value.sub->re_npat;
1993 					}
1994 					if (n)
1995 					{
1996 						regmessage(mp, ep->value.sub, n);
1997 						ep->value.sub = 0;
1998 					}
1999 					else if (*p && mp->disc->errorf)
2000 						(*mp->disc->errorf)(mp, mp->disc, 1, "invalid characters after substitution: %s", p);
2001 				}
2002 			}
2003 			else if (ep->type == 'm')
2004 			{
2005 				ep->mask = stresc(p) + 1;
2006 				ep->value.str = vmnewof(mp->vm, 0, char, ep->mask + 1, 0);
2007 				memcpy(ep->value.str, p, ep->mask);
2008 				if ((!ep->expr || !ep->offset) && !strmatch(ep->value.str, "\\!\\(*\\)"))
2009 					ep->value.str[ep->mask - 1] = '*';
2010 			}
2011 			else if (ep->type == 's')
2012 			{
2013 				ep->mask = stresc(p);
2014 				ep->value.str = vmnewof(mp->vm, 0, char, ep->mask, 0);
2015 				memcpy(ep->value.str, p, ep->mask);
2016 			}
2017 			else if (*p == '\'')
2018 			{
2019 				stresc(p);
2020 				ep->value.num = *(unsigned char*)(p + 1) + lge;
2021 			}
2022 			else if (strmatch(p, "+([a-z])\\(*\\)"))
2023 			{
2024 				char*	t;
2025 
2026 				t = p;
2027 				ep->type = 'V';
2028 				ep->op = *p;
2029 				while (*p && *p++ != '(');
2030 				switch (ep->op)
2031 				{
2032 				case 'l':
2033 					n = *p++;
2034 					if (n < 'a' || n > 'z')
2035 					{
2036 						if (mp->disc->errorf)
2037 							(*mp->disc->errorf)(mp, mp->disc, 2, "%c: invalid function name", n);
2038 					}
2039 					else if (!fun[n -= 'a'])
2040 					{
2041 						if (mp->disc->errorf)
2042 							(*mp->disc->errorf)(mp, mp->disc, 2, "%c: function not defined", n + 'a');
2043 					}
2044 					else
2045 					{
2046 						ep->value.loop = vmnewof(mp->vm, 0, Loop_t, 1, 0);
2047 						ep->value.loop->lab = fun[n];
2048 						while (*p && *p++ != ',');
2049 						ep->value.loop->start = strton(p, &t, NiL, 0);
2050 						while (*t && *t++ != ',');
2051 						ep->value.loop->size = strton(t, &t, NiL, 0);
2052 					}
2053 					break;
2054 				case 'm':
2055 				case 'r':
2056 					ep->desc = vmnewof(mp->vm, 0, char, 32, 0);
2057 					ep->mime = vmnewof(mp->vm, 0, char, 32, 0);
2058 					break;
2059 				case 'v':
2060 					break;
2061 				default:
2062 					if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
2063 						(*mp->disc->errorf)(mp, mp->disc, 1, "%-.*s: unknown function", p - t, t);
2064 					break;
2065 				}
2066 			}
2067 			else
2068 			{
2069 				ep->value.num = strton(p, NiL, NiL, 0) + lge;
2070 				if (ep->op == '@')
2071 					ep->value.num = swapget(0, (char*)&ep->value.num, sizeof(ep->value.num));
2072 			}
2073 		}
2074 
2075 		/*
2076 		 * file description
2077 		 */
2078 
2079 		if (p2)
2080 		{
2081 			for (; isspace(*p2); p2++);
2082 			if (p = strchr(p2, '\t'))
2083 			{
2084 				/*
2085 				 * check for message catalog index
2086 				 */
2087 
2088 				*p++ = 0;
2089 				if (isalpha(*p2))
2090 				{
2091 					for (p3 = p2; isalnum(*p3); p3++);
2092 					if (*p3++ == ':')
2093 					{
2094 						for (; isdigit(*p3); p3++);
2095 						if (!*p3)
2096 						{
2097 							for (p2 = p; isspace(*p2); p2++);
2098 							if (p = strchr(p2, '\t'))
2099 								*p++ = 0;
2100 						}
2101 					}
2102 				}
2103 			}
2104 			stresc(p2);
2105 			ep->desc = vmstrdup(mp->vm, p2);
2106 			if (p)
2107 			{
2108 				for (; isspace(*p); p++);
2109 				if (*p)
2110 					ep->mime = vmstrdup(mp->vm, p);
2111 			}
2112 		}
2113 		else
2114 			ep->desc = "";
2115 
2116 		/*
2117 		 * get next entry
2118 		 */
2119 
2120 		last = ep;
2121 		ep = ep->next = vmnewof(mp->vm, 0, Entry_t, 1, 0);
2122 	}
2123 	if (last)
2124 	{
2125 		last->next = 0;
2126 		if (mp->magiclast)
2127 			mp->magiclast->next = first;
2128 		else
2129 			mp->magic = first;
2130 		mp->magiclast = last;
2131 	}
2132 	vmfree(mp->vm, ep);
2133 	if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf)
2134 	{
2135 		if (lev < 0)
2136 			(*mp->disc->errorf)(mp, mp->disc, 1, "too many } operators");
2137 		else if (lev > 0)
2138 			(*mp->disc->errorf)(mp, mp->disc, 1, "not enough } operators");
2139 		if (ret)
2140 			(*mp->disc->errorf)(mp, mp->disc, 2, "%c: function has no return", ret->offset + 'a');
2141 	}
2142 	error_info.file = 0;
2143 	error_info.line = 0;
2144 	return 0;
2145 }
2146 
2147 /*
2148  * load a magic file into mp
2149  */
2150 
2151 int
2152 magicload(register Magic_t* mp, const char* file, unsigned long flags)
2153 {
2154 	register char*		s;
2155 	register char*		e;
2156 	register char*		t;
2157 	int			n;
2158 	int			found;
2159 	int			list;
2160 	Sfio_t*			fp;
2161 
2162 	mp->flags = mp->disc->flags | flags;
2163 	found = 0;
2164 	if (list = !(s = (char*)file) || !*s || (*s == '-' || *s == '.') && !*(s + 1))
2165 	{
2166 		if (!(s = getenv(MAGIC_FILE_ENV)) || !*s)
2167 			s = MAGIC_FILE;
2168 	}
2169 	for (;;)
2170 	{
2171 		if (!list)
2172 			e = 0;
2173 		else if (e = strchr(s, ':'))
2174 		{
2175 			/*
2176 			 * ok, so ~ won't work for the last list element
2177 			 * we do it for MAGIC_FILES_ENV anyway
2178 			 */
2179 
2180 			if ((strneq(s, "~/", n = 2) || strneq(s, "$HOME/", n = 6) || strneq(s, "${HOME}/", n = 8)) && (t = getenv("HOME")))
2181 			{
2182 				sfputr(mp->tmp, t, -1);
2183 				s += n - 1;
2184 			}
2185 			sfwrite(mp->tmp, s, e - s);
2186 			if (!(s = sfstruse(mp->tmp)))
2187 				goto nospace;
2188 		}
2189 		if (!*s || streq(s, "-"))
2190 			s = MAGIC_FILE;
2191 		if (!(fp = sfopen(NiL, s, "r")))
2192 		{
2193 			if (list)
2194 			{
2195 				if (!(t = pathpath(mp->fbuf, s, "", PATH_REGULAR|PATH_READ)) && !strchr(s, '/'))
2196 				{
2197 					strcpy(mp->fbuf, s);
2198 					sfprintf(mp->tmp, "%s/%s", MAGIC_DIR, mp->fbuf);
2199 					if (!(s = sfstruse(mp->tmp)))
2200 						goto nospace;
2201 					if (!(t = pathpath(mp->fbuf, s, "", PATH_REGULAR|PATH_READ)))
2202 						goto next;
2203 				}
2204 				if (!(fp = sfopen(NiL, t, "r")))
2205 					goto next;
2206 			}
2207 			else
2208 			{
2209 				if (mp->disc->errorf)
2210 					(*mp->disc->errorf)(mp, mp->disc, 3, "%s: cannot open magic file", s);
2211 				return -1;
2212 			}
2213 		}
2214 		found = 1;
2215 		n = load(mp, s, fp);
2216 		sfclose(fp);
2217 		if (n && !list)
2218 			return -1;
2219 	next:
2220 		if (!e)
2221 			break;
2222 		s = e + 1;
2223 	}
2224 	if (!found)
2225 	{
2226 		if (mp->flags & MAGIC_VERBOSE)
2227 		{
2228 			if (mp->disc->errorf)
2229 				(*mp->disc->errorf)(mp, mp->disc, 2, "cannot find magic file");
2230 		}
2231 		return -1;
2232 	}
2233 	return 0;
2234  nospace:
2235 	if (mp->disc->errorf)
2236 		(*mp->disc->errorf)(mp, mp->disc, 3, "out of space");
2237 	return -1;
2238 }
2239 
2240 /*
2241  * open a magic session
2242  */
2243 
2244 Magic_t*
2245 magicopen(Magicdisc_t* disc)
2246 {
2247 	register Magic_t*	mp;
2248 	register int		i;
2249 	register int		n;
2250 	register int		f;
2251 	register int		c;
2252 	register Vmalloc_t*	vm;
2253 	unsigned char*		map[CC_MAPS + 1];
2254 
2255 	if (!(vm = vmopen(Vmdcheap, Vmbest, 0)))
2256 		return 0;
2257 	if (!(mp = vmnewof(vm, 0, Magic_t, 1, 0)))
2258 	{
2259 		vmclose(vm);
2260 		return 0;
2261 	}
2262 	mp->id = lib;
2263 	mp->disc = disc;
2264 	mp->vm = vm;
2265 	mp->flags = disc->flags;
2266 	mp->redisc.re_version = REG_VERSION;
2267 	mp->redisc.re_flags = REG_NOFREE;
2268 	mp->redisc.re_errorf = (regerror_t)disc->errorf;
2269 	mp->redisc.re_resizef = (regresize_t)vmgetmem;
2270 	mp->redisc.re_resizehandle = (void*)mp->vm;
2271 	mp->dtdisc.key = offsetof(Info_t, name);
2272 	mp->dtdisc.link = offsetof(Info_t, link);
2273 	if (!(mp->tmp = sfstropen()) || !(mp->infotab = dtnew(mp->vm, &mp->dtdisc, Dthash)))
2274 		goto bad;
2275 	for (n = 0; n < elementsof(info); n++)
2276 		dtinsert(mp->infotab, &info[n]);
2277 	for (i = 0; i < CC_MAPS; i++)
2278 		map[i] = ccmap(i, CC_ASCII);
2279 	mp->x2n = ccmap(CC_ALIEN, CC_NATIVE);
2280 	for (n = 0; n <= UCHAR_MAX; n++)
2281 	{
2282 		f = 0;
2283 		i = CC_MAPS;
2284 		while (--i >= 0)
2285 		{
2286 			c = ccmapchr(map[i], n);
2287 			f = (f << CC_BIT) | CCTYPE(c);
2288 		}
2289 		mp->cctype[n] = f;
2290 	}
2291 	return mp;
2292  bad:
2293 	magicclose(mp);
2294 	return 0;
2295 }
2296 
2297 /*
2298  * close a magicopen() session
2299  */
2300 
2301 int
2302 magicclose(register Magic_t* mp)
2303 {
2304 	if (!mp)
2305 		return -1;
2306 	if (mp->tmp)
2307 		sfstrclose(mp->tmp);
2308 	if (mp->vm)
2309 		vmclose(mp->vm);
2310 	return 0;
2311 }
2312 
2313 /*
2314  * return the magic string for file with optional stat info st
2315  */
2316 
2317 char*
2318 magictype(register Magic_t* mp, Sfio_t* fp, const char* file, register struct stat* st)
2319 {
2320 	off_t	off;
2321 	char*	s;
2322 
2323 	mp->flags = mp->disc->flags;
2324 	mp->mime = 0;
2325 	if (!st)
2326 		s = T("cannot stat");
2327 	else
2328 	{
2329 		if (mp->fp = fp)
2330 			off = sfseek(mp->fp, (off_t)0, SEEK_CUR);
2331 		s = type(mp, file, st, mp->tbuf, sizeof(mp->tbuf));
2332 		if (mp->fp)
2333 			sfseek(mp->fp, off, SEEK_SET);
2334 		if (!(mp->flags & MAGIC_MIME))
2335 		{
2336 			if (S_ISREG(st->st_mode) && (st->st_size > 0) && (st->st_size < 128))
2337 				sfprintf(mp->tmp, "%s ", T("short"));
2338 			sfprintf(mp->tmp, "%s", s);
2339 			if (!mp->fp && (st->st_mode & (S_IXUSR|S_IXGRP|S_IXOTH)))
2340 				sfprintf(mp->tmp, ", %s", S_ISDIR(st->st_mode) ? T("searchable") : T("executable"));
2341 			if (st->st_mode & S_ISUID)
2342 				sfprintf(mp->tmp, ", setuid=%s", fmtuid(st->st_uid));
2343 			if (st->st_mode & S_ISGID)
2344 				sfprintf(mp->tmp, ", setgid=%s", fmtgid(st->st_gid));
2345 			if (st->st_mode & S_ISVTX)
2346 				sfprintf(mp->tmp, ", sticky");
2347 			if (!(s = sfstruse(mp->tmp)))
2348 				s = T("out of space");
2349 		}
2350 	}
2351 	if (mp->flags & MAGIC_MIME)
2352 		s = mp->mime;
2353 	if (!s)
2354 		s = T("error");
2355 	return s;
2356 }
2357 
2358 /*
2359  * list the magic table in mp on sp
2360  */
2361 
2362 int
2363 magiclist(register Magic_t* mp, register Sfio_t* sp)
2364 {
2365 	register Entry_t*	ep = mp->magic;
2366 	register Entry_t*	rp = 0;
2367 
2368 	mp->flags = mp->disc->flags;
2369 	sfprintf(sp, "cont\toffset\ttype\top\tmask\tvalue\tmime\tdesc\n");
2370 	while (ep)
2371 	{
2372 		sfprintf(sp, "%c %c\t", ep->cont, ep->nest);
2373 		if (ep->expr)
2374 			sfprintf(sp, "%s", ep->expr);
2375 		else
2376 			sfprintf(sp, "%ld", ep->offset);
2377 		sfprintf(sp, "\t%s%c\t%c\t%lo\t", ep->swap == (char)~3 ? "L" : ep->swap == (char)~0 ? "B" : "", ep->type, ep->op, ep->mask);
2378 		switch (ep->type)
2379 		{
2380 		case 'm':
2381 		case 's':
2382 			sfputr(sp, fmtesc(ep->value.str), -1);
2383 			break;
2384 		case 'V':
2385 			switch (ep->op)
2386 			{
2387 			case 'l':
2388 				sfprintf(sp, "loop(%d,%d,%d,%d)", ep->value.loop->start, ep->value.loop->size, ep->value.loop->count, ep->value.loop->offset);
2389 				break;
2390 			case 'v':
2391 				sfprintf(sp, "vcodex()");
2392 				break;
2393 			default:
2394 				sfprintf(sp, "%p", ep->value.str);
2395 				break;
2396 			}
2397 			break;
2398 		default:
2399 			sfprintf(sp, "%lo", ep->value.num);
2400 			break;
2401 		}
2402 		sfprintf(sp, "\t%s\t%s\n", ep->mime ? ep->mime : "", fmtesc(ep->desc));
2403 		if (ep->cont == '$' && !ep->value.lab->mask)
2404 		{
2405 			rp = ep;
2406 			ep = ep->value.lab;
2407 		}
2408 		else
2409 		{
2410 			if (ep->cont == ':')
2411 			{
2412 				ep = rp;
2413 				ep->value.lab->mask = 1;
2414 			}
2415 			ep = ep->next;
2416 		}
2417 	}
2418 	return 0;
2419 }
2420