xref: /titanic_44/usr/src/lib/libast/common/misc/mime.c (revision 2b4a78020b9c38d1b95e2f3fefa6d6e4be382d1f)
1 /***********************************************************************
2 *                                                                      *
3 *               This software is part of the ast package               *
4 *           Copyright (c) 1985-2007 AT&T Knowledge Ventures            *
5 *                      and is licensed under the                       *
6 *                  Common Public License, Version 1.0                  *
7 *                      by AT&T Knowledge Ventures                      *
8 *                                                                      *
9 *                A copy of the License is available at                 *
10 *            http://www.opensource.org/licenses/cpl1.0.txt             *
11 *         (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9)         *
12 *                                                                      *
13 *              Information and Software Systems Research               *
14 *                            AT&T Research                             *
15 *                           Florham Park NJ                            *
16 *                                                                      *
17 *                 Glenn Fowler <gsf@research.att.com>                  *
18 *                  David Korn <dgk@research.att.com>                   *
19 *                   Phong Vo <kpv@research.att.com>                    *
20 *                                                                      *
21 ***********************************************************************/
22 #pragma prototyped
23 
24 /*
25  * Glenn Fowler
26  * AT&T Research
27  *
28  * mime/mailcap support library
29  */
30 
31 static const char id[] = "\n@(#)$Id: mime library (AT&T Research) 2002-10-29 $\0\n";
32 
33 static const char lib[] = "libast:mime";
34 
35 #include "mimelib.h"
36 
37 typedef struct Att_s
38 {
39 	struct Att_s*	next;
40 	char*		name;
41 	char*		value;
42 } Att_t;
43 
44 typedef struct Cap_s
45 {
46 	struct Cap_s*	next;
47 	unsigned long	flags;
48 	Att_t		att;
49 	char*		test;
50 	char		data[1];
51 } Cap_t;
52 
53 typedef struct
54 {
55 	Dtlink_t	link;
56 	Cap_t*		cap;
57 	Cap_t*		pac;
58 	char		name[1];
59 } Ent_t;
60 
61 typedef struct
62 {
63 	char*		data;
64 	int		size;
65 } String_t;
66 
67 typedef struct
68 {
69 	char*		next;
70 	String_t	name;
71 	String_t	value;
72 } Parse_t;
73 
74 typedef struct
75 {
76 	const char*	pattern;
77 	int		prefix;
78 	Sfio_t*		fp;
79 	int		hit;
80 } Walk_t;
81 
82 /*
83  * convert c to lower case
84  */
85 
86 static int
87 lower(register int c)
88 {
89 	return isupper(c) ? tolower(c) : c;
90 }
91 
92 /*
93  * Ent_t case insensitive comparf
94  */
95 
96 static int
97 order(Dt_t* dt, void* a, void* b, Dtdisc_t* disc)
98 {
99 	return strcasecmp(a, b);
100 }
101 
102 /*
103  * Cap_t free
104  */
105 
106 static void
107 dropcap(register Cap_t* cap)
108 {
109 	register Att_t*	att;
110 
111 	while (att = cap->att.next)
112 	{
113 		cap->att.next = att->next;
114 		free(att);
115 	}
116 	free(cap);
117 }
118 
119 /*
120  * Ent_t freef
121  */
122 
123 static void
124 drop(Dt_t* dt, void* object, Dtdisc_t* disc)
125 {
126 	register Ent_t*	ent = (Ent_t*)object;
127 	register Cap_t*	cap;
128 
129 	while (cap = ent->cap)
130 	{
131 		ent->cap = cap->next;
132 		dropcap(cap);
133 	}
134 	free(ent);
135 }
136 
137 /*
138  * add mime type entry in s to mp
139  */
140 
141 int
142 mimeset(Mime_t* mp, register char* s, unsigned long flags)
143 {
144 	register Ent_t*	ent;
145 	register Cap_t*	cap;
146 	register Att_t*	att;
147 	register char*	t;
148 	register char*	v;
149 	register char*	k;
150 	char*		x;
151 	Att_t*		tta;
152 	int		q;
153 
154 	for (; isspace(*s); s++);
155 	if (*s && *s != '#')
156 	{
157 		cap = 0;
158 		for (v = s; *v && *v != ';'; v++)
159 			if (isspace(*v) || *v == '/' && *(v + 1) == '*')
160 				*v = 0;
161 		if (*v)
162 		{
163 			*v++ = 0;
164 			do
165 			{
166 				for (; isspace(*v); v++);
167 				if (cap)
168 				{
169 					for (t = v; *t && !isspace(*t) && *t != '='; t++);
170 					for (k = t; isspace(*t); t++);
171 					if (!*t || *t == '=' || *t == ';')
172 					{
173 						if (*t)
174 							while (isspace(*++t));
175 						*k = 0;
176 						k = v;
177 						v = t;
178 					}
179 					else
180 						k = 0;
181 				}
182 				if (*v == '"')
183 					q = *v++;
184 				else
185 					q = 0;
186 				for (t = v; *t; t++)
187 					if (*t == '\\')
188 					{
189 						switch (*(t + 1))
190 						{
191 						case 0:
192 						case '\\':
193 						case '%':
194 							*t = *(t + 1);
195 							break;
196 						default:
197 							*t = ' ';
198 							break;
199 						}
200 						if (!*++t)
201 							break;
202 					}
203 					else if (*t == q)
204 					{
205 						*t = ' ';
206 						q = 0;
207 					}
208 					else if (*t == ';' && !q)
209 					{
210 						*t = ' ';
211 						break;
212 					}
213 				for (; t > v && isspace(*(t - 1)); t--);
214 				if (t <= v && (!cap || !k))
215 					break;
216 				if (!cap)
217 				{
218 					if (!(cap = newof(0, Cap_t, 1, strlen(v) + 1)))
219 						return -1;
220 					if (*t)
221 						*t++ = 0;
222 					tta = &cap->att;
223 					tta->name = "default";
224 					x = strcopy(tta->value = cap->data, v) + 1;
225 				}
226 				else if (k)
227 				{
228 					if (*t)
229 						*t++ = 0;
230 					if (!(att = newof(0, Att_t, 1, 0)))
231 						return -1;
232 					x = strcopy(att->name = x, k) + 1;
233 					x = strcopy(att->value = x, v) + 1;
234 					tta = tta->next = att;
235 					if (!strcasecmp(k, "test"))
236 						cap->test = att->value;
237 				}
238 			} while (*(v = t));
239 		}
240 		ent = (Ent_t*)dtmatch(mp->cap, s);
241 		if (cap)
242 		{
243 			if (ent)
244 			{
245 				register Cap_t*	dup;
246 				register Cap_t*	pud;
247 
248 				for (pud = 0, dup = ent->cap; dup; pud = dup, dup = dup->next)
249 					if (!cap->test && !dup->test || cap->test && dup->test && streq(cap->test, dup->test))
250 					{
251 						if (flags & MIME_REPLACE)
252 						{
253 							if (pud)
254 								pud->next = cap;
255 							else
256 								ent->cap = cap;
257 							if (!(cap->next = dup->next))
258 								ent->pac = cap;
259 							cap = dup;
260 						}
261 						dropcap(cap);
262 						return 0;
263 					}
264 				ent->pac = ent->pac->next = cap;
265 			}
266 			else if (!(ent = newof(0, Ent_t, 1, strlen(s) + 1)))
267 				return -1;
268 			else
269 			{
270 				strcpy(ent->name, s);
271 				ent->cap = ent->pac = cap;
272 				dtinsert(mp->cap, ent);
273 			}
274 		}
275 		else if (ent && (flags & MIME_REPLACE))
276 			dtdelete(mp->cap, ent);
277 	}
278 	return 0;
279 }
280 
281 /*
282  * load mime type files into mp
283  */
284 
285 int
286 mimeload(Mime_t* mp, const char* file, unsigned long flags)
287 {
288 	register char*	s;
289 	register char*	t;
290 	register char*	e;
291 	register int	n;
292 	Sfio_t*		fp;
293 
294 	if (!(s = (char*)file))
295 	{
296 		flags |= MIME_LIST;
297 		if (!(s = getenv(MIME_FILES_ENV)))
298 			s = MIME_FILES;
299 	}
300 	for (;;)
301 	{
302 		if (!(flags & MIME_LIST))
303 			e = 0;
304 		else if (e = strchr(s, ':'))
305 		{
306 			/*
307 			 * ok, so ~ won't work for the last list element
308 			 * we do it for MIME_FILES_ENV anyway
309 			 */
310 
311 			if ((strneq(s, "~/", n = 2) || strneq(s, "$HOME/", n = 6) || strneq(s, "${HOME}/", n = 8)) && (t = getenv("HOME")))
312 			{
313 				sfputr(mp->buf, t, -1);
314 				s += n - 1;
315 			}
316 			sfwrite(mp->buf, s, e - s);
317 			if (!(s = sfstruse(mp->buf)))
318 				return -1;
319 		}
320 		if (fp = tokline(s, SF_READ, NiL))
321 		{
322 			while (t = sfgetr(fp, '\n', 1))
323 				if (mimeset(mp, t, flags))
324 					break;
325 			sfclose(fp);
326 		}
327 		else if (!(flags & MIME_LIST))
328 			return -1;
329 		if (!e)
330 			break;
331 		s = e + 1;
332 	}
333 	return 0;
334 }
335 
336 /*
337  * mimelist walker
338  */
339 
340 static int
341 list(Dt_t* dt, void* object, void* context)
342 {
343 	register Walk_t*	wp = (Walk_t*)context;
344 	register Ent_t*		ent = (Ent_t*)object;
345 	register Cap_t*		cap;
346 	register Att_t*		att;
347 
348 	if (!wp->pattern || !strncasecmp(ent->name, wp->pattern, wp->prefix) && (!ent->name[wp->prefix] || ent->name[wp->prefix] == '/'))
349 	{
350 		wp->hit++;
351 		for (cap = ent->cap; cap; cap = cap->next)
352 		{
353 			sfprintf(wp->fp, "%s", ent->name);
354 			for (att = &cap->att; att; att = att->next)
355 			{
356 				sfprintf(wp->fp, "\n\t");
357 				if (att != &cap->att)
358 				{
359 					sfprintf(wp->fp, "%s", att->name);
360 					if (*att->value)
361 						sfprintf(wp->fp, " = ");
362 				}
363 				sfputr(wp->fp, att->value, -1);
364 			}
365 			sfprintf(wp->fp, "\n");
366 		}
367 	}
368 	return 0;
369 }
370 
371 /*
372  * find entry matching type
373  * if exact match fails then left and right x- and right version number
374  * permutations are attempted
375  */
376 
377 static Ent_t*
378 find(Mime_t* mp, const char* type)
379 {
380 	register char*	lp;
381 	register char*	rp;
382 	register char*	rb;
383 	register char*	rv;
384 	register int	rc;
385 	register int	i;
386 	char*		s;
387 	Ent_t*		ent;
388 	char		buf[256];
389 
390 	static const char*	prefix[] = { "", "", "x-", "x-", "" };
391 
392 	if ((ent = (Ent_t*)dtmatch(mp->cap, type)) ||
393 	    !(rp = strchr(lp = (char*)type, '/')) ||
394 	    strlen(lp) >= sizeof(buf))
395 		return ent;
396 	strcpy(buf, type);
397 	rp = buf + (rp - lp);
398 	*rp++ = 0;
399 	if (*rp == 'x' && *(rp + 1) == '-')
400 		rp += 2;
401 	lp = buf;
402 	if (*lp == 'x' && *(lp + 1) == '-')
403 		lp += 2;
404 	rb = rp;
405 	for (rv = rp + strlen(rp); rv > rp && (isdigit(*(rv - 1)) || *(rv - 1) == '.'); rv--);
406 	rc = *rv;
407 	do
408 	{
409 		rp = rb;
410 		do
411 		{
412 			for (i = 0; i < elementsof(prefix) - 1; i++)
413 			{
414 				sfprintf(mp->buf, "%s%s/%s%s", prefix[i], lp, prefix[i + 1], rp);
415 				if (!(s = sfstruse(mp->buf)))
416 					return 0;
417 				if (ent = (Ent_t*)dtmatch(mp->cap, s))
418 					return ent;
419 				if (rc)
420 				{
421 					*rv = 0;
422 					sfprintf(mp->buf, "%s%s/%s%s", prefix[i], lp, prefix[i + 1], rp);
423 					if (!(s = sfstruse(mp->buf)))
424 						return 0;
425 					if (ent = (Ent_t*)dtmatch(mp->cap, s))
426 						return ent;
427 					*rv = rc;
428 				}
429 			}
430 			while (*rp && *rp++ != '-');
431 		} while (*rp);
432 		while (*lp && *lp++ != '-');
433 	} while (*lp);
434 	return (Ent_t*)dtmatch(mp->cap, buf);
435 }
436 
437 /*
438  * list mime <type,data> for pat on fp
439  */
440 
441 int
442 mimelist(Mime_t* mp, Sfio_t* fp, register const char* pattern)
443 {
444 	Ent_t*	ent;
445 	Walk_t	ws;
446 
447 	ws.fp = fp;
448 	ws.hit = 0;
449 	ws.prefix = 0;
450 	if (ws.pattern = pattern)
451 	{
452 		while (*pattern && *pattern++ != '/');
453 		if (!*pattern || *pattern == '*' && !*(pattern + 1))
454 			ws.prefix = pattern - ws.pattern;
455 		else if (ent = find(mp, ws.pattern))
456 		{
457 			ws.pattern = 0;
458 			list(mp->cap, ent, &ws);
459 			return ws.hit;
460 		}
461 	}
462 	dtwalk(mp->cap, list, &ws);
463 	return ws.hit;
464 }
465 
466 /*
467  * get next arg in pp
468  * 0 returned if no more args
469  */
470 
471 static int
472 arg(register Parse_t* pp, int first)
473 {
474 	register char*	s;
475 	register int	c;
476 	register int	q;
477 	int		x;
478 
479 	for (s = pp->next; isspace(*s) && *s != '\n'; s++);
480 	if (!*s || *s == '\n')
481 	{
482 		pp->next = s;
483 		return 0;
484 	}
485 	pp->name.data = s;
486 	pp->value.data = 0;
487 	q = 0;
488 	x = 0;
489 	while ((c = *s++) && c != ';' && c != '\n')
490 	{
491 		if (c == '"')
492 		{
493 			q = 1;
494 			if (pp->value.data)
495 			{
496 				pp->value.data = s;
497 				if (x)
498 					x = -1;
499 				else
500 					x = 1;
501 			}
502 			else if (!x && pp->name.data == (s - 1))
503 			{
504 				x = 1;
505 				pp->name.data = s;
506 			}
507 			do
508 			{
509 				if (!(c = *s++) || c == '\n')
510 				{
511 					s--;
512 					break;
513 				}
514 			} while (c != '"');
515 			if (first < 0 || x > 0)
516 			{
517 				c = ';';
518 				break;
519 			}
520  		}
521 		else if (c == '=' && !first)
522 		{
523 			first = 1;
524 			pp->name.size = s - pp->name.data - 1;
525 			pp->value.data = s;
526 		}
527 		else if (first >= 0 && isspace(c))
528 			break;
529 	}
530 	pp->next = s - (c != ';');
531 	if (first >= 0 || !q)
532 		for (s--; s > pp->name.data && isspace(*(s - 1)); s--);
533 	if (pp->value.data)
534 		pp->value.size = s - pp->value.data - (q && first < 0);
535 	else
536 	{
537 		pp->value.size = 0;
538 		pp->name.size = s - pp->name.data - (q && first < 0);
539 	}
540 	if (first >= 0 && pp->name.size > 0 && pp->name.data[pp->name.size - 1] == ':')
541 		return 0;
542 	return pp->name.size > 0;
543 }
544 
545 /*
546  * low level for mimeview()
547  */
548 
549 static char*
550 expand(Mime_t* mp, register char* s, const char* name, const char* type, const char* opts)
551 {
552 	register char*	t;
553 	register int	c;
554 	Parse_t		pp;
555 
556 	mp->disc->flags |= MIME_PIPE;
557 	for (;;)
558 	{
559 		switch (c = *s++)
560 		{
561 		case 0:
562 		case '\n':
563 			break;
564 		case '%':
565 			switch (c = *s++)
566 			{
567 			case 's':
568 				sfputr(mp->buf, (char*)name, -1);
569 				mp->disc->flags &= ~MIME_PIPE;
570 				continue;
571 			case 't':
572 				sfputr(mp->buf, (char*)type, -1);
573 				continue;
574 			case '{':
575 				for (t = s; *s && *s != '}'; s++);
576 				if (*s && (c = s++ - t) && (pp.next = (char*)opts))
577 					while (arg(&pp, 0))
578 						if (pp.name.size == c && !strncasecmp(pp.name.data, t, c))
579 						{
580 							if (pp.value.size)
581 								sfwrite(mp->buf, pp.value.data, pp.value.size);
582 							break;
583 						}
584 				continue;
585 			}
586 			/*FALLTHROUGH*/
587 		default:
588 			sfputc(mp->buf, c);
589 			continue;
590 		}
591 		break;
592 	}
593 	return sfstruse(mp->buf);
594 }
595 
596 /*
597  * return expanded command/path/value for <view,name,type,opts>
598  * return value valid until next mime*() call
599  */
600 
601 char*
602 mimeview(Mime_t* mp, const char* view, const char* name, const char* type, const char* opts)
603 {
604 	register Ent_t*	ent;
605 	register Cap_t*	cap;
606 	register Att_t*	att;
607 	register char*	s;
608 	int		c;
609 
610 	if (ent = find(mp, type))
611 	{
612 		cap = ent->cap;
613 		if (!view || strcasecmp(view, "test"))
614 			while (s = cap->test)
615 			{
616 				if (s = expand(mp, s, name, type, opts))
617 				{
618 					Parse_t	a1;
619 					Parse_t	a2;
620 					Parse_t	a3;
621 					Parse_t	a4;
622 
623 					/*
624 					 * try to do a few common cases here
625 					 * mailcap consistency is a winning
626 					 * strategy
627 					 */
628 
629 					a1.next = s;
630 					if (arg(&a1, -1))
631 					{
632 						if ((c = *a1.name.data == '!') && --a1.name.size <= 0 && !arg(&a1, -1))
633 							goto lose;
634 						if (a1.name.size == 6 && strneq(a1.name.data, "strcmp", 6) || a1.name.size == 10 && strneq(a1.name.data, "strcasecmp", 10))
635 						{
636 							a2.next = a1.next;
637 							if (!arg(&a2, -1))
638 								goto lose;
639 							a3.next = a2.next;
640 							if (!arg(&a3, -1))
641 								goto lose;
642 							if (a2.name.size != a3.name.size)
643 								c ^= 0;
644 							else c ^= (a1.name.size == 6 ? strncmp : strncasecmp)(a2.name.data, a3.name.data, a2.name.size) == 0;
645 							if (c)
646 								break;
647 							goto skip;
648 						}
649 						else if (a1.name.size == 4 && strneq(a1.name.data, "test", 4))
650 						{
651 							if (!arg(&a1, -1))
652 								goto lose;
653 							a2.next = a1.next;
654 							if (!arg(&a2, -1) || a2.name.size > 2 || a2.name.size == 1 && *a2.name.data != '=' || a2.name.size == 2 && (!strneq(a1.name.data, "!=", 2) || !strneq(a2.name.data, "==", 2)))
655 								goto lose;
656 							a3.next = a2.next;
657 							if (!arg(&a3, -1))
658 								goto lose;
659 							if (*a3.name.data == '`' && *(a3.name.data + a3.name.size - 1) == '`')
660 							{
661 								a4 = a3;
662 								a3 = a1;
663 								a1 = a4;
664 							}
665 							if (*a1.name.data == '`' && *(a1.name.data + a1.name.size - 1) == '`')
666 							{
667 								a1.next = a1.name.data + 1;
668 								if (!arg(&a1, -1) || a1.name.size != 4 || !strneq(a1.name.data, "echo", 4) || !arg(&a1, -1))
669 									goto lose;
670 								a4.next = a1.next;
671 								if (!arg(&a4, 1) || a4.name.size < 21 || !strneq(a4.name.data, "| tr '[A-Z]' '[a-z]'`", 21))
672 									goto lose;
673 							}
674 							else
675 								a4.name.size = 0;
676 							c = *a2.name.data == '!';
677 							if (a1.name.size != a3.name.size)
678 								c ^= 0;
679 							else c ^= (a4.name.size ? strncasecmp : strncmp)(a1.name.data, a3.name.data, a1.name.size) == 0;
680 							if (c)
681 								break;
682 							goto skip;
683 						}
684 					}
685 				lose:
686 					if (!system(s))
687 						break;
688 				}
689 			skip:
690 				if (!(cap = cap->next))
691 					return 0;
692 			}
693 		att = &cap->att;
694 		if (view && *view && !streq(view, "-"))
695 			while (strcasecmp(view, att->name))
696 				if (!(att = att->next))
697 					return 0;
698 		return expand(mp, att->value, name, type, opts);
699 	}
700 	return 0;
701 }
702 
703 /*
704  * lower case identifier prefix strcmp
705  * if e!=0 then it will point to the next char after the match
706  */
707 
708 int
709 mimecmp(register const char* s, register const char* v, char** e)
710 {
711 	register int	n;
712 
713 	while (isalnum(*v) || *v == *s && (*v == '_' || *v == '-' || *v == '/'))
714 		if (n = lower(*s++) - lower(*v++))
715 			return n;
716 	if (!isalnum(*s) && *s != '_' && *s != '-')
717 	{
718 		if (e)
719 			*e = (char*)s;
720 		return 0;
721 	}
722 	return lower(*s) - lower(*v);
723 }
724 
725 /*
726  * parse mime headers in strsearch(tab,num,siz) from s
727  * return >0 if mime header consumed
728  */
729 
730 int
731 mimehead(Mime_t* mp, void* tab, size_t num, size_t siz, register char* s)
732 {
733 	register void*	p;
734 	char*		e;
735 	Parse_t		pp;
736 	Mimevalue_f	set;
737 
738 	set = mp->disc->valuef;
739 	if (!strncasecmp(s, "original-", 9))
740 		s += 9;
741 	if (!strncasecmp(s, "content-", 8))
742 	{
743 		s += 8;
744 		if ((p = strsearch(tab, num, siz, (Strcmp_f)mimecmp, s, &e)) && *e == ':')
745 		{
746 			pp.next = e + 1;
747 			if (arg(&pp, 1))
748 			{
749 				if ((*set)(mp, p, pp.name.data, pp.name.size, mp->disc))
750 					return 0;
751 				while (arg(&pp, 0))
752 					if (pp.value.size &&
753 					    (p = strsearch(tab, num, siz, (Strcmp_f)mimecmp, pp.name.data, &e)) &&
754 					    (*set)(mp, p, pp.value.data, pp.value.size, mp->disc))
755 						return 0;
756 				return 1;
757 			}
758 		}
759 		else if (strchr(s, ':'))
760 			return 1;
761 	}
762 	return !strncasecmp(s, "x-", 2);
763 }
764 
765 /*
766  * open a mime library handle
767  */
768 
769 Mime_t*
770 mimeopen(Mimedisc_t* disc)
771 {
772 	register Mime_t*	mp;
773 
774 	if (!(mp = newof(0, Mime_t, 1, 0)))
775 		return 0;
776 	mp->id = lib;
777 	mp->disc = disc;
778 	mp->dict.key = offsetof(Ent_t, name);
779 	mp->dict.comparf = order;
780 	mp->dict.freef = drop;
781 	if (!(mp->buf = sfstropen()) || !(mp->cap = dtopen(&mp->dict, Dtorder)))
782 	{
783 		mimeclose(mp);
784 		return 0;
785 	}
786 	return mp;
787 }
788 
789 /*
790  * close a mimeopen() handle
791  */
792 
793 int
794 mimeclose(Mime_t* mp)
795 {
796 	if (mp)
797 	{
798 		if (mp->buf)
799 			sfclose(mp->buf);
800 		if (mp->cap)
801 			dtclose(mp->cap);
802 		if (mp->freef)
803 			(*mp->freef)(mp);
804 		free(mp);
805 	}
806 	return 0;
807 }
808