1 /*********************************************************************** 2 * * 3 * This software is part of the ast package * 4 * Copyright (c) 1985-2011 AT&T Intellectual Property * 5 * and is licensed under the * 6 * Eclipse Public License, Version 1.0 * 7 * by AT&T Intellectual Property * 8 * * 9 * A copy of the License is available at * 10 * http://www.eclipse.org/org/documents/epl-v10.html * 11 * (with md5 checksum b35adb5213ca9657e911e9befb180842) * 12 * * 13 * Information and Software Systems Research * 14 * AT&T Research * 15 * Florham Park NJ * 16 * * 17 * Glenn Fowler <gsf@research.att.com> * 18 * David Korn <dgk@research.att.com> * 19 * Phong Vo <kpv@research.att.com> * 20 * * 21 ***********************************************************************/ 22 #pragma prototyped 23 /* 24 * Glenn Fowler 25 * AT&T Research 26 * 27 * library interface to file 28 * 29 * the sum of the hacks {s5,v10,planix} is _____ than the parts 30 */ 31 32 static const char id[] = "\n@(#)$Id: magic library (AT&T Research) 2011-03-09 $\0\n"; 33 34 static const char lib[] = "libast:magic"; 35 36 #include <ast.h> 37 #include <ctype.h> 38 #include <ccode.h> 39 #include <dt.h> 40 #include <modex.h> 41 #include <error.h> 42 #include <regex.h> 43 #include <swap.h> 44 45 #define T(m) (*m?ERROR_translate(NiL,NiL,lib,m):m) 46 47 #define match(s,p) strgrpmatch(s,p,NiL,0,STR_LEFT|STR_RIGHT|STR_ICASE) 48 49 #define MAXNEST 10 /* { ... } nesting limit */ 50 #define MINITEM 4 /* magic buffer rounding */ 51 52 typedef struct /* identifier dictionary entry */ 53 { 54 const char name[16]; /* identifier name */ 55 int value; /* identifier value */ 56 Dtlink_t link; /* dictionary link */ 57 } Info_t; 58 59 typedef struct Edit /* edit substitution */ 60 { 61 struct Edit* next; /* next in list */ 62 regex_t* from; /* from pattern */ 63 } Edit_t; 64 65 struct Entry; 66 67 typedef struct /* loop info */ 68 { 69 struct Entry* lab; /* call this function */ 70 int start; /* start here */ 71 int size; /* increment by this amount */ 72 int count; /* dynamic loop count */ 73 int offset; /* dynamic offset */ 74 } Loop_t; 75 76 typedef struct Entry /* magic file entry */ 77 { 78 struct Entry* next; /* next in list */ 79 char* expr; /* offset expression */ 80 union 81 { 82 unsigned long num; 83 char* str; 84 struct Entry* lab; 85 regex_t* sub; 86 Loop_t* loop; 87 } value; /* comparison value */ 88 char* desc; /* file description */ 89 char* mime; /* file mime type */ 90 unsigned long offset; /* offset in bytes */ 91 unsigned long mask; /* mask before compare */ 92 char cont; /* continuation operation */ 93 char type; /* datum type */ 94 char op; /* comparison operation */ 95 char nest; /* { or } nesting operation */ 96 char swap; /* forced swap order */ 97 } Entry_t; 98 99 #define CC_BIT 5 100 101 #if (CC_MAPS*CC_BIT) <= (CHAR_BIT*2) 102 typedef unsigned short Cctype_t; 103 #else 104 typedef unsigned long Cctype_t; 105 #endif 106 107 #define CC_text 0x01 108 #define CC_control 0x02 109 #define CC_latin 0x04 110 #define CC_binary 0x08 111 #define CC_utf_8 0x10 112 113 #define CC_notext CC_text /* CC_text is flipped before checking */ 114 115 #define CC_MASK (CC_binary|CC_latin|CC_control|CC_text) 116 117 #define CCTYPE(c) (((c)>0240)?CC_binary:((c)>=0200)?CC_latin:((c)<040&&(c)!=007&&(c)!=011&&(c)!=012&&(c)!=013&&(c)!=015)?CC_control:CC_text) 118 119 #define ID_NONE 0 120 #define ID_ASM 1 121 #define ID_C 2 122 #define ID_COBOL 3 123 #define ID_COPYBOOK 4 124 #define ID_CPLUSPLUS 5 125 #define ID_FORTRAN 6 126 #define ID_HTML 7 127 #define ID_INCL1 8 128 #define ID_INCL2 9 129 #define ID_INCL3 10 130 #define ID_MAM1 11 131 #define ID_MAM2 12 132 #define ID_MAM3 13 133 #define ID_NOTEXT 14 134 #define ID_PL1 15 135 #define ID_YACC 16 136 137 #define ID_MAX ID_YACC 138 139 #define INFO_atime 1 140 #define INFO_blocks 2 141 #define INFO_ctime 3 142 #define INFO_fstype 4 143 #define INFO_gid 5 144 #define INFO_mode 6 145 #define INFO_mtime 7 146 #define INFO_name 8 147 #define INFO_nlink 9 148 #define INFO_size 10 149 #define INFO_uid 11 150 151 #define _MAGIC_PRIVATE_ \ 152 Magicdisc_t* disc; /* discipline */ \ 153 Vmalloc_t* vm; /* vmalloc region */ \ 154 Entry_t* magic; /* parsed magic table */ \ 155 Entry_t* magiclast; /* last entry in magic */ \ 156 char* mime; /* MIME type */ \ 157 unsigned char* x2n; /* CC_ALIEN=>CC_NATIVE */ \ 158 char fbuf[SF_BUFSIZE + 1]; /* file data */ \ 159 char xbuf[SF_BUFSIZE + 1]; /* indirect file data */ \ 160 char nbuf[256]; /* !CC_NATIVE data */ \ 161 char mbuf[64]; /* mime string */ \ 162 char sbuf[64]; /* type suffix string */ \ 163 char tbuf[2 * PATH_MAX]; /* type string */ \ 164 Cctype_t cctype[UCHAR_MAX + 1]; /* char code types */ \ 165 unsigned int count[UCHAR_MAX + 1]; /* char frequency count */ \ 166 unsigned int multi[UCHAR_MAX + 1]; /* muti char count */ \ 167 int keep[MAXNEST]; /* ckmagic nest stack */ \ 168 char* cap[MAXNEST]; /* ckmagic mime stack */ \ 169 char* msg[MAXNEST]; /* ckmagic text stack */ \ 170 Entry_t* ret[MAXNEST]; /* ckmagic return stack */ \ 171 int fbsz; /* fbuf size */ \ 172 int fbmx; /* fbuf max size */ \ 173 int xbsz; /* xbuf size */ \ 174 int swap; /* swap() operation */ \ 175 unsigned long flags; /* disc+open flags */ \ 176 long xoff; /* xbuf offset */ \ 177 int identifier[ID_MAX + 1]; /* Info_t identifier */ \ 178 Sfio_t* fp; /* fbuf fp */ \ 179 Sfio_t* tmp; /* tmp string */ \ 180 regdisc_t redisc; /* regex discipline */ \ 181 Dtdisc_t dtdisc; /* dict discipline */ \ 182 Dt_t* idtab; /* identifier dict */ \ 183 Dt_t* infotab; /* info keyword dict */ 184 185 #include <magic.h> 186 187 static Info_t dict[] = /* keyword dictionary */ 188 { 189 { "COMMON", ID_FORTRAN }, 190 { "COMPUTE", ID_COBOL }, 191 { "COMP", ID_COPYBOOK }, 192 { "COMPUTATIONAL",ID_COPYBOOK }, 193 { "DCL", ID_PL1 }, 194 { "DEFINED", ID_PL1 }, 195 { "DIMENSION", ID_FORTRAN }, 196 { "DIVISION", ID_COBOL }, 197 { "FILLER", ID_COPYBOOK }, 198 { "FIXED", ID_PL1 }, 199 { "FUNCTION", ID_FORTRAN }, 200 { "HTML", ID_HTML }, 201 { "INTEGER", ID_FORTRAN }, 202 { "MAIN", ID_PL1 }, 203 { "OPTIONS", ID_PL1 }, 204 { "PERFORM", ID_COBOL }, 205 { "PIC", ID_COPYBOOK }, 206 { "REAL", ID_FORTRAN }, 207 { "REDEFINES", ID_COPYBOOK }, 208 { "S9", ID_COPYBOOK }, 209 { "SECTION", ID_COBOL }, 210 { "SELECT", ID_COBOL }, 211 { "SUBROUTINE", ID_FORTRAN }, 212 { "TEXT", ID_ASM }, 213 { "VALUE", ID_COPYBOOK }, 214 { "attr", ID_MAM3 }, 215 { "binary", ID_YACC }, 216 { "block", ID_FORTRAN }, 217 { "bss", ID_ASM }, 218 { "byte", ID_ASM }, 219 { "char", ID_C }, 220 { "class", ID_CPLUSPLUS }, 221 { "clr", ID_NOTEXT }, 222 { "comm", ID_ASM }, 223 { "common", ID_FORTRAN }, 224 { "data", ID_ASM }, 225 { "dimension", ID_FORTRAN }, 226 { "done", ID_MAM2 }, 227 { "double", ID_C }, 228 { "even", ID_ASM }, 229 { "exec", ID_MAM3 }, 230 { "extern", ID_C }, 231 { "float", ID_C }, 232 { "function", ID_FORTRAN }, 233 { "globl", ID_ASM }, 234 { "h", ID_INCL3 }, 235 { "html", ID_HTML }, 236 { "include", ID_INCL1 }, 237 { "int", ID_C }, 238 { "integer", ID_FORTRAN }, 239 { "jmp", ID_NOTEXT }, 240 { "left", ID_YACC }, 241 { "libc", ID_INCL2 }, 242 { "long", ID_C }, 243 { "make", ID_MAM1 }, 244 { "mov", ID_NOTEXT }, 245 { "private", ID_CPLUSPLUS }, 246 { "public", ID_CPLUSPLUS }, 247 { "real", ID_FORTRAN }, 248 { "register", ID_C }, 249 { "right", ID_YACC }, 250 { "sfio", ID_INCL2 }, 251 { "static", ID_C }, 252 { "stdio", ID_INCL2 }, 253 { "struct", ID_C }, 254 { "subroutine", ID_FORTRAN }, 255 { "sys", ID_NOTEXT }, 256 { "term", ID_YACC }, 257 { "text", ID_ASM }, 258 { "tst", ID_NOTEXT }, 259 { "type", ID_YACC }, 260 { "typedef", ID_C }, 261 { "u", ID_INCL2 }, 262 { "union", ID_YACC }, 263 { "void", ID_C }, 264 }; 265 266 static Info_t info[] = 267 { 268 { "atime", INFO_atime }, 269 { "blocks", INFO_blocks }, 270 { "ctime", INFO_ctime }, 271 { "fstype", INFO_fstype }, 272 { "gid", INFO_gid }, 273 { "mode", INFO_mode }, 274 { "mtime", INFO_mtime }, 275 { "name", INFO_name }, 276 { "nlink", INFO_nlink }, 277 { "size", INFO_size }, 278 { "uid", INFO_uid }, 279 }; 280 281 /* 282 * return pointer to data at offset off and size siz 283 */ 284 285 static char* 286 getdata(register Magic_t* mp, register long off, register int siz) 287 { 288 register long n; 289 290 if (off < 0) 291 return 0; 292 if (off + siz <= mp->fbsz) 293 return mp->fbuf + off; 294 if (off < mp->xoff || off + siz > mp->xoff + mp->xbsz) 295 { 296 if (off + siz > mp->fbmx) 297 return 0; 298 n = (off / (SF_BUFSIZE / 2)) * (SF_BUFSIZE / 2); 299 if (sfseek(mp->fp, n, SEEK_SET) != n) 300 return 0; 301 if ((mp->xbsz = sfread(mp->fp, mp->xbuf, sizeof(mp->xbuf) - 1)) < 0) 302 { 303 mp->xoff = 0; 304 mp->xbsz = 0; 305 return 0; 306 } 307 mp->xbuf[mp->xbsz] = 0; 308 mp->xoff = n; 309 if (off + siz > mp->xoff + mp->xbsz) 310 return 0; 311 } 312 return mp->xbuf + off - mp->xoff; 313 } 314 315 /* 316 * @... evaluator for strexpr() 317 */ 318 319 static long 320 indirect(const char* cs, char** e, void* handle) 321 { 322 register char* s = (char*)cs; 323 register Magic_t* mp = (Magic_t*)handle; 324 register long n = 0; 325 register char* p; 326 327 if (s) 328 { 329 if (*s == '@') 330 { 331 n = *++s == '(' ? strexpr(s, e, indirect, mp) : strtol(s, e, 0); 332 switch (*(s = *e)) 333 { 334 case 'b': 335 case 'B': 336 s++; 337 if (p = getdata(mp, n, 1)) 338 n = *(unsigned char*)p; 339 else 340 s = (char*)cs; 341 break; 342 case 'h': 343 case 'H': 344 s++; 345 if (p = getdata(mp, n, 2)) 346 n = swapget(mp->swap, p, 2); 347 else 348 s = (char*)cs; 349 break; 350 case 'q': 351 case 'Q': 352 s++; 353 if (p = getdata(mp, n, 8)) 354 n = swapget(mp->swap, p, 8); 355 else 356 s = (char*)cs; 357 break; 358 default: 359 if (isalnum(*s)) 360 s++; 361 if (p = getdata(mp, n, 4)) 362 n = swapget(mp->swap, p, 4); 363 else 364 s = (char*)cs; 365 break; 366 } 367 } 368 *e = s; 369 } 370 else if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf) 371 (*mp->disc->errorf)(mp, mp->disc, 2, "%s in indirect expression", *e); 372 return n; 373 } 374 375 /* 376 * emit regex error message 377 */ 378 379 static void 380 regmessage(Magic_t* mp, regex_t* re, int code) 381 { 382 char buf[128]; 383 384 if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf) 385 { 386 regerror(code, re, buf, sizeof(buf)); 387 (*mp->disc->errorf)(mp, mp->disc, 3, "regex: %s", buf); 388 } 389 } 390 391 /* 392 * decompose vcodex(3) method composition 393 */ 394 395 static char* 396 vcdecomp(char* b, char* e, unsigned char* m, unsigned char* x) 397 { 398 unsigned char* map; 399 const char* o; 400 int c; 401 int n; 402 int i; 403 int a; 404 405 map = CCMAP(CC_ASCII, CC_NATIVE); 406 a = 0; 407 i = 1; 408 for (;;) 409 { 410 if (i) 411 i = 0; 412 else 413 *b++ = '^'; 414 if (m < (x - 1) && !*(m + 1)) 415 { 416 /* 417 * obsolete indices 418 */ 419 420 if (!a) 421 { 422 a = 1; 423 o = "old, "; 424 while (b < e && (c = *o++)) 425 *b++ = c; 426 } 427 switch (*m) 428 { 429 case 0: o = "delta"; break; 430 case 1: o = "huffman"; break; 431 case 2: o = "huffgroup"; break; 432 case 3: o = "arith"; break; 433 case 4: o = "bwt"; break; 434 case 5: o = "rle"; break; 435 case 6: o = "mtf"; break; 436 case 7: o = "transpose"; break; 437 case 8: o = "table"; break; 438 case 9: o = "huffpart"; break; 439 case 50: o = "map"; break; 440 case 100: o = "recfm"; break; 441 case 101: o = "ss7"; break; 442 default: o = "UNKNOWN"; break; 443 } 444 m += 2; 445 while (b < e && (c = *o++)) 446 *b++ = c; 447 } 448 else 449 while (b < e && m < x && (c = *m++)) 450 { 451 if (map) 452 c = map[c]; 453 *b++ = c; 454 } 455 if (b >= e) 456 break; 457 n = 0; 458 while (m < x) 459 { 460 n = (n<<7) | (*m & 0x7f); 461 if (!(*m++ & 0x80)) 462 break; 463 } 464 if (n >= (x - m)) 465 break; 466 m += n; 467 } 468 return b; 469 } 470 471 /* 472 * check for magic table match in buf 473 */ 474 475 static char* 476 ckmagic(register Magic_t* mp, const char* file, char* buf, char* end, struct stat* st, unsigned long off) 477 { 478 register Entry_t* ep; 479 register char* p; 480 register char* b; 481 register int level = 0; 482 int call = -1; 483 int all = 0; 484 int c; 485 int str; 486 char* q; 487 char* t; 488 char* cur; 489 char* base = 0; 490 unsigned long num; 491 unsigned long mask; 492 regmatch_t matches[10]; 493 494 mp->swap = 0; 495 b = mp->msg[0] = cur = buf; 496 mp->mime = mp->cap[0] = 0; 497 mp->keep[0] = 0; 498 for (ep = mp->magic; ep; ep = ep->next) 499 { 500 fun: 501 if (ep->nest == '{') 502 { 503 if (++level >= MAXNEST) 504 { 505 call = -1; 506 level = 0; 507 mp->keep[0] = 0; 508 b = mp->msg[0]; 509 mp->mime = mp->cap[0]; 510 continue; 511 } 512 mp->keep[level] = mp->keep[level - 1] != 0; 513 mp->msg[level] = b; 514 mp->cap[level] = mp->mime; 515 } 516 switch (ep->cont) 517 { 518 case '#': 519 if (mp->keep[level] && b > cur) 520 { 521 if ((mp->flags & MAGIC_ALL) && b < (end - 3)) 522 { 523 all = 1; 524 *b++ = '\n'; 525 cur = b; 526 continue; 527 } 528 *b = 0; 529 return buf; 530 } 531 mp->swap = 0; 532 b = mp->msg[0] = cur; 533 mp->mime = mp->cap[0] = 0; 534 if (ep->type == ' ') 535 continue; 536 break; 537 case '$': 538 if (mp->keep[level] && call < (MAXNEST - 1)) 539 { 540 mp->ret[++call] = ep; 541 ep = ep->value.lab; 542 goto fun; 543 } 544 continue; 545 case ':': 546 ep = mp->ret[call--]; 547 if (ep->op == 'l') 548 goto fun; 549 continue; 550 case '|': 551 if (mp->keep[level] > 1) 552 goto checknest; 553 /*FALLTHROUGH*/ 554 default: 555 if (!mp->keep[level]) 556 { 557 b = mp->msg[level]; 558 mp->mime = mp->cap[level]; 559 goto checknest; 560 } 561 break; 562 } 563 p = ""; 564 num = 0; 565 if (!ep->expr) 566 num = ep->offset + off; 567 else 568 switch (ep->offset) 569 { 570 case 0: 571 num = strexpr(ep->expr, NiL, indirect, mp) + off; 572 break; 573 case INFO_atime: 574 num = st->st_atime; 575 ep->type = 'D'; 576 break; 577 case INFO_blocks: 578 num = iblocks(st); 579 ep->type = 'N'; 580 break; 581 case INFO_ctime: 582 num = st->st_ctime; 583 ep->type = 'D'; 584 break; 585 case INFO_fstype: 586 p = fmtfs(st); 587 ep->type = toupper(ep->type); 588 break; 589 case INFO_gid: 590 if (ep->type == 'e' || ep->type == 'm' || ep->type == 's') 591 { 592 p = fmtgid(st->st_gid); 593 ep->type = toupper(ep->type); 594 } 595 else 596 { 597 num = st->st_gid; 598 ep->type = 'N'; 599 } 600 break; 601 case INFO_mode: 602 if (ep->type == 'e' || ep->type == 'm' || ep->type == 's') 603 { 604 p = fmtmode(st->st_mode, 0); 605 ep->type = toupper(ep->type); 606 } 607 else 608 { 609 num = modex(st->st_mode); 610 ep->type = 'N'; 611 } 612 break; 613 case INFO_mtime: 614 num = st->st_ctime; 615 ep->type = 'D'; 616 break; 617 case INFO_name: 618 if (!base) 619 { 620 if (base = strrchr(file, '/')) 621 base++; 622 else 623 base = (char*)file; 624 } 625 p = base; 626 ep->type = toupper(ep->type); 627 break; 628 case INFO_nlink: 629 num = st->st_nlink; 630 ep->type = 'N'; 631 break; 632 case INFO_size: 633 num = st->st_size; 634 ep->type = 'N'; 635 break; 636 case INFO_uid: 637 if (ep->type == 'e' || ep->type == 'm' || ep->type == 's') 638 { 639 p = fmtuid(st->st_uid); 640 ep->type = toupper(ep->type); 641 } 642 else 643 { 644 num = st->st_uid; 645 ep->type = 'N'; 646 } 647 break; 648 } 649 switch (ep->type) 650 { 651 652 case 'b': 653 if (!(p = getdata(mp, num, 1))) 654 goto next; 655 num = *(unsigned char*)p; 656 break; 657 658 case 'h': 659 if (!(p = getdata(mp, num, 2))) 660 goto next; 661 num = swapget(ep->swap ? (~ep->swap ^ mp->swap) : mp->swap, p, 2); 662 break; 663 664 case 'd': 665 case 'l': 666 case 'v': 667 if (!(p = getdata(mp, num, 4))) 668 goto next; 669 num = swapget(ep->swap ? (~ep->swap ^ mp->swap) : mp->swap, p, 4); 670 break; 671 672 case 'q': 673 if (!(p = getdata(mp, num, 8))) 674 goto next; 675 num = swapget(ep->swap ? (~ep->swap ^ mp->swap) : mp->swap, p, 8); 676 break; 677 678 case 'e': 679 if (!(p = getdata(mp, num, 0))) 680 goto next; 681 /*FALLTHROUGH*/ 682 case 'E': 683 if (!ep->value.sub) 684 goto next; 685 if ((c = regexec(ep->value.sub, p, elementsof(matches), matches, 0)) || (c = regsubexec(ep->value.sub, p, elementsof(matches), matches))) 686 { 687 c = mp->fbsz; 688 if (c >= sizeof(mp->nbuf)) 689 c = sizeof(mp->nbuf) - 1; 690 p = (char*)memcpy(mp->nbuf, p, c); 691 p[c] = 0; 692 ccmapstr(mp->x2n, p, c); 693 if ((c = regexec(ep->value.sub, p, elementsof(matches), matches, 0)) || (c = regsubexec(ep->value.sub, p, elementsof(matches), matches))) 694 { 695 if (c != REG_NOMATCH) 696 regmessage(mp, ep->value.sub, c); 697 goto next; 698 } 699 } 700 p = ep->value.sub->re_sub->re_buf; 701 q = T(ep->desc); 702 t = *q ? q : p; 703 if (mp->keep[level]++ && b > cur && b < end && *(b - 1) != ' ' && *t && *t != ',' && *t != '.' && *t != '\b') 704 *b++ = ' '; 705 b += sfsprintf(b, end - b, *q ? q : "%s", p + (*p == '\b')); 706 if (ep->mime) 707 mp->mime = ep->mime; 708 goto checknest; 709 710 case 's': 711 if (!(p = getdata(mp, num, ep->mask))) 712 goto next; 713 goto checkstr; 714 case 'm': 715 if (!(p = getdata(mp, num, 0))) 716 goto next; 717 /*FALLTHROUGH*/ 718 case 'M': 719 case 'S': 720 checkstr: 721 for (;;) 722 { 723 if (*ep->value.str == '*' && !*(ep->value.str + 1) && isprint(*p)) 724 break; 725 if ((ep->type == 'm' || ep->type == 'M') ? strmatch(p, ep->value.str) : !memcmp(p, ep->value.str, ep->mask)) 726 break; 727 if (p == mp->nbuf || ep->mask >= sizeof(mp->nbuf)) 728 goto next; 729 p = (char*)memcpy(mp->nbuf, p, ep->mask); 730 p[ep->mask] = 0; 731 ccmapstr(mp->x2n, p, ep->mask); 732 } 733 q = T(ep->desc); 734 if (mp->keep[level]++ && b > cur && b < end && *(b - 1) != ' ' && *q && *q != ',' && *q != '.' && *q != '\b') 735 *b++ = ' '; 736 for (t = p; (c = *t) >= 0 && c <= 0177 && isprint(c) && c != '\n'; t++); 737 *t = 0; 738 b += sfsprintf(b, end - b, q + (*q == '\b'), p); 739 *t = c; 740 if (ep->mime) 741 mp->mime = ep->mime; 742 goto checknest; 743 744 } 745 if (mask = ep->mask) 746 num &= mask; 747 switch (ep->op) 748 { 749 750 case '=': 751 case '@': 752 if (num == ep->value.num) 753 break; 754 if (ep->cont != '#') 755 goto next; 756 if (!mask) 757 mask = ~mask; 758 if (ep->type == 'h') 759 { 760 if ((num = swapget(mp->swap = 1, p, 2) & mask) == ep->value.num) 761 { 762 if (!(mp->swap & (mp->swap + 1))) 763 mp->swap = 7; 764 goto swapped; 765 } 766 } 767 else if (ep->type == 'l') 768 { 769 for (c = 1; c < 4; c++) 770 if ((num = swapget(mp->swap = c, p, 4) & mask) == ep->value.num) 771 { 772 if (!(mp->swap & (mp->swap + 1))) 773 mp->swap = 7; 774 goto swapped; 775 } 776 } 777 else if (ep->type == 'q') 778 { 779 for (c = 1; c < 8; c++) 780 if ((num = swapget(mp->swap = c, p, 8) & mask) == ep->value.num) 781 goto swapped; 782 } 783 goto next; 784 785 case '!': 786 if (num != ep->value.num) 787 break; 788 goto next; 789 790 case '^': 791 if (num ^ ep->value.num) 792 break; 793 goto next; 794 795 case '>': 796 if (num > ep->value.num) 797 break; 798 goto next; 799 800 case '<': 801 if (num < ep->value.num) 802 break; 803 goto next; 804 805 case 'l': 806 if (num > 0 && mp->keep[level] && call < (MAXNEST - 1)) 807 { 808 if (!ep->value.loop->count) 809 { 810 ep->value.loop->count = num; 811 ep->value.loop->offset = off; 812 off = ep->value.loop->start; 813 } 814 else if (!--ep->value.loop->count) 815 { 816 off = ep->value.loop->offset; 817 goto next; 818 } 819 else 820 off += ep->value.loop->size; 821 mp->ret[++call] = ep; 822 ep = ep->value.loop->lab; 823 goto fun; 824 } 825 goto next; 826 827 case 'm': 828 c = mp->swap; 829 t = ckmagic(mp, file, b + (b > cur), end, st, num); 830 mp->swap = c; 831 if (t) 832 { 833 if (b > cur && b < end) 834 *b = ' '; 835 b += strlen(b); 836 } 837 else if (ep->cont == '&') 838 goto next; 839 break; 840 841 case 'r': 842 #if _UWIN 843 { 844 char* e; 845 Sfio_t* rp; 846 Sfio_t* gp; 847 848 if (!(t = strrchr(file, '.'))) 849 goto next; 850 sfprintf(mp->tmp, "/reg/classes_root/%s", t); 851 if (!(t = sfstruse(mp->tmp)) || !(rp = sfopen(NiL, t, "r"))) 852 goto next; 853 *ep->desc = 0; 854 *ep->mime = 0; 855 gp = 0; 856 while (t = sfgetr(rp, '\n', 1)) 857 { 858 if (strneq(t, "Content Type=", 13)) 859 { 860 ep->mime = vmnewof(mp->vm, ep->mime, char, sfvalue(rp), 0); 861 strcpy(ep->mime, t + 13); 862 if (gp) 863 break; 864 } 865 else 866 { 867 sfprintf(mp->tmp, "/reg/classes_root/%s", t); 868 if ((e = sfstruse(mp->tmp)) && (gp = sfopen(NiL, e, "r"))) 869 { 870 ep->desc = vmnewof(mp->vm, ep->desc, char, strlen(t), 1); 871 strcpy(ep->desc, t); 872 if (*ep->mime) 873 break; 874 } 875 } 876 } 877 sfclose(rp); 878 if (!gp) 879 goto next; 880 if (!*ep->mime) 881 { 882 t = T(ep->desc); 883 if (!strncasecmp(t, "microsoft", 9)) 884 t += 9; 885 while (isspace(*t)) 886 t++; 887 e = "application/x-ms-"; 888 ep->mime = vmnewof(mp->vm, ep->mime, char, strlen(t), strlen(e)); 889 e = strcopy(ep->mime, e); 890 while ((c = *t++) && c != '.' && c != ' ') 891 *e++ = isupper(c) ? tolower(c) : c; 892 *e = 0; 893 } 894 while (t = sfgetr(gp, '\n', 1)) 895 if (*t && !streq(t, "\"\"")) 896 { 897 ep->desc = vmnewof(mp->vm, ep->desc, char, sfvalue(gp), 0); 898 strcpy(ep->desc, t); 899 break; 900 } 901 sfclose(gp); 902 if (!*ep->desc) 903 goto next; 904 if (!t) 905 for (t = T(ep->desc); *t; t++) 906 if (*t == '.') 907 *t = ' '; 908 if (!mp->keep[level]) 909 mp->keep[level] = 2; 910 mp->mime = ep->mime; 911 break; 912 } 913 #else 914 if (ep->cont == '#' && !mp->keep[level]) 915 mp->keep[level] = 1; 916 goto next; 917 #endif 918 919 case 'v': 920 if (!(p = getdata(mp, num, 4))) 921 goto next; 922 c = 0; 923 do 924 { 925 num++; 926 c = (c<<7) | (*p & 0x7f); 927 } while (*p++ & 0x80); 928 if (!(p = getdata(mp, num, c))) 929 goto next; 930 if (mp->keep[level]++ && b > cur && b < (end - 1) && *(b - 1) != ' ') 931 { 932 *b++ = ','; 933 *b++ = ' '; 934 } 935 b = vcdecomp(b, cur + PATH_MAX, (unsigned char*)p, (unsigned char*)p + c); 936 goto checknest; 937 938 } 939 swapped: 940 q = T(ep->desc); 941 if (mp->keep[level]++ && b > cur && b < end && *(b - 1) != ' ' && *q && *q != ',' && *q != '.' && *q != '\b') 942 *b++ = ' '; 943 if (*q == '\b') 944 q++; 945 str = 0; 946 for (t = q; *t; t++) 947 if (*t == '%' && (c = *(t + 1))) 948 { 949 if (c == '%') 950 t++; 951 else 952 while (c && c != '%') 953 { 954 if (c == 's') 955 { 956 str = 1; 957 break; 958 } 959 else if (c == 'c' || c == 'd' || c == 'i' || c == 'u' || c == 'x' || c == 'X') 960 goto format; 961 t++; 962 c = *(t + 1); 963 } 964 } 965 format: 966 if (!str) 967 b += sfsprintf(b, end - b, q, num, num == 1 ? "" : "s", 0, 0, 0, 0, 0, 0); 968 else if (ep->type == 'd' || ep->type == 'D') 969 b += sfsprintf(b, end - b, q, fmttime("%?%QL", (time_t)num), 0, 0, 0, 0, 0, 0, 0); 970 else if (ep->type == 'v') 971 b += sfsprintf(b, end - b, q, fmtversion(num), 0, 0, 0, 0, 0, 0, 0); 972 else 973 b += sfsprintf(b, end - b, q, fmtnum(num, 0), num == 1 ? "" : "s", 0, 0, 0, 0, 0, 0); 974 if (ep->mime && *ep->mime) 975 mp->mime = ep->mime; 976 checknest: 977 if (ep->nest == '}') 978 { 979 if (!mp->keep[level]) 980 { 981 b = mp->msg[level]; 982 mp->mime = mp->cap[level]; 983 } 984 else if (level > 0) 985 mp->keep[level - 1] = mp->keep[level]; 986 if (--level < 0) 987 { 988 level = 0; 989 mp->keep[0] = 0; 990 } 991 } 992 continue; 993 next: 994 if (ep->cont == '&') 995 mp->keep[level] = 0; 996 goto checknest; 997 } 998 if (all && b-- || mp->keep[level] && b > cur) 999 { 1000 *b = 0; 1001 return buf; 1002 } 1003 return 0; 1004 } 1005 1006 /* 1007 * check english language stats 1008 */ 1009 1010 static int 1011 ckenglish(register Magic_t* mp, int pun, int badpun) 1012 { 1013 register char* s; 1014 register int vowl = 0; 1015 register int freq = 0; 1016 register int rare = 0; 1017 1018 if (5 * badpun > pun) 1019 return 0; 1020 if (2 * mp->count[';'] > mp->count['E'] + mp->count['e']) 1021 return 0; 1022 if ((mp->count['>'] + mp->count['<'] + mp->count['/']) > mp->count['E'] + mp->count['e']) 1023 return 0; 1024 for (s = "aeiou"; *s; s++) 1025 vowl += mp->count[toupper(*s)] + mp->count[*s]; 1026 for (s = "etaion"; *s; s++) 1027 freq += mp->count[toupper(*s)] + mp->count[*s]; 1028 for (s = "vjkqxz"; *s; s++) 1029 rare += mp->count[toupper(*s)] + mp->count[*s]; 1030 return 5 * vowl >= mp->fbsz - mp->count[' '] && freq >= 10 * rare; 1031 } 1032 1033 /* 1034 * check programming language stats 1035 */ 1036 1037 static char* 1038 cklang(register Magic_t* mp, const char* file, char* buf, char* end, struct stat* st) 1039 { 1040 register int c; 1041 register unsigned char* b; 1042 register unsigned char* e; 1043 register int q; 1044 register char* s; 1045 char* t; 1046 char* base; 1047 char* suff; 1048 char* t1; 1049 char* t2; 1050 char* t3; 1051 int n; 1052 int badpun; 1053 int code; 1054 int pun; 1055 Cctype_t flags; 1056 Info_t* ip; 1057 1058 b = (unsigned char*)mp->fbuf; 1059 e = b + mp->fbsz; 1060 memzero(mp->count, sizeof(mp->count)); 1061 memzero(mp->multi, sizeof(mp->multi)); 1062 memzero(mp->identifier, sizeof(mp->identifier)); 1063 1064 /* 1065 * check character coding 1066 */ 1067 1068 flags = 0; 1069 while (b < e) 1070 flags |= mp->cctype[*b++]; 1071 b = (unsigned char*)mp->fbuf; 1072 code = 0; 1073 q = CC_ASCII; 1074 n = CC_MASK; 1075 for (c = 0; c < CC_MAPS; c++) 1076 { 1077 flags ^= CC_text; 1078 if ((flags & CC_MASK) < n) 1079 { 1080 n = flags & CC_MASK; 1081 q = c; 1082 } 1083 flags >>= CC_BIT; 1084 } 1085 flags = n; 1086 if (!(flags & (CC_binary|CC_notext))) 1087 { 1088 if (q != CC_NATIVE) 1089 { 1090 code = q; 1091 ccmaps(mp->fbuf, mp->fbsz, q, CC_NATIVE); 1092 } 1093 if (b[0] == '#' && b[1] == '!') 1094 { 1095 for (b += 2; b < e && isspace(*b); b++); 1096 for (s = (char*)b; b < e && isprint(*b); b++); 1097 c = *b; 1098 *b = 0; 1099 if ((st->st_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) || match(s, "/*bin*/*") || !access(s, F_OK)) 1100 { 1101 if (t = strrchr(s, '/')) 1102 s = t + 1; 1103 for (t = s; *t; t++) 1104 if (isspace(*t)) 1105 { 1106 *t = 0; 1107 break; 1108 } 1109 sfsprintf(mp->mbuf, sizeof(mp->mbuf), "application/x-%s", *s ? s : "sh"); 1110 mp->mime = mp->mbuf; 1111 if (match(s, "*sh")) 1112 { 1113 t1 = T("command"); 1114 if (streq(s, "sh")) 1115 *s = 0; 1116 else 1117 { 1118 *b++ = ' '; 1119 *b = 0; 1120 } 1121 } 1122 else 1123 { 1124 t1 = T("interpreter"); 1125 *b++ = ' '; 1126 *b = 0; 1127 } 1128 sfsprintf(mp->sbuf, sizeof(mp->sbuf), T("%s%s script"), s, t1); 1129 s = mp->sbuf; 1130 goto qualify; 1131 } 1132 *b = c; 1133 b = (unsigned char*)mp->fbuf; 1134 } 1135 badpun = 0; 1136 pun = 0; 1137 q = 0; 1138 s = 0; 1139 t = 0; 1140 while (b < e) 1141 { 1142 c = *b++; 1143 mp->count[c]++; 1144 if (c == q && (q != '*' || *b == '/' && b++)) 1145 { 1146 mp->multi[q]++; 1147 q = 0; 1148 } 1149 else if (c == '\\') 1150 { 1151 s = 0; 1152 b++; 1153 } 1154 else if (!q) 1155 { 1156 if (isalpha(c) || c == '_') 1157 { 1158 if (!s) 1159 s = (char*)b - 1; 1160 } 1161 else if (!isdigit(c)) 1162 { 1163 if (s) 1164 { 1165 if (s > mp->fbuf) 1166 switch (*(s - 1)) 1167 { 1168 case ':': 1169 if (*b == ':') 1170 mp->multi[':']++; 1171 break; 1172 case '.': 1173 if (((char*)b - s) == 3 && (s == (mp->fbuf + 1) || *(s - 2) == '\n')) 1174 mp->multi['.']++; 1175 break; 1176 case '\n': 1177 case '\\': 1178 if (*b == '{') 1179 t = (char*)b + 1; 1180 break; 1181 case '{': 1182 if (s == t && *b == '}') 1183 mp->multi['X']++; 1184 break; 1185 } 1186 if (!mp->idtab) 1187 { 1188 if (mp->idtab = dtnew(mp->vm, &mp->dtdisc, Dtset)) 1189 for (q = 0; q < elementsof(dict); q++) 1190 dtinsert(mp->idtab, &dict[q]); 1191 else if (mp->disc->errorf) 1192 (*mp->disc->errorf)(mp, mp->disc, 3, "out of space"); 1193 q = 0; 1194 } 1195 if (mp->idtab) 1196 { 1197 *(b - 1) = 0; 1198 if (ip = (Info_t*)dtmatch(mp->idtab, s)) 1199 mp->identifier[ip->value]++; 1200 *(b - 1) = c; 1201 } 1202 s = 0; 1203 } 1204 switch (c) 1205 { 1206 case '\t': 1207 if (b == (unsigned char*)(mp->fbuf + 1) || *(b - 2) == '\n') 1208 mp->multi['\t']++; 1209 break; 1210 case '"': 1211 case '\'': 1212 q = c; 1213 break; 1214 case '/': 1215 if (*b == '*') 1216 q = *b++; 1217 else if (*b == '/') 1218 q = '\n'; 1219 break; 1220 case '$': 1221 if (*b == '(' && *(b + 1) != ' ') 1222 mp->multi['$']++; 1223 break; 1224 case '{': 1225 case '}': 1226 case '[': 1227 case ']': 1228 case '(': 1229 mp->multi[c]++; 1230 break; 1231 case ')': 1232 mp->multi[c]++; 1233 goto punctuation; 1234 case ':': 1235 if (*b == ':' && isspace(*(b + 1)) && b > (unsigned char*)(mp->fbuf + 1) && isspace(*(b - 2))) 1236 mp->multi[':']++; 1237 goto punctuation; 1238 case '.': 1239 case ',': 1240 case '%': 1241 case ';': 1242 case '?': 1243 punctuation: 1244 pun++; 1245 if (*b != ' ' && *b != '\n') 1246 badpun++; 1247 break; 1248 } 1249 } 1250 } 1251 } 1252 } 1253 else 1254 while (b < e) 1255 mp->count[*b++]++; 1256 base = (t1 = strrchr(file, '/')) ? t1 + 1 : (char*)file; 1257 suff = (t1 = strrchr(base, '.')) ? t1 + 1 : ""; 1258 if (!flags) 1259 { 1260 if (match(suff, "*sh|bat|cmd")) 1261 goto id_sh; 1262 if (match(base, "*@(mkfile)")) 1263 goto id_mk; 1264 if (match(base, "*@(makefile|.mk)")) 1265 goto id_make; 1266 if (match(base, "*@(mamfile|.mam)")) 1267 goto id_mam; 1268 if (match(suff, "[cly]?(pp|xx|++)|cc|ll|yy")) 1269 goto id_c; 1270 if (match(suff, "f")) 1271 goto id_fortran; 1272 if (match(suff, "htm+(l)")) 1273 goto id_html; 1274 if (match(suff, "cpy")) 1275 goto id_copybook; 1276 if (match(suff, "cob|cbl|cb2")) 1277 goto id_cobol; 1278 if (match(suff, "pl[1i]")) 1279 goto id_pl1; 1280 if (match(suff, "tex")) 1281 goto id_tex; 1282 if (match(suff, "asm|s")) 1283 goto id_asm; 1284 if ((st->st_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) && (!suff || suff != strchr(suff, '.'))) 1285 { 1286 id_sh: 1287 s = T("command script"); 1288 mp->mime = "application/sh"; 1289 goto qualify; 1290 } 1291 if (strmatch(mp->fbuf, "From * [0-9][0-9]:[0-9][0-9]:[0-9][0-9] *")) 1292 { 1293 s = T("mail message"); 1294 mp->mime = "message/rfc822"; 1295 goto qualify; 1296 } 1297 if (match(base, "*@(mkfile)")) 1298 { 1299 id_mk: 1300 s = "mkfile"; 1301 mp->mime = "application/mk"; 1302 goto qualify; 1303 } 1304 if (match(base, "*@(makefile|.mk)") || mp->multi['\t'] >= mp->count[':'] && (mp->multi['$'] > 0 || mp->multi[':'] > 0)) 1305 { 1306 id_make: 1307 s = "makefile"; 1308 mp->mime = "application/make"; 1309 goto qualify; 1310 } 1311 if (mp->multi['.'] >= 3) 1312 { 1313 s = T("nroff input"); 1314 mp->mime = "application/x-troff"; 1315 goto qualify; 1316 } 1317 if (mp->multi['X'] >= 3) 1318 { 1319 s = T("TeX input"); 1320 mp->mime = "application/x-tex"; 1321 goto qualify; 1322 } 1323 if (mp->fbsz < SF_BUFSIZE && 1324 (mp->multi['('] == mp->multi[')'] && 1325 mp->multi['{'] == mp->multi['}'] && 1326 mp->multi['['] == mp->multi[']']) || 1327 mp->fbsz >= SF_BUFSIZE && 1328 (mp->multi['('] >= mp->multi[')'] && 1329 mp->multi['{'] >= mp->multi['}'] && 1330 mp->multi['['] >= mp->multi[']'])) 1331 { 1332 c = mp->identifier[ID_INCL1]; 1333 if (c >= 2 && mp->identifier[ID_INCL2] >= c && mp->identifier[ID_INCL3] >= c && mp->count['.'] >= c || 1334 mp->identifier[ID_C] >= 5 && mp->count[';'] >= 5 || 1335 mp->count['='] >= 20 && mp->count[';'] >= 20) 1336 { 1337 id_c: 1338 t1 = ""; 1339 t2 = "c "; 1340 t3 = T("program"); 1341 switch (*suff) 1342 { 1343 case 'c': 1344 case 'C': 1345 mp->mime = "application/x-cc"; 1346 break; 1347 case 'l': 1348 case 'L': 1349 t1 = "lex "; 1350 mp->mime = "application/x-lex"; 1351 break; 1352 default: 1353 t3 = T("header"); 1354 if (mp->identifier[ID_YACC] < 5 || mp->count['%'] < 5) 1355 { 1356 mp->mime = "application/x-cc"; 1357 break; 1358 } 1359 /*FALLTHROUGH*/ 1360 case 'y': 1361 case 'Y': 1362 t1 = "yacc "; 1363 mp->mime = "application/x-yacc"; 1364 break; 1365 } 1366 if (mp->identifier[ID_CPLUSPLUS] >= 3) 1367 { 1368 t2 = "c++ "; 1369 mp->mime = "application/x-c++"; 1370 } 1371 sfsprintf(mp->sbuf, sizeof(mp->sbuf), "%s%s%s", t1, t2, t3); 1372 s = mp->sbuf; 1373 goto qualify; 1374 } 1375 } 1376 if (mp->identifier[ID_MAM1] >= 2 && mp->identifier[ID_MAM3] >= 2 && 1377 (mp->fbsz < SF_BUFSIZE && mp->identifier[ID_MAM1] == mp->identifier[ID_MAM2] || 1378 mp->fbsz >= SF_BUFSIZE && mp->identifier[ID_MAM1] >= mp->identifier[ID_MAM2])) 1379 { 1380 id_mam: 1381 s = T("mam program"); 1382 mp->mime = "application/x-mam"; 1383 goto qualify; 1384 } 1385 if (mp->identifier[ID_FORTRAN] >= 8) 1386 { 1387 id_fortran: 1388 s = T("fortran program"); 1389 mp->mime = "application/x-fortran"; 1390 goto qualify; 1391 } 1392 if (mp->identifier[ID_HTML] > 0 && mp->count['<'] >= 8 && (c = mp->count['<'] - mp->count['>']) >= -2 && c <= 2) 1393 { 1394 id_html: 1395 s = T("html input"); 1396 mp->mime = "text/html"; 1397 goto qualify; 1398 } 1399 if (mp->identifier[ID_COPYBOOK] > 0 && mp->identifier[ID_COBOL] == 0 && (c = mp->count['('] - mp->count[')']) >= -2 && c <= 2) 1400 { 1401 id_copybook: 1402 s = T("cobol copybook"); 1403 mp->mime = "application/x-cobol"; 1404 goto qualify; 1405 } 1406 if (mp->identifier[ID_COBOL] > 0 && mp->identifier[ID_COPYBOOK] > 0 && (c = mp->count['('] - mp->count[')']) >= -2 && c <= 2) 1407 { 1408 id_cobol: 1409 s = T("cobol program"); 1410 mp->mime = "application/x-cobol"; 1411 goto qualify; 1412 } 1413 if (mp->identifier[ID_PL1] > 0 && (c = mp->count['('] - mp->count[')']) >= -2 && c <= 2) 1414 { 1415 id_pl1: 1416 s = T("pl1 program"); 1417 mp->mime = "application/x-pl1"; 1418 goto qualify; 1419 } 1420 if (mp->count['{'] >= 6 && (c = mp->count['{'] - mp->count['}']) >= -2 && c <= 2 && mp->count['\\'] >= mp->count['{']) 1421 { 1422 id_tex: 1423 s = T("TeX input"); 1424 mp->mime = "text/tex"; 1425 goto qualify; 1426 } 1427 if (mp->identifier[ID_ASM] >= 4) 1428 { 1429 id_asm: 1430 s = T("as program"); 1431 mp->mime = "application/x-as"; 1432 goto qualify; 1433 } 1434 if (ckenglish(mp, pun, badpun)) 1435 { 1436 s = T("english text"); 1437 mp->mime = "text/plain"; 1438 goto qualify; 1439 } 1440 } 1441 else if (streq(base, "core")) 1442 { 1443 mp->mime = "x-system/core"; 1444 return T("core dump"); 1445 } 1446 if (flags & (CC_binary|CC_notext)) 1447 { 1448 b = (unsigned char*)mp->fbuf; 1449 e = b + mp->fbsz; 1450 n = 0; 1451 for (;;) 1452 { 1453 c = *b++; 1454 q = 0; 1455 while (c & 0x80) 1456 { 1457 c <<= 1; 1458 q++; 1459 } 1460 switch (q) 1461 { 1462 case 4: 1463 if (b < e && (*b++ & 0xc0) != 0x80) 1464 break; 1465 /* FALLTHROUGH */ 1466 case 3: 1467 if (b < e && (*b++ & 0xc0) != 0x80) 1468 break; 1469 /* FALLTHROUGH */ 1470 case 2: 1471 if (b < e && (*b++ & 0xc0) != 0x80) 1472 break; 1473 n = 1; 1474 /* FALLTHROUGH */ 1475 case 0: 1476 if (b >= e) 1477 { 1478 if (n) 1479 { 1480 flags &= ~(CC_binary|CC_notext); 1481 flags |= CC_utf_8; 1482 } 1483 break; 1484 } 1485 continue; 1486 } 1487 break; 1488 } 1489 } 1490 if (flags & (CC_binary|CC_notext)) 1491 { 1492 unsigned long d = 0; 1493 1494 if ((q = mp->fbsz / UCHAR_MAX) >= 2) 1495 { 1496 /* 1497 * compression/encryption via standard deviation 1498 */ 1499 1500 1501 for (c = 0; c < UCHAR_MAX; c++) 1502 { 1503 pun = mp->count[c] - q; 1504 d += pun * pun; 1505 } 1506 d /= mp->fbsz; 1507 } 1508 if (d <= 0) 1509 s = T("binary"); 1510 else if (d < 4) 1511 s = T("encrypted"); 1512 else if (d < 16) 1513 s = T("packed"); 1514 else if (d < 64) 1515 s = T("compressed"); 1516 else if (d < 256) 1517 s = T("delta"); 1518 else 1519 s = T("data"); 1520 mp->mime = "application/octet-stream"; 1521 return s; 1522 } 1523 mp->mime = "text/plain"; 1524 if (flags & CC_utf_8) 1525 s = (flags & CC_control) ? T("utf-8 text with control characters") : T("utf-8 text"); 1526 else if (flags & CC_latin) 1527 s = (flags & CC_control) ? T("latin text with control characters") : T("latin text"); 1528 else 1529 s = (flags & CC_control) ? T("text with control characters") : T("text"); 1530 qualify: 1531 if (!flags && mp->count['\n'] >= mp->count['\r'] && mp->count['\n'] <= (mp->count['\r'] + 1) && mp->count['\r']) 1532 { 1533 t = "dos "; 1534 mp->mime = "text/dos"; 1535 } 1536 else 1537 t = ""; 1538 if (code) 1539 { 1540 if (code == CC_ASCII) 1541 sfsprintf(buf, end - buf, "ascii %s%s", t, s); 1542 else 1543 { 1544 sfsprintf(buf, end - buf, "ebcdic%d %s%s", code - 1, t, s); 1545 mp->mime = "text/ebcdic"; 1546 } 1547 s = buf; 1548 } 1549 else if (*t) 1550 { 1551 sfsprintf(buf, end - buf, "%s%s", t, s); 1552 s = buf; 1553 } 1554 return s; 1555 } 1556 1557 /* 1558 * return the basic magic string for file,st in buf,size 1559 */ 1560 1561 static char* 1562 type(register Magic_t* mp, const char* file, struct stat* st, char* buf, char* end) 1563 { 1564 register char* s; 1565 register char* t; 1566 1567 mp->mime = 0; 1568 if (!S_ISREG(st->st_mode)) 1569 { 1570 if (S_ISDIR(st->st_mode)) 1571 { 1572 mp->mime = "x-system/dir"; 1573 return T("directory"); 1574 } 1575 if (S_ISLNK(st->st_mode)) 1576 { 1577 mp->mime = "x-system/lnk"; 1578 s = buf; 1579 s += sfsprintf(s, end - s, T("symbolic link to ")); 1580 if (pathgetlink(file, s, end - s) < 0) 1581 return T("cannot read symbolic link text"); 1582 return buf; 1583 } 1584 if (S_ISBLK(st->st_mode)) 1585 { 1586 mp->mime = "x-system/blk"; 1587 sfsprintf(buf, PATH_MAX, T("block special (%s)"), fmtdev(st)); 1588 return buf; 1589 } 1590 if (S_ISCHR(st->st_mode)) 1591 { 1592 mp->mime = "x-system/chr"; 1593 sfsprintf(buf, end - buf, T("character special (%s)"), fmtdev(st)); 1594 return buf; 1595 } 1596 if (S_ISFIFO(st->st_mode)) 1597 { 1598 mp->mime = "x-system/fifo"; 1599 return "fifo"; 1600 } 1601 #ifdef S_ISSOCK 1602 if (S_ISSOCK(st->st_mode)) 1603 { 1604 mp->mime = "x-system/sock"; 1605 return "socket"; 1606 } 1607 #endif 1608 } 1609 if (!(mp->fbmx = st->st_size)) 1610 s = T("empty"); 1611 else if (!mp->fp) 1612 s = T("cannot read"); 1613 else 1614 { 1615 mp->fbsz = sfread(mp->fp, mp->fbuf, sizeof(mp->fbuf) - 1); 1616 if (mp->fbsz < 0) 1617 s = fmterror(errno); 1618 else if (mp->fbsz == 0) 1619 s = T("empty"); 1620 else 1621 { 1622 mp->fbuf[mp->fbsz] = 0; 1623 mp->xoff = 0; 1624 mp->xbsz = 0; 1625 if (!(s = ckmagic(mp, file, buf, end, st, 0))) 1626 s = cklang(mp, file, buf, end, st); 1627 } 1628 } 1629 if (!mp->mime) 1630 mp->mime = "application/unknown"; 1631 else if ((t = strchr(mp->mime, '%')) && *(t + 1) == 's' && !*(t + 2)) 1632 { 1633 register char* b; 1634 register char* be; 1635 register char* m; 1636 register char* me; 1637 1638 b = mp->mime; 1639 me = (m = mp->mime = mp->fbuf) + sizeof(mp->fbuf) - 1; 1640 while (m < me && b < t) 1641 *m++ = *b++; 1642 b = t = s; 1643 for (;;) 1644 { 1645 if (!(be = strchr(t, ' '))) 1646 { 1647 be = b + strlen(b); 1648 break; 1649 } 1650 if (*(be - 1) == ',' || strneq(be + 1, "data", 4) || strneq(be + 1, "file", 4)) 1651 break; 1652 b = t; 1653 t = be + 1; 1654 } 1655 while (m < me && b < be) 1656 if ((*m++ = *b++) == ' ') 1657 *(m - 1) = '-'; 1658 *m = 0; 1659 } 1660 return s; 1661 } 1662 1663 /* 1664 * low level for magicload() 1665 */ 1666 1667 static int 1668 load(register Magic_t* mp, char* file, register Sfio_t* fp) 1669 { 1670 register Entry_t* ep; 1671 register char* p; 1672 register char* p2; 1673 char* p3; 1674 char* next; 1675 int n; 1676 int lge; 1677 int lev; 1678 int ent; 1679 int old; 1680 int cont; 1681 Info_t* ip; 1682 Entry_t* ret; 1683 Entry_t* first; 1684 Entry_t* last = 0; 1685 Entry_t* fun['z' - 'a' + 1]; 1686 1687 memzero(fun, sizeof(fun)); 1688 cont = '$'; 1689 ent = 0; 1690 lev = 0; 1691 old = 0; 1692 ret = 0; 1693 error_info.file = file; 1694 error_info.line = 0; 1695 first = ep = vmnewof(mp->vm, 0, Entry_t, 1, 0); 1696 while (p = sfgetr(fp, '\n', 1)) 1697 { 1698 error_info.line++; 1699 for (; isspace(*p); p++); 1700 1701 /* 1702 * nesting 1703 */ 1704 1705 switch (*p) 1706 { 1707 case 0: 1708 case '#': 1709 cont = '#'; 1710 continue; 1711 case '{': 1712 if (++lev < MAXNEST) 1713 ep->nest = *p; 1714 else if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf) 1715 (*mp->disc->errorf)(mp, mp->disc, 1, "{ ... } operator nesting too deep -- %d max", MAXNEST); 1716 continue; 1717 case '}': 1718 if (!last || lev <= 0) 1719 { 1720 if (mp->disc->errorf) 1721 (*mp->disc->errorf)(mp, mp->disc, 2, "`%c': invalid nesting", *p); 1722 } 1723 else if (lev-- == ent) 1724 { 1725 ent = 0; 1726 ep->cont = ':'; 1727 ep->offset = ret->offset; 1728 ep->nest = ' '; 1729 ep->type = ' '; 1730 ep->op = ' '; 1731 ep->desc = "[RETURN]"; 1732 last = ep; 1733 ep = ret->next = vmnewof(mp->vm, 0, Entry_t, 1, 0); 1734 ret = 0; 1735 } 1736 else 1737 last->nest = *p; 1738 continue; 1739 default: 1740 if (*(p + 1) == '{' || *(p + 1) == '(' && *p != '+' && *p != '>' && *p != '&' && *p != '|') 1741 { 1742 n = *p++; 1743 if (n >= 'a' && n <= 'z') 1744 n -= 'a'; 1745 else 1746 { 1747 if (mp->disc->errorf) 1748 (*mp->disc->errorf)(mp, mp->disc, 2, "%c: invalid function name", n); 1749 n = 0; 1750 } 1751 if (ret && mp->disc->errorf) 1752 (*mp->disc->errorf)(mp, mp->disc, 2, "%c: function has no return", ret->offset + 'a'); 1753 if (*p == '{') 1754 { 1755 ent = ++lev; 1756 ret = ep; 1757 ep->desc = "[FUNCTION]"; 1758 } 1759 else 1760 { 1761 if (*(p + 1) != ')' && mp->disc->errorf) 1762 (*mp->disc->errorf)(mp, mp->disc, 2, "%c: invalid function call argument list", n + 'a'); 1763 ep->desc = "[CALL]"; 1764 } 1765 ep->cont = cont; 1766 ep->offset = n; 1767 ep->nest = ' '; 1768 ep->type = ' '; 1769 ep->op = ' '; 1770 last = ep; 1771 ep = ep->next = vmnewof(mp->vm, 0, Entry_t, 1, 0); 1772 if (ret) 1773 fun[n] = last->value.lab = ep; 1774 else if (!(last->value.lab = fun[n]) && mp->disc->errorf) 1775 (*mp->disc->errorf)(mp, mp->disc, 2, "%c: function not defined", n + 'a'); 1776 continue; 1777 } 1778 if (!ep->nest) 1779 ep->nest = (lev > 0 && lev != ent) ? ('0' + lev - !!ent) : ' '; 1780 break; 1781 } 1782 1783 /* 1784 * continuation 1785 */ 1786 1787 cont = '$'; 1788 switch (*p) 1789 { 1790 case '>': 1791 old = 1; 1792 if (*(p + 1) == *p) 1793 { 1794 /* 1795 * old style nesting push 1796 */ 1797 1798 p++; 1799 old = 2; 1800 if (!lev && last) 1801 { 1802 lev = 1; 1803 last->nest = '{'; 1804 if (last->cont == '>') 1805 last->cont = '&'; 1806 ep->nest = '1'; 1807 } 1808 } 1809 /*FALLTHROUGH*/ 1810 case '+': 1811 case '&': 1812 case '|': 1813 ep->cont = *p++; 1814 break; 1815 default: 1816 if ((mp->flags & MAGIC_VERBOSE) && !isalpha(*p) && mp->disc->errorf) 1817 (*mp->disc->errorf)(mp, mp->disc, 1, "`%c': invalid line continuation operator", *p); 1818 /*FALLTHROUGH*/ 1819 case '*': 1820 case '0': case '1': case '2': case '3': case '4': 1821 case '5': case '6': case '7': case '8': case '9': 1822 ep->cont = (lev > 0) ? '&' : '#'; 1823 break; 1824 } 1825 switch (old) 1826 { 1827 case 1: 1828 old = 0; 1829 if (lev) 1830 { 1831 /* 1832 * old style nesting pop 1833 */ 1834 1835 lev = 0; 1836 if (last) 1837 last->nest = '}'; 1838 ep->nest = ' '; 1839 if (ep->cont == '&') 1840 ep->cont = '#'; 1841 } 1842 break; 1843 case 2: 1844 old = 1; 1845 break; 1846 } 1847 if (isdigit(*p)) 1848 { 1849 /* 1850 * absolute offset 1851 */ 1852 1853 ep->offset = strton(p, &next, NiL, 0); 1854 p2 = next; 1855 } 1856 else 1857 { 1858 for (p2 = p; *p2 && !isspace(*p2); p2++); 1859 if (!*p2) 1860 { 1861 if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf) 1862 (*mp->disc->errorf)(mp, mp->disc, 1, "not enough fields: `%s'", p); 1863 continue; 1864 } 1865 1866 /* 1867 * offset expression 1868 */ 1869 1870 *p2++ = 0; 1871 ep->expr = vmstrdup(mp->vm, p); 1872 if (isalpha(*p)) 1873 ep->offset = (ip = (Info_t*)dtmatch(mp->infotab, p)) ? ip->value : 0; 1874 else if (*p == '(' && ep->cont == '>') 1875 { 1876 /* 1877 * convert old style indirection to @ 1878 */ 1879 1880 p = ep->expr + 1; 1881 for (;;) 1882 { 1883 switch (*p++) 1884 { 1885 case 0: 1886 case '@': 1887 case '(': 1888 break; 1889 case ')': 1890 break; 1891 default: 1892 continue; 1893 } 1894 break; 1895 } 1896 if (*--p == ')') 1897 { 1898 *p = 0; 1899 *ep->expr = '@'; 1900 } 1901 } 1902 } 1903 for (; isspace(*p2); p2++); 1904 for (p = p2; *p2 && !isspace(*p2); p2++); 1905 if (!*p2) 1906 { 1907 if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf) 1908 (*mp->disc->errorf)(mp, mp->disc, 1, "not enough fields: `%s'", p); 1909 continue; 1910 } 1911 *p2++ = 0; 1912 1913 /* 1914 * type 1915 */ 1916 1917 if ((*p == 'b' || *p == 'l') && *(p + 1) == 'e') 1918 { 1919 ep->swap = ~(*p == 'l' ? 7 : 0); 1920 p += 2; 1921 } 1922 if (*p == 's') 1923 { 1924 if (*(p + 1) == 'h') 1925 ep->type = 'h'; 1926 else 1927 ep->type = 's'; 1928 } 1929 else if (*p == 'a') 1930 ep->type = 's'; 1931 else 1932 ep->type = *p; 1933 if (p = strchr(p, '&')) 1934 { 1935 /* 1936 * old style mask 1937 */ 1938 1939 ep->mask = strton(++p, NiL, NiL, 0); 1940 } 1941 for (; isspace(*p2); p2++); 1942 if (ep->mask) 1943 *--p2 = '='; 1944 1945 /* 1946 * comparison operation 1947 */ 1948 1949 p = p2; 1950 if (p2 = strchr(p, '\t')) 1951 *p2++ = 0; 1952 else 1953 { 1954 int qe = 0; 1955 int qn = 0; 1956 1957 /* 1958 * assume balanced {}[]()\\""'' field 1959 */ 1960 1961 for (p2 = p;;) 1962 { 1963 switch (n = *p2++) 1964 { 1965 case 0: 1966 break; 1967 case '{': 1968 if (!qe) 1969 qe = '}'; 1970 if (qe == '}') 1971 qn++; 1972 continue; 1973 case '(': 1974 if (!qe) 1975 qe = ')'; 1976 if (qe == ')') 1977 qn++; 1978 continue; 1979 case '[': 1980 if (!qe) 1981 qe = ']'; 1982 if (qe == ']') 1983 qn++; 1984 continue; 1985 case '}': 1986 case ')': 1987 case ']': 1988 if (qe == n && qn > 0) 1989 qn--; 1990 continue; 1991 case '"': 1992 case '\'': 1993 if (!qe) 1994 qe = n; 1995 else if (qe == n) 1996 qe = 0; 1997 continue; 1998 case '\\': 1999 if (*p2) 2000 p2++; 2001 continue; 2002 default: 2003 if (!qe && isspace(n)) 2004 break; 2005 continue; 2006 } 2007 if (n) 2008 *(p2 - 1) = 0; 2009 else 2010 p2--; 2011 break; 2012 } 2013 } 2014 lge = 0; 2015 if (ep->type == 'e' || ep->type == 'm' || ep->type == 's') 2016 ep->op = '='; 2017 else 2018 { 2019 if (*p == '&') 2020 { 2021 ep->mask = strton(++p, &next, NiL, 0); 2022 p = next; 2023 } 2024 switch (*p) 2025 { 2026 case '=': 2027 case '>': 2028 case '<': 2029 case '*': 2030 ep->op = *p++; 2031 if (*p == '=') 2032 { 2033 p++; 2034 switch (ep->op) 2035 { 2036 case '>': 2037 lge = -1; 2038 break; 2039 case '<': 2040 lge = 1; 2041 break; 2042 } 2043 } 2044 break; 2045 case '!': 2046 case '@': 2047 ep->op = *p++; 2048 if (*p == '=') 2049 p++; 2050 break; 2051 case 'x': 2052 p++; 2053 ep->op = '*'; 2054 break; 2055 default: 2056 ep->op = '='; 2057 if (ep->mask) 2058 ep->value.num = ep->mask; 2059 break; 2060 } 2061 } 2062 if (ep->op != '*' && !ep->value.num) 2063 { 2064 if (ep->type == 'e') 2065 { 2066 if (ep->value.sub = vmnewof(mp->vm, 0, regex_t, 1, 0)) 2067 { 2068 ep->value.sub->re_disc = &mp->redisc; 2069 if (!(n = regcomp(ep->value.sub, p, REG_DELIMITED|REG_LENIENT|REG_NULL|REG_DISCIPLINE))) 2070 { 2071 p += ep->value.sub->re_npat; 2072 if (!(n = regsubcomp(ep->value.sub, p, NiL, 0, 0))) 2073 p += ep->value.sub->re_npat; 2074 } 2075 if (n) 2076 { 2077 regmessage(mp, ep->value.sub, n); 2078 ep->value.sub = 0; 2079 } 2080 else if (*p && mp->disc->errorf) 2081 (*mp->disc->errorf)(mp, mp->disc, 1, "invalid characters after substitution: %s", p); 2082 } 2083 } 2084 else if (ep->type == 'm') 2085 { 2086 ep->mask = stresc(p) + 1; 2087 ep->value.str = vmnewof(mp->vm, 0, char, ep->mask + 1, 0); 2088 memcpy(ep->value.str, p, ep->mask); 2089 if ((!ep->expr || !ep->offset) && !strmatch(ep->value.str, "\\!\\(*\\)")) 2090 ep->value.str[ep->mask - 1] = '*'; 2091 } 2092 else if (ep->type == 's') 2093 { 2094 ep->mask = stresc(p); 2095 ep->value.str = vmnewof(mp->vm, 0, char, ep->mask, 0); 2096 memcpy(ep->value.str, p, ep->mask); 2097 } 2098 else if (*p == '\'') 2099 { 2100 stresc(p); 2101 ep->value.num = *(unsigned char*)(p + 1) + lge; 2102 } 2103 else if (strmatch(p, "+([a-z])\\(*\\)")) 2104 { 2105 char* t; 2106 2107 t = p; 2108 ep->type = 'V'; 2109 ep->op = *p; 2110 while (*p && *p++ != '('); 2111 switch (ep->op) 2112 { 2113 case 'l': 2114 n = *p++; 2115 if (n < 'a' || n > 'z') 2116 { 2117 if (mp->disc->errorf) 2118 (*mp->disc->errorf)(mp, mp->disc, 2, "%c: invalid function name", n); 2119 } 2120 else if (!fun[n -= 'a']) 2121 { 2122 if (mp->disc->errorf) 2123 (*mp->disc->errorf)(mp, mp->disc, 2, "%c: function not defined", n + 'a'); 2124 } 2125 else 2126 { 2127 ep->value.loop = vmnewof(mp->vm, 0, Loop_t, 1, 0); 2128 ep->value.loop->lab = fun[n]; 2129 while (*p && *p++ != ','); 2130 ep->value.loop->start = strton(p, &t, NiL, 0); 2131 while (*t && *t++ != ','); 2132 ep->value.loop->size = strton(t, &t, NiL, 0); 2133 } 2134 break; 2135 case 'm': 2136 case 'r': 2137 ep->desc = vmnewof(mp->vm, 0, char, 32, 0); 2138 ep->mime = vmnewof(mp->vm, 0, char, 32, 0); 2139 break; 2140 case 'v': 2141 break; 2142 default: 2143 if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf) 2144 (*mp->disc->errorf)(mp, mp->disc, 1, "%-.*s: unknown function", p - t, t); 2145 break; 2146 } 2147 } 2148 else 2149 { 2150 ep->value.num = strton(p, NiL, NiL, 0) + lge; 2151 if (ep->op == '@') 2152 ep->value.num = swapget(0, (char*)&ep->value.num, sizeof(ep->value.num)); 2153 } 2154 } 2155 2156 /* 2157 * file description 2158 */ 2159 2160 if (p2) 2161 { 2162 for (; isspace(*p2); p2++); 2163 if (p = strchr(p2, '\t')) 2164 { 2165 /* 2166 * check for message catalog index 2167 */ 2168 2169 *p++ = 0; 2170 if (isalpha(*p2)) 2171 { 2172 for (p3 = p2; isalnum(*p3); p3++); 2173 if (*p3++ == ':') 2174 { 2175 for (; isdigit(*p3); p3++); 2176 if (!*p3) 2177 { 2178 for (p2 = p; isspace(*p2); p2++); 2179 if (p = strchr(p2, '\t')) 2180 *p++ = 0; 2181 } 2182 } 2183 } 2184 } 2185 stresc(p2); 2186 ep->desc = vmstrdup(mp->vm, p2); 2187 if (p) 2188 { 2189 for (; isspace(*p); p++); 2190 if (*p) 2191 ep->mime = vmstrdup(mp->vm, p); 2192 } 2193 } 2194 else 2195 ep->desc = ""; 2196 2197 /* 2198 * get next entry 2199 */ 2200 2201 last = ep; 2202 ep = ep->next = vmnewof(mp->vm, 0, Entry_t, 1, 0); 2203 } 2204 if (last) 2205 { 2206 last->next = 0; 2207 if (mp->magiclast) 2208 mp->magiclast->next = first; 2209 else 2210 mp->magic = first; 2211 mp->magiclast = last; 2212 } 2213 vmfree(mp->vm, ep); 2214 if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf) 2215 { 2216 if (lev < 0) 2217 (*mp->disc->errorf)(mp, mp->disc, 1, "too many } operators"); 2218 else if (lev > 0) 2219 (*mp->disc->errorf)(mp, mp->disc, 1, "not enough } operators"); 2220 if (ret) 2221 (*mp->disc->errorf)(mp, mp->disc, 2, "%c: function has no return", ret->offset + 'a'); 2222 } 2223 error_info.file = 0; 2224 error_info.line = 0; 2225 return 0; 2226 } 2227 2228 /* 2229 * load a magic file into mp 2230 */ 2231 2232 int 2233 magicload(register Magic_t* mp, const char* file, unsigned long flags) 2234 { 2235 register char* s; 2236 register char* e; 2237 register char* t; 2238 int n; 2239 int found; 2240 int list; 2241 Sfio_t* fp; 2242 2243 mp->flags = mp->disc->flags | flags; 2244 found = 0; 2245 if (list = !(s = (char*)file) || !*s || (*s == '-' || *s == '.') && !*(s + 1)) 2246 { 2247 if (!(s = getenv(MAGIC_FILE_ENV)) || !*s) 2248 s = MAGIC_FILE; 2249 } 2250 for (;;) 2251 { 2252 if (!list) 2253 e = 0; 2254 else if (e = strchr(s, ':')) 2255 { 2256 /* 2257 * ok, so ~ won't work for the last list element 2258 * we do it for MAGIC_FILES_ENV anyway 2259 */ 2260 2261 if ((strneq(s, "~/", n = 2) || strneq(s, "$HOME/", n = 6) || strneq(s, "${HOME}/", n = 8)) && (t = getenv("HOME"))) 2262 { 2263 sfputr(mp->tmp, t, -1); 2264 s += n - 1; 2265 } 2266 sfwrite(mp->tmp, s, e - s); 2267 if (!(s = sfstruse(mp->tmp))) 2268 goto nospace; 2269 } 2270 if (!*s || streq(s, "-")) 2271 s = MAGIC_FILE; 2272 if (!(fp = sfopen(NiL, s, "r"))) 2273 { 2274 if (list) 2275 { 2276 if (!(t = pathpath(s, "", PATH_REGULAR|PATH_READ, mp->fbuf, sizeof(mp->fbuf))) && !strchr(s, '/')) 2277 { 2278 strcpy(mp->fbuf, s); 2279 sfprintf(mp->tmp, "%s/%s", MAGIC_DIR, mp->fbuf); 2280 if (!(s = sfstruse(mp->tmp))) 2281 goto nospace; 2282 if (!(t = pathpath(s, "", PATH_REGULAR|PATH_READ, mp->fbuf, sizeof(mp->fbuf)))) 2283 goto next; 2284 } 2285 if (!(fp = sfopen(NiL, t, "r"))) 2286 goto next; 2287 } 2288 else 2289 { 2290 if (mp->disc->errorf) 2291 (*mp->disc->errorf)(mp, mp->disc, 3, "%s: cannot open magic file", s); 2292 return -1; 2293 } 2294 } 2295 found = 1; 2296 n = load(mp, s, fp); 2297 sfclose(fp); 2298 if (n && !list) 2299 return -1; 2300 next: 2301 if (!e) 2302 break; 2303 s = e + 1; 2304 } 2305 if (!found) 2306 { 2307 if (mp->flags & MAGIC_VERBOSE) 2308 { 2309 if (mp->disc->errorf) 2310 (*mp->disc->errorf)(mp, mp->disc, 2, "cannot find magic file"); 2311 } 2312 return -1; 2313 } 2314 return 0; 2315 nospace: 2316 if (mp->disc->errorf) 2317 (*mp->disc->errorf)(mp, mp->disc, 3, "out of space"); 2318 return -1; 2319 } 2320 2321 /* 2322 * open a magic session 2323 */ 2324 2325 Magic_t* 2326 magicopen(Magicdisc_t* disc) 2327 { 2328 register Magic_t* mp; 2329 register int i; 2330 register int n; 2331 register int f; 2332 register int c; 2333 register Vmalloc_t* vm; 2334 unsigned char* map[CC_MAPS + 1]; 2335 2336 if (!(vm = vmopen(Vmdcheap, Vmbest, 0))) 2337 return 0; 2338 if (!(mp = vmnewof(vm, 0, Magic_t, 1, 0))) 2339 { 2340 vmclose(vm); 2341 return 0; 2342 } 2343 mp->id = lib; 2344 mp->disc = disc; 2345 mp->vm = vm; 2346 mp->flags = disc->flags; 2347 mp->redisc.re_version = REG_VERSION; 2348 mp->redisc.re_flags = REG_NOFREE; 2349 mp->redisc.re_errorf = (regerror_t)disc->errorf; 2350 mp->redisc.re_resizef = (regresize_t)vmgetmem; 2351 mp->redisc.re_resizehandle = (void*)mp->vm; 2352 mp->dtdisc.key = offsetof(Info_t, name); 2353 mp->dtdisc.link = offsetof(Info_t, link); 2354 if (!(mp->tmp = sfstropen()) || !(mp->infotab = dtnew(mp->vm, &mp->dtdisc, Dtoset))) 2355 goto bad; 2356 for (n = 0; n < elementsof(info); n++) 2357 dtinsert(mp->infotab, &info[n]); 2358 for (i = 0; i < CC_MAPS; i++) 2359 map[i] = ccmap(i, CC_ASCII); 2360 mp->x2n = ccmap(CC_ALIEN, CC_NATIVE); 2361 for (n = 0; n <= UCHAR_MAX; n++) 2362 { 2363 f = 0; 2364 i = CC_MAPS; 2365 while (--i >= 0) 2366 { 2367 c = ccmapchr(map[i], n); 2368 f = (f << CC_BIT) | CCTYPE(c); 2369 } 2370 mp->cctype[n] = f; 2371 } 2372 return mp; 2373 bad: 2374 magicclose(mp); 2375 return 0; 2376 } 2377 2378 /* 2379 * close a magicopen() session 2380 */ 2381 2382 int 2383 magicclose(register Magic_t* mp) 2384 { 2385 if (!mp) 2386 return -1; 2387 if (mp->tmp) 2388 sfstrclose(mp->tmp); 2389 if (mp->vm) 2390 vmclose(mp->vm); 2391 return 0; 2392 } 2393 2394 /* 2395 * return the magic string for file with optional stat info st 2396 */ 2397 2398 char* 2399 magictype(register Magic_t* mp, Sfio_t* fp, const char* file, register struct stat* st) 2400 { 2401 off_t off; 2402 char* s; 2403 2404 mp->flags = mp->disc->flags; 2405 mp->mime = 0; 2406 if (!st) 2407 s = T("cannot stat"); 2408 else 2409 { 2410 if (mp->fp = fp) 2411 off = sfseek(mp->fp, (off_t)0, SEEK_CUR); 2412 s = type(mp, file, st, mp->tbuf, &mp->tbuf[sizeof(mp->tbuf)-1]); 2413 if (mp->fp) 2414 sfseek(mp->fp, off, SEEK_SET); 2415 if (!(mp->flags & (MAGIC_MIME|MAGIC_ALL))) 2416 { 2417 if (S_ISREG(st->st_mode) && (st->st_size > 0) && (st->st_size < 128)) 2418 sfprintf(mp->tmp, "%s ", T("short")); 2419 sfprintf(mp->tmp, "%s", s); 2420 if (!mp->fp && (st->st_mode & (S_IXUSR|S_IXGRP|S_IXOTH))) 2421 sfprintf(mp->tmp, ", %s", S_ISDIR(st->st_mode) ? T("searchable") : T("executable")); 2422 if (st->st_mode & S_ISUID) 2423 sfprintf(mp->tmp, ", setuid=%s", fmtuid(st->st_uid)); 2424 if (st->st_mode & S_ISGID) 2425 sfprintf(mp->tmp, ", setgid=%s", fmtgid(st->st_gid)); 2426 if (st->st_mode & S_ISVTX) 2427 sfprintf(mp->tmp, ", sticky"); 2428 if (!(s = sfstruse(mp->tmp))) 2429 s = T("out of space"); 2430 } 2431 } 2432 if (mp->flags & MAGIC_MIME) 2433 s = mp->mime; 2434 if (!s) 2435 s = T("error"); 2436 return s; 2437 } 2438 2439 /* 2440 * list the magic table in mp on sp 2441 */ 2442 2443 int 2444 magiclist(register Magic_t* mp, register Sfio_t* sp) 2445 { 2446 register Entry_t* ep = mp->magic; 2447 register Entry_t* rp = 0; 2448 2449 mp->flags = mp->disc->flags; 2450 sfprintf(sp, "cont\toffset\ttype\top\tmask\tvalue\tmime\tdesc\n"); 2451 while (ep) 2452 { 2453 sfprintf(sp, "%c %c\t", ep->cont, ep->nest); 2454 if (ep->expr) 2455 sfprintf(sp, "%s", ep->expr); 2456 else 2457 sfprintf(sp, "%ld", ep->offset); 2458 sfprintf(sp, "\t%s%c\t%c\t%lo\t", ep->swap == (char)~3 ? "L" : ep->swap == (char)~0 ? "B" : "", ep->type, ep->op, ep->mask); 2459 switch (ep->type) 2460 { 2461 case 'm': 2462 case 's': 2463 sfputr(sp, fmtesc(ep->value.str), -1); 2464 break; 2465 case 'V': 2466 switch (ep->op) 2467 { 2468 case 'l': 2469 sfprintf(sp, "loop(%d,%d,%d,%d)", ep->value.loop->start, ep->value.loop->size, ep->value.loop->count, ep->value.loop->offset); 2470 break; 2471 case 'v': 2472 sfprintf(sp, "vcodex()"); 2473 break; 2474 default: 2475 sfprintf(sp, "%p", ep->value.str); 2476 break; 2477 } 2478 break; 2479 default: 2480 sfprintf(sp, "%lo", ep->value.num); 2481 break; 2482 } 2483 sfprintf(sp, "\t%s\t%s\n", ep->mime ? ep->mime : "", fmtesc(ep->desc)); 2484 if (ep->cont == '$' && !ep->value.lab->mask) 2485 { 2486 rp = ep; 2487 ep = ep->value.lab; 2488 } 2489 else 2490 { 2491 if (ep->cont == ':') 2492 { 2493 ep = rp; 2494 ep->value.lab->mask = 1; 2495 } 2496 ep = ep->next; 2497 } 2498 } 2499 return 0; 2500 } 2501