1 /*********************************************************************** 2 * * 3 * This software is part of the ast package * 4 * Copyright (c) 1985-2009 AT&T Intellectual Property * 5 * and is licensed under the * 6 * Common Public License, Version 1.0 * 7 * by AT&T Intellectual Property * 8 * * 9 * A copy of the License is available at * 10 * http://www.opensource.org/licenses/cpl1.0.txt * 11 * (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) * 12 * * 13 * Information and Software Systems Research * 14 * AT&T Research * 15 * Florham Park NJ * 16 * * 17 * Glenn Fowler <gsf@research.att.com> * 18 * David Korn <dgk@research.att.com> * 19 * Phong Vo <kpv@research.att.com> * 20 * * 21 ***********************************************************************/ 22 #pragma prototyped 23 /* 24 * Glenn Fowler 25 * AT&T Research 26 * 27 * library interface to file 28 * 29 * the sum of the hacks {s5,v10,planix} is _____ than the parts 30 */ 31 32 static const char id[] = "\n@(#)$Id: magic library (AT&T Research) 2008-09-10 $\0\n"; 33 34 static const char lib[] = "libast:magic"; 35 36 #include <ast.h> 37 #include <ctype.h> 38 #include <ccode.h> 39 #include <dt.h> 40 #include <modex.h> 41 #include <error.h> 42 #include <regex.h> 43 #include <swap.h> 44 45 #define T(m) (*m?ERROR_translate(NiL,NiL,lib,m):m) 46 47 #define match(s,p) strgrpmatch(s,p,NiL,0,STR_LEFT|STR_RIGHT|STR_ICASE) 48 49 #define MAXNEST 10 /* { ... } nesting limit */ 50 #define MINITEM 4 /* magic buffer rounding */ 51 52 typedef struct /* identifier dictionary entry */ 53 { 54 const char name[16]; /* identifier name */ 55 int value; /* identifier value */ 56 Dtlink_t link; /* dictionary link */ 57 } Info_t; 58 59 typedef struct Edit /* edit substitution */ 60 { 61 struct Edit* next; /* next in list */ 62 regex_t* from; /* from pattern */ 63 } Edit_t; 64 65 struct Entry; 66 67 typedef struct /* loop info */ 68 { 69 struct Entry* lab; /* call this function */ 70 int start; /* start here */ 71 int size; /* increment by this amount */ 72 int count; /* dynamic loop count */ 73 int offset; /* dynamic offset */ 74 } Loop_t; 75 76 typedef struct Entry /* magic file entry */ 77 { 78 struct Entry* next; /* next in list */ 79 char* expr; /* offset expression */ 80 union 81 { 82 unsigned long num; 83 char* str; 84 struct Entry* lab; 85 regex_t* sub; 86 Loop_t* loop; 87 } value; /* comparison value */ 88 char* desc; /* file description */ 89 char* mime; /* file mime type */ 90 unsigned long offset; /* offset in bytes */ 91 unsigned long mask; /* mask before compare */ 92 char cont; /* continuation operation */ 93 char type; /* datum type */ 94 char op; /* comparison operation */ 95 char nest; /* { or } nesting operation */ 96 char swap; /* forced swap order */ 97 } Entry_t; 98 99 #define CC_BIT 5 100 101 #if (CC_MAPS*CC_BIT) <= (CHAR_BIT*2) 102 typedef unsigned short Cctype_t; 103 #else 104 typedef unsigned long Cctype_t; 105 #endif 106 107 #define CC_text 0x01 108 #define CC_control 0x02 109 #define CC_latin 0x04 110 #define CC_binary 0x08 111 #define CC_utf_8 0x10 112 113 #define CC_notext CC_text /* CC_text is flipped before checking */ 114 115 #define CC_MASK (CC_binary|CC_latin|CC_control|CC_text) 116 117 #define CCTYPE(c) (((c)>0240)?CC_binary:((c)>=0200)?CC_latin:((c)<040&&(c)!=007&&(c)!=011&&(c)!=012&&(c)!=013&&(c)!=015)?CC_control:CC_text) 118 119 #define ID_NONE 0 120 #define ID_ASM 1 121 #define ID_C 2 122 #define ID_COBOL 3 123 #define ID_COPYBOOK 4 124 #define ID_CPLUSPLUS 5 125 #define ID_FORTRAN 6 126 #define ID_HTML 7 127 #define ID_INCL1 8 128 #define ID_INCL2 9 129 #define ID_INCL3 10 130 #define ID_MAM1 11 131 #define ID_MAM2 12 132 #define ID_MAM3 13 133 #define ID_NOTEXT 14 134 #define ID_PL1 15 135 #define ID_YACC 16 136 137 #define ID_MAX ID_YACC 138 139 #define INFO_atime 1 140 #define INFO_blocks 2 141 #define INFO_ctime 3 142 #define INFO_fstype 4 143 #define INFO_gid 5 144 #define INFO_mode 6 145 #define INFO_mtime 7 146 #define INFO_name 8 147 #define INFO_nlink 9 148 #define INFO_size 10 149 #define INFO_uid 11 150 151 #define _MAGIC_PRIVATE_ \ 152 Magicdisc_t* disc; /* discipline */ \ 153 Vmalloc_t* vm; /* vmalloc region */ \ 154 Entry_t* magic; /* parsed magic table */ \ 155 Entry_t* magiclast; /* last entry in magic */ \ 156 char* mime; /* MIME type */ \ 157 unsigned char* x2n; /* CC_ALIEN=>CC_NATIVE */ \ 158 char fbuf[SF_BUFSIZE + 1]; /* file data */ \ 159 char xbuf[SF_BUFSIZE + 1]; /* indirect file data */ \ 160 char nbuf[256]; /* !CC_NATIVE data */ \ 161 char mbuf[64]; /* mime string */ \ 162 char sbuf[64]; /* type suffix string */ \ 163 char tbuf[2 * PATH_MAX]; /* type string */ \ 164 Cctype_t cctype[UCHAR_MAX + 1]; /* char code types */ \ 165 unsigned int count[UCHAR_MAX + 1]; /* char frequency count */ \ 166 unsigned int multi[UCHAR_MAX + 1]; /* muti char count */ \ 167 int keep[MAXNEST]; /* ckmagic nest stack */ \ 168 char* cap[MAXNEST]; /* ckmagic mime stack */ \ 169 char* msg[MAXNEST]; /* ckmagic text stack */ \ 170 Entry_t* ret[MAXNEST]; /* ckmagic return stack */ \ 171 int fbsz; /* fbuf size */ \ 172 int fbmx; /* fbuf max size */ \ 173 int xbsz; /* xbuf size */ \ 174 int swap; /* swap() operation */ \ 175 unsigned long flags; /* disc+open flags */ \ 176 long xoff; /* xbuf offset */ \ 177 int identifier[ID_MAX + 1]; /* Info_t identifier */ \ 178 Sfio_t* fp; /* fbuf fp */ \ 179 Sfio_t* tmp; /* tmp string */ \ 180 regdisc_t redisc; /* regex discipline */ \ 181 Dtdisc_t dtdisc; /* dict discipline */ \ 182 Dt_t* idtab; /* identifier dict */ \ 183 Dt_t* infotab; /* info keyword dict */ 184 185 #include <magic.h> 186 187 static Info_t dict[] = /* keyword dictionary */ 188 { 189 { "COMMON", ID_FORTRAN }, 190 { "COMPUTE", ID_COBOL }, 191 { "COMP", ID_COPYBOOK }, 192 { "COMPUTATIONAL",ID_COPYBOOK }, 193 { "DCL", ID_PL1 }, 194 { "DEFINED", ID_PL1 }, 195 { "DIMENSION", ID_FORTRAN }, 196 { "DIVISION", ID_COBOL }, 197 { "FILLER", ID_COPYBOOK }, 198 { "FIXED", ID_PL1 }, 199 { "FUNCTION", ID_FORTRAN }, 200 { "HTML", ID_HTML }, 201 { "INTEGER", ID_FORTRAN }, 202 { "MAIN", ID_PL1 }, 203 { "OPTIONS", ID_PL1 }, 204 { "PERFORM", ID_COBOL }, 205 { "PIC", ID_COPYBOOK }, 206 { "REAL", ID_FORTRAN }, 207 { "REDEFINES", ID_COPYBOOK }, 208 { "S9", ID_COPYBOOK }, 209 { "SECTION", ID_COBOL }, 210 { "SELECT", ID_COBOL }, 211 { "SUBROUTINE", ID_FORTRAN }, 212 { "TEXT", ID_ASM }, 213 { "VALUE", ID_COPYBOOK }, 214 { "attr", ID_MAM3 }, 215 { "binary", ID_YACC }, 216 { "block", ID_FORTRAN }, 217 { "bss", ID_ASM }, 218 { "byte", ID_ASM }, 219 { "char", ID_C }, 220 { "class", ID_CPLUSPLUS }, 221 { "clr", ID_NOTEXT }, 222 { "comm", ID_ASM }, 223 { "common", ID_FORTRAN }, 224 { "data", ID_ASM }, 225 { "dimension", ID_FORTRAN }, 226 { "done", ID_MAM2 }, 227 { "double", ID_C }, 228 { "even", ID_ASM }, 229 { "exec", ID_MAM3 }, 230 { "extern", ID_C }, 231 { "float", ID_C }, 232 { "function", ID_FORTRAN }, 233 { "globl", ID_ASM }, 234 { "h", ID_INCL3 }, 235 { "html", ID_HTML }, 236 { "include", ID_INCL1 }, 237 { "int", ID_C }, 238 { "integer", ID_FORTRAN }, 239 { "jmp", ID_NOTEXT }, 240 { "left", ID_YACC }, 241 { "libc", ID_INCL2 }, 242 { "long", ID_C }, 243 { "make", ID_MAM1 }, 244 { "mov", ID_NOTEXT }, 245 { "private", ID_CPLUSPLUS }, 246 { "public", ID_CPLUSPLUS }, 247 { "real", ID_FORTRAN }, 248 { "register", ID_C }, 249 { "right", ID_YACC }, 250 { "sfio", ID_INCL2 }, 251 { "static", ID_C }, 252 { "stdio", ID_INCL2 }, 253 { "struct", ID_C }, 254 { "subroutine", ID_FORTRAN }, 255 { "sys", ID_NOTEXT }, 256 { "term", ID_YACC }, 257 { "text", ID_ASM }, 258 { "tst", ID_NOTEXT }, 259 { "type", ID_YACC }, 260 { "typedef", ID_C }, 261 { "u", ID_INCL2 }, 262 { "union", ID_YACC }, 263 { "void", ID_C }, 264 }; 265 266 static Info_t info[] = 267 { 268 { "atime", INFO_atime }, 269 { "blocks", INFO_blocks }, 270 { "ctime", INFO_ctime }, 271 { "fstype", INFO_fstype }, 272 { "gid", INFO_gid }, 273 { "mode", INFO_mode }, 274 { "mtime", INFO_mtime }, 275 { "name", INFO_name }, 276 { "nlink", INFO_nlink }, 277 { "size", INFO_size }, 278 { "uid", INFO_uid }, 279 }; 280 281 /* 282 * return pointer to data at offset off and size siz 283 */ 284 285 static char* 286 getdata(register Magic_t* mp, register long off, register int siz) 287 { 288 register long n; 289 290 if (off < 0) 291 return 0; 292 if (off + siz <= mp->fbsz) 293 return mp->fbuf + off; 294 if (off < mp->xoff || off + siz > mp->xoff + mp->xbsz) 295 { 296 if (off + siz > mp->fbmx) 297 return 0; 298 n = (off / (SF_BUFSIZE / 2)) * (SF_BUFSIZE / 2); 299 if (sfseek(mp->fp, n, SEEK_SET) != n) 300 return 0; 301 if ((mp->xbsz = sfread(mp->fp, mp->xbuf, sizeof(mp->xbuf) - 1)) < 0) 302 { 303 mp->xoff = 0; 304 mp->xbsz = 0; 305 return 0; 306 } 307 mp->xbuf[mp->xbsz] = 0; 308 mp->xoff = n; 309 if (off + siz > mp->xoff + mp->xbsz) 310 return 0; 311 } 312 return mp->xbuf + off - mp->xoff; 313 } 314 315 /* 316 * @... evaluator for strexpr() 317 */ 318 319 static long 320 indirect(const char* cs, char** e, void* handle) 321 { 322 register char* s = (char*)cs; 323 register Magic_t* mp = (Magic_t*)handle; 324 register long n = 0; 325 register char* p; 326 327 if (s) 328 { 329 if (*s == '@') 330 { 331 n = *++s == '(' ? strexpr(s, e, indirect, mp) : strtol(s, e, 0); 332 switch (*(s = *e)) 333 { 334 case 'b': 335 case 'B': 336 s++; 337 if (p = getdata(mp, n, 1)) 338 n = *(unsigned char*)p; 339 else 340 s = (char*)cs; 341 break; 342 case 'h': 343 case 'H': 344 s++; 345 if (p = getdata(mp, n, 2)) 346 n = swapget(mp->swap, p, 2); 347 else 348 s = (char*)cs; 349 break; 350 case 'q': 351 case 'Q': 352 s++; 353 if (p = getdata(mp, n, 8)) 354 n = swapget(mp->swap, p, 8); 355 else 356 s = (char*)cs; 357 break; 358 default: 359 if (isalnum(*s)) 360 s++; 361 if (p = getdata(mp, n, 4)) 362 n = swapget(mp->swap, p, 4); 363 else 364 s = (char*)cs; 365 break; 366 } 367 } 368 *e = s; 369 } 370 else if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf) 371 (*mp->disc->errorf)(mp, mp->disc, 2, "%s in indirect expression", *e); 372 return n; 373 } 374 375 /* 376 * emit regex error message 377 */ 378 379 static void 380 regmessage(Magic_t* mp, regex_t* re, int code) 381 { 382 char buf[128]; 383 384 if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf) 385 { 386 regerror(code, re, buf, sizeof(buf)); 387 (*mp->disc->errorf)(mp, mp->disc, 3, "regex: %s", buf); 388 } 389 } 390 391 /* 392 * decompose vcodex(3) method composition 393 */ 394 395 static char* 396 vcdecomp(char* b, char* e, unsigned char* m, unsigned char* x) 397 { 398 unsigned char* map; 399 const char* o; 400 int c; 401 int n; 402 int i; 403 int a; 404 405 map = CCMAP(CC_ASCII, CC_NATIVE); 406 a = 0; 407 i = 1; 408 for (;;) 409 { 410 if (i) 411 i = 0; 412 else 413 *b++ = '^'; 414 if (m < (x - 1) && !*(m + 1)) 415 { 416 /* 417 * obsolete indices 418 */ 419 420 if (!a) 421 { 422 a = 1; 423 o = "old, "; 424 while (b < e && (c = *o++)) 425 *b++ = c; 426 } 427 switch (*m) 428 { 429 case 0: o = "delta"; break; 430 case 1: o = "huffman"; break; 431 case 2: o = "huffgroup"; break; 432 case 3: o = "arith"; break; 433 case 4: o = "bwt"; break; 434 case 5: o = "rle"; break; 435 case 6: o = "mtf"; break; 436 case 7: o = "transpose"; break; 437 case 8: o = "table"; break; 438 case 9: o = "huffpart"; break; 439 case 50: o = "map"; break; 440 case 100: o = "recfm"; break; 441 case 101: o = "ss7"; break; 442 default: o = "UNKNOWN"; break; 443 } 444 m += 2; 445 while (b < e && (c = *o++)) 446 *b++ = c; 447 } 448 else 449 while (b < e && m < x && (c = *m++)) 450 { 451 if (map) 452 c = map[c]; 453 *b++ = c; 454 } 455 if (b >= e) 456 break; 457 n = 0; 458 while (m < x) 459 { 460 n = (n<<7) | (*m & 0x7f); 461 if (!(*m++ & 0x80)) 462 break; 463 } 464 if (n >= (x - m)) 465 break; 466 m += n; 467 } 468 return b; 469 } 470 471 /* 472 * check for magic table match in buf 473 */ 474 475 static char* 476 ckmagic(register Magic_t* mp, const char* file, char* buf, struct stat* st, unsigned long off) 477 { 478 register Entry_t* ep; 479 register char* p; 480 register char* b; 481 register int level = 0; 482 int call = -1; 483 int c; 484 char* q; 485 char* t; 486 char* base = 0; 487 unsigned long num; 488 unsigned long mask; 489 regmatch_t matches[10]; 490 491 mp->swap = 0; 492 b = mp->msg[0] = buf; 493 mp->mime = mp->cap[0] = 0; 494 mp->keep[0] = 0; 495 for (ep = mp->magic; ep; ep = ep->next) 496 { 497 fun: 498 if (ep->nest == '{') 499 { 500 if (++level >= MAXNEST) 501 { 502 call = -1; 503 level = 0; 504 mp->keep[0] = 0; 505 b = mp->msg[0]; 506 mp->mime = mp->cap[0]; 507 continue; 508 } 509 mp->keep[level] = mp->keep[level - 1] != 0; 510 mp->msg[level] = b; 511 mp->cap[level] = mp->mime; 512 } 513 switch (ep->cont) 514 { 515 case '#': 516 if (mp->keep[level] && b > buf) 517 { 518 *b = 0; 519 return buf; 520 } 521 mp->swap = 0; 522 b = mp->msg[0] = buf; 523 mp->mime = mp->cap[0] = 0; 524 if (ep->type == ' ') 525 continue; 526 break; 527 case '$': 528 if (mp->keep[level] && call < (MAXNEST - 1)) 529 { 530 mp->ret[++call] = ep; 531 ep = ep->value.lab; 532 goto fun; 533 } 534 continue; 535 case ':': 536 ep = mp->ret[call--]; 537 if (ep->op == 'l') 538 goto fun; 539 continue; 540 case '|': 541 if (mp->keep[level] > 1) 542 goto checknest; 543 /*FALLTHROUGH*/ 544 default: 545 if (!mp->keep[level]) 546 { 547 b = mp->msg[level]; 548 mp->mime = mp->cap[level]; 549 goto checknest; 550 } 551 break; 552 } 553 p = ""; 554 num = 0; 555 if (!ep->expr) 556 num = ep->offset + off; 557 else 558 switch (ep->offset) 559 { 560 case 0: 561 num = strexpr(ep->expr, NiL, indirect, mp) + off; 562 break; 563 case INFO_atime: 564 num = st->st_atime; 565 ep->type = 'D'; 566 break; 567 case INFO_blocks: 568 num = iblocks(st); 569 ep->type = 'N'; 570 break; 571 case INFO_ctime: 572 num = st->st_ctime; 573 ep->type = 'D'; 574 break; 575 case INFO_fstype: 576 p = fmtfs(st); 577 ep->type = toupper(ep->type); 578 break; 579 case INFO_gid: 580 if (ep->type == 'e' || ep->type == 'm' || ep->type == 's') 581 { 582 p = fmtgid(st->st_gid); 583 ep->type = toupper(ep->type); 584 } 585 else 586 { 587 num = st->st_gid; 588 ep->type = 'N'; 589 } 590 break; 591 case INFO_mode: 592 if (ep->type == 'e' || ep->type == 'm' || ep->type == 's') 593 { 594 p = fmtmode(st->st_mode, 0); 595 ep->type = toupper(ep->type); 596 } 597 else 598 { 599 num = modex(st->st_mode); 600 ep->type = 'N'; 601 } 602 break; 603 case INFO_mtime: 604 num = st->st_ctime; 605 ep->type = 'D'; 606 break; 607 case INFO_name: 608 if (!base) 609 { 610 if (base = strrchr(file, '/')) 611 base++; 612 else 613 base = (char*)file; 614 } 615 p = base; 616 ep->type = toupper(ep->type); 617 break; 618 case INFO_nlink: 619 num = st->st_nlink; 620 ep->type = 'N'; 621 break; 622 case INFO_size: 623 num = st->st_size; 624 ep->type = 'N'; 625 break; 626 case INFO_uid: 627 if (ep->type == 'e' || ep->type == 'm' || ep->type == 's') 628 { 629 p = fmtuid(st->st_uid); 630 ep->type = toupper(ep->type); 631 } 632 else 633 { 634 num = st->st_uid; 635 ep->type = 'N'; 636 } 637 break; 638 } 639 switch (ep->type) 640 { 641 642 case 'b': 643 if (!(p = getdata(mp, num, 1))) 644 goto next; 645 num = *(unsigned char*)p; 646 break; 647 648 case 'h': 649 if (!(p = getdata(mp, num, 2))) 650 goto next; 651 num = swapget(ep->swap ? (~ep->swap ^ mp->swap) : mp->swap, p, 2); 652 break; 653 654 case 'd': 655 case 'l': 656 case 'v': 657 if (!(p = getdata(mp, num, 4))) 658 goto next; 659 num = swapget(ep->swap ? (~ep->swap ^ mp->swap) : mp->swap, p, 4); 660 break; 661 662 case 'q': 663 if (!(p = getdata(mp, num, 8))) 664 goto next; 665 num = swapget(ep->swap ? (~ep->swap ^ mp->swap) : mp->swap, p, 8); 666 break; 667 668 case 'e': 669 if (!(p = getdata(mp, num, 0))) 670 goto next; 671 /*FALLTHROUGH*/ 672 case 'E': 673 if (!ep->value.sub) 674 goto next; 675 if ((c = regexec(ep->value.sub, p, elementsof(matches), matches, 0)) || (c = regsubexec(ep->value.sub, p, elementsof(matches), matches))) 676 { 677 c = mp->fbsz; 678 if (c >= sizeof(mp->nbuf)) 679 c = sizeof(mp->nbuf) - 1; 680 p = (char*)memcpy(mp->nbuf, p, c); 681 p[c] = 0; 682 ccmapstr(mp->x2n, p, c); 683 if ((c = regexec(ep->value.sub, p, elementsof(matches), matches, 0)) || (c = regsubexec(ep->value.sub, p, elementsof(matches), matches))) 684 { 685 if (c != REG_NOMATCH) 686 regmessage(mp, ep->value.sub, c); 687 goto next; 688 } 689 } 690 p = ep->value.sub->re_sub->re_buf; 691 q = T(ep->desc); 692 t = *q ? q : p; 693 if (mp->keep[level]++ && b > buf && *(b - 1) != ' ' && *t && *t != ',' && *t != '.' && *t != '\b') 694 *b++ = ' '; 695 b += sfsprintf(b, PATH_MAX - (b - buf), *q ? q : "%s", p + (*p == '\b')); 696 if (ep->mime) 697 mp->mime = ep->mime; 698 goto checknest; 699 700 case 's': 701 if (!(p = getdata(mp, num, ep->mask))) 702 goto next; 703 goto checkstr; 704 case 'm': 705 if (!(p = getdata(mp, num, 0))) 706 goto next; 707 /*FALLTHROUGH*/ 708 case 'M': 709 case 'S': 710 checkstr: 711 for (;;) 712 { 713 if (*ep->value.str == '*' && !*(ep->value.str + 1) && isprint(*p)) 714 break; 715 if ((ep->type == 'm' || ep->type == 'M') ? strmatch(p, ep->value.str) : !memcmp(p, ep->value.str, ep->mask)) 716 break; 717 if (p == mp->nbuf || ep->mask >= sizeof(mp->nbuf)) 718 goto next; 719 p = (char*)memcpy(mp->nbuf, p, ep->mask); 720 p[ep->mask] = 0; 721 ccmapstr(mp->x2n, p, ep->mask); 722 } 723 q = T(ep->desc); 724 if (mp->keep[level]++ && b > buf && *(b - 1) != ' ' && *q && *q != ',' && *q != '.' && *q != '\b') 725 *b++ = ' '; 726 for (t = p; (c = *t) >= 0 && c <= 0177 && isprint(c) && c != '\n'; t++); 727 *t = 0; 728 b += sfsprintf(b, PATH_MAX - (b - buf), q + (*q == '\b'), p); 729 *t = c; 730 if (ep->mime) 731 mp->mime = ep->mime; 732 goto checknest; 733 734 } 735 if (mask = ep->mask) 736 num &= mask; 737 switch (ep->op) 738 { 739 740 case '=': 741 case '@': 742 if (num == ep->value.num) 743 break; 744 if (ep->cont != '#') 745 goto next; 746 if (!mask) 747 mask = ~mask; 748 if (ep->type == 'h') 749 { 750 if ((num = swapget(mp->swap = 1, p, 2) & mask) == ep->value.num) 751 { 752 if (!(mp->swap & (mp->swap + 1))) 753 mp->swap = 7; 754 goto swapped; 755 } 756 } 757 else if (ep->type == 'l') 758 { 759 for (c = 1; c < 4; c++) 760 if ((num = swapget(mp->swap = c, p, 4) & mask) == ep->value.num) 761 { 762 if (!(mp->swap & (mp->swap + 1))) 763 mp->swap = 7; 764 goto swapped; 765 } 766 } 767 else if (ep->type == 'q') 768 { 769 for (c = 1; c < 8; c++) 770 if ((num = swapget(mp->swap = c, p, 8) & mask) == ep->value.num) 771 goto swapped; 772 } 773 goto next; 774 775 case '!': 776 if (num != ep->value.num) 777 break; 778 goto next; 779 780 case '^': 781 if (num ^ ep->value.num) 782 break; 783 goto next; 784 785 case '>': 786 if (num > ep->value.num) 787 break; 788 goto next; 789 790 case '<': 791 if (num < ep->value.num) 792 break; 793 goto next; 794 795 case 'l': 796 if (num > 0 && mp->keep[level] && call < (MAXNEST - 1)) 797 { 798 if (!ep->value.loop->count) 799 { 800 ep->value.loop->count = num; 801 ep->value.loop->offset = off; 802 off = ep->value.loop->start; 803 } 804 else if (!--ep->value.loop->count) 805 { 806 off = ep->value.loop->offset; 807 goto next; 808 } 809 else 810 off += ep->value.loop->size; 811 mp->ret[++call] = ep; 812 ep = ep->value.loop->lab; 813 goto fun; 814 } 815 goto next; 816 817 case 'm': 818 c = mp->swap; 819 t = ckmagic(mp, file, b + (b > buf), st, num); 820 mp->swap = c; 821 if (!t) 822 goto next; 823 if (b > buf) 824 *b = ' '; 825 b += strlen(b); 826 break; 827 828 case 'r': 829 #if _UWIN 830 { 831 char* e; 832 Sfio_t* rp; 833 Sfio_t* gp; 834 835 if (!(t = strrchr(file, '.'))) 836 goto next; 837 sfprintf(mp->tmp, "/reg/classes_root/%s", t); 838 if (!(t = sfstruse(mp->tmp)) || !(rp = sfopen(NiL, t, "r"))) 839 goto next; 840 *ep->desc = 0; 841 *ep->mime = 0; 842 gp = 0; 843 while (t = sfgetr(rp, '\n', 1)) 844 { 845 if (strneq(t, "Content Type=", 13)) 846 { 847 ep->mime = vmnewof(mp->vm, ep->mime, char, sfvalue(rp), 0); 848 strcpy(ep->mime, t + 13); 849 if (gp) 850 break; 851 } 852 else 853 { 854 sfprintf(mp->tmp, "/reg/classes_root/%s", t); 855 if ((e = sfstruse(mp->tmp)) && (gp = sfopen(NiL, e, "r"))) 856 { 857 ep->desc = vmnewof(mp->vm, ep->desc, char, strlen(t), 1); 858 strcpy(ep->desc, t); 859 if (*ep->mime) 860 break; 861 } 862 } 863 } 864 sfclose(rp); 865 if (!gp) 866 goto next; 867 if (!*ep->mime) 868 { 869 t = T(ep->desc); 870 if (!strncasecmp(t, "microsoft", 9)) 871 t += 9; 872 while (isspace(*t)) 873 t++; 874 e = "application/x-ms-"; 875 ep->mime = vmnewof(mp->vm, ep->mime, char, strlen(t), strlen(e)); 876 e = strcopy(ep->mime, e); 877 while ((c = *t++) && c != '.' && c != ' ') 878 *e++ = isupper(c) ? tolower(c) : c; 879 *e = 0; 880 } 881 while (t = sfgetr(gp, '\n', 1)) 882 if (*t && !streq(t, "\"\"")) 883 { 884 ep->desc = vmnewof(mp->vm, ep->desc, char, sfvalue(gp), 0); 885 strcpy(ep->desc, t); 886 break; 887 } 888 sfclose(gp); 889 if (!*ep->desc) 890 goto next; 891 if (!t) 892 for (t = T(ep->desc); *t; t++) 893 if (*t == '.') 894 *t = ' '; 895 if (!mp->keep[level]) 896 mp->keep[level] = 2; 897 mp->mime = ep->mime; 898 break; 899 } 900 #else 901 if (ep->cont == '#' && !mp->keep[level]) 902 mp->keep[level] = 1; 903 goto next; 904 #endif 905 906 case 'v': 907 if (!(p = getdata(mp, num, 4))) 908 goto next; 909 c = 0; 910 do 911 { 912 num++; 913 c = (c<<7) | (*p & 0x7f); 914 } while (*p++ & 0x80); 915 if (!(p = getdata(mp, num, c))) 916 goto next; 917 if (mp->keep[level]++ && b > buf && *(b - 1) != ' ') 918 { 919 *b++ = ','; 920 *b++ = ' '; 921 } 922 b = vcdecomp(b, buf + PATH_MAX, (unsigned char*)p, (unsigned char*)p + c); 923 goto checknest; 924 925 } 926 swapped: 927 q = T(ep->desc); 928 if (mp->keep[level]++ && b > buf && *(b - 1) != ' ' && *q && *q != ',' && *q != '.' && *q != '\b') 929 *b++ = ' '; 930 if (ep->type == 'd' || ep->type == 'D') 931 b += sfsprintf(b, PATH_MAX - (b - buf), q + (*q == '\b'), fmttime("%?%l", (time_t)num)); 932 else if (ep->type == 'v') 933 b += sfsprintf(b, PATH_MAX - (b - buf), q + (*q == '\b'), fmtversion(num)); 934 else 935 b += sfsprintf(b, PATH_MAX - (b - buf), q + (*q == '\b'), num); 936 if (ep->mime && *ep->mime) 937 mp->mime = ep->mime; 938 checknest: 939 if (ep->nest == '}') 940 { 941 if (!mp->keep[level]) 942 { 943 b = mp->msg[level]; 944 mp->mime = mp->cap[level]; 945 } 946 else if (level > 0) 947 mp->keep[level - 1] = mp->keep[level]; 948 if (--level < 0) 949 { 950 level = 0; 951 mp->keep[0] = 0; 952 } 953 } 954 continue; 955 next: 956 if (ep->cont == '&') 957 mp->keep[level] = 0; 958 goto checknest; 959 } 960 if (mp->keep[level] && b > buf) 961 { 962 *b = 0; 963 return buf; 964 } 965 return 0; 966 } 967 968 /* 969 * check english language stats 970 */ 971 972 static int 973 ckenglish(register Magic_t* mp, int pun, int badpun) 974 { 975 register char* s; 976 register int vowl = 0; 977 register int freq = 0; 978 register int rare = 0; 979 980 if (5 * badpun > pun) 981 return 0; 982 if (2 * mp->count[';'] > mp->count['E'] + mp->count['e']) 983 return 0; 984 if ((mp->count['>'] + mp->count['<'] + mp->count['/']) > mp->count['E'] + mp->count['e']) 985 return 0; 986 for (s = "aeiou"; *s; s++) 987 vowl += mp->count[toupper(*s)] + mp->count[*s]; 988 for (s = "etaion"; *s; s++) 989 freq += mp->count[toupper(*s)] + mp->count[*s]; 990 for (s = "vjkqxz"; *s; s++) 991 rare += mp->count[toupper(*s)] + mp->count[*s]; 992 return 5 * vowl >= mp->fbsz - mp->count[' '] && freq >= 10 * rare; 993 } 994 995 /* 996 * check programming language stats 997 */ 998 999 static char* 1000 cklang(register Magic_t* mp, const char* file, char* buf, struct stat* st) 1001 { 1002 register int c; 1003 register unsigned char* b; 1004 register unsigned char* e; 1005 register int q; 1006 register char* s; 1007 char* t; 1008 char* base; 1009 char* suff; 1010 char* t1; 1011 char* t2; 1012 char* t3; 1013 int n; 1014 int badpun; 1015 int code; 1016 int pun; 1017 Cctype_t flags; 1018 Info_t* ip; 1019 1020 b = (unsigned char*)mp->fbuf; 1021 e = b + mp->fbsz; 1022 memzero(mp->count, sizeof(mp->count)); 1023 memzero(mp->multi, sizeof(mp->multi)); 1024 memzero(mp->identifier, sizeof(mp->identifier)); 1025 1026 /* 1027 * check character coding 1028 */ 1029 1030 flags = 0; 1031 while (b < e) 1032 flags |= mp->cctype[*b++]; 1033 b = (unsigned char*)mp->fbuf; 1034 code = 0; 1035 q = CC_ASCII; 1036 n = CC_MASK; 1037 for (c = 0; c < CC_MAPS; c++) 1038 { 1039 flags ^= CC_text; 1040 if ((flags & CC_MASK) < n) 1041 { 1042 n = flags & CC_MASK; 1043 q = c; 1044 } 1045 flags >>= CC_BIT; 1046 } 1047 flags = n; 1048 if (!(flags & (CC_binary|CC_notext))) 1049 { 1050 if (q != CC_NATIVE) 1051 { 1052 code = q; 1053 ccmaps(mp->fbuf, mp->fbsz, q, CC_NATIVE); 1054 } 1055 if (b[0] == '#' && b[1] == '!') 1056 { 1057 for (b += 2; b < e && isspace(*b); b++); 1058 for (s = (char*)b; b < e && isprint(*b); b++); 1059 c = *b; 1060 *b = 0; 1061 if ((st->st_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) || match(s, "/*bin*/*") || !access(s, F_OK)) 1062 { 1063 if (t = strrchr(s, '/')) 1064 s = t + 1; 1065 for (t = s; *t; t++) 1066 if (isspace(*t)) 1067 { 1068 *t = 0; 1069 break; 1070 } 1071 sfsprintf(mp->mbuf, sizeof(mp->mbuf), "application/x-%s", *s ? s : "sh"); 1072 mp->mime = mp->mbuf; 1073 if (match(s, "*sh")) 1074 { 1075 t1 = T("command"); 1076 if (streq(s, "sh")) 1077 *s = 0; 1078 else 1079 { 1080 *b++ = ' '; 1081 *b = 0; 1082 } 1083 } 1084 else 1085 { 1086 t1 = T("interpreter"); 1087 *b++ = ' '; 1088 *b = 0; 1089 } 1090 sfsprintf(mp->sbuf, sizeof(mp->sbuf), T("%s%s script"), s, t1); 1091 s = mp->sbuf; 1092 goto qualify; 1093 } 1094 *b = c; 1095 b = (unsigned char*)mp->fbuf; 1096 } 1097 badpun = 0; 1098 pun = 0; 1099 q = 0; 1100 s = 0; 1101 t = 0; 1102 while (b < e) 1103 { 1104 c = *b++; 1105 mp->count[c]++; 1106 if (c == q && (q != '*' || *b == '/' && b++)) 1107 { 1108 mp->multi[q]++; 1109 q = 0; 1110 } 1111 else if (c == '\\') 1112 { 1113 s = 0; 1114 b++; 1115 } 1116 else if (!q) 1117 { 1118 if (isalpha(c) || c == '_') 1119 { 1120 if (!s) 1121 s = (char*)b - 1; 1122 } 1123 else if (!isdigit(c)) 1124 { 1125 if (s) 1126 { 1127 if (s > mp->fbuf) 1128 switch (*(s - 1)) 1129 { 1130 case ':': 1131 if (*b == ':') 1132 mp->multi[':']++; 1133 break; 1134 case '.': 1135 if (((char*)b - s) == 3 && (s == (mp->fbuf + 1) || *(s - 2) == '\n')) 1136 mp->multi['.']++; 1137 break; 1138 case '\n': 1139 case '\\': 1140 if (*b == '{') 1141 t = (char*)b + 1; 1142 break; 1143 case '{': 1144 if (s == t && *b == '}') 1145 mp->multi['X']++; 1146 break; 1147 } 1148 if (!mp->idtab) 1149 { 1150 if (mp->idtab = dtnew(mp->vm, &mp->dtdisc, Dthash)) 1151 for (q = 0; q < elementsof(dict); q++) 1152 dtinsert(mp->idtab, &dict[q]); 1153 else if (mp->disc->errorf) 1154 (*mp->disc->errorf)(mp, mp->disc, 3, "out of space"); 1155 q = 0; 1156 } 1157 if (mp->idtab) 1158 { 1159 *(b - 1) = 0; 1160 if (ip = (Info_t*)dtmatch(mp->idtab, s)) 1161 mp->identifier[ip->value]++; 1162 *(b - 1) = c; 1163 } 1164 s = 0; 1165 } 1166 switch (c) 1167 { 1168 case '\t': 1169 if (b == (unsigned char*)(mp->fbuf + 1) || *(b - 2) == '\n') 1170 mp->multi['\t']++; 1171 break; 1172 case '"': 1173 case '\'': 1174 q = c; 1175 break; 1176 case '/': 1177 if (*b == '*') 1178 q = *b++; 1179 else if (*b == '/') 1180 q = '\n'; 1181 break; 1182 case '$': 1183 if (*b == '(' && *(b + 1) != ' ') 1184 mp->multi['$']++; 1185 break; 1186 case '{': 1187 case '}': 1188 case '[': 1189 case ']': 1190 case '(': 1191 mp->multi[c]++; 1192 break; 1193 case ')': 1194 mp->multi[c]++; 1195 goto punctuation; 1196 case ':': 1197 if (*b == ':' && isspace(*(b + 1)) && b > (unsigned char*)(mp->fbuf + 1) && isspace(*(b - 2))) 1198 mp->multi[':']++; 1199 goto punctuation; 1200 case '.': 1201 case ',': 1202 case '%': 1203 case ';': 1204 case '?': 1205 punctuation: 1206 pun++; 1207 if (*b != ' ' && *b != '\n') 1208 badpun++; 1209 break; 1210 } 1211 } 1212 } 1213 } 1214 } 1215 else 1216 while (b < e) 1217 mp->count[*b++]++; 1218 base = (t1 = strrchr(file, '/')) ? t1 + 1 : (char*)file; 1219 suff = (t1 = strrchr(base, '.')) ? t1 + 1 : ""; 1220 if (!flags) 1221 { 1222 if (match(suff, "*sh|bat|cmd")) 1223 goto id_sh; 1224 if (match(base, "*@(mkfile)")) 1225 goto id_mk; 1226 if (match(base, "*@(makefile|.mk)")) 1227 goto id_make; 1228 if (match(base, "*@(mamfile|.mam)")) 1229 goto id_mam; 1230 if (match(suff, "[cly]?(pp|xx|++)|cc|ll|yy")) 1231 goto id_c; 1232 if (match(suff, "f")) 1233 goto id_fortran; 1234 if (match(suff, "htm+(l)")) 1235 goto id_html; 1236 if (match(suff, "cpy")) 1237 goto id_copybook; 1238 if (match(suff, "cob|cbl|cb2")) 1239 goto id_cobol; 1240 if (match(suff, "pl[1i]")) 1241 goto id_pl1; 1242 if (match(suff, "tex")) 1243 goto id_tex; 1244 if (match(suff, "asm|s")) 1245 goto id_asm; 1246 if ((st->st_mode & (S_IXUSR|S_IXGRP|S_IXOTH)) && (!suff || suff != strchr(suff, '.'))) 1247 { 1248 id_sh: 1249 s = T("command script"); 1250 mp->mime = "application/sh"; 1251 goto qualify; 1252 } 1253 if (strmatch(mp->fbuf, "From * [0-9][0-9]:[0-9][0-9]:[0-9][0-9] *")) 1254 { 1255 s = T("mail message"); 1256 mp->mime = "message/rfc822"; 1257 goto qualify; 1258 } 1259 if (match(base, "*@(mkfile)")) 1260 { 1261 id_mk: 1262 s = "mkfile"; 1263 mp->mime = "application/mk"; 1264 goto qualify; 1265 } 1266 if (match(base, "*@(makefile|.mk)") || mp->multi['\t'] >= mp->count[':'] && (mp->multi['$'] > 0 || mp->multi[':'] > 0)) 1267 { 1268 id_make: 1269 s = "makefile"; 1270 mp->mime = "application/make"; 1271 goto qualify; 1272 } 1273 if (mp->multi['.'] >= 3) 1274 { 1275 s = T("nroff input"); 1276 mp->mime = "application/x-troff"; 1277 goto qualify; 1278 } 1279 if (mp->multi['X'] >= 3) 1280 { 1281 s = T("TeX input"); 1282 mp->mime = "application/x-tex"; 1283 goto qualify; 1284 } 1285 if (mp->fbsz < SF_BUFSIZE && 1286 (mp->multi['('] == mp->multi[')'] && 1287 mp->multi['{'] == mp->multi['}'] && 1288 mp->multi['['] == mp->multi[']']) || 1289 mp->fbsz >= SF_BUFSIZE && 1290 (mp->multi['('] >= mp->multi[')'] && 1291 mp->multi['{'] >= mp->multi['}'] && 1292 mp->multi['['] >= mp->multi[']'])) 1293 { 1294 c = mp->identifier[ID_INCL1]; 1295 if (c >= 2 && mp->identifier[ID_INCL2] >= c && mp->identifier[ID_INCL3] >= c && mp->count['.'] >= c || 1296 mp->identifier[ID_C] >= 5 && mp->count[';'] >= 5 || 1297 mp->count['='] >= 20 && mp->count[';'] >= 20) 1298 { 1299 id_c: 1300 t1 = ""; 1301 t2 = "c "; 1302 t3 = T("program"); 1303 switch (*suff) 1304 { 1305 case 'c': 1306 case 'C': 1307 mp->mime = "application/x-cc"; 1308 break; 1309 case 'l': 1310 case 'L': 1311 t1 = "lex "; 1312 mp->mime = "application/x-lex"; 1313 break; 1314 default: 1315 t3 = T("header"); 1316 if (mp->identifier[ID_YACC] < 5 || mp->count['%'] < 5) 1317 { 1318 mp->mime = "application/x-cc"; 1319 break; 1320 } 1321 /*FALLTHROUGH*/ 1322 case 'y': 1323 case 'Y': 1324 t1 = "yacc "; 1325 mp->mime = "application/x-yacc"; 1326 break; 1327 } 1328 if (mp->identifier[ID_CPLUSPLUS] >= 3) 1329 { 1330 t2 = "c++ "; 1331 mp->mime = "application/x-c++"; 1332 } 1333 sfsprintf(mp->sbuf, sizeof(mp->sbuf), "%s%s%s", t1, t2, t3); 1334 s = mp->sbuf; 1335 goto qualify; 1336 } 1337 } 1338 if (mp->identifier[ID_MAM1] >= 2 && mp->identifier[ID_MAM3] >= 2 && 1339 (mp->fbsz < SF_BUFSIZE && mp->identifier[ID_MAM1] == mp->identifier[ID_MAM2] || 1340 mp->fbsz >= SF_BUFSIZE && mp->identifier[ID_MAM1] >= mp->identifier[ID_MAM2])) 1341 { 1342 id_mam: 1343 s = T("mam program"); 1344 mp->mime = "application/x-mam"; 1345 goto qualify; 1346 } 1347 if (mp->identifier[ID_FORTRAN] >= 8) 1348 { 1349 id_fortran: 1350 s = T("fortran program"); 1351 mp->mime = "application/x-fortran"; 1352 goto qualify; 1353 } 1354 if (mp->identifier[ID_HTML] > 0 && mp->count['<'] >= 8 && (c = mp->count['<'] - mp->count['>']) >= -2 && c <= 2) 1355 { 1356 id_html: 1357 s = T("html input"); 1358 mp->mime = "text/html"; 1359 goto qualify; 1360 } 1361 if (mp->identifier[ID_COPYBOOK] > 0 && mp->identifier[ID_COBOL] == 0 && (c = mp->count['('] - mp->count[')']) >= -2 && c <= 2) 1362 { 1363 id_copybook: 1364 s = T("cobol copybook"); 1365 mp->mime = "application/x-cobol"; 1366 goto qualify; 1367 } 1368 if (mp->identifier[ID_COBOL] > 0 && mp->identifier[ID_COPYBOOK] > 0 && (c = mp->count['('] - mp->count[')']) >= -2 && c <= 2) 1369 { 1370 id_cobol: 1371 s = T("cobol program"); 1372 mp->mime = "application/x-cobol"; 1373 goto qualify; 1374 } 1375 if (mp->identifier[ID_PL1] > 0 && (c = mp->count['('] - mp->count[')']) >= -2 && c <= 2) 1376 { 1377 id_pl1: 1378 s = T("pl1 program"); 1379 mp->mime = "application/x-pl1"; 1380 goto qualify; 1381 } 1382 if (mp->count['{'] >= 6 && (c = mp->count['{'] - mp->count['}']) >= -2 && c <= 2 && mp->count['\\'] >= mp->count['{']) 1383 { 1384 id_tex: 1385 s = T("TeX input"); 1386 mp->mime = "text/tex"; 1387 goto qualify; 1388 } 1389 if (mp->identifier[ID_ASM] >= 4) 1390 { 1391 id_asm: 1392 s = T("as program"); 1393 mp->mime = "application/x-as"; 1394 goto qualify; 1395 } 1396 if (ckenglish(mp, pun, badpun)) 1397 { 1398 s = T("english text"); 1399 mp->mime = "text/plain"; 1400 goto qualify; 1401 } 1402 } 1403 else if (streq(base, "core")) 1404 { 1405 mp->mime = "x-system/core"; 1406 return T("core dump"); 1407 } 1408 if (flags & (CC_binary|CC_notext)) 1409 { 1410 b = (unsigned char*)mp->fbuf; 1411 e = b + mp->fbsz; 1412 n = 0; 1413 for (;;) 1414 { 1415 c = *b++; 1416 q = 0; 1417 while (c & 0x80) 1418 { 1419 c <<= 1; 1420 q++; 1421 } 1422 switch (q) 1423 { 1424 case 4: 1425 if (b < e && (*b++ & 0xc0) != 0x80) 1426 break; 1427 case 3: 1428 if (b < e && (*b++ & 0xc0) != 0x80) 1429 break; 1430 case 2: 1431 if (b < e && (*b++ & 0xc0) != 0x80) 1432 break; 1433 n = 1; 1434 case 0: 1435 if (b >= e) 1436 { 1437 if (n) 1438 { 1439 flags &= ~(CC_binary|CC_notext); 1440 flags |= CC_utf_8; 1441 } 1442 break; 1443 } 1444 continue; 1445 } 1446 break; 1447 } 1448 } 1449 if (flags & (CC_binary|CC_notext)) 1450 { 1451 unsigned long d = 0; 1452 1453 if ((q = mp->fbsz / UCHAR_MAX) >= 2) 1454 { 1455 /* 1456 * compression/encryption via standard deviation 1457 */ 1458 1459 1460 for (c = 0; c < UCHAR_MAX; c++) 1461 { 1462 pun = mp->count[c] - q; 1463 d += pun * pun; 1464 } 1465 d /= mp->fbsz; 1466 } 1467 if (d <= 0) 1468 s = T("binary"); 1469 else if (d < 4) 1470 s = T("encrypted"); 1471 else if (d < 16) 1472 s = T("packed"); 1473 else if (d < 64) 1474 s = T("compressed"); 1475 else if (d < 256) 1476 s = T("delta"); 1477 else 1478 s = T("data"); 1479 mp->mime = "application/octet-stream"; 1480 return s; 1481 } 1482 mp->mime = "text/plain"; 1483 if (flags & CC_utf_8) 1484 s = (flags & CC_control) ? T("utf-8 text with control characters") : T("utf-8 text"); 1485 else if (flags & CC_latin) 1486 s = (flags & CC_control) ? T("latin text with control characters") : T("latin text"); 1487 else 1488 s = (flags & CC_control) ? T("text with control characters") : T("text"); 1489 qualify: 1490 if (!flags && mp->count['\n'] >= mp->count['\r'] && mp->count['\n'] <= (mp->count['\r'] + 1) && mp->count['\r']) 1491 { 1492 t = "dos "; 1493 mp->mime = "text/dos"; 1494 } 1495 else 1496 t = ""; 1497 if (code) 1498 { 1499 if (code == CC_ASCII) 1500 sfsprintf(buf, PATH_MAX, "ascii %s%s", t, s); 1501 else 1502 { 1503 sfsprintf(buf, PATH_MAX, "ebcdic%d %s%s", code - 1, t, s); 1504 mp->mime = "text/ebcdic"; 1505 } 1506 s = buf; 1507 } 1508 else if (*t) 1509 { 1510 sfsprintf(buf, PATH_MAX, "%s%s", t, s); 1511 s = buf; 1512 } 1513 return s; 1514 } 1515 1516 /* 1517 * return the basic magic string for file,st in buf,size 1518 */ 1519 1520 static char* 1521 type(register Magic_t* mp, const char* file, struct stat* st, char* buf, int size) 1522 { 1523 register char* s; 1524 register char* t; 1525 1526 mp->mime = 0; 1527 if (!S_ISREG(st->st_mode)) 1528 { 1529 if (S_ISDIR(st->st_mode)) 1530 { 1531 mp->mime = "x-system/dir"; 1532 return T("directory"); 1533 } 1534 if (S_ISLNK(st->st_mode)) 1535 { 1536 mp->mime = "x-system/lnk"; 1537 s = buf; 1538 s += sfsprintf(s, PATH_MAX, T("symbolic link to ")); 1539 if (pathgetlink(file, s, size - (s - buf)) < 0) 1540 return T("cannot read symbolic link text"); 1541 return buf; 1542 } 1543 if (S_ISBLK(st->st_mode)) 1544 { 1545 mp->mime = "x-system/blk"; 1546 sfsprintf(buf, PATH_MAX, T("block special (%s)"), fmtdev(st)); 1547 return buf; 1548 } 1549 if (S_ISCHR(st->st_mode)) 1550 { 1551 mp->mime = "x-system/chr"; 1552 sfsprintf(buf, PATH_MAX, T("character special (%s)"), fmtdev(st)); 1553 return buf; 1554 } 1555 if (S_ISFIFO(st->st_mode)) 1556 { 1557 mp->mime = "x-system/fifo"; 1558 return "fifo"; 1559 } 1560 #ifdef S_ISSOCK 1561 if (S_ISSOCK(st->st_mode)) 1562 { 1563 mp->mime = "x-system/sock"; 1564 return "socket"; 1565 } 1566 #endif 1567 } 1568 if (!(mp->fbmx = st->st_size)) 1569 s = T("empty"); 1570 else if (!mp->fp) 1571 s = T("cannot read"); 1572 else 1573 { 1574 mp->fbsz = sfread(mp->fp, mp->fbuf, sizeof(mp->fbuf) - 1); 1575 if (mp->fbsz < 0) 1576 s = fmterror(errno); 1577 else if (mp->fbsz == 0) 1578 s = T("empty"); 1579 else 1580 { 1581 mp->fbuf[mp->fbsz] = 0; 1582 mp->xoff = 0; 1583 mp->xbsz = 0; 1584 if (!(s = ckmagic(mp, file, buf, st, 0))) 1585 s = cklang(mp, file, buf, st); 1586 } 1587 } 1588 if (!mp->mime) 1589 mp->mime = "application/unknown"; 1590 else if ((t = strchr(mp->mime, '%')) && *(t + 1) == 's' && !*(t + 2)) 1591 { 1592 register char* b; 1593 register char* be; 1594 register char* m; 1595 register char* me; 1596 1597 b = mp->mime; 1598 me = (m = mp->mime = mp->fbuf) + sizeof(mp->fbuf) - 1; 1599 while (m < me && b < t) 1600 *m++ = *b++; 1601 b = t = s; 1602 for (;;) 1603 { 1604 if (!(be = strchr(t, ' '))) 1605 { 1606 be = b + strlen(b); 1607 break; 1608 } 1609 if (*(be - 1) == ',' || strneq(be + 1, "data", 4) || strneq(be + 1, "file", 4)) 1610 break; 1611 b = t; 1612 t = be + 1; 1613 } 1614 while (m < me && b < be) 1615 if ((*m++ = *b++) == ' ') 1616 *(m - 1) = '-'; 1617 *m = 0; 1618 } 1619 return s; 1620 } 1621 1622 /* 1623 * low level for magicload() 1624 */ 1625 1626 static int 1627 load(register Magic_t* mp, char* file, register Sfio_t* fp) 1628 { 1629 register Entry_t* ep; 1630 register char* p; 1631 register char* p2; 1632 char* p3; 1633 char* next; 1634 int n; 1635 int lge; 1636 int lev; 1637 int ent; 1638 int old; 1639 int cont; 1640 Info_t* ip; 1641 Entry_t* ret; 1642 Entry_t* first; 1643 Entry_t* last = 0; 1644 Entry_t* fun['z' - 'a' + 1]; 1645 1646 memzero(fun, sizeof(fun)); 1647 cont = '$'; 1648 ent = 0; 1649 lev = 0; 1650 old = 0; 1651 ret = 0; 1652 error_info.file = file; 1653 error_info.line = 0; 1654 first = ep = vmnewof(mp->vm, 0, Entry_t, 1, 0); 1655 while (p = sfgetr(fp, '\n', 1)) 1656 { 1657 error_info.line++; 1658 for (; isspace(*p); p++); 1659 1660 /* 1661 * nesting 1662 */ 1663 1664 switch (*p) 1665 { 1666 case 0: 1667 case '#': 1668 cont = '#'; 1669 continue; 1670 case '{': 1671 if (++lev < MAXNEST) 1672 ep->nest = *p; 1673 else if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf) 1674 (*mp->disc->errorf)(mp, mp->disc, 1, "{ ... } operator nesting too deep -- %d max", MAXNEST); 1675 continue; 1676 case '}': 1677 if (!last || lev <= 0) 1678 { 1679 if (mp->disc->errorf) 1680 (*mp->disc->errorf)(mp, mp->disc, 2, "`%c': invalid nesting", *p); 1681 } 1682 else if (lev-- == ent) 1683 { 1684 ent = 0; 1685 ep->cont = ':'; 1686 ep->offset = ret->offset; 1687 ep->nest = ' '; 1688 ep->type = ' '; 1689 ep->op = ' '; 1690 ep->desc = "[RETURN]"; 1691 last = ep; 1692 ep = ret->next = vmnewof(mp->vm, 0, Entry_t, 1, 0); 1693 ret = 0; 1694 } 1695 else 1696 last->nest = *p; 1697 continue; 1698 default: 1699 if (*(p + 1) == '{' || *(p + 1) == '(' && *p != '+' && *p != '>' && *p != '&' && *p != '|') 1700 { 1701 n = *p++; 1702 if (n >= 'a' && n <= 'z') 1703 n -= 'a'; 1704 else 1705 { 1706 if (mp->disc->errorf) 1707 (*mp->disc->errorf)(mp, mp->disc, 2, "%c: invalid function name", n); 1708 n = 0; 1709 } 1710 if (ret && mp->disc->errorf) 1711 (*mp->disc->errorf)(mp, mp->disc, 2, "%c: function has no return", ret->offset + 'a'); 1712 if (*p == '{') 1713 { 1714 ent = ++lev; 1715 ret = ep; 1716 ep->desc = "[FUNCTION]"; 1717 } 1718 else 1719 { 1720 if (*(p + 1) != ')' && mp->disc->errorf) 1721 (*mp->disc->errorf)(mp, mp->disc, 2, "%c: invalid function call argument list", n + 'a'); 1722 ep->desc = "[CALL]"; 1723 } 1724 ep->cont = cont; 1725 ep->offset = n; 1726 ep->nest = ' '; 1727 ep->type = ' '; 1728 ep->op = ' '; 1729 last = ep; 1730 ep = ep->next = vmnewof(mp->vm, 0, Entry_t, 1, 0); 1731 if (ret) 1732 fun[n] = last->value.lab = ep; 1733 else if (!(last->value.lab = fun[n]) && mp->disc->errorf) 1734 (*mp->disc->errorf)(mp, mp->disc, 2, "%c: function not defined", n + 'a'); 1735 continue; 1736 } 1737 if (!ep->nest) 1738 ep->nest = (lev > 0 && lev != ent) ? ('0' + lev - !!ent) : ' '; 1739 break; 1740 } 1741 1742 /* 1743 * continuation 1744 */ 1745 1746 cont = '$'; 1747 switch (*p) 1748 { 1749 case '>': 1750 old = 1; 1751 if (*(p + 1) == *p) 1752 { 1753 /* 1754 * old style nesting push 1755 */ 1756 1757 p++; 1758 old = 2; 1759 if (!lev && last) 1760 { 1761 lev = 1; 1762 last->nest = '{'; 1763 if (last->cont == '>') 1764 last->cont = '&'; 1765 ep->nest = '1'; 1766 } 1767 } 1768 /*FALLTHROUGH*/ 1769 case '+': 1770 case '&': 1771 case '|': 1772 ep->cont = *p++; 1773 break; 1774 default: 1775 if ((mp->flags & MAGIC_VERBOSE) && !isalpha(*p) && mp->disc->errorf) 1776 (*mp->disc->errorf)(mp, mp->disc, 1, "`%c': invalid line continuation operator", *p); 1777 /*FALLTHROUGH*/ 1778 case '*': 1779 case '0': case '1': case '2': case '3': case '4': 1780 case '5': case '6': case '7': case '8': case '9': 1781 ep->cont = (lev > 0) ? '&' : '#'; 1782 break; 1783 } 1784 switch (old) 1785 { 1786 case 1: 1787 old = 0; 1788 if (lev) 1789 { 1790 /* 1791 * old style nesting pop 1792 */ 1793 1794 lev = 0; 1795 if (last) 1796 last->nest = '}'; 1797 ep->nest = ' '; 1798 if (ep->cont == '&') 1799 ep->cont = '#'; 1800 } 1801 break; 1802 case 2: 1803 old = 1; 1804 break; 1805 } 1806 if (isdigit(*p)) 1807 { 1808 /* 1809 * absolute offset 1810 */ 1811 1812 ep->offset = strton(p, &next, NiL, 0); 1813 p2 = next; 1814 } 1815 else 1816 { 1817 for (p2 = p; *p2 && !isspace(*p2); p2++); 1818 if (!*p2) 1819 { 1820 if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf) 1821 (*mp->disc->errorf)(mp, mp->disc, 1, "not enough fields: `%s'", p); 1822 continue; 1823 } 1824 1825 /* 1826 * offset expression 1827 */ 1828 1829 *p2++ = 0; 1830 ep->expr = vmstrdup(mp->vm, p); 1831 if (isalpha(*p)) 1832 ep->offset = (ip = (Info_t*)dtmatch(mp->infotab, p)) ? ip->value : 0; 1833 else if (*p == '(' && ep->cont == '>') 1834 { 1835 /* 1836 * convert old style indirection to @ 1837 */ 1838 1839 p = ep->expr + 1; 1840 for (;;) 1841 { 1842 switch (*p++) 1843 { 1844 case 0: 1845 case '@': 1846 case '(': 1847 break; 1848 case ')': 1849 break; 1850 default: 1851 continue; 1852 } 1853 break; 1854 } 1855 if (*--p == ')') 1856 { 1857 *p = 0; 1858 *ep->expr = '@'; 1859 } 1860 } 1861 } 1862 for (; isspace(*p2); p2++); 1863 for (p = p2; *p2 && !isspace(*p2); p2++); 1864 if (!*p2) 1865 { 1866 if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf) 1867 (*mp->disc->errorf)(mp, mp->disc, 1, "not enough fields: `%s'", p); 1868 continue; 1869 } 1870 *p2++ = 0; 1871 1872 /* 1873 * type 1874 */ 1875 1876 if ((*p == 'b' || *p == 'l') && *(p + 1) == 'e') 1877 { 1878 ep->swap = ~(*p == 'l' ? 7 : 0); 1879 p += 2; 1880 } 1881 if (*p == 's') 1882 { 1883 if (*(p + 1) == 'h') 1884 ep->type = 'h'; 1885 else 1886 ep->type = 's'; 1887 } 1888 else if (*p == 'a') 1889 ep->type = 's'; 1890 else 1891 ep->type = *p; 1892 if (p = strchr(p, '&')) 1893 { 1894 /* 1895 * old style mask 1896 */ 1897 1898 ep->mask = strton(++p, NiL, NiL, 0); 1899 } 1900 for (; isspace(*p2); p2++); 1901 if (ep->mask) 1902 *--p2 = '='; 1903 1904 /* 1905 * comparison operation 1906 */ 1907 1908 p = p2; 1909 if (p2 = strchr(p, '\t')) 1910 *p2++ = 0; 1911 else 1912 { 1913 int qe = 0; 1914 int qn = 0; 1915 1916 /* 1917 * assume balanced {}[]()\\""'' field 1918 */ 1919 1920 for (p2 = p;;) 1921 { 1922 switch (n = *p2++) 1923 { 1924 case 0: 1925 break; 1926 case '{': 1927 if (!qe) 1928 qe = '}'; 1929 if (qe == '}') 1930 qn++; 1931 continue; 1932 case '(': 1933 if (!qe) 1934 qe = ')'; 1935 if (qe == ')') 1936 qn++; 1937 continue; 1938 case '[': 1939 if (!qe) 1940 qe = ']'; 1941 if (qe == ']') 1942 qn++; 1943 continue; 1944 case '}': 1945 case ')': 1946 case ']': 1947 if (qe == n && qn > 0) 1948 qn--; 1949 continue; 1950 case '"': 1951 case '\'': 1952 if (!qe) 1953 qe = n; 1954 else if (qe == n) 1955 qe = 0; 1956 continue; 1957 case '\\': 1958 if (*p2) 1959 p2++; 1960 continue; 1961 default: 1962 if (!qe && isspace(n)) 1963 break; 1964 continue; 1965 } 1966 if (n) 1967 *(p2 - 1) = 0; 1968 else 1969 p2--; 1970 break; 1971 } 1972 } 1973 lge = 0; 1974 if (ep->type == 'e' || ep->type == 'm' || ep->type == 's') 1975 ep->op = '='; 1976 else 1977 { 1978 if (*p == '&') 1979 { 1980 ep->mask = strton(++p, &next, NiL, 0); 1981 p = next; 1982 } 1983 switch (*p) 1984 { 1985 case '=': 1986 case '>': 1987 case '<': 1988 case '*': 1989 ep->op = *p++; 1990 if (*p == '=') 1991 { 1992 p++; 1993 switch (ep->op) 1994 { 1995 case '>': 1996 lge = -1; 1997 break; 1998 case '<': 1999 lge = 1; 2000 break; 2001 } 2002 } 2003 break; 2004 case '!': 2005 case '@': 2006 ep->op = *p++; 2007 if (*p == '=') 2008 p++; 2009 break; 2010 case 'x': 2011 p++; 2012 ep->op = '*'; 2013 break; 2014 default: 2015 ep->op = '='; 2016 if (ep->mask) 2017 ep->value.num = ep->mask; 2018 break; 2019 } 2020 } 2021 if (ep->op != '*' && !ep->value.num) 2022 { 2023 if (ep->type == 'e') 2024 { 2025 if (ep->value.sub = vmnewof(mp->vm, 0, regex_t, 1, 0)) 2026 { 2027 ep->value.sub->re_disc = &mp->redisc; 2028 if (!(n = regcomp(ep->value.sub, p, REG_DELIMITED|REG_LENIENT|REG_NULL|REG_DISCIPLINE))) 2029 { 2030 p += ep->value.sub->re_npat; 2031 if (!(n = regsubcomp(ep->value.sub, p, NiL, 0, 0))) 2032 p += ep->value.sub->re_npat; 2033 } 2034 if (n) 2035 { 2036 regmessage(mp, ep->value.sub, n); 2037 ep->value.sub = 0; 2038 } 2039 else if (*p && mp->disc->errorf) 2040 (*mp->disc->errorf)(mp, mp->disc, 1, "invalid characters after substitution: %s", p); 2041 } 2042 } 2043 else if (ep->type == 'm') 2044 { 2045 ep->mask = stresc(p) + 1; 2046 ep->value.str = vmnewof(mp->vm, 0, char, ep->mask + 1, 0); 2047 memcpy(ep->value.str, p, ep->mask); 2048 if ((!ep->expr || !ep->offset) && !strmatch(ep->value.str, "\\!\\(*\\)")) 2049 ep->value.str[ep->mask - 1] = '*'; 2050 } 2051 else if (ep->type == 's') 2052 { 2053 ep->mask = stresc(p); 2054 ep->value.str = vmnewof(mp->vm, 0, char, ep->mask, 0); 2055 memcpy(ep->value.str, p, ep->mask); 2056 } 2057 else if (*p == '\'') 2058 { 2059 stresc(p); 2060 ep->value.num = *(unsigned char*)(p + 1) + lge; 2061 } 2062 else if (strmatch(p, "+([a-z])\\(*\\)")) 2063 { 2064 char* t; 2065 2066 t = p; 2067 ep->type = 'V'; 2068 ep->op = *p; 2069 while (*p && *p++ != '('); 2070 switch (ep->op) 2071 { 2072 case 'l': 2073 n = *p++; 2074 if (n < 'a' || n > 'z') 2075 { 2076 if (mp->disc->errorf) 2077 (*mp->disc->errorf)(mp, mp->disc, 2, "%c: invalid function name", n); 2078 } 2079 else if (!fun[n -= 'a']) 2080 { 2081 if (mp->disc->errorf) 2082 (*mp->disc->errorf)(mp, mp->disc, 2, "%c: function not defined", n + 'a'); 2083 } 2084 else 2085 { 2086 ep->value.loop = vmnewof(mp->vm, 0, Loop_t, 1, 0); 2087 ep->value.loop->lab = fun[n]; 2088 while (*p && *p++ != ','); 2089 ep->value.loop->start = strton(p, &t, NiL, 0); 2090 while (*t && *t++ != ','); 2091 ep->value.loop->size = strton(t, &t, NiL, 0); 2092 } 2093 break; 2094 case 'm': 2095 case 'r': 2096 ep->desc = vmnewof(mp->vm, 0, char, 32, 0); 2097 ep->mime = vmnewof(mp->vm, 0, char, 32, 0); 2098 break; 2099 case 'v': 2100 break; 2101 default: 2102 if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf) 2103 (*mp->disc->errorf)(mp, mp->disc, 1, "%-.*s: unknown function", p - t, t); 2104 break; 2105 } 2106 } 2107 else 2108 { 2109 ep->value.num = strton(p, NiL, NiL, 0) + lge; 2110 if (ep->op == '@') 2111 ep->value.num = swapget(0, (char*)&ep->value.num, sizeof(ep->value.num)); 2112 } 2113 } 2114 2115 /* 2116 * file description 2117 */ 2118 2119 if (p2) 2120 { 2121 for (; isspace(*p2); p2++); 2122 if (p = strchr(p2, '\t')) 2123 { 2124 /* 2125 * check for message catalog index 2126 */ 2127 2128 *p++ = 0; 2129 if (isalpha(*p2)) 2130 { 2131 for (p3 = p2; isalnum(*p3); p3++); 2132 if (*p3++ == ':') 2133 { 2134 for (; isdigit(*p3); p3++); 2135 if (!*p3) 2136 { 2137 for (p2 = p; isspace(*p2); p2++); 2138 if (p = strchr(p2, '\t')) 2139 *p++ = 0; 2140 } 2141 } 2142 } 2143 } 2144 stresc(p2); 2145 ep->desc = vmstrdup(mp->vm, p2); 2146 if (p) 2147 { 2148 for (; isspace(*p); p++); 2149 if (*p) 2150 ep->mime = vmstrdup(mp->vm, p); 2151 } 2152 } 2153 else 2154 ep->desc = ""; 2155 2156 /* 2157 * get next entry 2158 */ 2159 2160 last = ep; 2161 ep = ep->next = vmnewof(mp->vm, 0, Entry_t, 1, 0); 2162 } 2163 if (last) 2164 { 2165 last->next = 0; 2166 if (mp->magiclast) 2167 mp->magiclast->next = first; 2168 else 2169 mp->magic = first; 2170 mp->magiclast = last; 2171 } 2172 vmfree(mp->vm, ep); 2173 if ((mp->flags & MAGIC_VERBOSE) && mp->disc->errorf) 2174 { 2175 if (lev < 0) 2176 (*mp->disc->errorf)(mp, mp->disc, 1, "too many } operators"); 2177 else if (lev > 0) 2178 (*mp->disc->errorf)(mp, mp->disc, 1, "not enough } operators"); 2179 if (ret) 2180 (*mp->disc->errorf)(mp, mp->disc, 2, "%c: function has no return", ret->offset + 'a'); 2181 } 2182 error_info.file = 0; 2183 error_info.line = 0; 2184 return 0; 2185 } 2186 2187 /* 2188 * load a magic file into mp 2189 */ 2190 2191 int 2192 magicload(register Magic_t* mp, const char* file, unsigned long flags) 2193 { 2194 register char* s; 2195 register char* e; 2196 register char* t; 2197 int n; 2198 int found; 2199 int list; 2200 Sfio_t* fp; 2201 2202 mp->flags = mp->disc->flags | flags; 2203 found = 0; 2204 if (list = !(s = (char*)file) || !*s || (*s == '-' || *s == '.') && !*(s + 1)) 2205 { 2206 if (!(s = getenv(MAGIC_FILE_ENV)) || !*s) 2207 s = MAGIC_FILE; 2208 } 2209 for (;;) 2210 { 2211 if (!list) 2212 e = 0; 2213 else if (e = strchr(s, ':')) 2214 { 2215 /* 2216 * ok, so ~ won't work for the last list element 2217 * we do it for MAGIC_FILES_ENV anyway 2218 */ 2219 2220 if ((strneq(s, "~/", n = 2) || strneq(s, "$HOME/", n = 6) || strneq(s, "${HOME}/", n = 8)) && (t = getenv("HOME"))) 2221 { 2222 sfputr(mp->tmp, t, -1); 2223 s += n - 1; 2224 } 2225 sfwrite(mp->tmp, s, e - s); 2226 if (!(s = sfstruse(mp->tmp))) 2227 goto nospace; 2228 } 2229 if (!*s || streq(s, "-")) 2230 s = MAGIC_FILE; 2231 if (!(fp = sfopen(NiL, s, "r"))) 2232 { 2233 if (list) 2234 { 2235 if (!(t = pathpath(mp->fbuf, s, "", PATH_REGULAR|PATH_READ)) && !strchr(s, '/')) 2236 { 2237 strcpy(mp->fbuf, s); 2238 sfprintf(mp->tmp, "%s/%s", MAGIC_DIR, mp->fbuf); 2239 if (!(s = sfstruse(mp->tmp))) 2240 goto nospace; 2241 if (!(t = pathpath(mp->fbuf, s, "", PATH_REGULAR|PATH_READ))) 2242 goto next; 2243 } 2244 if (!(fp = sfopen(NiL, t, "r"))) 2245 goto next; 2246 } 2247 else 2248 { 2249 if (mp->disc->errorf) 2250 (*mp->disc->errorf)(mp, mp->disc, 3, "%s: cannot open magic file", s); 2251 return -1; 2252 } 2253 } 2254 found = 1; 2255 n = load(mp, s, fp); 2256 sfclose(fp); 2257 if (n && !list) 2258 return -1; 2259 next: 2260 if (!e) 2261 break; 2262 s = e + 1; 2263 } 2264 if (!found) 2265 { 2266 if (mp->flags & MAGIC_VERBOSE) 2267 { 2268 if (mp->disc->errorf) 2269 (*mp->disc->errorf)(mp, mp->disc, 2, "cannot find magic file"); 2270 } 2271 return -1; 2272 } 2273 return 0; 2274 nospace: 2275 if (mp->disc->errorf) 2276 (*mp->disc->errorf)(mp, mp->disc, 3, "out of space"); 2277 return -1; 2278 } 2279 2280 /* 2281 * open a magic session 2282 */ 2283 2284 Magic_t* 2285 magicopen(Magicdisc_t* disc) 2286 { 2287 register Magic_t* mp; 2288 register int i; 2289 register int n; 2290 register int f; 2291 register int c; 2292 register Vmalloc_t* vm; 2293 unsigned char* map[CC_MAPS + 1]; 2294 2295 if (!(vm = vmopen(Vmdcheap, Vmbest, 0))) 2296 return 0; 2297 if (!(mp = vmnewof(vm, 0, Magic_t, 1, 0))) 2298 { 2299 vmclose(vm); 2300 return 0; 2301 } 2302 mp->id = lib; 2303 mp->disc = disc; 2304 mp->vm = vm; 2305 mp->flags = disc->flags; 2306 mp->redisc.re_version = REG_VERSION; 2307 mp->redisc.re_flags = REG_NOFREE; 2308 mp->redisc.re_errorf = (regerror_t)disc->errorf; 2309 mp->redisc.re_resizef = (regresize_t)vmgetmem; 2310 mp->redisc.re_resizehandle = (void*)mp->vm; 2311 mp->dtdisc.key = offsetof(Info_t, name); 2312 mp->dtdisc.link = offsetof(Info_t, link); 2313 if (!(mp->tmp = sfstropen()) || !(mp->infotab = dtnew(mp->vm, &mp->dtdisc, Dthash))) 2314 goto bad; 2315 for (n = 0; n < elementsof(info); n++) 2316 dtinsert(mp->infotab, &info[n]); 2317 for (i = 0; i < CC_MAPS; i++) 2318 map[i] = ccmap(i, CC_ASCII); 2319 mp->x2n = ccmap(CC_ALIEN, CC_NATIVE); 2320 for (n = 0; n <= UCHAR_MAX; n++) 2321 { 2322 f = 0; 2323 i = CC_MAPS; 2324 while (--i >= 0) 2325 { 2326 c = ccmapchr(map[i], n); 2327 f = (f << CC_BIT) | CCTYPE(c); 2328 } 2329 mp->cctype[n] = f; 2330 } 2331 return mp; 2332 bad: 2333 magicclose(mp); 2334 return 0; 2335 } 2336 2337 /* 2338 * close a magicopen() session 2339 */ 2340 2341 int 2342 magicclose(register Magic_t* mp) 2343 { 2344 if (!mp) 2345 return -1; 2346 if (mp->tmp) 2347 sfstrclose(mp->tmp); 2348 if (mp->vm) 2349 vmclose(mp->vm); 2350 return 0; 2351 } 2352 2353 /* 2354 * return the magic string for file with optional stat info st 2355 */ 2356 2357 char* 2358 magictype(register Magic_t* mp, Sfio_t* fp, const char* file, register struct stat* st) 2359 { 2360 off_t off; 2361 char* s; 2362 2363 mp->flags = mp->disc->flags; 2364 mp->mime = 0; 2365 if (!st) 2366 s = T("cannot stat"); 2367 else 2368 { 2369 if (mp->fp = fp) 2370 off = sfseek(mp->fp, (off_t)0, SEEK_CUR); 2371 s = type(mp, file, st, mp->tbuf, sizeof(mp->tbuf)); 2372 if (mp->fp) 2373 sfseek(mp->fp, off, SEEK_SET); 2374 if (!(mp->flags & MAGIC_MIME)) 2375 { 2376 if (S_ISREG(st->st_mode) && (st->st_size > 0) && (st->st_size < 128)) 2377 sfprintf(mp->tmp, "%s ", T("short")); 2378 sfprintf(mp->tmp, "%s", s); 2379 if (!mp->fp && (st->st_mode & (S_IXUSR|S_IXGRP|S_IXOTH))) 2380 sfprintf(mp->tmp, ", %s", S_ISDIR(st->st_mode) ? T("searchable") : T("executable")); 2381 if (st->st_mode & S_ISUID) 2382 sfprintf(mp->tmp, ", setuid=%s", fmtuid(st->st_uid)); 2383 if (st->st_mode & S_ISGID) 2384 sfprintf(mp->tmp, ", setgid=%s", fmtgid(st->st_gid)); 2385 if (st->st_mode & S_ISVTX) 2386 sfprintf(mp->tmp, ", sticky"); 2387 if (!(s = sfstruse(mp->tmp))) 2388 s = T("out of space"); 2389 } 2390 } 2391 if (mp->flags & MAGIC_MIME) 2392 s = mp->mime; 2393 if (!s) 2394 s = T("error"); 2395 return s; 2396 } 2397 2398 /* 2399 * list the magic table in mp on sp 2400 */ 2401 2402 int 2403 magiclist(register Magic_t* mp, register Sfio_t* sp) 2404 { 2405 register Entry_t* ep = mp->magic; 2406 register Entry_t* rp = 0; 2407 2408 mp->flags = mp->disc->flags; 2409 sfprintf(sp, "cont\toffset\ttype\top\tmask\tvalue\tmime\tdesc\n"); 2410 while (ep) 2411 { 2412 sfprintf(sp, "%c %c\t", ep->cont, ep->nest); 2413 if (ep->expr) 2414 sfprintf(sp, "%s", ep->expr); 2415 else 2416 sfprintf(sp, "%ld", ep->offset); 2417 sfprintf(sp, "\t%s%c\t%c\t%lo\t", ep->swap == (char)~3 ? "L" : ep->swap == (char)~0 ? "B" : "", ep->type, ep->op, ep->mask); 2418 switch (ep->type) 2419 { 2420 case 'm': 2421 case 's': 2422 sfputr(sp, fmtesc(ep->value.str), -1); 2423 break; 2424 case 'V': 2425 switch (ep->op) 2426 { 2427 case 'l': 2428 sfprintf(sp, "loop(%d,%d,%d,%d)", ep->value.loop->start, ep->value.loop->size, ep->value.loop->count, ep->value.loop->offset); 2429 break; 2430 case 'v': 2431 sfprintf(sp, "vcodex()"); 2432 break; 2433 default: 2434 sfprintf(sp, "%p", ep->value.str); 2435 break; 2436 } 2437 break; 2438 default: 2439 sfprintf(sp, "%lo", ep->value.num); 2440 break; 2441 } 2442 sfprintf(sp, "\t%s\t%s\n", ep->mime ? ep->mime : "", fmtesc(ep->desc)); 2443 if (ep->cont == '$' && !ep->value.lab->mask) 2444 { 2445 rp = ep; 2446 ep = ep->value.lab; 2447 } 2448 else 2449 { 2450 if (ep->cont == ':') 2451 { 2452 ep = rp; 2453 ep->value.lab->mask = 1; 2454 } 2455 ep = ep->next; 2456 } 2457 } 2458 return 0; 2459 } 2460