1*11a8fa6cSceastha /* 2*11a8fa6cSceastha * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 3*11a8fa6cSceastha * Use is subject to license terms. 4*11a8fa6cSceastha */ 5*11a8fa6cSceastha 67c478bd9Sstevel@tonic-gate /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 77c478bd9Sstevel@tonic-gate /* All Rights Reserved */ 87c478bd9Sstevel@tonic-gate 97c478bd9Sstevel@tonic-gate /* 107c478bd9Sstevel@tonic-gate * Copyright (c) 1980 Regents of the University of California. 117c478bd9Sstevel@tonic-gate * All rights reserved. The Berkeley software License Agreement 127c478bd9Sstevel@tonic-gate * specifies the terms and conditions for redistribution. 137c478bd9Sstevel@tonic-gate */ 147c478bd9Sstevel@tonic-gate 157c478bd9Sstevel@tonic-gate #pragma ident "%Z%%M% %I% %E% SMI" 167c478bd9Sstevel@tonic-gate 177c478bd9Sstevel@tonic-gate #include <locale.h> 187c478bd9Sstevel@tonic-gate #include <stdio.h> 197c478bd9Sstevel@tonic-gate #include <signal.h> 207c478bd9Sstevel@tonic-gate #include <stdlib.h> 217c478bd9Sstevel@tonic-gate 227c478bd9Sstevel@tonic-gate #define BUF BUFSIZ 237c478bd9Sstevel@tonic-gate #define MXFILES 16 247c478bd9Sstevel@tonic-gate 257c478bd9Sstevel@tonic-gate char tempfile[32]; /* temporary file for sorting keys */ 267c478bd9Sstevel@tonic-gate int tmpfd = -1; 277c478bd9Sstevel@tonic-gate char *keystr = "AD"; /* default sorting on author and date */ 287c478bd9Sstevel@tonic-gate int multauth = 0; /* by default sort on senior author only */ 297c478bd9Sstevel@tonic-gate int oneauth; /* has there been author in the record? */ 307c478bd9Sstevel@tonic-gate 31*11a8fa6cSceastha static int article(char *); 32*11a8fa6cSceastha static void deliver(FILE *[], FILE *); 33*11a8fa6cSceastha static int endcomma(char *); 34*11a8fa6cSceastha static void error(char *); 35*11a8fa6cSceastha static void eval(char []); 36*11a8fa6cSceastha static void parse(char [], char fld[][BUF]); 37*11a8fa6cSceastha static void sortbib(FILE *, FILE *, int); 38*11a8fa6cSceastha static void onintr(void); 39*11a8fa6cSceastha 40*11a8fa6cSceastha /* sortbib: sort bibliographic database in place */ 41*11a8fa6cSceastha int 42*11a8fa6cSceastha main(int argc, char *argv[]) 437c478bd9Sstevel@tonic-gate { 447c478bd9Sstevel@tonic-gate FILE *fp[MXFILES], *tfp; 457c478bd9Sstevel@tonic-gate int i; 467c478bd9Sstevel@tonic-gate 477c478bd9Sstevel@tonic-gate (void) setlocale(LC_ALL, ""); 487c478bd9Sstevel@tonic-gate 497c478bd9Sstevel@tonic-gate #if !defined(TEXT_DOMAIN) 507c478bd9Sstevel@tonic-gate #define TEXT_DOMAIN "SYS_TEST" 517c478bd9Sstevel@tonic-gate #endif 527c478bd9Sstevel@tonic-gate (void) textdomain(TEXT_DOMAIN); 537c478bd9Sstevel@tonic-gate 54*11a8fa6cSceastha if (argc == 1) { /* can't use stdin for seeking anyway */ 557c478bd9Sstevel@tonic-gate puts(gettext("Usage: sortbib [-sKEYS] database [...]\n\ 567c478bd9Sstevel@tonic-gate \t-s: sort by fields in KEYS (default is AD)")); 577c478bd9Sstevel@tonic-gate exit(1); 587c478bd9Sstevel@tonic-gate } 59*11a8fa6cSceastha if (argc > 2 && argv[1][0] == '-' && argv[1][1] == 's') { 607c478bd9Sstevel@tonic-gate /* if a key is specified use it, otherwise use default key */ 617c478bd9Sstevel@tonic-gate if (argv[1][2] != '\0') 627c478bd9Sstevel@tonic-gate keystr = argv[1] + 2; 637c478bd9Sstevel@tonic-gate eval(keystr); /* evaluate A+ for multiple authors */ 647c478bd9Sstevel@tonic-gate argv++; argc--; 657c478bd9Sstevel@tonic-gate } 66*11a8fa6cSceastha if (argc > MXFILES+1) { /* too many open file streams */ 677c478bd9Sstevel@tonic-gate fprintf(stderr, 687c478bd9Sstevel@tonic-gate gettext("sortbib: More than %d databases specified\n"), 697c478bd9Sstevel@tonic-gate MXFILES); 707c478bd9Sstevel@tonic-gate exit(1); 717c478bd9Sstevel@tonic-gate } 727c478bd9Sstevel@tonic-gate for (i = 1; i < argc; i++) /* open files in arg list */ 737c478bd9Sstevel@tonic-gate if ((fp[i-1] = fopen(argv[i], "r")) == NULL) 747c478bd9Sstevel@tonic-gate error(argv[i]); 757c478bd9Sstevel@tonic-gate strcpy(tempfile, "/tmp/SbibXXXXXX"); /* tempfile for sorting keys */ 767c478bd9Sstevel@tonic-gate if ((tmpfd = mkstemp(tempfile)) == -1) 777c478bd9Sstevel@tonic-gate error(tempfile); 787c478bd9Sstevel@tonic-gate 797c478bd9Sstevel@tonic-gate (void) close(tmpfd); 807c478bd9Sstevel@tonic-gate if (signal(SIGINT, SIG_IGN) != SIG_IGN) /* remove if interrupted */ 81*11a8fa6cSceastha signal(SIGINT, (void(*)())onintr); 827c478bd9Sstevel@tonic-gate if ((tfp = fopen(tempfile, "w")) == NULL) { 837c478bd9Sstevel@tonic-gate (void) unlink(tempfile); 847c478bd9Sstevel@tonic-gate error(tempfile); 857c478bd9Sstevel@tonic-gate } 867c478bd9Sstevel@tonic-gate for (i = 0; i < argc-1; i++) /* read keys from bib files */ 877c478bd9Sstevel@tonic-gate sortbib(fp[i], tfp, i); 887c478bd9Sstevel@tonic-gate fclose(tfp); 897c478bd9Sstevel@tonic-gate deliver(fp, tfp); /* do disk seeks and read from biblio files */ 907c478bd9Sstevel@tonic-gate (void) unlink(tempfile); 91*11a8fa6cSceastha return (0); 927c478bd9Sstevel@tonic-gate } 937c478bd9Sstevel@tonic-gate 947c478bd9Sstevel@tonic-gate int rsmode = 0; /* record separator: 1 = null line, 2 = bracket */ 957c478bd9Sstevel@tonic-gate 96*11a8fa6cSceastha /* read records, prepare list for sorting */ 97*11a8fa6cSceastha static void 98*11a8fa6cSceastha sortbib(FILE *fp, FILE *tfp, int i) 997c478bd9Sstevel@tonic-gate { 1007c478bd9Sstevel@tonic-gate long offset, lastoffset = 0, ftell(); /* byte offsets in file */ 1017c478bd9Sstevel@tonic-gate int length, newrec, recno = 0; /* reclen, new rec'd?, number */ 1027c478bd9Sstevel@tonic-gate char line[BUF], fld[4][BUF]; /* one line, the sort fields */ 1037c478bd9Sstevel@tonic-gate 1047c478bd9Sstevel@tonic-gate /* measure byte offset, then get new line */ 105*11a8fa6cSceastha while (offset = ftell(fp), fgets(line, BUF, fp)) { 1067c478bd9Sstevel@tonic-gate if (recno == 0) /* accept record w/o initial newline */ 1077c478bd9Sstevel@tonic-gate newrec = 1; 108*11a8fa6cSceastha if (line[0] == '\n') { /* accept null line record separator */ 1097c478bd9Sstevel@tonic-gate if (!rsmode) 1107c478bd9Sstevel@tonic-gate rsmode = 1; /* null line mode */ 1117c478bd9Sstevel@tonic-gate if (rsmode == 1) 1127c478bd9Sstevel@tonic-gate newrec = 1; 1137c478bd9Sstevel@tonic-gate } 114*11a8fa6cSceastha if (line[0] == '.' && line[1] == '[') { /* also accept .[ .] */ 1157c478bd9Sstevel@tonic-gate if (!rsmode) 1167c478bd9Sstevel@tonic-gate rsmode = 2; /* bracket pair mode */ 1177c478bd9Sstevel@tonic-gate if (rsmode == 2) 1187c478bd9Sstevel@tonic-gate newrec = 1; 1197c478bd9Sstevel@tonic-gate } 120*11a8fa6cSceastha if (newrec) { /* by whatever means above */ 1217c478bd9Sstevel@tonic-gate newrec = 0; 1227c478bd9Sstevel@tonic-gate length = offset - lastoffset; /* measure rec len */ 1237c478bd9Sstevel@tonic-gate if (length > BUF*8) { 1247c478bd9Sstevel@tonic-gate fprintf(stderr, 125*11a8fa6cSceastha gettext("sortbib: record %d longer than %d " 126*11a8fa6cSceastha "(%d)\n"), recno, BUF*8, length); 1277c478bd9Sstevel@tonic-gate (void) unlink(tempfile); 1287c478bd9Sstevel@tonic-gate exit(1); 1297c478bd9Sstevel@tonic-gate } 130*11a8fa6cSceastha if (recno++) { /* info for sorting */ 1317c478bd9Sstevel@tonic-gate fprintf(tfp, "%d %d %d : %s %s %s %s\n", 1327c478bd9Sstevel@tonic-gate i, lastoffset, length, 1337c478bd9Sstevel@tonic-gate fld[0], fld[1], fld[2], fld[3]); 1347c478bd9Sstevel@tonic-gate if (ferror(tfp)) { 1357c478bd9Sstevel@tonic-gate (void) unlink(tempfile); 1367c478bd9Sstevel@tonic-gate error(tempfile); 1377c478bd9Sstevel@tonic-gate } 1387c478bd9Sstevel@tonic-gate } 1397c478bd9Sstevel@tonic-gate *fld[0] = *fld[1] = *fld[2] = *fld[3] = NULL; 1407c478bd9Sstevel@tonic-gate oneauth = 0; /* reset number of authors */ 1417c478bd9Sstevel@tonic-gate lastoffset = offset; /* save for next time */ 1427c478bd9Sstevel@tonic-gate } 1437c478bd9Sstevel@tonic-gate if (line[0] == '%') /* parse out fields to be sorted */ 1447c478bd9Sstevel@tonic-gate parse(line, fld); 1457c478bd9Sstevel@tonic-gate } 1467c478bd9Sstevel@tonic-gate offset = ftell(fp); /* measure byte offset at EOF */ 1477c478bd9Sstevel@tonic-gate length = offset - lastoffset; /* measure final record length */ 148*11a8fa6cSceastha if (length > BUF*8) { 1497c478bd9Sstevel@tonic-gate fprintf(stderr, 1507c478bd9Sstevel@tonic-gate gettext("sortbib: record %d longer than %d (%d)\n"), 1517c478bd9Sstevel@tonic-gate recno, BUF*8, length); 1527c478bd9Sstevel@tonic-gate (void) unlink(tempfile); 1537c478bd9Sstevel@tonic-gate exit(1); 1547c478bd9Sstevel@tonic-gate } 155*11a8fa6cSceastha if (line[0] != '\n') { /* ignore null line just before EOF */ 1567c478bd9Sstevel@tonic-gate fprintf(tfp, "%d %d %d : %s %s %s %s\n", 157*11a8fa6cSceastha i, lastoffset, length, fld[0], fld[1], fld[2], fld[3]); 1587c478bd9Sstevel@tonic-gate if (ferror(tfp)) { 1597c478bd9Sstevel@tonic-gate (void) unlink(tempfile); 1607c478bd9Sstevel@tonic-gate error(tempfile); /* disk error in /tmp */ 1617c478bd9Sstevel@tonic-gate } 1627c478bd9Sstevel@tonic-gate } 1637c478bd9Sstevel@tonic-gate } 1647c478bd9Sstevel@tonic-gate 165*11a8fa6cSceastha /* deliver sorted entries out of database(s) */ 166*11a8fa6cSceastha static void 167*11a8fa6cSceastha deliver(FILE *fp[], FILE *tfp) 1687c478bd9Sstevel@tonic-gate { 1697c478bd9Sstevel@tonic-gate char str[BUF], buff[BUF*8]; /* for tempfile & databases */ 1707c478bd9Sstevel@tonic-gate char cmd[80]; /* for using system sort command */ 1717c478bd9Sstevel@tonic-gate long int offset; 1727c478bd9Sstevel@tonic-gate int i, length; 1737c478bd9Sstevel@tonic-gate 1747c478bd9Sstevel@tonic-gate /* when sorting, ignore case distinctions; tab char is ':' */ 1757c478bd9Sstevel@tonic-gate sprintf(cmd, "sort +4f +0n +1n %s -o %s", tempfile, tempfile); 1767c478bd9Sstevel@tonic-gate if (system(cmd) == 127) { 1777c478bd9Sstevel@tonic-gate (void) unlink(tempfile); 1787c478bd9Sstevel@tonic-gate error("sortbib"); 1797c478bd9Sstevel@tonic-gate } 1807c478bd9Sstevel@tonic-gate tfp = fopen(tempfile, "r"); 181*11a8fa6cSceastha while (fgets(str, sizeof (str), tfp)) { 1827c478bd9Sstevel@tonic-gate /* get file pointer, record offset, and length */ 1837c478bd9Sstevel@tonic-gate if (sscanf(str, "%d %d %d :", &i, &offset, &length) != 3) 1847c478bd9Sstevel@tonic-gate error(gettext("sortbib: sorting error")); 1857c478bd9Sstevel@tonic-gate /* seek to proper disk location in proper file */ 1867c478bd9Sstevel@tonic-gate if (fseek(fp[i], offset, 0) == -1) { 1877c478bd9Sstevel@tonic-gate (void) unlink(tempfile); 1887c478bd9Sstevel@tonic-gate error("sortbib"); 1897c478bd9Sstevel@tonic-gate } 1907c478bd9Sstevel@tonic-gate /* read exactly one record from bibliography */ 1917c478bd9Sstevel@tonic-gate if (fread(buff, sizeof (*buff), length, fp[i]) == 0) { 1927c478bd9Sstevel@tonic-gate (void) unlink(tempfile); 1937c478bd9Sstevel@tonic-gate error("sortbib"); 1947c478bd9Sstevel@tonic-gate } 1957c478bd9Sstevel@tonic-gate /* add newline between unseparated records */ 1967c478bd9Sstevel@tonic-gate if (buff[0] != '\n' && rsmode == 1) 1977c478bd9Sstevel@tonic-gate putchar('\n'); 1987c478bd9Sstevel@tonic-gate /* write record buffer to standard output */ 1997c478bd9Sstevel@tonic-gate if (fwrite(buff, sizeof (*buff), length, stdout) == 0) { 2007c478bd9Sstevel@tonic-gate (void) unlink(tempfile); 2017c478bd9Sstevel@tonic-gate error("sortbib"); 2027c478bd9Sstevel@tonic-gate } 2037c478bd9Sstevel@tonic-gate } 2047c478bd9Sstevel@tonic-gate } 2057c478bd9Sstevel@tonic-gate 206*11a8fa6cSceastha /* get fields out of line, prepare for sorting */ 207*11a8fa6cSceastha static void 208*11a8fa6cSceastha parse(char line[], char fld[][BUF]) 2097c478bd9Sstevel@tonic-gate { 2107c478bd9Sstevel@tonic-gate char wd[8][BUF/4], *strcat(); 2117c478bd9Sstevel@tonic-gate int n, i, j; 2127c478bd9Sstevel@tonic-gate 2137c478bd9Sstevel@tonic-gate for (i = 0; i < 8; i++) /* zap out old strings */ 2147c478bd9Sstevel@tonic-gate *wd[i] = NULL; 2157c478bd9Sstevel@tonic-gate n = sscanf(line, "%s %s %s %s %s %s %s %s", 2167c478bd9Sstevel@tonic-gate wd[0], wd[1], wd[2], wd[3], wd[4], wd[5], wd[6], wd[7]); 217*11a8fa6cSceastha for (i = 0; i < 4; i++) { 218*11a8fa6cSceastha if (wd[0][1] == keystr[i]) { 219*11a8fa6cSceastha if (wd[0][1] == 'A') { 2207c478bd9Sstevel@tonic-gate if (oneauth && !multauth) /* no repeat */ 2217c478bd9Sstevel@tonic-gate break; 2227c478bd9Sstevel@tonic-gate else if (oneauth) /* mult auths */ 2237c478bd9Sstevel@tonic-gate strcat(fld[i], "~~"); 2247c478bd9Sstevel@tonic-gate if (!endcomma(wd[n-2])) /* surname */ 2257c478bd9Sstevel@tonic-gate strcat(fld[i], wd[n-1]); 2267c478bd9Sstevel@tonic-gate else { /* jr. or ed. */ 2277c478bd9Sstevel@tonic-gate strcat(fld[i], wd[n-2]); 2287c478bd9Sstevel@tonic-gate n--; 2297c478bd9Sstevel@tonic-gate } 2307c478bd9Sstevel@tonic-gate strcat(fld[i], " "); 2317c478bd9Sstevel@tonic-gate for (j = 1; j < n-1; j++) 2327c478bd9Sstevel@tonic-gate strcat(fld[i], wd[j]); 2337c478bd9Sstevel@tonic-gate oneauth = 1; 2347c478bd9Sstevel@tonic-gate } else if (wd[0][1] == 'D') { 2357c478bd9Sstevel@tonic-gate strcat(fld[i], wd[n-1]); /* year */ 2367c478bd9Sstevel@tonic-gate if (n > 2) 2377c478bd9Sstevel@tonic-gate strcat(fld[i], wd[1]); /* month */ 2387c478bd9Sstevel@tonic-gate } else if (wd[0][1] == 'T' || wd[0][1] == 'J') { 2397c478bd9Sstevel@tonic-gate j = 1; 2407c478bd9Sstevel@tonic-gate if (article(wd[1])) /* skip article */ 2417c478bd9Sstevel@tonic-gate j++; 2427c478bd9Sstevel@tonic-gate for (; j < n; j++) 2437c478bd9Sstevel@tonic-gate strcat(fld[i], wd[j]); 2447c478bd9Sstevel@tonic-gate } else /* any other field */ 2457c478bd9Sstevel@tonic-gate for (j = 1; j < n; j++) 2467c478bd9Sstevel@tonic-gate strcat(fld[i], wd[j]); 2477c478bd9Sstevel@tonic-gate } 2487c478bd9Sstevel@tonic-gate /* %Q quorporate or queer author - unreversed %A */ 2497c478bd9Sstevel@tonic-gate else if (wd[0][1] == 'Q' && keystr[i] == 'A') 2507c478bd9Sstevel@tonic-gate for (j = 1; j < n; j++) 2517c478bd9Sstevel@tonic-gate strcat(fld[i], wd[j]); 2527c478bd9Sstevel@tonic-gate } 2537c478bd9Sstevel@tonic-gate } 2547c478bd9Sstevel@tonic-gate 255*11a8fa6cSceastha /* see if string contains an article */ 256*11a8fa6cSceastha static int 257*11a8fa6cSceastha article(char *str) 2587c478bd9Sstevel@tonic-gate { 2597c478bd9Sstevel@tonic-gate if (strcmp("The", str) == 0) /* English */ 2607c478bd9Sstevel@tonic-gate return (1); 2617c478bd9Sstevel@tonic-gate if (strcmp("A", str) == 0) 2627c478bd9Sstevel@tonic-gate return (1); 2637c478bd9Sstevel@tonic-gate if (strcmp("An", str) == 0) 2647c478bd9Sstevel@tonic-gate return (1); 2657c478bd9Sstevel@tonic-gate if (strcmp("Le", str) == 0) /* French */ 2667c478bd9Sstevel@tonic-gate return (1); 2677c478bd9Sstevel@tonic-gate if (strcmp("La", str) == 0) 2687c478bd9Sstevel@tonic-gate return (1); 2697c478bd9Sstevel@tonic-gate if (strcmp("Der", str) == 0) /* German */ 2707c478bd9Sstevel@tonic-gate return (1); 2717c478bd9Sstevel@tonic-gate if (strcmp("Die", str) == 0) 2727c478bd9Sstevel@tonic-gate return (1); 2737c478bd9Sstevel@tonic-gate if (strcmp("Das", str) == 0) 2747c478bd9Sstevel@tonic-gate return (1); 2757c478bd9Sstevel@tonic-gate if (strcmp("El", str) == 0) /* Spanish */ 2767c478bd9Sstevel@tonic-gate return (1); 2777c478bd9Sstevel@tonic-gate if (strcmp("Den", str) == 0) /* Scandinavian */ 2787c478bd9Sstevel@tonic-gate return (1); 2797c478bd9Sstevel@tonic-gate return (0); 2807c478bd9Sstevel@tonic-gate } 2817c478bd9Sstevel@tonic-gate 282*11a8fa6cSceastha /* evaluate key string for A+ marking */ 283*11a8fa6cSceastha static void 284*11a8fa6cSceastha eval(char keystr[]) 2857c478bd9Sstevel@tonic-gate { 2867c478bd9Sstevel@tonic-gate int i, j; 2877c478bd9Sstevel@tonic-gate 288*11a8fa6cSceastha for (i = 0, j = 0; keystr[i]; i++, j++) { 289*11a8fa6cSceastha if (keystr[i] == '+') { 2907c478bd9Sstevel@tonic-gate multauth = 1; 2917c478bd9Sstevel@tonic-gate i++; 2927c478bd9Sstevel@tonic-gate } 2937c478bd9Sstevel@tonic-gate if (keystr[i] == NULL) 2947c478bd9Sstevel@tonic-gate break; 2957c478bd9Sstevel@tonic-gate keystr[j] = keystr[i]; 2967c478bd9Sstevel@tonic-gate } 2977c478bd9Sstevel@tonic-gate keystr[j] = NULL; 2987c478bd9Sstevel@tonic-gate } 2997c478bd9Sstevel@tonic-gate 300*11a8fa6cSceastha /* exit in case of various system errors */ 301*11a8fa6cSceastha static void 302*11a8fa6cSceastha error(char *s) 3037c478bd9Sstevel@tonic-gate { 3047c478bd9Sstevel@tonic-gate perror(s); 3057c478bd9Sstevel@tonic-gate exit(1); 3067c478bd9Sstevel@tonic-gate } 3077c478bd9Sstevel@tonic-gate 308*11a8fa6cSceastha /* remove tempfile in case of interrupt */ 309*11a8fa6cSceastha static void 310*11a8fa6cSceastha onintr(void) 3117c478bd9Sstevel@tonic-gate { 3127c478bd9Sstevel@tonic-gate fprintf(stderr, gettext("\nInterrupt\n")); 3137c478bd9Sstevel@tonic-gate unlink(tempfile); 3147c478bd9Sstevel@tonic-gate exit(1); 3157c478bd9Sstevel@tonic-gate } 3167c478bd9Sstevel@tonic-gate 317*11a8fa6cSceastha static int 318*11a8fa6cSceastha endcomma(char *str) 3197c478bd9Sstevel@tonic-gate { 3207c478bd9Sstevel@tonic-gate int n; 3217c478bd9Sstevel@tonic-gate 3227c478bd9Sstevel@tonic-gate n = strlen(str) - 1; 323*11a8fa6cSceastha if (str[n] == ',') { 3247c478bd9Sstevel@tonic-gate str[n] = NULL; 3257c478bd9Sstevel@tonic-gate return (1); 3267c478bd9Sstevel@tonic-gate } 3277c478bd9Sstevel@tonic-gate return (0); 3287c478bd9Sstevel@tonic-gate } 329