1 /* 2 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 3 * Use is subject to license terms. 4 */ 5 6 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 7 /* All Rights Reserved */ 8 9 /* 10 * Copyright (c) 1980 Regents of the University of California. 11 * All rights reserved. The Berkeley software License Agreement 12 * specifies the terms and conditions for redistribution. 13 */ 14 15 #pragma ident "%Z%%M% %I% %E% SMI" 16 17 #include <locale.h> 18 #include <stdio.h> 19 #include <signal.h> 20 #include <stdlib.h> 21 22 #define BUF BUFSIZ 23 #define MXFILES 16 24 25 char tempfile[32]; /* temporary file for sorting keys */ 26 int tmpfd = -1; 27 char *keystr = "AD"; /* default sorting on author and date */ 28 int multauth = 0; /* by default sort on senior author only */ 29 int oneauth; /* has there been author in the record? */ 30 31 static int article(char *); 32 static void deliver(FILE *[], FILE *); 33 static int endcomma(char *); 34 static void error(char *); 35 static void eval(char []); 36 static void parse(char [], char fld[][BUF]); 37 static void sortbib(FILE *, FILE *, int); 38 static void onintr(void); 39 40 /* sortbib: sort bibliographic database in place */ 41 int 42 main(int argc, char *argv[]) 43 { 44 FILE *fp[MXFILES], *tfp; 45 int i; 46 47 (void) setlocale(LC_ALL, ""); 48 49 #if !defined(TEXT_DOMAIN) 50 #define TEXT_DOMAIN "SYS_TEST" 51 #endif 52 (void) textdomain(TEXT_DOMAIN); 53 54 if (argc == 1) { /* can't use stdin for seeking anyway */ 55 puts(gettext("Usage: sortbib [-sKEYS] database [...]\n\ 56 \t-s: sort by fields in KEYS (default is AD)")); 57 exit(1); 58 } 59 if (argc > 2 && argv[1][0] == '-' && argv[1][1] == 's') { 60 /* if a key is specified use it, otherwise use default key */ 61 if (argv[1][2] != '\0') 62 keystr = argv[1] + 2; 63 eval(keystr); /* evaluate A+ for multiple authors */ 64 argv++; argc--; 65 } 66 if (argc > MXFILES+1) { /* too many open file streams */ 67 fprintf(stderr, 68 gettext("sortbib: More than %d databases specified\n"), 69 MXFILES); 70 exit(1); 71 } 72 for (i = 1; i < argc; i++) /* open files in arg list */ 73 if ((fp[i-1] = fopen(argv[i], "r")) == NULL) 74 error(argv[i]); 75 strcpy(tempfile, "/tmp/SbibXXXXXX"); /* tempfile for sorting keys */ 76 if ((tmpfd = mkstemp(tempfile)) == -1) 77 error(tempfile); 78 79 (void) close(tmpfd); 80 if (signal(SIGINT, SIG_IGN) != SIG_IGN) /* remove if interrupted */ 81 signal(SIGINT, (void(*)())onintr); 82 if ((tfp = fopen(tempfile, "w")) == NULL) { 83 (void) unlink(tempfile); 84 error(tempfile); 85 } 86 for (i = 0; i < argc-1; i++) /* read keys from bib files */ 87 sortbib(fp[i], tfp, i); 88 fclose(tfp); 89 deliver(fp, tfp); /* do disk seeks and read from biblio files */ 90 (void) unlink(tempfile); 91 return (0); 92 } 93 94 int rsmode = 0; /* record separator: 1 = null line, 2 = bracket */ 95 96 /* read records, prepare list for sorting */ 97 static void 98 sortbib(FILE *fp, FILE *tfp, int i) 99 { 100 long offset, lastoffset = 0, ftell(); /* byte offsets in file */ 101 int length, newrec, recno = 0; /* reclen, new rec'd?, number */ 102 char line[BUF], fld[4][BUF]; /* one line, the sort fields */ 103 104 /* measure byte offset, then get new line */ 105 while (offset = ftell(fp), fgets(line, BUF, fp)) { 106 if (recno == 0) /* accept record w/o initial newline */ 107 newrec = 1; 108 if (line[0] == '\n') { /* accept null line record separator */ 109 if (!rsmode) 110 rsmode = 1; /* null line mode */ 111 if (rsmode == 1) 112 newrec = 1; 113 } 114 if (line[0] == '.' && line[1] == '[') { /* also accept .[ .] */ 115 if (!rsmode) 116 rsmode = 2; /* bracket pair mode */ 117 if (rsmode == 2) 118 newrec = 1; 119 } 120 if (newrec) { /* by whatever means above */ 121 newrec = 0; 122 length = offset - lastoffset; /* measure rec len */ 123 if (length > BUF*8) { 124 fprintf(stderr, 125 gettext("sortbib: record %d longer than %d " 126 "(%d)\n"), recno, BUF*8, length); 127 (void) unlink(tempfile); 128 exit(1); 129 } 130 if (recno++) { /* info for sorting */ 131 fprintf(tfp, "%d %d %d : %s %s %s %s\n", 132 i, lastoffset, length, 133 fld[0], fld[1], fld[2], fld[3]); 134 if (ferror(tfp)) { 135 (void) unlink(tempfile); 136 error(tempfile); 137 } 138 } 139 *fld[0] = *fld[1] = *fld[2] = *fld[3] = NULL; 140 oneauth = 0; /* reset number of authors */ 141 lastoffset = offset; /* save for next time */ 142 } 143 if (line[0] == '%') /* parse out fields to be sorted */ 144 parse(line, fld); 145 } 146 offset = ftell(fp); /* measure byte offset at EOF */ 147 length = offset - lastoffset; /* measure final record length */ 148 if (length > BUF*8) { 149 fprintf(stderr, 150 gettext("sortbib: record %d longer than %d (%d)\n"), 151 recno, BUF*8, length); 152 (void) unlink(tempfile); 153 exit(1); 154 } 155 if (line[0] != '\n') { /* ignore null line just before EOF */ 156 fprintf(tfp, "%d %d %d : %s %s %s %s\n", 157 i, lastoffset, length, fld[0], fld[1], fld[2], fld[3]); 158 if (ferror(tfp)) { 159 (void) unlink(tempfile); 160 error(tempfile); /* disk error in /tmp */ 161 } 162 } 163 } 164 165 /* deliver sorted entries out of database(s) */ 166 static void 167 deliver(FILE *fp[], FILE *tfp) 168 { 169 char str[BUF], buff[BUF*8]; /* for tempfile & databases */ 170 char cmd[80]; /* for using system sort command */ 171 long int offset; 172 int i, length; 173 174 /* when sorting, ignore case distinctions; tab char is ':' */ 175 sprintf(cmd, "sort +4f +0n +1n %s -o %s", tempfile, tempfile); 176 if (system(cmd) == 127) { 177 (void) unlink(tempfile); 178 error("sortbib"); 179 } 180 tfp = fopen(tempfile, "r"); 181 while (fgets(str, sizeof (str), tfp)) { 182 /* get file pointer, record offset, and length */ 183 if (sscanf(str, "%d %d %d :", &i, &offset, &length) != 3) 184 error(gettext("sortbib: sorting error")); 185 /* seek to proper disk location in proper file */ 186 if (fseek(fp[i], offset, 0) == -1) { 187 (void) unlink(tempfile); 188 error("sortbib"); 189 } 190 /* read exactly one record from bibliography */ 191 if (fread(buff, sizeof (*buff), length, fp[i]) == 0) { 192 (void) unlink(tempfile); 193 error("sortbib"); 194 } 195 /* add newline between unseparated records */ 196 if (buff[0] != '\n' && rsmode == 1) 197 putchar('\n'); 198 /* write record buffer to standard output */ 199 if (fwrite(buff, sizeof (*buff), length, stdout) == 0) { 200 (void) unlink(tempfile); 201 error("sortbib"); 202 } 203 } 204 } 205 206 /* get fields out of line, prepare for sorting */ 207 static void 208 parse(char line[], char fld[][BUF]) 209 { 210 char wd[8][BUF/4], *strcat(); 211 int n, i, j; 212 213 for (i = 0; i < 8; i++) /* zap out old strings */ 214 *wd[i] = NULL; 215 n = sscanf(line, "%s %s %s %s %s %s %s %s", 216 wd[0], wd[1], wd[2], wd[3], wd[4], wd[5], wd[6], wd[7]); 217 for (i = 0; i < 4; i++) { 218 if (wd[0][1] == keystr[i]) { 219 if (wd[0][1] == 'A') { 220 if (oneauth && !multauth) /* no repeat */ 221 break; 222 else if (oneauth) /* mult auths */ 223 strcat(fld[i], "~~"); 224 if (!endcomma(wd[n-2])) /* surname */ 225 strcat(fld[i], wd[n-1]); 226 else { /* jr. or ed. */ 227 strcat(fld[i], wd[n-2]); 228 n--; 229 } 230 strcat(fld[i], " "); 231 for (j = 1; j < n-1; j++) 232 strcat(fld[i], wd[j]); 233 oneauth = 1; 234 } else if (wd[0][1] == 'D') { 235 strcat(fld[i], wd[n-1]); /* year */ 236 if (n > 2) 237 strcat(fld[i], wd[1]); /* month */ 238 } else if (wd[0][1] == 'T' || wd[0][1] == 'J') { 239 j = 1; 240 if (article(wd[1])) /* skip article */ 241 j++; 242 for (; j < n; j++) 243 strcat(fld[i], wd[j]); 244 } else /* any other field */ 245 for (j = 1; j < n; j++) 246 strcat(fld[i], wd[j]); 247 } 248 /* %Q quorporate or queer author - unreversed %A */ 249 else if (wd[0][1] == 'Q' && keystr[i] == 'A') 250 for (j = 1; j < n; j++) 251 strcat(fld[i], wd[j]); 252 } 253 } 254 255 /* see if string contains an article */ 256 static int 257 article(char *str) 258 { 259 if (strcmp("The", str) == 0) /* English */ 260 return (1); 261 if (strcmp("A", str) == 0) 262 return (1); 263 if (strcmp("An", str) == 0) 264 return (1); 265 if (strcmp("Le", str) == 0) /* French */ 266 return (1); 267 if (strcmp("La", str) == 0) 268 return (1); 269 if (strcmp("Der", str) == 0) /* German */ 270 return (1); 271 if (strcmp("Die", str) == 0) 272 return (1); 273 if (strcmp("Das", str) == 0) 274 return (1); 275 if (strcmp("El", str) == 0) /* Spanish */ 276 return (1); 277 if (strcmp("Den", str) == 0) /* Scandinavian */ 278 return (1); 279 return (0); 280 } 281 282 /* evaluate key string for A+ marking */ 283 static void 284 eval(char keystr[]) 285 { 286 int i, j; 287 288 for (i = 0, j = 0; keystr[i]; i++, j++) { 289 if (keystr[i] == '+') { 290 multauth = 1; 291 i++; 292 } 293 if (keystr[i] == NULL) 294 break; 295 keystr[j] = keystr[i]; 296 } 297 keystr[j] = NULL; 298 } 299 300 /* exit in case of various system errors */ 301 static void 302 error(char *s) 303 { 304 perror(s); 305 exit(1); 306 } 307 308 /* remove tempfile in case of interrupt */ 309 static void 310 onintr(void) 311 { 312 fprintf(stderr, gettext("\nInterrupt\n")); 313 unlink(tempfile); 314 exit(1); 315 } 316 317 static int 318 endcomma(char *str) 319 { 320 int n; 321 322 n = strlen(str) - 1; 323 if (str[n] == ',') { 324 str[n] = NULL; 325 return (1); 326 } 327 return (0); 328 } 329