1 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 2 /* All Rights Reserved */ 3 4 5 /* 6 * Copyright (c) 1980 Regents of the University of California. 7 * All rights reserved. The Berkeley software License Agreement 8 * specifies the terms and conditions for redistribution. 9 */ 10 11 /* 12 * Copyright (c) 1983-1988 by Sun Microsystems, Inc. 13 * All Rights Reserved. 14 */ 15 16 #pragma ident "%Z%%M% %I% %E% SMI" 17 18 #include <locale.h> 19 #include <stdio.h> 20 #include <signal.h> 21 #include <stdlib.h> 22 23 #define BUF BUFSIZ 24 #define MXFILES 16 25 26 char tempfile[32]; /* temporary file for sorting keys */ 27 int tmpfd = -1; 28 char *keystr = "AD"; /* default sorting on author and date */ 29 int multauth = 0; /* by default sort on senior author only */ 30 int oneauth; /* has there been author in the record? */ 31 32 main(argc, argv) /* sortbib: sort bibliographic database in place */ 33 int argc; 34 char *argv[]; 35 { 36 FILE *fp[MXFILES], *tfp; 37 int i; 38 void onintr(); 39 40 (void) setlocale(LC_ALL, ""); 41 42 #if !defined(TEXT_DOMAIN) 43 #define TEXT_DOMAIN "SYS_TEST" 44 #endif 45 (void) textdomain(TEXT_DOMAIN); 46 47 if (argc == 1) /* can't use stdin for seeking anyway */ 48 { 49 puts(gettext("Usage: sortbib [-sKEYS] database [...]\n\ 50 \t-s: sort by fields in KEYS (default is AD)")); 51 exit(1); 52 } 53 if (argc > 2 && argv[1][0] == '-' && argv[1][1] == 's') 54 { 55 /* if a key is specified use it, otherwise use default key */ 56 if (argv[1][2] != '\0') 57 keystr = argv[1] + 2; 58 eval(keystr); /* evaluate A+ for multiple authors */ 59 argv++; argc--; 60 } 61 if (argc > MXFILES+1) /* too many open file streams */ 62 { 63 fprintf(stderr, 64 gettext("sortbib: More than %d databases specified\n"), 65 MXFILES); 66 exit(1); 67 } 68 for (i = 1; i < argc; i++) /* open files in arg list */ 69 if ((fp[i-1] = fopen(argv[i], "r")) == NULL) 70 error(argv[i]); 71 strcpy(tempfile, "/tmp/SbibXXXXXX"); /* tempfile for sorting keys */ 72 if ((tmpfd = mkstemp(tempfile)) == -1) 73 error(tempfile); 74 75 (void) close(tmpfd); 76 if (signal(SIGINT, SIG_IGN) != SIG_IGN) /* remove if interrupted */ 77 signal(SIGINT, onintr); 78 if ((tfp = fopen(tempfile, "w")) == NULL) { 79 (void) unlink(tempfile); 80 error(tempfile); 81 } 82 for (i = 0; i < argc-1; i++) /* read keys from bib files */ 83 sortbib(fp[i], tfp, i); 84 fclose(tfp); 85 deliver(fp, tfp); /* do disk seeks and read from biblio files */ 86 (void) unlink(tempfile); 87 exit(0); 88 /* NOTREACHED */ 89 } 90 91 int rsmode = 0; /* record separator: 1 = null line, 2 = bracket */ 92 93 sortbib(fp, tfp, i) /* read records, prepare list for sorting */ 94 FILE *fp, *tfp; 95 int i; 96 { 97 long offset, lastoffset = 0, ftell(); /* byte offsets in file */ 98 int length, newrec, recno = 0; /* reclen, new rec'd?, number */ 99 char line[BUF], fld[4][BUF]; /* one line, the sort fields */ 100 101 /* measure byte offset, then get new line */ 102 while (offset = ftell(fp), fgets(line, BUF, fp)) 103 { 104 if (recno == 0) /* accept record w/o initial newline */ 105 newrec = 1; 106 if (line[0] == '\n') /* accept null line record separator */ 107 { 108 if (!rsmode) 109 rsmode = 1; /* null line mode */ 110 if (rsmode == 1) 111 newrec = 1; 112 } 113 if (line[0] == '.' && line[1] == '[') /* also accept .[ .] */ 114 { 115 if (!rsmode) 116 rsmode = 2; /* bracket pair mode */ 117 if (rsmode == 2) 118 newrec = 1; 119 } 120 if (newrec) /* by whatever means above */ 121 { 122 newrec = 0; 123 length = offset - lastoffset; /* measure rec len */ 124 if (length > BUF*8) { 125 fprintf(stderr, 126 gettext("sortbib: record %d longer than %d (%d)\n"), 127 recno, BUF*8, length); 128 (void) unlink(tempfile); 129 exit(1); 130 } 131 if (recno++) /* info for sorting */ 132 { 133 fprintf(tfp, "%d %d %d : %s %s %s %s\n", 134 i, lastoffset, length, 135 fld[0], fld[1], fld[2], fld[3]); 136 if (ferror(tfp)) { 137 (void) unlink(tempfile); 138 error(tempfile); 139 } 140 } 141 *fld[0] = *fld[1] = *fld[2] = *fld[3] = NULL; 142 oneauth = 0; /* reset number of authors */ 143 lastoffset = offset; /* save for next time */ 144 } 145 if (line[0] == '%') /* parse out fields to be sorted */ 146 parse(line, fld); 147 } 148 offset = ftell(fp); /* measure byte offset at EOF */ 149 length = offset - lastoffset; /* measure final record length */ 150 if (length > BUF*8) 151 { 152 fprintf(stderr, 153 gettext("sortbib: record %d longer than %d (%d)\n"), 154 recno, BUF*8, length); 155 (void) unlink(tempfile); 156 exit(1); 157 } 158 if (line[0] != '\n') /* ignore null line just before EOF */ 159 { 160 fprintf(tfp, "%d %d %d : %s %s %s %s\n", 161 i, lastoffset, length, 162 fld[0], fld[1], fld[2], fld[3]); 163 if (ferror(tfp)) { 164 (void) unlink(tempfile); 165 error(tempfile); /* disk error in /tmp */ 166 } 167 } 168 } 169 170 deliver(fp, tfp) /* deliver sorted entries out of database(s) */ 171 FILE *fp[], *tfp; 172 { 173 char str[BUF], buff[BUF*8]; /* for tempfile & databases */ 174 char cmd[80]; /* for using system sort command */ 175 long int offset; 176 int i, length; 177 178 /* when sorting, ignore case distinctions; tab char is ':' */ 179 sprintf(cmd, "sort +4f +0n +1n %s -o %s", tempfile, tempfile); 180 if (system(cmd) == 127) { 181 (void) unlink(tempfile); 182 error("sortbib"); 183 } 184 tfp = fopen(tempfile, "r"); 185 while (fgets(str, sizeof (str), tfp)) 186 { 187 /* get file pointer, record offset, and length */ 188 if (sscanf(str, "%d %d %d :", &i, &offset, &length) != 3) 189 error(gettext("sortbib: sorting error")); 190 /* seek to proper disk location in proper file */ 191 if (fseek(fp[i], offset, 0) == -1) { 192 (void) unlink(tempfile); 193 error("sortbib"); 194 } 195 /* read exactly one record from bibliography */ 196 if (fread(buff, sizeof (*buff), length, fp[i]) == 0) { 197 (void) unlink(tempfile); 198 error("sortbib"); 199 } 200 /* add newline between unseparated records */ 201 if (buff[0] != '\n' && rsmode == 1) 202 putchar('\n'); 203 /* write record buffer to standard output */ 204 if (fwrite(buff, sizeof (*buff), length, stdout) == 0) { 205 (void) unlink(tempfile); 206 error("sortbib"); 207 } 208 } 209 } 210 211 parse(line, fld) /* get fields out of line, prepare for sorting */ 212 char line[]; 213 char fld[][BUF]; 214 { 215 char wd[8][BUF/4], *strcat(); 216 int n, i, j; 217 218 for (i = 0; i < 8; i++) /* zap out old strings */ 219 *wd[i] = NULL; 220 n = sscanf(line, "%s %s %s %s %s %s %s %s", 221 wd[0], wd[1], wd[2], wd[3], wd[4], wd[5], wd[6], wd[7]); 222 for (i = 0; i < 4; i++) 223 { 224 if (wd[0][1] == keystr[i]) 225 { 226 if (wd[0][1] == 'A') 227 { 228 if (oneauth && !multauth) /* no repeat */ 229 break; 230 else if (oneauth) /* mult auths */ 231 strcat(fld[i], "~~"); 232 if (!endcomma(wd[n-2])) /* surname */ 233 strcat(fld[i], wd[n-1]); 234 else { /* jr. or ed. */ 235 strcat(fld[i], wd[n-2]); 236 n--; 237 } 238 strcat(fld[i], " "); 239 for (j = 1; j < n-1; j++) 240 strcat(fld[i], wd[j]); 241 oneauth = 1; 242 } else if (wd[0][1] == 'D') { 243 strcat(fld[i], wd[n-1]); /* year */ 244 if (n > 2) 245 strcat(fld[i], wd[1]); /* month */ 246 } else if (wd[0][1] == 'T' || wd[0][1] == 'J') { 247 j = 1; 248 if (article(wd[1])) /* skip article */ 249 j++; 250 for (; j < n; j++) 251 strcat(fld[i], wd[j]); 252 } else /* any other field */ 253 for (j = 1; j < n; j++) 254 strcat(fld[i], wd[j]); 255 } 256 /* %Q quorporate or queer author - unreversed %A */ 257 else if (wd[0][1] == 'Q' && keystr[i] == 'A') 258 for (j = 1; j < n; j++) 259 strcat(fld[i], wd[j]); 260 } 261 } 262 263 article(str) /* see if string contains an article */ 264 char *str; 265 { 266 if (strcmp("The", str) == 0) /* English */ 267 return (1); 268 if (strcmp("A", str) == 0) 269 return (1); 270 if (strcmp("An", str) == 0) 271 return (1); 272 if (strcmp("Le", str) == 0) /* French */ 273 return (1); 274 if (strcmp("La", str) == 0) 275 return (1); 276 if (strcmp("Der", str) == 0) /* German */ 277 return (1); 278 if (strcmp("Die", str) == 0) 279 return (1); 280 if (strcmp("Das", str) == 0) 281 return (1); 282 if (strcmp("El", str) == 0) /* Spanish */ 283 return (1); 284 if (strcmp("Den", str) == 0) /* Scandinavian */ 285 return (1); 286 return (0); 287 } 288 289 eval(keystr) /* evaluate key string for A+ marking */ 290 char keystr[]; 291 { 292 int i, j; 293 294 for (i = 0, j = 0; keystr[i]; i++, j++) 295 { 296 if (keystr[i] == '+') 297 { 298 multauth = 1; 299 i++; 300 } 301 if (keystr[i] == NULL) 302 break; 303 keystr[j] = keystr[i]; 304 } 305 keystr[j] = NULL; 306 } 307 308 error(s) /* exit in case of various system errors */ 309 char *s; 310 { 311 perror(s); 312 exit(1); 313 } 314 315 void 316 onintr() /* remove tempfile in case of interrupt */ 317 { 318 fprintf(stderr, gettext("\nInterrupt\n")); 319 unlink(tempfile); 320 exit(1); 321 } 322 323 endcomma(str) 324 char *str; 325 { 326 int n; 327 328 n = strlen(str) - 1; 329 if (str[n] == ',') 330 { 331 str[n] = NULL; 332 return (1); 333 } 334 return (0); 335 } 336