1 /* 2 * Copyright 2005 Sun Microsystems, Inc. All rights reserved. 3 * Use is subject to license terms. 4 */ 5 6 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */ 7 /* All Rights Reserved */ 8 9 /* 10 * Copyright (c) 1980 Regents of the University of California. 11 * All rights reserved. The Berkeley software License Agreement 12 * specifies the terms and conditions for redistribution. 13 */ 14 15 #include <locale.h> 16 #include <stdio.h> 17 #include <signal.h> 18 #include <stdlib.h> 19 20 #define BUF BUFSIZ 21 #define MXFILES 16 22 23 char tempfile[32]; /* temporary file for sorting keys */ 24 int tmpfd = -1; 25 char *keystr = "AD"; /* default sorting on author and date */ 26 int multauth = 0; /* by default sort on senior author only */ 27 int oneauth; /* has there been author in the record? */ 28 29 static int article(char *); 30 static void deliver(FILE *[], FILE *); 31 static int endcomma(char *); 32 static void error(char *); 33 static void eval(char []); 34 static void parse(char [], char fld[][BUF]); 35 static void sortbib(FILE *, FILE *, int); 36 static void onintr(void); 37 38 /* sortbib: sort bibliographic database in place */ 39 int 40 main(int argc, char *argv[]) 41 { 42 FILE *fp[MXFILES], *tfp; 43 int i; 44 45 (void) setlocale(LC_ALL, ""); 46 47 #if !defined(TEXT_DOMAIN) 48 #define TEXT_DOMAIN "SYS_TEST" 49 #endif 50 (void) textdomain(TEXT_DOMAIN); 51 52 if (argc == 1) { /* can't use stdin for seeking anyway */ 53 puts(gettext("Usage: sortbib [-sKEYS] database [...]\n\ 54 \t-s: sort by fields in KEYS (default is AD)")); 55 exit(1); 56 } 57 if (argc > 2 && argv[1][0] == '-' && argv[1][1] == 's') { 58 /* if a key is specified use it, otherwise use default key */ 59 if (argv[1][2] != '\0') 60 keystr = argv[1] + 2; 61 eval(keystr); /* evaluate A+ for multiple authors */ 62 argv++; argc--; 63 } 64 if (argc > MXFILES+1) { /* too many open file streams */ 65 fprintf(stderr, 66 gettext("sortbib: More than %d databases specified\n"), 67 MXFILES); 68 exit(1); 69 } 70 for (i = 1; i < argc; i++) /* open files in arg list */ 71 if ((fp[i-1] = fopen(argv[i], "r")) == NULL) 72 error(argv[i]); 73 strcpy(tempfile, "/tmp/SbibXXXXXX"); /* tempfile for sorting keys */ 74 if ((tmpfd = mkstemp(tempfile)) == -1) 75 error(tempfile); 76 77 (void) close(tmpfd); 78 if (signal(SIGINT, SIG_IGN) != SIG_IGN) /* remove if interrupted */ 79 signal(SIGINT, (void(*)())onintr); 80 if ((tfp = fopen(tempfile, "w")) == NULL) { 81 (void) unlink(tempfile); 82 error(tempfile); 83 } 84 for (i = 0; i < argc-1; i++) /* read keys from bib files */ 85 sortbib(fp[i], tfp, i); 86 fclose(tfp); 87 deliver(fp, tfp); /* do disk seeks and read from biblio files */ 88 (void) unlink(tempfile); 89 return (0); 90 } 91 92 int rsmode = 0; /* record separator: 1 = null line, 2 = bracket */ 93 94 /* read records, prepare list for sorting */ 95 static void 96 sortbib(FILE *fp, FILE *tfp, int i) 97 { 98 long offset, lastoffset = 0, ftell(); /* byte offsets in file */ 99 int length, newrec, recno = 0; /* reclen, new rec'd?, number */ 100 char line[BUF], fld[4][BUF]; /* one line, the sort fields */ 101 102 /* measure byte offset, then get new line */ 103 while (offset = ftell(fp), fgets(line, BUF, fp)) { 104 if (recno == 0) /* accept record w/o initial newline */ 105 newrec = 1; 106 if (line[0] == '\n') { /* accept null line record separator */ 107 if (!rsmode) 108 rsmode = 1; /* null line mode */ 109 if (rsmode == 1) 110 newrec = 1; 111 } 112 if (line[0] == '.' && line[1] == '[') { /* also accept .[ .] */ 113 if (!rsmode) 114 rsmode = 2; /* bracket pair mode */ 115 if (rsmode == 2) 116 newrec = 1; 117 } 118 if (newrec) { /* by whatever means above */ 119 newrec = 0; 120 length = offset - lastoffset; /* measure rec len */ 121 if (length > BUF*8) { 122 fprintf(stderr, 123 gettext("sortbib: record %d longer than %d " 124 "(%d)\n"), recno, BUF*8, length); 125 (void) unlink(tempfile); 126 exit(1); 127 } 128 if (recno++) { /* info for sorting */ 129 fprintf(tfp, "%d %d %d : %s %s %s %s\n", 130 i, lastoffset, length, 131 fld[0], fld[1], fld[2], fld[3]); 132 if (ferror(tfp)) { 133 (void) unlink(tempfile); 134 error(tempfile); 135 } 136 } 137 *fld[0] = *fld[1] = *fld[2] = *fld[3] = '\0'; 138 oneauth = 0; /* reset number of authors */ 139 lastoffset = offset; /* save for next time */ 140 } 141 if (line[0] == '%') /* parse out fields to be sorted */ 142 parse(line, fld); 143 } 144 offset = ftell(fp); /* measure byte offset at EOF */ 145 length = offset - lastoffset; /* measure final record length */ 146 if (length > BUF*8) { 147 fprintf(stderr, 148 gettext("sortbib: record %d longer than %d (%d)\n"), 149 recno, BUF*8, length); 150 (void) unlink(tempfile); 151 exit(1); 152 } 153 if (line[0] != '\n') { /* ignore null line just before EOF */ 154 fprintf(tfp, "%d %d %d : %s %s %s %s\n", 155 i, lastoffset, length, fld[0], fld[1], fld[2], fld[3]); 156 if (ferror(tfp)) { 157 (void) unlink(tempfile); 158 error(tempfile); /* disk error in /tmp */ 159 } 160 } 161 } 162 163 /* deliver sorted entries out of database(s) */ 164 static void 165 deliver(FILE *fp[], FILE *tfp) 166 { 167 char str[BUF], buff[BUF*8]; /* for tempfile & databases */ 168 char cmd[80]; /* for using system sort command */ 169 long int offset; 170 int i, length; 171 172 /* when sorting, ignore case distinctions; tab char is ':' */ 173 sprintf(cmd, "sort +4f +0n +1n %s -o %s", tempfile, tempfile); 174 if (system(cmd) == 127) { 175 (void) unlink(tempfile); 176 error("sortbib"); 177 } 178 tfp = fopen(tempfile, "r"); 179 while (fgets(str, sizeof (str), tfp)) { 180 /* get file pointer, record offset, and length */ 181 if (sscanf(str, "%d %d %d :", &i, &offset, &length) != 3) 182 error(gettext("sortbib: sorting error")); 183 /* seek to proper disk location in proper file */ 184 if (fseek(fp[i], offset, 0) == -1) { 185 (void) unlink(tempfile); 186 error("sortbib"); 187 } 188 /* read exactly one record from bibliography */ 189 if (fread(buff, sizeof (*buff), length, fp[i]) == 0) { 190 (void) unlink(tempfile); 191 error("sortbib"); 192 } 193 /* add newline between unseparated records */ 194 if (buff[0] != '\n' && rsmode == 1) 195 putchar('\n'); 196 /* write record buffer to standard output */ 197 if (fwrite(buff, sizeof (*buff), length, stdout) == 0) { 198 (void) unlink(tempfile); 199 error("sortbib"); 200 } 201 } 202 } 203 204 /* get fields out of line, prepare for sorting */ 205 static void 206 parse(char line[], char fld[][BUF]) 207 { 208 char wd[8][BUF/4], *strcat(); 209 int n, i, j; 210 211 for (i = 0; i < 8; i++) /* zap out old strings */ 212 *wd[i] = '\0'; 213 n = sscanf(line, "%s %s %s %s %s %s %s %s", 214 wd[0], wd[1], wd[2], wd[3], wd[4], wd[5], wd[6], wd[7]); 215 for (i = 0; i < 4; i++) { 216 if (wd[0][1] == keystr[i]) { 217 if (wd[0][1] == 'A') { 218 if (oneauth && !multauth) /* no repeat */ 219 break; 220 else if (oneauth) /* mult auths */ 221 strcat(fld[i], "~~"); 222 if (!endcomma(wd[n-2])) /* surname */ 223 strcat(fld[i], wd[n-1]); 224 else { /* jr. or ed. */ 225 strcat(fld[i], wd[n-2]); 226 n--; 227 } 228 strcat(fld[i], " "); 229 for (j = 1; j < n-1; j++) 230 strcat(fld[i], wd[j]); 231 oneauth = 1; 232 } else if (wd[0][1] == 'D') { 233 strcat(fld[i], wd[n-1]); /* year */ 234 if (n > 2) 235 strcat(fld[i], wd[1]); /* month */ 236 } else if (wd[0][1] == 'T' || wd[0][1] == 'J') { 237 j = 1; 238 if (article(wd[1])) /* skip article */ 239 j++; 240 for (; j < n; j++) 241 strcat(fld[i], wd[j]); 242 } else /* any other field */ 243 for (j = 1; j < n; j++) 244 strcat(fld[i], wd[j]); 245 } 246 /* %Q quorporate or queer author - unreversed %A */ 247 else if (wd[0][1] == 'Q' && keystr[i] == 'A') 248 for (j = 1; j < n; j++) 249 strcat(fld[i], wd[j]); 250 } 251 } 252 253 /* see if string contains an article */ 254 static int 255 article(char *str) 256 { 257 if (strcmp("The", str) == 0) /* English */ 258 return (1); 259 if (strcmp("A", str) == 0) 260 return (1); 261 if (strcmp("An", str) == 0) 262 return (1); 263 if (strcmp("Le", str) == 0) /* French */ 264 return (1); 265 if (strcmp("La", str) == 0) 266 return (1); 267 if (strcmp("Der", str) == 0) /* German */ 268 return (1); 269 if (strcmp("Die", str) == 0) 270 return (1); 271 if (strcmp("Das", str) == 0) 272 return (1); 273 if (strcmp("El", str) == 0) /* Spanish */ 274 return (1); 275 if (strcmp("Den", str) == 0) /* Scandinavian */ 276 return (1); 277 return (0); 278 } 279 280 /* evaluate key string for A+ marking */ 281 static void 282 eval(char keystr[]) 283 { 284 int i, j; 285 286 for (i = 0, j = 0; keystr[i]; i++, j++) { 287 if (keystr[i] == '+') { 288 multauth = 1; 289 i++; 290 } 291 if (keystr[i] == '\0') 292 break; 293 keystr[j] = keystr[i]; 294 } 295 keystr[j] = '\0'; 296 } 297 298 /* exit in case of various system errors */ 299 static void 300 error(char *s) 301 { 302 perror(s); 303 exit(1); 304 } 305 306 /* remove tempfile in case of interrupt */ 307 static void 308 onintr(void) 309 { 310 fprintf(stderr, gettext("\nInterrupt\n")); 311 unlink(tempfile); 312 exit(1); 313 } 314 315 static int 316 endcomma(char *str) 317 { 318 int n; 319 320 n = strlen(str) - 1; 321 if (str[n] == ',') { 322 str[n] = '\0'; 323 return (1); 324 } 325 return (0); 326 } 327