1 /*
2 * Copyright 2005 Sun Microsystems, Inc. All rights reserved.
3 * Use is subject to license terms.
4 */
5
6 /* Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T */
7 /* All Rights Reserved */
8
9 /*
10 * Copyright (c) 1980 Regents of the University of California.
11 * All rights reserved. The Berkeley software License Agreement
12 * specifies the terms and conditions for redistribution.
13 */
14
15 #pragma ident "%Z%%M% %I% %E% SMI"
16
17 #include <locale.h>
18 #include <stdio.h>
19 #include <signal.h>
20 #include <stdlib.h>
21
22 #define BUF BUFSIZ
23 #define MXFILES 16
24
25 char tempfile[32]; /* temporary file for sorting keys */
26 int tmpfd = -1;
27 char *keystr = "AD"; /* default sorting on author and date */
28 int multauth = 0; /* by default sort on senior author only */
29 int oneauth; /* has there been author in the record? */
30
31 static int article(char *);
32 static void deliver(FILE *[], FILE *);
33 static int endcomma(char *);
34 static void error(char *);
35 static void eval(char []);
36 static void parse(char [], char fld[][BUF]);
37 static void sortbib(FILE *, FILE *, int);
38 static void onintr(void);
39
40 /* sortbib: sort bibliographic database in place */
41 int
main(int argc,char * argv[])42 main(int argc, char *argv[])
43 {
44 FILE *fp[MXFILES], *tfp;
45 int i;
46
47 (void) setlocale(LC_ALL, "");
48
49 #if !defined(TEXT_DOMAIN)
50 #define TEXT_DOMAIN "SYS_TEST"
51 #endif
52 (void) textdomain(TEXT_DOMAIN);
53
54 if (argc == 1) { /* can't use stdin for seeking anyway */
55 puts(gettext("Usage: sortbib [-sKEYS] database [...]\n\
56 \t-s: sort by fields in KEYS (default is AD)"));
57 exit(1);
58 }
59 if (argc > 2 && argv[1][0] == '-' && argv[1][1] == 's') {
60 /* if a key is specified use it, otherwise use default key */
61 if (argv[1][2] != '\0')
62 keystr = argv[1] + 2;
63 eval(keystr); /* evaluate A+ for multiple authors */
64 argv++; argc--;
65 }
66 if (argc > MXFILES+1) { /* too many open file streams */
67 fprintf(stderr,
68 gettext("sortbib: More than %d databases specified\n"),
69 MXFILES);
70 exit(1);
71 }
72 for (i = 1; i < argc; i++) /* open files in arg list */
73 if ((fp[i-1] = fopen(argv[i], "r")) == NULL)
74 error(argv[i]);
75 strcpy(tempfile, "/tmp/SbibXXXXXX"); /* tempfile for sorting keys */
76 if ((tmpfd = mkstemp(tempfile)) == -1)
77 error(tempfile);
78
79 (void) close(tmpfd);
80 if (signal(SIGINT, SIG_IGN) != SIG_IGN) /* remove if interrupted */
81 signal(SIGINT, (void(*)())onintr);
82 if ((tfp = fopen(tempfile, "w")) == NULL) {
83 (void) unlink(tempfile);
84 error(tempfile);
85 }
86 for (i = 0; i < argc-1; i++) /* read keys from bib files */
87 sortbib(fp[i], tfp, i);
88 fclose(tfp);
89 deliver(fp, tfp); /* do disk seeks and read from biblio files */
90 (void) unlink(tempfile);
91 return (0);
92 }
93
94 int rsmode = 0; /* record separator: 1 = null line, 2 = bracket */
95
96 /* read records, prepare list for sorting */
97 static void
sortbib(FILE * fp,FILE * tfp,int i)98 sortbib(FILE *fp, FILE *tfp, int i)
99 {
100 long offset, lastoffset = 0, ftell(); /* byte offsets in file */
101 int length, newrec, recno = 0; /* reclen, new rec'd?, number */
102 char line[BUF], fld[4][BUF]; /* one line, the sort fields */
103
104 /* measure byte offset, then get new line */
105 while (offset = ftell(fp), fgets(line, BUF, fp)) {
106 if (recno == 0) /* accept record w/o initial newline */
107 newrec = 1;
108 if (line[0] == '\n') { /* accept null line record separator */
109 if (!rsmode)
110 rsmode = 1; /* null line mode */
111 if (rsmode == 1)
112 newrec = 1;
113 }
114 if (line[0] == '.' && line[1] == '[') { /* also accept .[ .] */
115 if (!rsmode)
116 rsmode = 2; /* bracket pair mode */
117 if (rsmode == 2)
118 newrec = 1;
119 }
120 if (newrec) { /* by whatever means above */
121 newrec = 0;
122 length = offset - lastoffset; /* measure rec len */
123 if (length > BUF*8) {
124 fprintf(stderr,
125 gettext("sortbib: record %d longer than %d "
126 "(%d)\n"), recno, BUF*8, length);
127 (void) unlink(tempfile);
128 exit(1);
129 }
130 if (recno++) { /* info for sorting */
131 fprintf(tfp, "%d %d %d : %s %s %s %s\n",
132 i, lastoffset, length,
133 fld[0], fld[1], fld[2], fld[3]);
134 if (ferror(tfp)) {
135 (void) unlink(tempfile);
136 error(tempfile);
137 }
138 }
139 *fld[0] = *fld[1] = *fld[2] = *fld[3] = NULL;
140 oneauth = 0; /* reset number of authors */
141 lastoffset = offset; /* save for next time */
142 }
143 if (line[0] == '%') /* parse out fields to be sorted */
144 parse(line, fld);
145 }
146 offset = ftell(fp); /* measure byte offset at EOF */
147 length = offset - lastoffset; /* measure final record length */
148 if (length > BUF*8) {
149 fprintf(stderr,
150 gettext("sortbib: record %d longer than %d (%d)\n"),
151 recno, BUF*8, length);
152 (void) unlink(tempfile);
153 exit(1);
154 }
155 if (line[0] != '\n') { /* ignore null line just before EOF */
156 fprintf(tfp, "%d %d %d : %s %s %s %s\n",
157 i, lastoffset, length, fld[0], fld[1], fld[2], fld[3]);
158 if (ferror(tfp)) {
159 (void) unlink(tempfile);
160 error(tempfile); /* disk error in /tmp */
161 }
162 }
163 }
164
165 /* deliver sorted entries out of database(s) */
166 static void
deliver(FILE * fp[],FILE * tfp)167 deliver(FILE *fp[], FILE *tfp)
168 {
169 char str[BUF], buff[BUF*8]; /* for tempfile & databases */
170 char cmd[80]; /* for using system sort command */
171 long int offset;
172 int i, length;
173
174 /* when sorting, ignore case distinctions; tab char is ':' */
175 sprintf(cmd, "sort +4f +0n +1n %s -o %s", tempfile, tempfile);
176 if (system(cmd) == 127) {
177 (void) unlink(tempfile);
178 error("sortbib");
179 }
180 tfp = fopen(tempfile, "r");
181 while (fgets(str, sizeof (str), tfp)) {
182 /* get file pointer, record offset, and length */
183 if (sscanf(str, "%d %d %d :", &i, &offset, &length) != 3)
184 error(gettext("sortbib: sorting error"));
185 /* seek to proper disk location in proper file */
186 if (fseek(fp[i], offset, 0) == -1) {
187 (void) unlink(tempfile);
188 error("sortbib");
189 }
190 /* read exactly one record from bibliography */
191 if (fread(buff, sizeof (*buff), length, fp[i]) == 0) {
192 (void) unlink(tempfile);
193 error("sortbib");
194 }
195 /* add newline between unseparated records */
196 if (buff[0] != '\n' && rsmode == 1)
197 putchar('\n');
198 /* write record buffer to standard output */
199 if (fwrite(buff, sizeof (*buff), length, stdout) == 0) {
200 (void) unlink(tempfile);
201 error("sortbib");
202 }
203 }
204 }
205
206 /* get fields out of line, prepare for sorting */
207 static void
parse(char line[],char fld[][BUF])208 parse(char line[], char fld[][BUF])
209 {
210 char wd[8][BUF/4], *strcat();
211 int n, i, j;
212
213 for (i = 0; i < 8; i++) /* zap out old strings */
214 *wd[i] = NULL;
215 n = sscanf(line, "%s %s %s %s %s %s %s %s",
216 wd[0], wd[1], wd[2], wd[3], wd[4], wd[5], wd[6], wd[7]);
217 for (i = 0; i < 4; i++) {
218 if (wd[0][1] == keystr[i]) {
219 if (wd[0][1] == 'A') {
220 if (oneauth && !multauth) /* no repeat */
221 break;
222 else if (oneauth) /* mult auths */
223 strcat(fld[i], "~~");
224 if (!endcomma(wd[n-2])) /* surname */
225 strcat(fld[i], wd[n-1]);
226 else { /* jr. or ed. */
227 strcat(fld[i], wd[n-2]);
228 n--;
229 }
230 strcat(fld[i], " ");
231 for (j = 1; j < n-1; j++)
232 strcat(fld[i], wd[j]);
233 oneauth = 1;
234 } else if (wd[0][1] == 'D') {
235 strcat(fld[i], wd[n-1]); /* year */
236 if (n > 2)
237 strcat(fld[i], wd[1]); /* month */
238 } else if (wd[0][1] == 'T' || wd[0][1] == 'J') {
239 j = 1;
240 if (article(wd[1])) /* skip article */
241 j++;
242 for (; j < n; j++)
243 strcat(fld[i], wd[j]);
244 } else /* any other field */
245 for (j = 1; j < n; j++)
246 strcat(fld[i], wd[j]);
247 }
248 /* %Q quorporate or queer author - unreversed %A */
249 else if (wd[0][1] == 'Q' && keystr[i] == 'A')
250 for (j = 1; j < n; j++)
251 strcat(fld[i], wd[j]);
252 }
253 }
254
255 /* see if string contains an article */
256 static int
article(char * str)257 article(char *str)
258 {
259 if (strcmp("The", str) == 0) /* English */
260 return (1);
261 if (strcmp("A", str) == 0)
262 return (1);
263 if (strcmp("An", str) == 0)
264 return (1);
265 if (strcmp("Le", str) == 0) /* French */
266 return (1);
267 if (strcmp("La", str) == 0)
268 return (1);
269 if (strcmp("Der", str) == 0) /* German */
270 return (1);
271 if (strcmp("Die", str) == 0)
272 return (1);
273 if (strcmp("Das", str) == 0)
274 return (1);
275 if (strcmp("El", str) == 0) /* Spanish */
276 return (1);
277 if (strcmp("Den", str) == 0) /* Scandinavian */
278 return (1);
279 return (0);
280 }
281
282 /* evaluate key string for A+ marking */
283 static void
eval(char keystr[])284 eval(char keystr[])
285 {
286 int i, j;
287
288 for (i = 0, j = 0; keystr[i]; i++, j++) {
289 if (keystr[i] == '+') {
290 multauth = 1;
291 i++;
292 }
293 if (keystr[i] == NULL)
294 break;
295 keystr[j] = keystr[i];
296 }
297 keystr[j] = NULL;
298 }
299
300 /* exit in case of various system errors */
301 static void
error(char * s)302 error(char *s)
303 {
304 perror(s);
305 exit(1);
306 }
307
308 /* remove tempfile in case of interrupt */
309 static void
onintr(void)310 onintr(void)
311 {
312 fprintf(stderr, gettext("\nInterrupt\n"));
313 unlink(tempfile);
314 exit(1);
315 }
316
317 static int
endcomma(char * str)318 endcomma(char *str)
319 {
320 int n;
321
322 n = strlen(str) - 1;
323 if (str[n] == ',') {
324 str[n] = NULL;
325 return (1);
326 }
327 return (0);
328 }
329