xref: /illumos-gate/usr/src/cmd/refer/sortbib.c (revision 12042ab213b3af68474f48555504db816a449211)
1 /*
2  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
3  * Use is subject to license terms.
4  */
5 
6 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
7 /*	  All Rights Reserved	*/
8 
9 /*
10  * Copyright (c) 1980 Regents of the University of California.
11  * All rights reserved. The Berkeley software License Agreement
12  * specifies the terms and conditions for redistribution.
13  */
14 
15 #include <locale.h>
16 #include <stdio.h>
17 #include <signal.h>
18 #include <stdlib.h>
19 
20 #define	BUF BUFSIZ
21 #define	MXFILES 16
22 
23 char tempfile[32];		/* temporary file for sorting keys */
24 int tmpfd = -1;
25 char *keystr = "AD";		/* default sorting on author and date */
26 int multauth = 0;		/* by default sort on senior author only */
27 int oneauth;			/* has there been author in the record? */
28 
29 static int article(char *);
30 static void deliver(FILE *[], FILE *);
31 static int endcomma(char *);
32 static void error(char *);
33 static void eval(char []);
34 static void parse(char [], char fld[][BUF]);
35 static void sortbib(FILE *, FILE *, int);
36 static void onintr(void);
37 
38 /* sortbib: sort bibliographic database in place */
39 int
40 main(int argc, char *argv[])
41 {
42 	FILE *fp[MXFILES], *tfp;
43 	int i;
44 
45 	(void) setlocale(LC_ALL, "");
46 
47 #if !defined(TEXT_DOMAIN)
48 #define	TEXT_DOMAIN "SYS_TEST"
49 #endif
50 	(void) textdomain(TEXT_DOMAIN);
51 
52 	if (argc == 1) {		/* can't use stdin for seeking anyway */
53 		puts(gettext("Usage:  sortbib [-sKEYS] database [...]\n\
54 \t-s: sort by fields in KEYS (default is AD)"));
55 		exit(1);
56 	}
57 	if (argc > 2 && argv[1][0] == '-' && argv[1][1] == 's') {
58 		/* if a key is specified use it, otherwise use default key */
59 		if (argv[1][2] != '\0')
60 			keystr = argv[1] + 2;
61 		eval(keystr);		/* evaluate A+ for multiple authors */
62 		argv++; argc--;
63 	}
64 	if (argc > MXFILES+1) {	/* too many open file streams */
65 		fprintf(stderr,
66 		gettext("sortbib: More than %d databases specified\n"),
67 		    MXFILES);
68 		exit(1);
69 	}
70 	for (i = 1; i < argc; i++)		/* open files in arg list */
71 		if ((fp[i-1] = fopen(argv[i], "r")) == NULL)
72 			error(argv[i]);
73 	strcpy(tempfile, "/tmp/SbibXXXXXX");	/* tempfile for sorting keys */
74 	if ((tmpfd = mkstemp(tempfile)) == -1)
75 		error(tempfile);
76 
77 	(void) close(tmpfd);
78 	if (signal(SIGINT, SIG_IGN) != SIG_IGN)	/* remove if interrupted */
79 		signal(SIGINT, (void(*)())onintr);
80 	if ((tfp = fopen(tempfile, "w")) == NULL) {
81 		(void) unlink(tempfile);
82 		error(tempfile);
83 	}
84 	for (i = 0; i < argc-1; i++)		/* read keys from bib files */
85 		sortbib(fp[i], tfp, i);
86 	fclose(tfp);
87 	deliver(fp, tfp);	/* do disk seeks and read from biblio files */
88 	(void) unlink(tempfile);
89 	return (0);
90 }
91 
92 int rsmode = 0;		/* record separator: 1 = null line, 2 = bracket */
93 
94 /* read records, prepare list for sorting */
95 static void
96 sortbib(FILE *fp, FILE *tfp, int i)
97 {
98 	long offset, lastoffset = 0, ftell();	/* byte offsets in file */
99 	int length, newrec, recno = 0;		/* reclen, new rec'd?, number */
100 	char line[BUF], fld[4][BUF];		/* one line, the sort fields */
101 
102 	/* measure byte offset, then get new line */
103 	while (offset = ftell(fp), fgets(line, BUF, fp)) {
104 		if (recno == 0)		/* accept record w/o initial newline */
105 			newrec = 1;
106 		if (line[0] == '\n') {	/* accept null line record separator */
107 			if (!rsmode)
108 				rsmode = 1;	/* null line mode */
109 			if (rsmode == 1)
110 				newrec = 1;
111 		}
112 		if (line[0] == '.' && line[1] == '[') {	/* also accept .[ .] */
113 			if (!rsmode)
114 				rsmode = 2;	/* bracket pair mode */
115 			if (rsmode == 2)
116 				newrec = 1;
117 		}
118 		if (newrec) {		/* by whatever means above */
119 			newrec = 0;
120 			length = offset - lastoffset;	/* measure rec len */
121 			if (length > BUF*8) {
122 				fprintf(stderr,
123 				gettext("sortbib: record %d longer than %d "
124 				    "(%d)\n"), recno, BUF*8, length);
125 				(void) unlink(tempfile);
126 				exit(1);
127 			}
128 			if (recno++) {			/* info for sorting */
129 				fprintf(tfp, "%d %d %d : %s %s %s %s\n",
130 				    i, lastoffset, length,
131 				    fld[0], fld[1], fld[2], fld[3]);
132 				if (ferror(tfp)) {
133 					(void) unlink(tempfile);
134 					error(tempfile);
135 				}
136 			}
137 			*fld[0] = *fld[1] = *fld[2] = *fld[3] = '\0';
138 			oneauth = 0;		/* reset number of authors */
139 			lastoffset = offset;	/* save for next time */
140 		}
141 		if (line[0] == '%')	/* parse out fields to be sorted */
142 			parse(line, fld);
143 	}
144 	offset = ftell(fp);		/* measure byte offset at EOF */
145 	length = offset - lastoffset;	/* measure final record length */
146 	if (length > BUF*8) {
147 		fprintf(stderr,
148 		    gettext("sortbib: record %d longer than %d (%d)\n"),
149 		    recno, BUF*8, length);
150 		(void) unlink(tempfile);
151 		exit(1);
152 	}
153 	if (line[0] != '\n') {		/* ignore null line just before EOF */
154 		fprintf(tfp, "%d %d %d : %s %s %s %s\n",
155 		    i, lastoffset, length, fld[0], fld[1], fld[2], fld[3]);
156 		if (ferror(tfp)) {
157 			(void) unlink(tempfile);
158 			error(tempfile);	/* disk error in /tmp */
159 		}
160 	}
161 }
162 
163 /* deliver sorted entries out of database(s) */
164 static void
165 deliver(FILE *fp[], FILE *tfp)
166 {
167 	char str[BUF], buff[BUF*8];	/* for tempfile & databases */
168 	char cmd[80];			/* for using system sort command */
169 	long int offset;
170 	int i, length;
171 
172 	/* when sorting, ignore case distinctions; tab char is ':' */
173 	sprintf(cmd, "sort +4f +0n +1n %s -o %s", tempfile, tempfile);
174 	if (system(cmd) == 127) {
175 		(void) unlink(tempfile);
176 		error("sortbib");
177 	}
178 	tfp = fopen(tempfile, "r");
179 	while (fgets(str, sizeof (str), tfp)) {
180 		/* get file pointer, record offset, and length */
181 		if (sscanf(str, "%d %d %d :", &i, &offset, &length) != 3)
182 			error(gettext("sortbib: sorting error"));
183 		/* seek to proper disk location in proper file */
184 		if (fseek(fp[i], offset, 0) == -1) {
185 			(void) unlink(tempfile);
186 			error("sortbib");
187 		}
188 		/* read exactly one record from bibliography */
189 		if (fread(buff, sizeof (*buff), length, fp[i]) == 0) {
190 			(void) unlink(tempfile);
191 			error("sortbib");
192 		}
193 		/* add newline between unseparated records */
194 		if (buff[0] != '\n' && rsmode == 1)
195 			putchar('\n');
196 		/* write record buffer to standard output */
197 		if (fwrite(buff, sizeof (*buff), length, stdout) == 0) {
198 			(void) unlink(tempfile);
199 			error("sortbib");
200 		}
201 	}
202 }
203 
204 /* get fields out of line, prepare for sorting */
205 static void
206 parse(char line[], char fld[][BUF])
207 {
208 	char wd[8][BUF/4], *strcat();
209 	int n, i, j;
210 
211 	for (i = 0; i < 8; i++)		/* zap out old strings */
212 		*wd[i] = '\0';
213 	n = sscanf(line, "%s %s %s %s %s %s %s %s",
214 	    wd[0], wd[1], wd[2], wd[3], wd[4], wd[5], wd[6], wd[7]);
215 	for (i = 0; i < 4; i++) {
216 		if (wd[0][1] == keystr[i]) {
217 			if (wd[0][1] == 'A') {
218 				if (oneauth && !multauth)	/* no repeat */
219 					break;
220 				else if (oneauth)		/* mult auths */
221 					strcat(fld[i], "~~");
222 				if (!endcomma(wd[n-2]))		/* surname */
223 					strcat(fld[i], wd[n-1]);
224 				else {				/* jr. or ed. */
225 					strcat(fld[i], wd[n-2]);
226 					n--;
227 				}
228 				strcat(fld[i], " ");
229 				for (j = 1; j < n-1; j++)
230 					strcat(fld[i], wd[j]);
231 				oneauth = 1;
232 			} else if (wd[0][1] == 'D') {
233 				strcat(fld[i], wd[n-1]);	/* year */
234 				if (n > 2)
235 					strcat(fld[i], wd[1]);	/* month */
236 			} else if (wd[0][1] == 'T' || wd[0][1] == 'J') {
237 				j = 1;
238 				if (article(wd[1]))	/* skip article */
239 					j++;
240 				for (; j < n; j++)
241 					strcat(fld[i], wd[j]);
242 			} else  /* any other field */
243 				for (j = 1; j < n; j++)
244 					strcat(fld[i], wd[j]);
245 		}
246 		/* %Q quorporate or queer author - unreversed %A */
247 		else if (wd[0][1] == 'Q' && keystr[i] == 'A')
248 			for (j = 1; j < n; j++)
249 				strcat(fld[i], wd[j]);
250 	}
251 }
252 
253 /* see if string contains an article */
254 static int
255 article(char *str)
256 {
257 	if (strcmp("The", str) == 0)	/* English */
258 		return (1);
259 	if (strcmp("A", str) == 0)
260 		return (1);
261 	if (strcmp("An", str) == 0)
262 		return (1);
263 	if (strcmp("Le", str) == 0)	/* French */
264 		return (1);
265 	if (strcmp("La", str) == 0)
266 		return (1);
267 	if (strcmp("Der", str) == 0)	/* German */
268 		return (1);
269 	if (strcmp("Die", str) == 0)
270 		return (1);
271 	if (strcmp("Das", str) == 0)
272 		return (1);
273 	if (strcmp("El", str) == 0)	/* Spanish */
274 		return (1);
275 	if (strcmp("Den", str) == 0)	/* Scandinavian */
276 		return (1);
277 	return (0);
278 }
279 
280 /* evaluate key string for A+ marking */
281 static void
282 eval(char keystr[])
283 {
284 	int i, j;
285 
286 	for (i = 0, j = 0; keystr[i]; i++, j++) {
287 		if (keystr[i] == '+') {
288 			multauth = 1;
289 			i++;
290 		}
291 		if (keystr[i] == '\0')
292 			break;
293 		keystr[j] = keystr[i];
294 	}
295 	keystr[j] = '\0';
296 }
297 
298 /* exit in case of various system errors */
299 static void
300 error(char *s)
301 {
302 	perror(s);
303 	exit(1);
304 }
305 
306 /* remove tempfile in case of interrupt */
307 static void
308 onintr(void)
309 {
310 	fprintf(stderr, gettext("\nInterrupt\n"));
311 	unlink(tempfile);
312 	exit(1);
313 }
314 
315 static int
316 endcomma(char *str)
317 {
318 	int n;
319 
320 	n = strlen(str) - 1;
321 	if (str[n] == ',') {
322 		str[n] = '\0';
323 		return (1);
324 	}
325 	return (0);
326 }
327