xref: /illumos-gate/usr/src/cmd/refer/sortbib.c (revision a38ddfee9c8c6b6c5a2947ff52fd2338362a4444)
1 /*
2  * Copyright 2005 Sun Microsystems, Inc.  All rights reserved.
3  * Use is subject to license terms.
4  */
5 
6 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
7 /*	  All Rights Reserved  	*/
8 
9 /*
10  * Copyright (c) 1980 Regents of the University of California.
11  * All rights reserved. The Berkeley software License Agreement
12  * specifies the terms and conditions for redistribution.
13  */
14 
15 #pragma ident	"%Z%%M%	%I%	%E% SMI"
16 
17 #include <locale.h>
18 #include <stdio.h>
19 #include <signal.h>
20 #include <stdlib.h>
21 
22 #define	BUF BUFSIZ
23 #define	MXFILES 16
24 
25 char tempfile[32];		/* temporary file for sorting keys */
26 int tmpfd = -1;
27 char *keystr = "AD";		/* default sorting on author and date */
28 int multauth = 0;		/* by default sort on senior author only */
29 int oneauth;			/* has there been author in the record? */
30 
31 static int article(char *);
32 static void deliver(FILE *[], FILE *);
33 static int endcomma(char *);
34 static void error(char *);
35 static void eval(char []);
36 static void parse(char [], char fld[][BUF]);
37 static void sortbib(FILE *, FILE *, int);
38 static void onintr(void);
39 
40 /* sortbib: sort bibliographic database in place */
41 int
42 main(int argc, char *argv[])
43 {
44 	FILE *fp[MXFILES], *tfp;
45 	int i;
46 
47 	(void) setlocale(LC_ALL, "");
48 
49 #if !defined(TEXT_DOMAIN)
50 #define	TEXT_DOMAIN "SYS_TEST"
51 #endif
52 	(void) textdomain(TEXT_DOMAIN);
53 
54 	if (argc == 1) {		/* can't use stdin for seeking anyway */
55 		puts(gettext("Usage:  sortbib [-sKEYS] database [...]\n\
56 \t-s: sort by fields in KEYS (default is AD)"));
57 		exit(1);
58 	}
59 	if (argc > 2 && argv[1][0] == '-' && argv[1][1] == 's') {
60 		/* if a key is specified use it, otherwise use default key */
61 		if (argv[1][2] != '\0')
62 			keystr = argv[1] + 2;
63 		eval(keystr);		/* evaluate A+ for multiple authors */
64 		argv++; argc--;
65 	}
66 	if (argc > MXFILES+1) {	/* too many open file streams */
67 		fprintf(stderr,
68 		gettext("sortbib: More than %d databases specified\n"),
69 		    MXFILES);
70 		exit(1);
71 	}
72 	for (i = 1; i < argc; i++)		/* open files in arg list */
73 		if ((fp[i-1] = fopen(argv[i], "r")) == NULL)
74 			error(argv[i]);
75 	strcpy(tempfile, "/tmp/SbibXXXXXX");	/* tempfile for sorting keys */
76 	if ((tmpfd = mkstemp(tempfile)) == -1)
77 		error(tempfile);
78 
79 	(void) close(tmpfd);
80 	if (signal(SIGINT, SIG_IGN) != SIG_IGN)	/* remove if interrupted */
81 		signal(SIGINT, (void(*)())onintr);
82 	if ((tfp = fopen(tempfile, "w")) == NULL) {
83 		(void) unlink(tempfile);
84 		error(tempfile);
85 	}
86 	for (i = 0; i < argc-1; i++)		/* read keys from bib files */
87 		sortbib(fp[i], tfp, i);
88 	fclose(tfp);
89 	deliver(fp, tfp);	/* do disk seeks and read from biblio files */
90 	(void) unlink(tempfile);
91 	return (0);
92 }
93 
94 int rsmode = 0;		/* record separator: 1 = null line, 2 = bracket */
95 
96 /* read records, prepare list for sorting */
97 static void
98 sortbib(FILE *fp, FILE *tfp, int i)
99 {
100 	long offset, lastoffset = 0, ftell();	/* byte offsets in file */
101 	int length, newrec, recno = 0;		/* reclen, new rec'd?, number */
102 	char line[BUF], fld[4][BUF];		/* one line, the sort fields */
103 
104 	/* measure byte offset, then get new line */
105 	while (offset = ftell(fp), fgets(line, BUF, fp)) {
106 		if (recno == 0)		/* accept record w/o initial newline */
107 			newrec = 1;
108 		if (line[0] == '\n') {	/* accept null line record separator */
109 			if (!rsmode)
110 				rsmode = 1;	/* null line mode */
111 			if (rsmode == 1)
112 				newrec = 1;
113 		}
114 		if (line[0] == '.' && line[1] == '[') {	/* also accept .[ .] */
115 			if (!rsmode)
116 				rsmode = 2;	/* bracket pair mode */
117 			if (rsmode == 2)
118 				newrec = 1;
119 		}
120 		if (newrec) {		/* by whatever means above */
121 			newrec = 0;
122 			length = offset - lastoffset;	/* measure rec len */
123 			if (length > BUF*8) {
124 				fprintf(stderr,
125 				gettext("sortbib: record %d longer than %d "
126 				    "(%d)\n"), recno, BUF*8, length);
127 				(void) unlink(tempfile);
128 				exit(1);
129 			}
130 			if (recno++) {			/* info for sorting */
131 				fprintf(tfp, "%d %d %d : %s %s %s %s\n",
132 				    i, lastoffset, length,
133 				    fld[0], fld[1], fld[2], fld[3]);
134 				if (ferror(tfp)) {
135 					(void) unlink(tempfile);
136 					error(tempfile);
137 				}
138 			}
139 			*fld[0] = *fld[1] = *fld[2] = *fld[3] = NULL;
140 			oneauth = 0;		/* reset number of authors */
141 			lastoffset = offset;	/* save for next time */
142 		}
143 		if (line[0] == '%')	/* parse out fields to be sorted */
144 			parse(line, fld);
145 	}
146 	offset = ftell(fp);		/* measure byte offset at EOF */
147 	length = offset - lastoffset;	/* measure final record length */
148 	if (length > BUF*8) {
149 		fprintf(stderr,
150 		    gettext("sortbib: record %d longer than %d (%d)\n"),
151 		    recno, BUF*8, length);
152 		(void) unlink(tempfile);
153 		exit(1);
154 	}
155 	if (line[0] != '\n') {		/* ignore null line just before EOF */
156 		fprintf(tfp, "%d %d %d : %s %s %s %s\n",
157 		    i, lastoffset, length, fld[0], fld[1], fld[2], fld[3]);
158 		if (ferror(tfp)) {
159 			(void) unlink(tempfile);
160 			error(tempfile);	/* disk error in /tmp */
161 		}
162 	}
163 }
164 
165 /* deliver sorted entries out of database(s) */
166 static void
167 deliver(FILE *fp[], FILE *tfp)
168 {
169 	char str[BUF], buff[BUF*8];	/* for tempfile & databases */
170 	char cmd[80];			/* for using system sort command */
171 	long int offset;
172 	int i, length;
173 
174 	/* when sorting, ignore case distinctions; tab char is ':' */
175 	sprintf(cmd, "sort +4f +0n +1n %s -o %s", tempfile, tempfile);
176 	if (system(cmd) == 127) {
177 		(void) unlink(tempfile);
178 		error("sortbib");
179 	}
180 	tfp = fopen(tempfile, "r");
181 	while (fgets(str, sizeof (str), tfp)) {
182 		/* get file pointer, record offset, and length */
183 		if (sscanf(str, "%d %d %d :", &i, &offset, &length) != 3)
184 			error(gettext("sortbib: sorting error"));
185 		/* seek to proper disk location in proper file */
186 		if (fseek(fp[i], offset, 0) == -1) {
187 			(void) unlink(tempfile);
188 			error("sortbib");
189 		}
190 		/* read exactly one record from bibliography */
191 		if (fread(buff, sizeof (*buff), length, fp[i]) == 0) {
192 			(void) unlink(tempfile);
193 			error("sortbib");
194 		}
195 		/* add newline between unseparated records */
196 		if (buff[0] != '\n' && rsmode == 1)
197 			putchar('\n');
198 		/* write record buffer to standard output */
199 		if (fwrite(buff, sizeof (*buff), length, stdout) == 0) {
200 			(void) unlink(tempfile);
201 			error("sortbib");
202 		}
203 	}
204 }
205 
206 /* get fields out of line, prepare for sorting */
207 static void
208 parse(char line[], char fld[][BUF])
209 {
210 	char wd[8][BUF/4], *strcat();
211 	int n, i, j;
212 
213 	for (i = 0; i < 8; i++)		/* zap out old strings */
214 		*wd[i] = NULL;
215 	n = sscanf(line, "%s %s %s %s %s %s %s %s",
216 	    wd[0], wd[1], wd[2], wd[3], wd[4], wd[5], wd[6], wd[7]);
217 	for (i = 0; i < 4; i++) {
218 		if (wd[0][1] == keystr[i]) {
219 			if (wd[0][1] == 'A') {
220 				if (oneauth && !multauth)	/* no repeat */
221 					break;
222 				else if (oneauth)		/* mult auths */
223 					strcat(fld[i], "~~");
224 				if (!endcomma(wd[n-2]))		/* surname */
225 					strcat(fld[i], wd[n-1]);
226 				else {				/* jr. or ed. */
227 					strcat(fld[i], wd[n-2]);
228 					n--;
229 				}
230 				strcat(fld[i], " ");
231 				for (j = 1; j < n-1; j++)
232 					strcat(fld[i], wd[j]);
233 				oneauth = 1;
234 			} else if (wd[0][1] == 'D') {
235 				strcat(fld[i], wd[n-1]);	/* year */
236 				if (n > 2)
237 					strcat(fld[i], wd[1]);	/* month */
238 			} else if (wd[0][1] == 'T' || wd[0][1] == 'J') {
239 				j = 1;
240 				if (article(wd[1]))	/* skip article */
241 					j++;
242 				for (; j < n; j++)
243 					strcat(fld[i], wd[j]);
244 			} else  /* any other field */
245 				for (j = 1; j < n; j++)
246 					strcat(fld[i], wd[j]);
247 		}
248 		/* %Q quorporate or queer author - unreversed %A */
249 		else if (wd[0][1] == 'Q' && keystr[i] == 'A')
250 			for (j = 1; j < n; j++)
251 				strcat(fld[i], wd[j]);
252 	}
253 }
254 
255 /* see if string contains an article */
256 static int
257 article(char *str)
258 {
259 	if (strcmp("The", str) == 0)	/* English */
260 		return (1);
261 	if (strcmp("A", str) == 0)
262 		return (1);
263 	if (strcmp("An", str) == 0)
264 		return (1);
265 	if (strcmp("Le", str) == 0)	/* French */
266 		return (1);
267 	if (strcmp("La", str) == 0)
268 		return (1);
269 	if (strcmp("Der", str) == 0)	/* German */
270 		return (1);
271 	if (strcmp("Die", str) == 0)
272 		return (1);
273 	if (strcmp("Das", str) == 0)
274 		return (1);
275 	if (strcmp("El", str) == 0)	/* Spanish */
276 		return (1);
277 	if (strcmp("Den", str) == 0)	/* Scandinavian */
278 		return (1);
279 	return (0);
280 }
281 
282 /* evaluate key string for A+ marking */
283 static void
284 eval(char keystr[])
285 {
286 	int i, j;
287 
288 	for (i = 0, j = 0; keystr[i]; i++, j++) {
289 		if (keystr[i] == '+') {
290 			multauth = 1;
291 			i++;
292 		}
293 		if (keystr[i] == NULL)
294 			break;
295 		keystr[j] = keystr[i];
296 	}
297 	keystr[j] = NULL;
298 }
299 
300 /* exit in case of various system errors */
301 static void
302 error(char *s)
303 {
304 	perror(s);
305 	exit(1);
306 }
307 
308 /* remove tempfile in case of interrupt */
309 static void
310 onintr(void)
311 {
312 	fprintf(stderr, gettext("\nInterrupt\n"));
313 	unlink(tempfile);
314 	exit(1);
315 }
316 
317 static int
318 endcomma(char *str)
319 {
320 	int n;
321 
322 	n = strlen(str) - 1;
323 	if (str[n] == ',') {
324 		str[n] = NULL;
325 		return (1);
326 	}
327 	return (0);
328 }
329