xref: /titanic_44/usr/src/cmd/refer/sortbib.c (revision 09f67678c27dda8a89f87f1f408a87dd49ceb0e1)
1 /*	Copyright (c) 1984, 1986, 1987, 1988, 1989 AT&T	*/
2 /*	  All Rights Reserved  	*/
3 
4 
5 /*
6  * Copyright (c) 1980 Regents of the University of California.
7  * All rights reserved. The Berkeley software License Agreement
8  * specifies the terms and conditions for redistribution.
9  */
10 
11 /*
12  * Copyright (c) 1983-1988 by Sun Microsystems, Inc.
13  * All Rights Reserved.
14  */
15 
16 #pragma ident	"%Z%%M%	%I%	%E% SMI"
17 
18 #include <locale.h>
19 #include <stdio.h>
20 #include <signal.h>
21 #include <stdlib.h>
22 
23 #define	BUF BUFSIZ
24 #define	MXFILES 16
25 
26 char tempfile[32];		/* temporary file for sorting keys */
27 int tmpfd = -1;
28 char *keystr = "AD";		/* default sorting on author and date */
29 int multauth = 0;		/* by default sort on senior author only */
30 int oneauth;			/* has there been author in the record? */
31 
32 main(argc, argv)	/* sortbib: sort bibliographic database in place */
33 int argc;
34 char *argv[];
35 {
36 	FILE *fp[MXFILES], *tfp;
37 	int i;
38 	void onintr();
39 
40 	(void) setlocale(LC_ALL, "");
41 
42 #if !defined(TEXT_DOMAIN)
43 #define	TEXT_DOMAIN "SYS_TEST"
44 #endif
45 	(void) textdomain(TEXT_DOMAIN);
46 
47 	if (argc == 1)		/* can't use stdin for seeking anyway */
48 	{
49 		puts(gettext("Usage:  sortbib [-sKEYS] database [...]\n\
50 \t-s: sort by fields in KEYS (default is AD)"));
51 		exit(1);
52 	}
53 	if (argc > 2 && argv[1][0] == '-' && argv[1][1] == 's')
54 	{
55 		/* if a key is specified use it, otherwise use default key */
56 		if (argv[1][2] != '\0')
57 			keystr = argv[1] + 2;
58 		eval(keystr);		/* evaluate A+ for multiple authors */
59 		argv++; argc--;
60 	}
61 	if (argc > MXFILES+1)	/* too many open file streams */
62 	{
63 		fprintf(stderr,
64 		gettext("sortbib: More than %d databases specified\n"),
65 		    MXFILES);
66 		exit(1);
67 	}
68 	for (i = 1; i < argc; i++)		/* open files in arg list */
69 		if ((fp[i-1] = fopen(argv[i], "r")) == NULL)
70 			error(argv[i]);
71 	strcpy(tempfile, "/tmp/SbibXXXXXX");	/* tempfile for sorting keys */
72 	if ((tmpfd = mkstemp(tempfile)) == -1)
73 		error(tempfile);
74 
75 	(void) close(tmpfd);
76 	if (signal(SIGINT, SIG_IGN) != SIG_IGN)	/* remove if interrupted */
77 		signal(SIGINT, onintr);
78 	if ((tfp = fopen(tempfile, "w")) == NULL) {
79 		(void) unlink(tempfile);
80 		error(tempfile);
81 	}
82 	for (i = 0; i < argc-1; i++)		/* read keys from bib files */
83 		sortbib(fp[i], tfp, i);
84 	fclose(tfp);
85 	deliver(fp, tfp);	/* do disk seeks and read from biblio files */
86 	(void) unlink(tempfile);
87 	exit(0);
88 	/* NOTREACHED */
89 }
90 
91 int rsmode = 0;		/* record separator: 1 = null line, 2 = bracket */
92 
93 sortbib(fp, tfp, i)	/* read records, prepare list for sorting */
94 FILE *fp, *tfp;
95 int i;
96 {
97 	long offset, lastoffset = 0, ftell();	/* byte offsets in file */
98 	int length, newrec, recno = 0;		/* reclen, new rec'd?, number */
99 	char line[BUF], fld[4][BUF];		/* one line, the sort fields */
100 
101 	/* measure byte offset, then get new line */
102 	while (offset = ftell(fp), fgets(line, BUF, fp))
103 	{
104 		if (recno == 0)		/* accept record w/o initial newline */
105 			newrec = 1;
106 		if (line[0] == '\n')	/* accept null line record separator */
107 		{
108 			if (!rsmode)
109 				rsmode = 1;	/* null line mode */
110 			if (rsmode == 1)
111 				newrec = 1;
112 		}
113 		if (line[0] == '.' && line[1] == '[')	/* also accept .[ .] */
114 		{
115 			if (!rsmode)
116 				rsmode = 2;	/* bracket pair mode */
117 			if (rsmode == 2)
118 				newrec = 1;
119 		}
120 		if (newrec)		/* by whatever means above */
121 		{
122 			newrec = 0;
123 			length = offset - lastoffset;	/* measure rec len */
124 			if (length > BUF*8) {
125 				fprintf(stderr,
126 				gettext("sortbib: record %d longer than %d (%d)\n"),
127 					recno, BUF*8, length);
128 				(void) unlink(tempfile);
129 				exit(1);
130 			}
131 			if (recno++)			/* info for sorting */
132 			{
133 				fprintf(tfp, "%d %d %d : %s %s %s %s\n",
134 					i, lastoffset, length,
135 					fld[0], fld[1], fld[2], fld[3]);
136 				if (ferror(tfp)) {
137 					(void) unlink(tempfile);
138 					error(tempfile);
139 				}
140 			}
141 			*fld[0] = *fld[1] = *fld[2] = *fld[3] = NULL;
142 			oneauth = 0;		/* reset number of authors */
143 			lastoffset = offset;	/* save for next time */
144 		}
145 		if (line[0] == '%')	/* parse out fields to be sorted */
146 			parse(line, fld);
147 	}
148 	offset = ftell(fp);		/* measure byte offset at EOF */
149 	length = offset - lastoffset;	/* measure final record length */
150 	if (length > BUF*8)
151 	{
152 		fprintf(stderr,
153 		    gettext("sortbib: record %d longer than %d (%d)\n"),
154 		    recno, BUF*8, length);
155 		(void) unlink(tempfile);
156 		exit(1);
157 	}
158 	if (line[0] != '\n')		/* ignore null line just before EOF */
159 	{
160 		fprintf(tfp, "%d %d %d : %s %s %s %s\n",
161 			i, lastoffset, length,
162 			fld[0], fld[1], fld[2], fld[3]);
163 		if (ferror(tfp)) {
164 			(void) unlink(tempfile);
165 			error(tempfile);	/* disk error in /tmp */
166 		}
167 	}
168 }
169 
170 deliver(fp, tfp)	/* deliver sorted entries out of database(s) */
171 FILE *fp[], *tfp;
172 {
173 	char str[BUF], buff[BUF*8];	/* for tempfile & databases */
174 	char cmd[80];			/* for using system sort command */
175 	long int offset;
176 	int i, length;
177 
178 	/* when sorting, ignore case distinctions; tab char is ':' */
179 	sprintf(cmd, "sort +4f +0n +1n %s -o %s", tempfile, tempfile);
180 	if (system(cmd) == 127) {
181 		(void) unlink(tempfile);
182 		error("sortbib");
183 	}
184 	tfp = fopen(tempfile, "r");
185 	while (fgets(str, sizeof (str), tfp))
186 	{
187 		/* get file pointer, record offset, and length */
188 		if (sscanf(str, "%d %d %d :", &i, &offset, &length) != 3)
189 			error(gettext("sortbib: sorting error"));
190 		/* seek to proper disk location in proper file */
191 		if (fseek(fp[i], offset, 0) == -1) {
192 			(void) unlink(tempfile);
193 			error("sortbib");
194 		}
195 		/* read exactly one record from bibliography */
196 		if (fread(buff, sizeof (*buff), length, fp[i]) == 0) {
197 			(void) unlink(tempfile);
198 			error("sortbib");
199 		}
200 		/* add newline between unseparated records */
201 		if (buff[0] != '\n' && rsmode == 1)
202 			putchar('\n');
203 		/* write record buffer to standard output */
204 		if (fwrite(buff, sizeof (*buff), length, stdout) == 0) {
205 			(void) unlink(tempfile);
206 			error("sortbib");
207 		}
208 	}
209 }
210 
211 parse(line, fld)	/* get fields out of line, prepare for sorting */
212 char line[];
213 char fld[][BUF];
214 {
215 	char wd[8][BUF/4], *strcat();
216 	int n, i, j;
217 
218 	for (i = 0; i < 8; i++)		/* zap out old strings */
219 		*wd[i] = NULL;
220 	n = sscanf(line, "%s %s %s %s %s %s %s %s",
221 		wd[0], wd[1], wd[2], wd[3], wd[4], wd[5], wd[6], wd[7]);
222 	for (i = 0; i < 4; i++)
223 	{
224 		if (wd[0][1] == keystr[i])
225 		{
226 			if (wd[0][1] == 'A')
227 			{
228 				if (oneauth && !multauth)	/* no repeat */
229 					break;
230 				else if (oneauth)		/* mult auths */
231 					strcat(fld[i], "~~");
232 				if (!endcomma(wd[n-2]))		/* surname */
233 					strcat(fld[i], wd[n-1]);
234 				else {				/* jr. or ed. */
235 					strcat(fld[i], wd[n-2]);
236 					n--;
237 				}
238 				strcat(fld[i], " ");
239 				for (j = 1; j < n-1; j++)
240 					strcat(fld[i], wd[j]);
241 				oneauth = 1;
242 			} else if (wd[0][1] == 'D') {
243 				strcat(fld[i], wd[n-1]);	/* year */
244 				if (n > 2)
245 					strcat(fld[i], wd[1]);	/* month */
246 			} else if (wd[0][1] == 'T' || wd[0][1] == 'J') {
247 				j = 1;
248 				if (article(wd[1]))	/* skip article */
249 					j++;
250 				for (; j < n; j++)
251 					strcat(fld[i], wd[j]);
252 			} else  /* any other field */
253 				for (j = 1; j < n; j++)
254 					strcat(fld[i], wd[j]);
255 		}
256 		/* %Q quorporate or queer author - unreversed %A */
257 		else if (wd[0][1] == 'Q' && keystr[i] == 'A')
258 			for (j = 1; j < n; j++)
259 				strcat(fld[i], wd[j]);
260 	}
261 }
262 
263 article(str)		/* see if string contains an article */
264 char *str;
265 {
266 	if (strcmp("The", str) == 0)	/* English */
267 		return (1);
268 	if (strcmp("A", str) == 0)
269 		return (1);
270 	if (strcmp("An", str) == 0)
271 		return (1);
272 	if (strcmp("Le", str) == 0)	/* French */
273 		return (1);
274 	if (strcmp("La", str) == 0)
275 		return (1);
276 	if (strcmp("Der", str) == 0)	/* German */
277 		return (1);
278 	if (strcmp("Die", str) == 0)
279 		return (1);
280 	if (strcmp("Das", str) == 0)
281 		return (1);
282 	if (strcmp("El", str) == 0)	/* Spanish */
283 		return (1);
284 	if (strcmp("Den", str) == 0)	/* Scandinavian */
285 		return (1);
286 	return (0);
287 }
288 
289 eval(keystr)		/* evaluate key string for A+ marking */
290 char keystr[];
291 {
292 	int i, j;
293 
294 	for (i = 0, j = 0; keystr[i]; i++, j++)
295 	{
296 		if (keystr[i] == '+')
297 		{
298 			multauth = 1;
299 			i++;
300 		}
301 		if (keystr[i] == NULL)
302 			break;
303 		keystr[j] = keystr[i];
304 	}
305 	keystr[j] = NULL;
306 }
307 
308 error(s)		/* exit in case of various system errors */
309 char *s;
310 {
311 	perror(s);
312 	exit(1);
313 }
314 
315 void
316 onintr()		/* remove tempfile in case of interrupt */
317 {
318 	fprintf(stderr, gettext("\nInterrupt\n"));
319 	unlink(tempfile);
320 	exit(1);
321 }
322 
323 endcomma(str)
324 char *str;
325 {
326 	int n;
327 
328 	n = strlen(str) - 1;
329 	if (str[n] == ',')
330 	{
331 		str[n] = NULL;
332 		return (1);
333 	}
334 	return (0);
335 }
336