xref: /titanic_50/usr/src/lib/libcmd/common/join.c (revision edcc07547a39d6570197493a9836083bd6b2a197)
1 /***********************************************************************
2 *                                                                      *
3 *               This software is part of the ast package               *
4 *           Copyright (c) 1992-2007 AT&T Knowledge Ventures            *
5 *                      and is licensed under the                       *
6 *                  Common Public License, Version 1.0                  *
7 *                      by AT&T Knowledge Ventures                      *
8 *                                                                      *
9 *                A copy of the License is available at                 *
10 *            http://www.opensource.org/licenses/cpl1.0.txt             *
11 *         (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9)         *
12 *                                                                      *
13 *              Information and Software Systems Research               *
14 *                            AT&T Research                             *
15 *                           Florham Park NJ                            *
16 *                                                                      *
17 *                 Glenn Fowler <gsf@research.att.com>                  *
18 *                  David Korn <dgk@research.att.com>                   *
19 *                                                                      *
20 ***********************************************************************/
21 #pragma prototyped
22 /*
23  * David Korn
24  * Glenn Fowler
25  * AT&T Research
26  *
27  * join
28  */
29 
30 static const char usage[] =
31 "[-?\n@(#)$Id: join (AT&T Research) 2006-10-31 $\n]"
32 USAGE_LICENSE
33 "[+NAME?join - relational database operator]"
34 "[+DESCRIPTION?\bjoin\b performs an \aequality join\a on the files \afile1\a "
35 	"and \afile2\a and writes the resulting joined files to standard "
36 	"output.  By default, a field is delimited by one or more spaces "
37 	"and tabs with leading spaces and/or tabs ignored.  The \b-t\b option "
38 	"can be used to change the field delimiter.]"
39 "[+?The \ajoin field\a is a field in each file on which files are compared. "
40 	"By default \bjoin\b writes one line in the output for each pair "
41 	"of lines in \afiles1\a and \afiles2\a that have identical join "
42 	"fields.  The default output line consists of the join field, "
43 	"then the remaining fields from \afile1\a, then the remaining "
44 	"fields from \afile2\a, but this can be changed with the \b-o\b "
45 	"option.  The \b-a\b option can be used to add unmatched lines "
46 	"to the output.  The \b-v\b option can be used to output only "
47 	"unmatched lines.]"
48 "[+?The files \afile1\a and \afile2\a must be ordered in the collating "
49 	"sequence of \bsort -b\b on the fields on which they are to be "
50 	"joined otherwise the results are unspecified.]"
51 "[+?If either \afile1\a or \afile2\a is \b-\b, \bjoin\b "
52         "uses standard input starting at the current location.]"
53 
54 "[e:empty]:[string?Replace empty output fields in the list selected with"
55 "	\b-o\b with \astring\a.]"
56 "[o:output]:[list?Construct the output line to comprise the fields specified "
57 	"in a blank or comma separated list \alist\a.  Each element in "
58 	"\alist\a consists of a file number (either 1 or 2), a period, "
59 	"and a field number or \b0\b representing the join field.  "
60 	"As an obsolete feature multiple occurrences of \b-o\b can "
61 	"be specified.]"
62 "[t:separator|tabs]:[delim?Use \adelim\a as the field separator for both input"
63 "	and output.]"
64 "[1:j1]#[field?Join on field \afield\a of \afile1\a.  Fields start at 1.]"
65 "[2:j2]#[field?Join on field \afield\a of \afile2\a.  Fields start at 1.]"
66 "[j:join]#[field?Equivalent to \b-1\b \afield\a \b-2\b \afield\a.]"
67 "[a:unpairable]#[fileno?Write a line for each unpairable line in file"
68 "	\afileno\a, where \afileno\a is either 1 or 2, in addition to the"
69 "	normal output.  If \b-a\b options appear for both 1 and 2, then "
70 	"all unpairable lines will be output.]"
71 "[v:suppress]#[fileno?Write a line for each unpairable line in file"
72 "	\afileno\a, where \afileno\a is either 1 or 2, instead of the normal "
73 	"output.  If \b-v\b options appear for both 1 and 2, then "
74 	"all unpairable lines will be output.] ]"
75 "[i:ignorecase?Ignore case in field comparisons.]"
76 "[B!:mmap?Enable memory mapped reads instead of buffered.]"
77 
78 "[+?The following obsolete option forms are also recognized: \b-j\b \afield\a"
79 "	is equivalent to \b-1\b \afield\a \b-2\b \afield\a, \b-j1\b \afield\a"
80 "	is equivalent to \b-1\b \afield\a, and \b-j2\b \afield\a is"
81 "	equivalent to \b-2\b \afield\a.]"
82 
83 "\n"
84 "\nfile1 file2\n"
85 "\n"
86 "[+EXIT STATUS?]{"
87 	"[+0?Both files processed successfully.]"
88 	"[+>0?An error occurred.]"
89 "}"
90 "[+SEE ALSO?\bcut\b(1), \bcomm\b(1), \bpaste\b(1), \bsort\b(1), \buniq\b(1)]"
91 ;
92 
93 #include <cmd.h>
94 #include <sfdisc.h>
95 
96 #define C_FILE1		001
97 #define C_FILE2		002
98 #define C_COMMON	004
99 #define C_ALL		(C_FILE1|C_FILE2|C_COMMON)
100 
101 #define NFIELD		10
102 #define JOINFIELD	2
103 
104 #define S_DELIM		1
105 #define S_SPACE		2
106 #define S_NL		3
107 
108 typedef struct
109 {
110 	Sfio_t*		iop;
111 	char*		name;
112 	char*		recptr;
113 	int		reclen;
114 	int		field;
115 	int		fieldlen;
116 	int		nfields;
117 	int		maxfields;
118 	int		spaces;
119 	int		hit;
120 	int		discard;
121 	char**		fieldlist;
122 } File_t;
123 
124 typedef struct
125 {
126 	unsigned char	state[1<<CHAR_BIT];
127 	Sfio_t*		outfile;
128 	int*		outlist;
129 	int		outmode;
130 	int		ooutmode;
131 	char*		nullfield;
132 	int		delim;
133 	int		buffered;
134 	int		ignorecase;
135 	char*		same;
136 	int		samesize;
137 	File_t		file[2];
138 } Join_t;
139 
140 static void
141 done(register Join_t* jp)
142 {
143 	if (jp->file[0].iop && jp->file[0].iop != sfstdin)
144 		sfclose(jp->file[0].iop);
145 	if (jp->file[1].iop && jp->file[1].iop != sfstdin)
146 		sfclose(jp->file[1].iop);
147 	if (jp->outlist)
148 		free(jp->outlist);
149 	if (jp->file[0].fieldlist)
150 		free(jp->file[0].fieldlist);
151 	if (jp->file[1].fieldlist)
152 		free(jp->file[1].fieldlist);
153 	if (jp->same)
154 		free(jp->same);
155 	free(jp);
156 }
157 
158 static Join_t*
159 init(void)
160 {
161 	register Join_t*	jp;
162 
163 	if (jp = newof(0, Join_t, 1, 0))
164 	{
165 		jp->state[' '] = jp->state['\t'] = S_SPACE;
166 		jp->delim = -1;
167 		jp->nullfield = 0;
168 		if (!(jp->file[0].fieldlist = newof(0, char*, NFIELD + 1, 0)) ||
169 		    !(jp->file[1].fieldlist = newof(0, char*, NFIELD + 1, 0)))
170 		{
171 			done(jp);
172 			return 0;
173 		}
174 		jp->file[0].maxfields = NFIELD;
175 		jp->file[1].maxfields = NFIELD;
176 		jp->outmode = C_COMMON;
177 	}
178 	return jp;
179 }
180 
181 static int
182 getolist(Join_t* jp, const char* first, char** arglist)
183 {
184 	register const char*	cp = first;
185 	char**			argv = arglist;
186 	register int		c;
187 	int*			outptr;
188 	int*			outmax;
189 	int			nfield = NFIELD;
190 	char*			str;
191 
192 	outptr = jp->outlist = newof(0, int, NFIELD + 1, 0);
193 	outmax = outptr + NFIELD;
194 	while (c = *cp++)
195 	{
196 		if (c==' ' || c=='\t' || c==',')
197 			continue;
198 		str = (char*)--cp;
199 		if (*cp=='0' && ((c=cp[1])==0 || c==' ' || c=='\t' || c==','))
200 		{
201 			str++;
202 			c = JOINFIELD;
203 			goto skip;
204 		}
205 		if (cp[1]!='.' || (*cp!='1' && *cp!='2') || (c=strtol(cp+2,&str,10)) <=0)
206 		{
207 			error(2,"%s: invalid field list",first);
208 			break;
209 		}
210 		c--;
211 		c <<=2;
212 		if (*cp=='2')
213 			c |=1;
214 	skip:
215 		if (outptr >= outmax)
216 		{
217 			jp->outlist = newof(jp->outlist, int, 2 * nfield + 1, 0);
218 			outptr = jp->outlist + nfield;
219 			nfield *= 2;
220 			outmax = jp->outlist + nfield;
221 		}
222 		*outptr++ = c;
223 		cp = str;
224 	}
225 	/* need to accept obsolescent command syntax */
226 	while (1)
227 	{
228 		if (!(cp= *argv) || cp[1]!='.' || (*cp!='1' && *cp!='2'))
229 		{
230 			if (*cp=='0' && cp[1]==0)
231 			{
232 				c = JOINFIELD;
233 				goto skip2;
234 			}
235 			break;
236 		}
237 		str = (char*)cp;
238 		c = strtol(cp+2, &str,10);
239 		if (*str || --c<0)
240 			break;
241 		argv++;
242 		c <<= 2;
243 		if (*cp=='2')
244 			c |=1;
245 	skip2:
246 		if (outptr >= outmax)
247 		{
248 			jp->outlist = newof(jp->outlist, int, 2 * nfield + 1, 0);
249 			outptr = jp->outlist + nfield;
250 			nfield *= 2;
251 			outmax = jp->outlist + nfield;
252 		}
253 		*outptr++ = c;
254 	}
255 	*outptr = -1;
256 	return argv-arglist;
257 }
258 
259 /*
260  * read in a record from file <index> and split into fields
261  */
262 static unsigned char*
263 getrec(Join_t* jp, int index, int discard)
264 {
265 	register unsigned char*	sp = jp->state;
266 	register File_t*	fp = &jp->file[index];
267 	register char**		ptr = fp->fieldlist;
268 	register char**		ptrmax = ptr + fp->maxfields;
269 	register char*		cp;
270 	register int		n = 0;
271 
272 	if (cmdquit())
273 		return 0;
274 	if (discard && fp->discard)
275 		sfraise(fp->iop, SFSK_DISCARD, NiL);
276 	fp->spaces = 0;
277 	fp->hit = 0;
278 	if (!(cp = sfgetr(fp->iop, '\n', 0)))
279 	{
280 		jp->outmode &= ~(1<<index);
281 		return 0;
282 	}
283 	fp->recptr = cp;
284 	fp->reclen = sfvalue(fp->iop);
285 	if (jp->delim=='\n')	/* handle new-line delimiter specially */
286 	{
287 		*ptr++ = cp;
288 		cp += fp->reclen;
289 	}
290 	else while (n!=S_NL) /* separate into fields */
291 	{
292 		if (ptr >= ptrmax)
293 		{
294 			n = 2*fp->maxfields;
295 			fp->fieldlist = newof(fp->fieldlist, char*, n + 1, 0);
296 			ptr = fp->fieldlist + fp->maxfields;
297 			fp->maxfields = n;
298 			ptrmax = fp->fieldlist+n;
299 		}
300 		*ptr++ = cp;
301 		if (jp->delim<=0 && sp[*(unsigned char*)cp]==S_SPACE)
302 		{
303 			fp->spaces = 1;
304 			while (sp[*(unsigned char*)cp++]==S_SPACE);
305 			cp--;
306 		}
307 		while ((n=sp[*(unsigned char*)cp++])==0);
308 	}
309 	*ptr = cp;
310 	fp->nfields = ptr - fp->fieldlist;
311 	if ((n=fp->field) < fp->nfields)
312 	{
313 		cp = fp->fieldlist[n];
314 		/* eliminate leading spaces */
315 		if (fp->spaces)
316 		{
317 			while (sp[*(unsigned char*)cp++]==S_SPACE);
318 			cp--;
319 		}
320 		fp->fieldlen = (fp->fieldlist[n+1]-cp)-1;
321 		return (unsigned char*)cp;
322 	}
323 	fp->fieldlen = 0;
324 	return (unsigned char*)"";
325 }
326 
327 #if DEBUG_TRACE
328 static unsigned char* u1,u2,u3;
329 #define getrec(p,n,d)	(u1 = getrec(p, n, d), sfprintf(sfstdout, "[G%d#%d@%I*d:%-.8s]", __LINE__, n, sizeof(Sfoff_t), sftell(p->file[n].iop), u1), u1)
330 #endif
331 
332 /*
333  * print field <n> from file <index>
334  */
335 static int
336 outfield(Join_t* jp, int index, register int n, int last)
337 {
338 	register File_t*	fp = &jp->file[index];
339 	register char*		cp;
340 	register char*		cpmax;
341 	register int		size;
342 	register Sfio_t*	iop = jp->outfile;
343 
344 	if (n < fp->nfields)
345 	{
346 		cp = fp->fieldlist[n];
347 		cpmax = fp->fieldlist[n+1];
348 	}
349 	else
350 		cp = 0;
351 	if ((n=jp->delim)<=0)
352 	{
353 		if (fp->spaces)
354 		{
355 			/*eliminate leading spaces */
356 			while (jp->state[*(unsigned char*)cp++]==S_SPACE);
357 			cp--;
358 		}
359 		n = ' ';
360 	}
361 	if (last)
362 		n = '\n';
363 	if (cp)
364 		size = cpmax-cp;
365 	else
366 		size = 0;
367 	if (size==0)
368 	{
369 		if (!jp->nullfield)
370 			sfputc(iop,n);
371 		else if (sfputr(iop,jp->nullfield,n) < 0)
372 			return -1;
373 	}
374 	else
375 	{
376 		last = cp[size-1];
377 		cp[size-1] = n;
378 		if (sfwrite(iop,cp,size) < 0)
379 			return -1;
380 		cp[size-1] = last;
381 	}
382 	return 0;
383 }
384 
385 #if DEBUG_TRACE
386 static int i1,i2,i3;
387 #define outfield(p,i,n,f)	(sfprintf(sfstdout, "[F%d#%d:%d,%d]", __LINE__, i1=i, i2=n, i3=f), outfield(p, i1, i2, i3))
388 #endif
389 
390 static int
391 outrec(register Join_t* jp, int mode)
392 {
393 	register File_t*	fp;
394 	register int		i;
395 	register int		j;
396 	register int		k;
397 	register int		n;
398 	int*			out;
399 
400 	if (mode < 0 && jp->file[0].hit++)
401 		return 0;
402 	if (mode > 0 && jp->file[1].hit++)
403 		return 0;
404 	if (out = jp->outlist)
405 	{
406 		while ((n = *out++) >= 0)
407 		{
408 			if (n == JOINFIELD)
409 			{
410 				i = mode >= 0;
411 				j = jp->file[i].field;
412 			}
413 			else
414 			{
415 				i = n & 1;
416 				j = (mode<0 && i || mode>0 && !i) ?
417 					jp->file[i].nfields :
418 					n >> 2;
419 			}
420 			if (outfield(jp, i, j, *out < 0) < 0)
421 				return -1;
422 		}
423 		return 0;
424 	}
425 	k = jp->file[0].nfields;
426 	if (mode >= 0)
427 		k += jp->file[1].nfields - 1;
428 	for (i=0; i<2; i++)
429 	{
430 		fp = &jp->file[i];
431 		if (mode>0 && i==0)
432 		{
433 			k -= (fp->nfields - 1);
434 			continue;
435 		}
436 		n = fp->field;
437 		if (mode||i==0)
438 		{
439 			/* output join field first */
440 			if (outfield(jp,i,n,!--k) < 0)
441 				return -1;
442 			if (!k)
443 				return 0;
444 			for (j=0; j<n; j++)
445 			{
446 				if (outfield(jp,i,j,!--k) < 0)
447 					return -1;
448 				if (!k)
449 					return 0;
450 			}
451 			j = n + 1;
452 		}
453 		else
454 			j = 0;
455 		for (;j<fp->nfields; j++)
456 		{
457 			if (j!=n && outfield(jp,i,j,!--k) < 0)
458 				return -1;
459 			if (!k)
460 				return 0;
461 		}
462 	}
463 	return 0;
464 }
465 
466 #if DEBUG_TRACE
467 #define outrec(p,n)	(sfprintf(sfstdout, "[R#%d,%d,%lld,%lld:%-.*s{%d}:%-.*s{%d}]", __LINE__, i1=n, lo, hi, jp->file[0].fieldlen, cp1, jp->file[0].hit, jp->file[1].fieldlen, cp2, jp->file[1].hit), outrec(p, i1))
468 #endif
469 
470 static int
471 join(Join_t* jp)
472 {
473 	register unsigned char*	cp1;
474 	register unsigned char*	cp2;
475 	register int		n1;
476 	register int		n2;
477 	register int		n;
478 	register int		cmp;
479 	register int		same;
480 	int			o2;
481 	Sfoff_t			lo = -1;
482 	Sfoff_t			hi = -1;
483 
484 	if ((cp1 = getrec(jp, 0, 0)) && (cp2 = getrec(jp, 1, 0)) || (cp2 = 0))
485 	{
486 		n1 = jp->file[0].fieldlen;
487 		n2 = jp->file[1].fieldlen;
488 		same = 0;
489 		for (;;)
490 		{
491 			n = n1 < n2 ? n1 : n2;
492 #if DEBUG_TRACE
493 			if (!n && !(cmp = n1 < n2 ? -1 : (n1 > n2)) || n && !(cmp = (int)*cp1 - (int)*cp2) && !(cmp = jp->ignorecase ? strncasecmp((char*)cp1, (char*)cp2, n) : memcmp(cp1, cp2, n)))
494 				cmp = n1 - n2;
495 sfprintf(sfstdout, "[C#%d:%d(%c-%c),%d,%lld,%lld%s]", __LINE__, cmp, *cp1, *cp2, same, lo, hi, (jp->outmode & C_COMMON) ? ",COMMON" : "");
496 			if (!cmp)
497 #else
498 			if (!n && !(cmp = n1 < n2 ? -1 : (n1 > n2)) || n && !(cmp = (int)*cp1 - (int)*cp2) && !(cmp = jp->ignorecase ? strncasecmp((char*)cp1, (char*)cp2, n) : memcmp(cp1, cp2, n)) && !(cmp = n1 - n2))
499 #endif
500 			{
501 				if (!(jp->outmode & C_COMMON))
502 				{
503 					if (cp1 = getrec(jp, 0, 1))
504 					{
505 						n1 = jp->file[0].fieldlen;
506 						same = 1;
507 						continue;
508 					}
509 					if ((jp->ooutmode & (C_FILE1|C_FILE2)) != C_FILE2)
510 						break;
511 					if (sfseek(jp->file[0].iop, (Sfoff_t)-jp->file[0].reclen, SEEK_CUR) < 0 || !(cp1 = getrec(jp, 0, 0)))
512 					{
513 						error(ERROR_SYSTEM|2, "%s: seek error", jp->file[0].name);
514 						return -1;
515 					}
516 				}
517 				else if (outrec(jp, 0) < 0)
518 					return -1;
519 				else if (lo < 0 && (jp->outmode & C_COMMON))
520 				{
521 					if ((lo = sfseek(jp->file[1].iop, (Sfoff_t)0, SEEK_CUR)) < 0)
522 					{
523 						error(ERROR_SYSTEM|2, "%s: seek error", jp->file[1].name);
524 						return -1;
525 					}
526 					lo -= jp->file[1].reclen;
527 				}
528 				if (cp2 = getrec(jp, 1, lo < 0))
529 				{
530 					n2 = jp->file[1].fieldlen;
531 					continue;
532 				}
533 #if DEBUG_TRACE
534 sfprintf(sfstdout, "[2#%d:0,%lld,%lld]", __LINE__, lo, hi);
535 #endif
536 			}
537 			else if (cmp > 0)
538 			{
539 				if (same)
540 				{
541 					same = 0;
542 				next:
543 					if (n2 > jp->samesize)
544 					{
545 						jp->samesize = roundof(n2, 16);
546 						if (!(jp->same = newof(jp->same, char, jp->samesize, 0)))
547 						{
548 							error(ERROR_SYSTEM|2, "out of space");
549 							return -1;
550 						}
551 					}
552 					memcpy(jp->same, cp2, o2 = n2);
553 					if (!(cp2 = getrec(jp, 1, 0)))
554 						break;
555 					n2 = jp->file[1].fieldlen;
556 					if (n2 == o2 && *cp2 == *jp->same && !memcmp(cp2, jp->same, n2))
557 						goto next;
558 					continue;
559 				}
560 				if (hi >= 0)
561 				{
562 					if (sfseek(jp->file[1].iop, hi, SEEK_SET) != hi)
563 					{
564 						error(ERROR_SYSTEM|2, "%s: seek error", jp->file[1].name);
565 						return -1;
566 					}
567 					hi = -1;
568 				}
569 				else if ((jp->outmode & C_FILE2) && outrec(jp, 1) < 0)
570 					return -1;
571 				lo = -1;
572 				if (cp2 = getrec(jp, 1, 1))
573 				{
574 					n2 = jp->file[1].fieldlen;
575 					continue;
576 				}
577 #if DEBUG_TRACE
578 sfprintf(sfstdout, "[2#%d:0,%lld,%lld]", __LINE__, lo, hi);
579 #endif
580 			}
581 			else if (same)
582 			{
583 				same = 0;
584 				if (!(cp1 = getrec(jp, 0, 0)))
585 					break;
586 				n1 = jp->file[0].fieldlen;
587 				continue;
588 			}
589 			if (lo >= 0)
590 			{
591 				if ((hi = sfseek(jp->file[1].iop, (Sfoff_t)0, SEEK_CUR)) < 0 ||
592 				    (hi -= jp->file[1].reclen) < 0 ||
593 				    sfseek(jp->file[1].iop, lo, SEEK_SET) != lo ||
594 				    !(cp2 = getrec(jp, 1, 0)))
595 				{
596 					error(ERROR_SYSTEM|2, "%s: seek error", jp->file[1].name);
597 					return -1;
598 				}
599 				n2 = jp->file[1].fieldlen;
600 				lo = -1;
601 				if (jp->file[1].discard)
602 					sfseek(jp->file[1].iop, (Sfoff_t)-1, SEEK_SET);
603 			}
604 			else if (!cp2)
605 				break;
606 			else if ((jp->outmode & C_FILE1) && outrec(jp, -1) < 0)
607 				return -1;
608 			if (!(cp1 = getrec(jp, 0, 1)))
609 				break;
610 			n1 = jp->file[0].fieldlen;
611 		}
612 	}
613 #if DEBUG_TRACE
614 sfprintf(sfstdout, "[X#%d:?,%p,%p,%d%,%d,%d%s]", __LINE__, cp1, cp2, cmp, lo, hi, (jp->outmode & C_COMMON) ? ",COMMON" : "");
615 #endif
616 	if (cp2)
617 	{
618 		if (hi >= 0 &&
619 		    sfseek(jp->file[1].iop, (Sfoff_t)0, SEEK_CUR) < hi &&
620 		    sfseek(jp->file[1].iop, hi, SEEK_SET) != hi)
621 		{
622 			error(ERROR_SYSTEM|2, "%s: seek error", jp->file[1].name);
623 			return -1;
624 		}
625 #if DEBUG_TRACE
626 sfprintf(sfstdout, "[O#%d:%02o:%02o]", __LINE__, jp->ooutmode, jp->outmode);
627 #endif
628 		cp1 = (!cp1 && cmp && hi < 0 && !jp->file[1].hit && ((jp->ooutmode ^ C_ALL) <= 1 || jp->outmode == 2)) ? cp2 : getrec(jp, 1, 0);
629 		cmp = 1;
630 		n = 1;
631 	}
632 	else
633 	{
634 		cmp = -1;
635 		n = 0;
636 	}
637 #if DEBUG_TRACE
638 sfprintf(sfstdout, "[X#%d:%d,%p,%p,%d,%02o,%02o%s]", __LINE__, n, cp1, cp2, cmp, jp->ooutmode, jp->outmode, (jp->outmode & C_COMMON) ? ",COMMON" : "");
639 #endif
640 	if (!cp1 || !(jp->outmode & (1<<n)))
641 	{
642 		if (cp1 && jp->file[n].iop == sfstdin)
643 			sfseek(sfstdin, (Sfoff_t)0, SEEK_END);
644 		return 0;
645 	}
646 	if (outrec(jp, cmp) < 0)
647 		return -1;
648 	do
649 	{
650 		if (!getrec(jp, n, 1))
651 			return 0;
652 	} while (outrec(jp, cmp) >= 0);
653 	return -1;
654 }
655 
656 int
657 b_join(int argc, char** argv, void* context)
658 {
659 	register int		n;
660 	register char*		cp;
661 	register Join_t*	jp;
662 	char*			e;
663 
664 #if !DEBUG_TRACE
665 	cmdinit(argc, argv, context, ERROR_CATALOG, ERROR_NOTIFY);
666 #endif
667 	if (!(jp = init()))
668 		error(ERROR_system(1),"out of space");
669 	for (;;)
670 	{
671 		switch (n = optget(argv, usage))
672 		{
673 		case 0:
674 			break;
675  		case 'j':
676 			/*
677 			 * check for obsolete "-j1 field" and "-j2 field"
678 			 */
679 
680 			if (opt_info.offset == 0)
681 			{
682 				cp = argv[opt_info.index - 1];
683 				for (n = strlen(cp) - 1; n > 0 && cp[n] != 'j'; n--);
684 				n = cp[n] == 'j';
685 			}
686 			else
687 				n = 0;
688 			if (n)
689 			{
690 				if (opt_info.num!=1 && opt_info.num!=2)
691 					error(2,"-jfileno field: fileno must be 1 or 2");
692 				n = '0' + opt_info.num;
693 				if (!(cp = argv[opt_info.index]))
694 				{
695 					argc = 0;
696 					break;
697 				}
698 				opt_info.num = strtol(cp, &e, 10);
699 				if (*e)
700 				{
701 					argc = 0;
702 					break;
703 				}
704 				opt_info.index++;
705 			}
706 			else
707 			{
708 				jp->file[0].field = (int)(opt_info.num-1);
709 				n = '2';
710 			}
711 			/*FALLTHROUGH*/
712  		case '1':
713 		case '2':
714 			if (opt_info.num <=0)
715 				error(2,"field number must positive");
716 			jp->file[n-'1'].field = (int)(opt_info.num-1);
717 			continue;
718 		case 'v':
719 			jp->outmode &= ~C_COMMON;
720 			/*FALLTHROUGH*/
721 		case 'a':
722 			if (opt_info.num!=1 && opt_info.num!=2)
723 				error(2,"%s: file number must be 1 or 2", opt_info.name);
724 			jp->outmode |= 1<<(opt_info.num-1);
725 			continue;
726 		case 'e':
727 			jp->nullfield = opt_info.arg;
728 			continue;
729 		case 'o':
730 			/* need to accept obsolescent command syntax */
731 			n = getolist(jp, opt_info.arg, argv+opt_info.index);
732 			opt_info.index += n;
733 			continue;
734 		case 't':
735 			jp->state[' '] = jp->state['\t'] = 0;
736 			n= *(unsigned char*)opt_info.arg;
737 			jp->state[n] = S_DELIM;
738 			jp->delim = n;
739 			continue;
740 		case 'i':
741 			jp->ignorecase = !opt_info.num;
742 			continue;
743 		case 'B':
744 			jp->buffered = !opt_info.num;
745 			continue;
746 		case ':':
747 			error(2, "%s", opt_info.arg);
748 			break;
749 		case '?':
750 			done(jp);
751 			error(ERROR_usage(2), "%s", opt_info.arg);
752 			break;
753 		}
754 		break;
755 	}
756 	argv += opt_info.index;
757 	argc -= opt_info.index;
758 	if (error_info.errors || argc!=2)
759 	{
760 		done(jp);
761 		error(ERROR_usage(2),"%s", optusage(NiL));
762 	}
763 	jp->ooutmode = jp->outmode;
764 	jp->file[0].name = cp = *argv++;
765 	if (streq(cp,"-"))
766 	{
767 		if (sfseek(sfstdin,(Sfoff_t)0,SEEK_CUR) < 0)
768 		{
769 			if (sfdcseekable(sfstdin))
770 				error(ERROR_warn(0),"%s: seek may fail",cp);
771 			else
772 				jp->file[0].discard = 1;
773 		}
774 		jp->file[0].iop = sfstdin;
775 	}
776 	else if (!(jp->file[0].iop = sfopen(NiL, cp, "r")))
777 	{
778 		done(jp);
779 		error(ERROR_system(1),"%s: cannot open",cp);
780 	}
781 	jp->file[1].name = cp = *argv;
782 	if (streq(cp,"-"))
783 	{
784 		if (sfseek(sfstdin,(Sfoff_t)0,SEEK_CUR) < 0)
785 		{
786 			if (sfdcseekable(sfstdin))
787 				error(ERROR_warn(0),"%s: seek may fail",cp);
788 			else
789 				jp->file[1].discard = 1;
790 		}
791 		jp->file[1].iop = sfstdin;
792 	}
793 	else if (!(jp->file[1].iop = sfopen(NiL, cp, "r")))
794 	{
795 		done(jp);
796 		error(ERROR_system(1),"%s: cannot open",cp);
797 	}
798 	if (jp->buffered)
799 	{
800 		sfsetbuf(jp->file[0].iop, jp->file[0].iop, SF_UNBOUND);
801 		sfsetbuf(jp->file[1].iop, jp->file[0].iop, SF_UNBOUND);
802 	}
803 	jp->state['\n'] = S_NL;
804 	jp->outfile = sfstdout;
805 	if (!jp->outlist)
806 		jp->nullfield = 0;
807 	if (join(jp) < 0)
808 	{
809 		done(jp);
810 		error(ERROR_system(1),"write error");
811 	}
812 	else if (jp->file[0].iop==sfstdin || jp->file[1].iop==sfstdin)
813 		sfseek(sfstdin,(Sfoff_t)0,SEEK_END);
814 	done(jp);
815 	return error_info.errors;
816 }
817