xref: /titanic_50/usr/src/lib/libcmd/common/join.c (revision 4246c8e92ef9ad6ada2b992b7af02832ff071bf7)
1 /***********************************************************************
2 *                                                                      *
3 *               This software is part of the ast package               *
4 *          Copyright (c) 1992-2008 AT&T Intellectual Property          *
5 *                      and is licensed under the                       *
6 *                  Common Public License, Version 1.0                  *
7 *                    by AT&T Intellectual Property                     *
8 *                                                                      *
9 *                A copy of the License is available at                 *
10 *            http://www.opensource.org/licenses/cpl1.0.txt             *
11 *         (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9)         *
12 *                                                                      *
13 *              Information and Software Systems Research               *
14 *                            AT&T Research                             *
15 *                           Florham Park NJ                            *
16 *                                                                      *
17 *                 Glenn Fowler <gsf@research.att.com>                  *
18 *                  David Korn <dgk@research.att.com>                   *
19 *                                                                      *
20 ***********************************************************************/
21 #pragma prototyped
22 /*
23  * David Korn
24  * Glenn Fowler
25  * AT&T Research
26  *
27  * join
28  */
29 
30 static const char usage[] =
31 "[-?\n@(#)$Id: join (AT&T Research) 2006-10-31 $\n]"
32 USAGE_LICENSE
33 "[+NAME?join - relational database operator]"
34 "[+DESCRIPTION?\bjoin\b performs an \aequality join\a on the files \afile1\a "
35 	"and \afile2\a and writes the resulting joined files to standard "
36 	"output.  By default, a field is delimited by one or more spaces "
37 	"and tabs with leading spaces and/or tabs ignored.  The \b-t\b option "
38 	"can be used to change the field delimiter.]"
39 "[+?The \ajoin field\a is a field in each file on which files are compared. "
40 	"By default \bjoin\b writes one line in the output for each pair "
41 	"of lines in \afiles1\a and \afiles2\a that have identical join "
42 	"fields.  The default output line consists of the join field, "
43 	"then the remaining fields from \afile1\a, then the remaining "
44 	"fields from \afile2\a, but this can be changed with the \b-o\b "
45 	"option.  The \b-a\b option can be used to add unmatched lines "
46 	"to the output.  The \b-v\b option can be used to output only "
47 	"unmatched lines.]"
48 "[+?The files \afile1\a and \afile2\a must be ordered in the collating "
49 	"sequence of \bsort -b\b on the fields on which they are to be "
50 	"joined otherwise the results are unspecified.]"
51 "[+?If either \afile1\a or \afile2\a is \b-\b, \bjoin\b "
52         "uses standard input starting at the current location.]"
53 
54 "[e:empty]:[string?Replace empty output fields in the list selected with"
55 "	\b-o\b with \astring\a.]"
56 "[o:output]:[list?Construct the output line to comprise the fields specified "
57 	"in a blank or comma separated list \alist\a.  Each element in "
58 	"\alist\a consists of a file number (either 1 or 2), a period, "
59 	"and a field number or \b0\b representing the join field.  "
60 	"As an obsolete feature multiple occurrences of \b-o\b can "
61 	"be specified.]"
62 "[t:separator|tabs]:[delim?Use \adelim\a as the field separator for both input"
63 "	and output.]"
64 "[1:j1]#[field?Join on field \afield\a of \afile1\a.  Fields start at 1.]"
65 "[2:j2]#[field?Join on field \afield\a of \afile2\a.  Fields start at 1.]"
66 "[j:join]#[field?Equivalent to \b-1\b \afield\a \b-2\b \afield\a.]"
67 "[a:unpairable]#[fileno?Write a line for each unpairable line in file"
68 "	\afileno\a, where \afileno\a is either 1 or 2, in addition to the"
69 "	normal output.  If \b-a\b options appear for both 1 and 2, then "
70 	"all unpairable lines will be output.]"
71 "[v:suppress]#[fileno?Write a line for each unpairable line in file"
72 "	\afileno\a, where \afileno\a is either 1 or 2, instead of the normal "
73 	"output.  If \b-v\b options appear for both 1 and 2, then "
74 	"all unpairable lines will be output.] ]"
75 "[i:ignorecase?Ignore case in field comparisons.]"
76 "[B!:mmap?Enable memory mapped reads instead of buffered.]"
77 
78 "[+?The following obsolete option forms are also recognized: \b-j\b \afield\a"
79 "	is equivalent to \b-1\b \afield\a \b-2\b \afield\a, \b-j1\b \afield\a"
80 "	is equivalent to \b-1\b \afield\a, and \b-j2\b \afield\a is"
81 "	equivalent to \b-2\b \afield\a.]"
82 
83 "\n"
84 "\nfile1 file2\n"
85 "\n"
86 "[+EXIT STATUS?]{"
87 	"[+0?Both files processed successfully.]"
88 	"[+>0?An error occurred.]"
89 "}"
90 "[+SEE ALSO?\bcut\b(1), \bcomm\b(1), \bpaste\b(1), \bsort\b(1), \buniq\b(1)]"
91 ;
92 
93 #include <cmd.h>
94 #include <sfdisc.h>
95 
96 #define C_FILE1		001
97 #define C_FILE2		002
98 #define C_COMMON	004
99 #define C_ALL		(C_FILE1|C_FILE2|C_COMMON)
100 
101 #define NFIELD		10
102 #define JOINFIELD	2
103 
104 #define S_DELIM		1
105 #define S_SPACE		2
106 #define S_NL		3
107 
108 typedef struct
109 {
110 	Sfio_t*		iop;
111 	char*		name;
112 	char*		recptr;
113 	int		reclen;
114 	int		field;
115 	int		fieldlen;
116 	int		nfields;
117 	int		maxfields;
118 	int		spaces;
119 	int		hit;
120 	int		discard;
121 	char**		fieldlist;
122 } File_t;
123 
124 typedef struct
125 {
126 	unsigned char	state[1<<CHAR_BIT];
127 	Sfio_t*		outfile;
128 	int*		outlist;
129 	int		outmode;
130 	int		ooutmode;
131 	char*		nullfield;
132 	int		delim;
133 	int		buffered;
134 	int		ignorecase;
135 	char*		same;
136 	int		samesize;
137 	void*		context;
138 	File_t		file[2];
139 } Join_t;
140 
141 static void
142 done(register Join_t* jp)
143 {
144 	if (jp->file[0].iop && jp->file[0].iop != sfstdin)
145 		sfclose(jp->file[0].iop);
146 	if (jp->file[1].iop && jp->file[1].iop != sfstdin)
147 		sfclose(jp->file[1].iop);
148 	if (jp->outlist)
149 		free(jp->outlist);
150 	if (jp->file[0].fieldlist)
151 		free(jp->file[0].fieldlist);
152 	if (jp->file[1].fieldlist)
153 		free(jp->file[1].fieldlist);
154 	if (jp->same)
155 		free(jp->same);
156 	free(jp);
157 }
158 
159 static Join_t*
160 init(void)
161 {
162 	register Join_t*	jp;
163 
164 	if (jp = newof(0, Join_t, 1, 0))
165 	{
166 		jp->state[' '] = jp->state['\t'] = S_SPACE;
167 		jp->delim = -1;
168 		jp->nullfield = 0;
169 		if (!(jp->file[0].fieldlist = newof(0, char*, NFIELD + 1, 0)) ||
170 		    !(jp->file[1].fieldlist = newof(0, char*, NFIELD + 1, 0)))
171 		{
172 			done(jp);
173 			return 0;
174 		}
175 		jp->file[0].maxfields = NFIELD;
176 		jp->file[1].maxfields = NFIELD;
177 		jp->outmode = C_COMMON;
178 	}
179 	return jp;
180 }
181 
182 static int
183 getolist(Join_t* jp, const char* first, char** arglist)
184 {
185 	register const char*	cp = first;
186 	char**			argv = arglist;
187 	register int		c;
188 	int*			outptr;
189 	int*			outmax;
190 	int			nfield = NFIELD;
191 	char*			str;
192 
193 	outptr = jp->outlist = newof(0, int, NFIELD + 1, 0);
194 	outmax = outptr + NFIELD;
195 	while (c = *cp++)
196 	{
197 		if (c==' ' || c=='\t' || c==',')
198 			continue;
199 		str = (char*)--cp;
200 		if (*cp=='0' && ((c=cp[1])==0 || c==' ' || c=='\t' || c==','))
201 		{
202 			str++;
203 			c = JOINFIELD;
204 			goto skip;
205 		}
206 		if (cp[1]!='.' || (*cp!='1' && *cp!='2') || (c=strtol(cp+2,&str,10)) <=0)
207 		{
208 			error(2,"%s: invalid field list",first);
209 			break;
210 		}
211 		c--;
212 		c <<=2;
213 		if (*cp=='2')
214 			c |=1;
215 	skip:
216 		if (outptr >= outmax)
217 		{
218 			jp->outlist = newof(jp->outlist, int, 2 * nfield + 1, 0);
219 			outptr = jp->outlist + nfield;
220 			nfield *= 2;
221 			outmax = jp->outlist + nfield;
222 		}
223 		*outptr++ = c;
224 		cp = str;
225 	}
226 	/* need to accept obsolescent command syntax */
227 	while (1)
228 	{
229 		if (!(cp= *argv) || cp[1]!='.' || (*cp!='1' && *cp!='2'))
230 		{
231 			if (*cp=='0' && cp[1]==0)
232 			{
233 				c = JOINFIELD;
234 				goto skip2;
235 			}
236 			break;
237 		}
238 		str = (char*)cp;
239 		c = strtol(cp+2, &str,10);
240 		if (*str || --c<0)
241 			break;
242 		argv++;
243 		c <<= 2;
244 		if (*cp=='2')
245 			c |=1;
246 	skip2:
247 		if (outptr >= outmax)
248 		{
249 			jp->outlist = newof(jp->outlist, int, 2 * nfield + 1, 0);
250 			outptr = jp->outlist + nfield;
251 			nfield *= 2;
252 			outmax = jp->outlist + nfield;
253 		}
254 		*outptr++ = c;
255 	}
256 	*outptr = -1;
257 	return argv-arglist;
258 }
259 
260 /*
261  * read in a record from file <index> and split into fields
262  */
263 static unsigned char*
264 getrec(Join_t* jp, int index, int discard)
265 {
266 	register unsigned char*	sp = jp->state;
267 	register File_t*	fp = &jp->file[index];
268 	register char**		ptr = fp->fieldlist;
269 	register char**		ptrmax = ptr + fp->maxfields;
270 	register char*		cp;
271 	register int		n = 0;
272 
273 	if (sh_checksig(jp->context))
274 		return 0;
275 	if (discard && fp->discard)
276 		sfraise(fp->iop, SFSK_DISCARD, NiL);
277 	fp->spaces = 0;
278 	fp->hit = 0;
279 	if (!(cp = sfgetr(fp->iop, '\n', 0)))
280 	{
281 		jp->outmode &= ~(1<<index);
282 		return 0;
283 	}
284 	fp->recptr = cp;
285 	fp->reclen = sfvalue(fp->iop);
286 	if (jp->delim=='\n')	/* handle new-line delimiter specially */
287 	{
288 		*ptr++ = cp;
289 		cp += fp->reclen;
290 	}
291 	else while (n!=S_NL) /* separate into fields */
292 	{
293 		if (ptr >= ptrmax)
294 		{
295 			n = 2*fp->maxfields;
296 			fp->fieldlist = newof(fp->fieldlist, char*, n + 1, 0);
297 			ptr = fp->fieldlist + fp->maxfields;
298 			fp->maxfields = n;
299 			ptrmax = fp->fieldlist+n;
300 		}
301 		*ptr++ = cp;
302 		if (jp->delim<=0 && sp[*(unsigned char*)cp]==S_SPACE)
303 		{
304 			fp->spaces = 1;
305 			while (sp[*(unsigned char*)cp++]==S_SPACE);
306 			cp--;
307 		}
308 		while ((n=sp[*(unsigned char*)cp++])==0);
309 	}
310 	*ptr = cp;
311 	fp->nfields = ptr - fp->fieldlist;
312 	if ((n=fp->field) < fp->nfields)
313 	{
314 		cp = fp->fieldlist[n];
315 		/* eliminate leading spaces */
316 		if (fp->spaces)
317 		{
318 			while (sp[*(unsigned char*)cp++]==S_SPACE);
319 			cp--;
320 		}
321 		fp->fieldlen = (fp->fieldlist[n+1]-cp)-1;
322 		return (unsigned char*)cp;
323 	}
324 	fp->fieldlen = 0;
325 	return (unsigned char*)"";
326 }
327 
328 #if DEBUG_TRACE
329 static unsigned char* u1,u2,u3;
330 #define getrec(p,n,d)	(u1 = getrec(p, n, d), sfprintf(sfstdout, "[G%d#%d@%I*d:%-.8s]", __LINE__, n, sizeof(Sfoff_t), sftell(p->file[n].iop), u1), u1)
331 #endif
332 
333 /*
334  * print field <n> from file <index>
335  */
336 static int
337 outfield(Join_t* jp, int index, register int n, int last)
338 {
339 	register File_t*	fp = &jp->file[index];
340 	register char*		cp;
341 	register char*		cpmax;
342 	register int		size;
343 	register Sfio_t*	iop = jp->outfile;
344 
345 	if (n < fp->nfields)
346 	{
347 		cp = fp->fieldlist[n];
348 		cpmax = fp->fieldlist[n+1];
349 	}
350 	else
351 		cp = 0;
352 	if ((n=jp->delim)<=0)
353 	{
354 		if (fp->spaces)
355 		{
356 			/*eliminate leading spaces */
357 			while (jp->state[*(unsigned char*)cp++]==S_SPACE);
358 			cp--;
359 		}
360 		n = ' ';
361 	}
362 	if (last)
363 		n = '\n';
364 	if (cp)
365 		size = cpmax-cp;
366 	else
367 		size = 0;
368 	if (size==0)
369 	{
370 		if (!jp->nullfield)
371 			sfputc(iop,n);
372 		else if (sfputr(iop,jp->nullfield,n) < 0)
373 			return -1;
374 	}
375 	else
376 	{
377 		last = cp[size-1];
378 		cp[size-1] = n;
379 		if (sfwrite(iop,cp,size) < 0)
380 			return -1;
381 		cp[size-1] = last;
382 	}
383 	return 0;
384 }
385 
386 #if DEBUG_TRACE
387 static int i1,i2,i3;
388 #define outfield(p,i,n,f)	(sfprintf(sfstdout, "[F%d#%d:%d,%d]", __LINE__, i1=i, i2=n, i3=f), outfield(p, i1, i2, i3))
389 #endif
390 
391 static int
392 outrec(register Join_t* jp, int mode)
393 {
394 	register File_t*	fp;
395 	register int		i;
396 	register int		j;
397 	register int		k;
398 	register int		n;
399 	int*			out;
400 
401 	if (mode < 0 && jp->file[0].hit++)
402 		return 0;
403 	if (mode > 0 && jp->file[1].hit++)
404 		return 0;
405 	if (out = jp->outlist)
406 	{
407 		while ((n = *out++) >= 0)
408 		{
409 			if (n == JOINFIELD)
410 			{
411 				i = mode >= 0;
412 				j = jp->file[i].field;
413 			}
414 			else
415 			{
416 				i = n & 1;
417 				j = (mode<0 && i || mode>0 && !i) ?
418 					jp->file[i].nfields :
419 					n >> 2;
420 			}
421 			if (outfield(jp, i, j, *out < 0) < 0)
422 				return -1;
423 		}
424 		return 0;
425 	}
426 	k = jp->file[0].nfields;
427 	if (mode >= 0)
428 		k += jp->file[1].nfields - 1;
429 	for (i=0; i<2; i++)
430 	{
431 		fp = &jp->file[i];
432 		if (mode>0 && i==0)
433 		{
434 			k -= (fp->nfields - 1);
435 			continue;
436 		}
437 		n = fp->field;
438 		if (mode||i==0)
439 		{
440 			/* output join field first */
441 			if (outfield(jp,i,n,!--k) < 0)
442 				return -1;
443 			if (!k)
444 				return 0;
445 			for (j=0; j<n; j++)
446 			{
447 				if (outfield(jp,i,j,!--k) < 0)
448 					return -1;
449 				if (!k)
450 					return 0;
451 			}
452 			j = n + 1;
453 		}
454 		else
455 			j = 0;
456 		for (;j<fp->nfields; j++)
457 		{
458 			if (j!=n && outfield(jp,i,j,!--k) < 0)
459 				return -1;
460 			if (!k)
461 				return 0;
462 		}
463 	}
464 	return 0;
465 }
466 
467 #if DEBUG_TRACE
468 #define outrec(p,n)	(sfprintf(sfstdout, "[R#%d,%d,%lld,%lld:%-.*s{%d}:%-.*s{%d}]", __LINE__, i1=n, lo, hi, jp->file[0].fieldlen, cp1, jp->file[0].hit, jp->file[1].fieldlen, cp2, jp->file[1].hit), outrec(p, i1))
469 #endif
470 
471 static int
472 join(Join_t* jp)
473 {
474 	register unsigned char*	cp1;
475 	register unsigned char*	cp2;
476 	register int		n1;
477 	register int		n2;
478 	register int		n;
479 	register int		cmp;
480 	register int		same;
481 	int			o2;
482 	Sfoff_t			lo = -1;
483 	Sfoff_t			hi = -1;
484 
485 	if ((cp1 = getrec(jp, 0, 0)) && (cp2 = getrec(jp, 1, 0)) || (cp2 = 0))
486 	{
487 		n1 = jp->file[0].fieldlen;
488 		n2 = jp->file[1].fieldlen;
489 		same = 0;
490 		for (;;)
491 		{
492 			n = n1 < n2 ? n1 : n2;
493 #if DEBUG_TRACE
494 			if (!n && !(cmp = n1 < n2 ? -1 : (n1 > n2)) || n && !(cmp = (int)*cp1 - (int)*cp2) && !(cmp = jp->ignorecase ? strncasecmp((char*)cp1, (char*)cp2, n) : memcmp(cp1, cp2, n)))
495 				cmp = n1 - n2;
496 sfprintf(sfstdout, "[C#%d:%d(%c-%c),%d,%lld,%lld%s]", __LINE__, cmp, *cp1, *cp2, same, lo, hi, (jp->outmode & C_COMMON) ? ",COMMON" : "");
497 			if (!cmp)
498 #else
499 			if (!n && !(cmp = n1 < n2 ? -1 : (n1 > n2)) || n && !(cmp = (int)*cp1 - (int)*cp2) && !(cmp = jp->ignorecase ? strncasecmp((char*)cp1, (char*)cp2, n) : memcmp(cp1, cp2, n)) && !(cmp = n1 - n2))
500 #endif
501 			{
502 				if (!(jp->outmode & C_COMMON))
503 				{
504 					if (cp1 = getrec(jp, 0, 1))
505 					{
506 						n1 = jp->file[0].fieldlen;
507 						same = 1;
508 						continue;
509 					}
510 					if ((jp->ooutmode & (C_FILE1|C_FILE2)) != C_FILE2)
511 						break;
512 					if (sfseek(jp->file[0].iop, (Sfoff_t)-jp->file[0].reclen, SEEK_CUR) < 0 || !(cp1 = getrec(jp, 0, 0)))
513 					{
514 						error(ERROR_SYSTEM|2, "%s: seek error", jp->file[0].name);
515 						return -1;
516 					}
517 				}
518 				else if (outrec(jp, 0) < 0)
519 					return -1;
520 				else if (lo < 0 && (jp->outmode & C_COMMON))
521 				{
522 					if ((lo = sfseek(jp->file[1].iop, (Sfoff_t)0, SEEK_CUR)) < 0)
523 					{
524 						error(ERROR_SYSTEM|2, "%s: seek error", jp->file[1].name);
525 						return -1;
526 					}
527 					lo -= jp->file[1].reclen;
528 				}
529 				if (cp2 = getrec(jp, 1, lo < 0))
530 				{
531 					n2 = jp->file[1].fieldlen;
532 					continue;
533 				}
534 #if DEBUG_TRACE
535 sfprintf(sfstdout, "[2#%d:0,%lld,%lld]", __LINE__, lo, hi);
536 #endif
537 			}
538 			else if (cmp > 0)
539 			{
540 				if (same)
541 				{
542 					same = 0;
543 				next:
544 					if (n2 > jp->samesize)
545 					{
546 						jp->samesize = roundof(n2, 16);
547 						if (!(jp->same = newof(jp->same, char, jp->samesize, 0)))
548 						{
549 							error(ERROR_SYSTEM|2, "out of space");
550 							return -1;
551 						}
552 					}
553 					memcpy(jp->same, cp2, o2 = n2);
554 					if (!(cp2 = getrec(jp, 1, 0)))
555 						break;
556 					n2 = jp->file[1].fieldlen;
557 					if (n2 == o2 && *cp2 == *jp->same && !memcmp(cp2, jp->same, n2))
558 						goto next;
559 					continue;
560 				}
561 				if (hi >= 0)
562 				{
563 					if (sfseek(jp->file[1].iop, hi, SEEK_SET) != hi)
564 					{
565 						error(ERROR_SYSTEM|2, "%s: seek error", jp->file[1].name);
566 						return -1;
567 					}
568 					hi = -1;
569 				}
570 				else if ((jp->outmode & C_FILE2) && outrec(jp, 1) < 0)
571 					return -1;
572 				lo = -1;
573 				if (cp2 = getrec(jp, 1, 1))
574 				{
575 					n2 = jp->file[1].fieldlen;
576 					continue;
577 				}
578 #if DEBUG_TRACE
579 sfprintf(sfstdout, "[2#%d:0,%lld,%lld]", __LINE__, lo, hi);
580 #endif
581 			}
582 			else if (same)
583 			{
584 				same = 0;
585 				if (!(cp1 = getrec(jp, 0, 0)))
586 					break;
587 				n1 = jp->file[0].fieldlen;
588 				continue;
589 			}
590 			if (lo >= 0)
591 			{
592 				if ((hi = sfseek(jp->file[1].iop, (Sfoff_t)0, SEEK_CUR)) < 0 ||
593 				    (hi -= jp->file[1].reclen) < 0 ||
594 				    sfseek(jp->file[1].iop, lo, SEEK_SET) != lo ||
595 				    !(cp2 = getrec(jp, 1, 0)))
596 				{
597 					error(ERROR_SYSTEM|2, "%s: seek error", jp->file[1].name);
598 					return -1;
599 				}
600 				n2 = jp->file[1].fieldlen;
601 				lo = -1;
602 				if (jp->file[1].discard)
603 					sfseek(jp->file[1].iop, (Sfoff_t)-1, SEEK_SET);
604 			}
605 			else if (!cp2)
606 				break;
607 			else if ((jp->outmode & C_FILE1) && outrec(jp, -1) < 0)
608 				return -1;
609 			if (!(cp1 = getrec(jp, 0, 1)))
610 				break;
611 			n1 = jp->file[0].fieldlen;
612 		}
613 	}
614 #if DEBUG_TRACE
615 sfprintf(sfstdout, "[X#%d:?,%p,%p,%d%,%d,%d%s]", __LINE__, cp1, cp2, cmp, lo, hi, (jp->outmode & C_COMMON) ? ",COMMON" : "");
616 #endif
617 	if (cp2)
618 	{
619 		if (hi >= 0 &&
620 		    sfseek(jp->file[1].iop, (Sfoff_t)0, SEEK_CUR) < hi &&
621 		    sfseek(jp->file[1].iop, hi, SEEK_SET) != hi)
622 		{
623 			error(ERROR_SYSTEM|2, "%s: seek error", jp->file[1].name);
624 			return -1;
625 		}
626 #if DEBUG_TRACE
627 sfprintf(sfstdout, "[O#%d:%02o:%02o]", __LINE__, jp->ooutmode, jp->outmode);
628 #endif
629 		cp1 = (!cp1 && cmp && hi < 0 && !jp->file[1].hit && ((jp->ooutmode ^ C_ALL) <= 1 || jp->outmode == 2)) ? cp2 : getrec(jp, 1, 0);
630 		cmp = 1;
631 		n = 1;
632 	}
633 	else
634 	{
635 		cmp = -1;
636 		n = 0;
637 	}
638 #if DEBUG_TRACE
639 sfprintf(sfstdout, "[X#%d:%d,%p,%p,%d,%02o,%02o%s]", __LINE__, n, cp1, cp2, cmp, jp->ooutmode, jp->outmode, (jp->outmode & C_COMMON) ? ",COMMON" : "");
640 #endif
641 	if (!cp1 || !(jp->outmode & (1<<n)))
642 	{
643 		if (cp1 && jp->file[n].iop == sfstdin)
644 			sfseek(sfstdin, (Sfoff_t)0, SEEK_END);
645 		return 0;
646 	}
647 	if (outrec(jp, cmp) < 0)
648 		return -1;
649 	do
650 	{
651 		if (!getrec(jp, n, 1))
652 			return 0;
653 	} while (outrec(jp, cmp) >= 0);
654 	return -1;
655 }
656 
657 int
658 b_join(int argc, char** argv, void* context)
659 {
660 	register int		n;
661 	register char*		cp;
662 	register Join_t*	jp;
663 	char*			e;
664 
665 #if !DEBUG_TRACE
666 	cmdinit(argc, argv, context, ERROR_CATALOG, ERROR_NOTIFY);
667 #endif
668 	if (!(jp = init()))
669 		error(ERROR_system(1),"out of space");
670 	jp->context = context;
671 	for (;;)
672 	{
673 		switch (n = optget(argv, usage))
674 		{
675 		case 0:
676 			break;
677  		case 'j':
678 			/*
679 			 * check for obsolete "-j1 field" and "-j2 field"
680 			 */
681 
682 			if (opt_info.offset == 0)
683 			{
684 				cp = argv[opt_info.index - 1];
685 				for (n = strlen(cp) - 1; n > 0 && cp[n] != 'j'; n--);
686 				n = cp[n] == 'j';
687 			}
688 			else
689 				n = 0;
690 			if (n)
691 			{
692 				if (opt_info.num!=1 && opt_info.num!=2)
693 					error(2,"-jfileno field: fileno must be 1 or 2");
694 				n = '0' + opt_info.num;
695 				if (!(cp = argv[opt_info.index]))
696 				{
697 					argc = 0;
698 					break;
699 				}
700 				opt_info.num = strtol(cp, &e, 10);
701 				if (*e)
702 				{
703 					argc = 0;
704 					break;
705 				}
706 				opt_info.index++;
707 			}
708 			else
709 			{
710 				jp->file[0].field = (int)(opt_info.num-1);
711 				n = '2';
712 			}
713 			/*FALLTHROUGH*/
714  		case '1':
715 		case '2':
716 			if (opt_info.num <=0)
717 				error(2,"field number must positive");
718 			jp->file[n-'1'].field = (int)(opt_info.num-1);
719 			continue;
720 		case 'v':
721 			jp->outmode &= ~C_COMMON;
722 			/*FALLTHROUGH*/
723 		case 'a':
724 			if (opt_info.num!=1 && opt_info.num!=2)
725 				error(2,"%s: file number must be 1 or 2", opt_info.name);
726 			jp->outmode |= 1<<(opt_info.num-1);
727 			continue;
728 		case 'e':
729 			jp->nullfield = opt_info.arg;
730 			continue;
731 		case 'o':
732 			/* need to accept obsolescent command syntax */
733 			n = getolist(jp, opt_info.arg, argv+opt_info.index);
734 			opt_info.index += n;
735 			continue;
736 		case 't':
737 			jp->state[' '] = jp->state['\t'] = 0;
738 			n= *(unsigned char*)opt_info.arg;
739 			jp->state[n] = S_DELIM;
740 			jp->delim = n;
741 			continue;
742 		case 'i':
743 			jp->ignorecase = !opt_info.num;
744 			continue;
745 		case 'B':
746 			jp->buffered = !opt_info.num;
747 			continue;
748 		case ':':
749 			error(2, "%s", opt_info.arg);
750 			break;
751 		case '?':
752 			done(jp);
753 			error(ERROR_usage(2), "%s", opt_info.arg);
754 			break;
755 		}
756 		break;
757 	}
758 	argv += opt_info.index;
759 	argc -= opt_info.index;
760 	if (error_info.errors || argc!=2)
761 	{
762 		done(jp);
763 		error(ERROR_usage(2),"%s", optusage(NiL));
764 	}
765 	jp->ooutmode = jp->outmode;
766 	jp->file[0].name = cp = *argv++;
767 	if (streq(cp,"-"))
768 	{
769 		if (sfseek(sfstdin,(Sfoff_t)0,SEEK_CUR) < 0)
770 		{
771 			if (sfdcseekable(sfstdin))
772 				error(ERROR_warn(0),"%s: seek may fail",cp);
773 			else
774 				jp->file[0].discard = 1;
775 		}
776 		jp->file[0].iop = sfstdin;
777 	}
778 	else if (!(jp->file[0].iop = sfopen(NiL, cp, "r")))
779 	{
780 		done(jp);
781 		error(ERROR_system(1),"%s: cannot open",cp);
782 	}
783 	jp->file[1].name = cp = *argv;
784 	if (streq(cp,"-"))
785 	{
786 		if (sfseek(sfstdin,(Sfoff_t)0,SEEK_CUR) < 0)
787 		{
788 			if (sfdcseekable(sfstdin))
789 				error(ERROR_warn(0),"%s: seek may fail",cp);
790 			else
791 				jp->file[1].discard = 1;
792 		}
793 		jp->file[1].iop = sfstdin;
794 	}
795 	else if (!(jp->file[1].iop = sfopen(NiL, cp, "r")))
796 	{
797 		done(jp);
798 		error(ERROR_system(1),"%s: cannot open",cp);
799 	}
800 	if (jp->buffered)
801 	{
802 		sfsetbuf(jp->file[0].iop, jp->file[0].iop, SF_UNBOUND);
803 		sfsetbuf(jp->file[1].iop, jp->file[0].iop, SF_UNBOUND);
804 	}
805 	jp->state['\n'] = S_NL;
806 	jp->outfile = sfstdout;
807 	if (!jp->outlist)
808 		jp->nullfield = 0;
809 	if (join(jp) < 0)
810 	{
811 		done(jp);
812 		error(ERROR_system(1),"write error");
813 	}
814 	else if (jp->file[0].iop==sfstdin || jp->file[1].iop==sfstdin)
815 		sfseek(sfstdin,(Sfoff_t)0,SEEK_END);
816 	done(jp);
817 	return error_info.errors;
818 }
819