1 /***********************************************************************
2 * *
3 * This software is part of the ast package *
4 * Copyright (c) 1992-2010 AT&T Intellectual Property *
5 * and is licensed under the *
6 * Common Public License, Version 1.0 *
7 * by AT&T Intellectual Property *
8 * *
9 * A copy of the License is available at *
10 * http://www.opensource.org/licenses/cpl1.0.txt *
11 * (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) *
12 * *
13 * Information and Software Systems Research *
14 * AT&T Research *
15 * Florham Park NJ *
16 * *
17 * Glenn Fowler <gsf@research.att.com> *
18 * David Korn <dgk@research.att.com> *
19 * *
20 ***********************************************************************/
21 #pragma prototyped
22 /*
23 * David Korn
24 * Glenn Fowler
25 * AT&T Research
26 *
27 * join
28 */
29
30 static const char usage[] =
31 "[-?\n@(#)$Id: join (AT&T Research) 2009-12-10 $\n]"
32 USAGE_LICENSE
33 "[+NAME?join - relational database operator]"
34 "[+DESCRIPTION?\bjoin\b performs an \aequality join\a on the files \afile1\a "
35 "and \afile2\a and writes the resulting joined files to standard "
36 "output. By default, a field is delimited by one or more spaces "
37 "and tabs with leading spaces and/or tabs ignored. The \b-t\b option "
38 "can be used to change the field delimiter.]"
39 "[+?The \ajoin field\a is a field in each file on which files are compared. "
40 "By default \bjoin\b writes one line in the output for each pair "
41 "of lines in \afiles1\a and \afiles2\a that have identical join "
42 "fields. The default output line consists of the join field, "
43 "then the remaining fields from \afile1\a, then the remaining "
44 "fields from \afile2\a, but this can be changed with the \b-o\b "
45 "option. The \b-a\b option can be used to add unmatched lines "
46 "to the output. The \b-v\b option can be used to output only "
47 "unmatched lines.]"
48 "[+?The files \afile1\a and \afile2\a must be ordered in the collating "
49 "sequence of \bsort -b\b on the fields on which they are to be "
50 "joined otherwise the results are unspecified.]"
51 "[+?If either \afile1\a or \afile2\a is \b-\b, \bjoin\b "
52 "uses standard input starting at the current location.]"
53
54 "[e:empty]:[string?Replace empty output fields in the list selected with"
55 " \b-o\b with \astring\a.]"
56 "[o:output]:[list?Construct the output line to comprise the fields specified "
57 "in a blank or comma separated list \alist\a. Each element in "
58 "\alist\a consists of a file number (either 1 or 2), a period, "
59 "and a field number or \b0\b representing the join field. "
60 "As an obsolete feature multiple occurrences of \b-o\b can "
61 "be specified.]"
62 "[t:separator|tabs]:[delim?Use \adelim\a as the field separator for both input"
63 " and output.]"
64 "[1:j1]#[field?Join on field \afield\a of \afile1\a. Fields start at 1.]"
65 "[2:j2]#[field?Join on field \afield\a of \afile2\a. Fields start at 1.]"
66 "[j:join]#[field?Equivalent to \b-1\b \afield\a \b-2\b \afield\a.]"
67 "[a:unpairable]#[fileno?Write a line for each unpairable line in file"
68 " \afileno\a, where \afileno\a is either 1 or 2, in addition to the"
69 " normal output. If \b-a\b options appear for both 1 and 2, then "
70 "all unpairable lines will be output.]"
71 "[v:suppress]#[fileno?Write a line for each unpairable line in file"
72 " \afileno\a, where \afileno\a is either 1 or 2, instead of the normal "
73 "output. If \b-v\b options appear for both 1 and 2, then "
74 "all unpairable lines will be output.] ]"
75 "[i:ignorecase?Ignore case in field comparisons.]"
76 "[B!:mmap?Enable memory mapped reads instead of buffered.]"
77
78 "[+?The following obsolete option forms are also recognized: \b-j\b \afield\a"
79 " is equivalent to \b-1\b \afield\a \b-2\b \afield\a, \b-j1\b \afield\a"
80 " is equivalent to \b-1\b \afield\a, and \b-j2\b \afield\a is"
81 " equivalent to \b-2\b \afield\a.]"
82
83 "\n"
84 "\nfile1 file2\n"
85 "\n"
86 "[+EXIT STATUS?]{"
87 "[+0?Both files processed successfully.]"
88 "[+>0?An error occurred.]"
89 "}"
90 "[+SEE ALSO?\bcut\b(1), \bcomm\b(1), \bpaste\b(1), \bsort\b(1), \buniq\b(1)]"
91 ;
92
93 #include <cmd.h>
94 #include <sfdisc.h>
95
96 #if _hdr_wchar && _hdr_wctype && _lib_iswctype
97
98 #include <wchar.h>
99 #include <wctype.h>
100
101 #else
102
103 #include <ctype.h>
104
105 #ifndef iswspace
106 #define iswspace(x) isspace(x)
107 #endif
108
109 #endif
110
111 #define C_FILE1 001
112 #define C_FILE2 002
113 #define C_COMMON 004
114 #define C_ALL (C_FILE1|C_FILE2|C_COMMON)
115
116 #define NFIELD 10
117 #define JOINFIELD 2
118
119 #define S_DELIM 1
120 #define S_SPACE 2
121 #define S_NL 3
122 #define S_WIDE 4
123
124 typedef struct Field_s
125 {
126 char* beg;
127 char* end;
128 } Field_t;
129
130 typedef struct File_s
131 {
132 Sfio_t* iop;
133 char* name;
134 char* recptr;
135 int reclen;
136 int field;
137 int fieldlen;
138 int nfields;
139 int maxfields;
140 int spaces;
141 int hit;
142 int discard;
143 Field_t* fields;
144 } File_t;
145
146 typedef struct Join_s
147 {
148 unsigned char state[1<<CHAR_BIT];
149 Sfio_t* outfile;
150 int* outlist;
151 int outmode;
152 int ooutmode;
153 char* nullfield;
154 char* delimstr;
155 int delim;
156 int delimlen;
157 int buffered;
158 int ignorecase;
159 int mb;
160 char* same;
161 int samesize;
162 void* context;
163 File_t file[2];
164 } Join_t;
165
166 static void
done(register Join_t * jp)167 done(register Join_t* jp)
168 {
169 if (jp->file[0].iop && jp->file[0].iop != sfstdin)
170 sfclose(jp->file[0].iop);
171 if (jp->file[1].iop && jp->file[1].iop != sfstdin)
172 sfclose(jp->file[1].iop);
173 if (jp->outlist)
174 free(jp->outlist);
175 if (jp->file[0].fields)
176 free(jp->file[0].fields);
177 if (jp->file[1].fields)
178 free(jp->file[1].fields);
179 if (jp->same)
180 free(jp->same);
181 free(jp);
182 }
183
184 static Join_t*
init(void)185 init(void)
186 {
187 register Join_t* jp;
188 register int i;
189
190 setlocale(LC_ALL, "");
191 if (jp = newof(0, Join_t, 1, 0))
192 {
193 if (jp->mb = mbwide())
194 for (i = 0x80; i <= 0xff; i++)
195 jp->state[i] = S_WIDE;
196 jp->state[' '] = jp->state['\t'] = S_SPACE;
197 jp->state['\n'] = S_NL;
198 jp->delim = -1;
199 jp->nullfield = 0;
200 if (!(jp->file[0].fields = newof(0, Field_t, NFIELD + 1, 0)) ||
201 !(jp->file[1].fields = newof(0, Field_t, NFIELD + 1, 0)))
202 {
203 done(jp);
204 return 0;
205 }
206 jp->file[0].maxfields = NFIELD;
207 jp->file[1].maxfields = NFIELD;
208 jp->outmode = C_COMMON;
209 }
210 return jp;
211 }
212
213 static int
getolist(Join_t * jp,const char * first,char ** arglist)214 getolist(Join_t* jp, const char* first, char** arglist)
215 {
216 register const char* cp = first;
217 char** argv = arglist;
218 register int c;
219 int* outptr;
220 int* outmax;
221 int nfield = NFIELD;
222 char* str;
223
224 outptr = jp->outlist = newof(0, int, NFIELD + 1, 0);
225 outmax = outptr + NFIELD;
226 while (c = *cp++)
227 {
228 if (c==' ' || c=='\t' || c==',')
229 continue;
230 str = (char*)--cp;
231 if (*cp=='0' && ((c=cp[1])==0 || c==' ' || c=='\t' || c==','))
232 {
233 str++;
234 c = JOINFIELD;
235 goto skip;
236 }
237 if (cp[1]!='.' || (*cp!='1' && *cp!='2') || (c=strtol(cp+2,&str,10)) <=0)
238 {
239 error(2,"%s: invalid field list",first);
240 break;
241 }
242 c--;
243 c <<=2;
244 if (*cp=='2')
245 c |=1;
246 skip:
247 if (outptr >= outmax)
248 {
249 jp->outlist = newof(jp->outlist, int, 2 * nfield + 1, 0);
250 outptr = jp->outlist + nfield;
251 nfield *= 2;
252 outmax = jp->outlist + nfield;
253 }
254 *outptr++ = c;
255 cp = str;
256 }
257 /* need to accept obsolescent command syntax */
258 while (1)
259 {
260 if (!(cp= *argv) || cp[1]!='.' || (*cp!='1' && *cp!='2'))
261 {
262 if (*cp=='0' && cp[1]==0)
263 {
264 c = JOINFIELD;
265 goto skip2;
266 }
267 break;
268 }
269 str = (char*)cp;
270 c = strtol(cp+2, &str,10);
271 if (*str || --c<0)
272 break;
273 argv++;
274 c <<= 2;
275 if (*cp=='2')
276 c |=1;
277 skip2:
278 if (outptr >= outmax)
279 {
280 jp->outlist = newof(jp->outlist, int, 2 * nfield + 1, 0);
281 outptr = jp->outlist + nfield;
282 nfield *= 2;
283 outmax = jp->outlist + nfield;
284 }
285 *outptr++ = c;
286 }
287 *outptr = -1;
288 return argv-arglist;
289 }
290
291 /*
292 * read in a record from file <index> and split into fields
293 */
294 static unsigned char*
getrec(Join_t * jp,int index,int discard)295 getrec(Join_t* jp, int index, int discard)
296 {
297 register unsigned char* sp = jp->state;
298 register File_t* fp = &jp->file[index];
299 register Field_t* field = fp->fields;
300 register Field_t* fieldmax = field + fp->maxfields;
301 register char* cp;
302 register int n;
303 char* tp;
304
305 if (sh_checksig(jp->context))
306 return 0;
307 if (discard && fp->discard)
308 sfraise(fp->iop, SFSK_DISCARD, NiL);
309 fp->spaces = 0;
310 fp->hit = 0;
311 if (!(cp = sfgetr(fp->iop, '\n', 0)))
312 {
313 jp->outmode &= ~(1<<index);
314 return 0;
315 }
316 fp->recptr = cp;
317 fp->reclen = sfvalue(fp->iop);
318 if (jp->delim == '\n') /* handle new-line delimiter specially */
319 {
320 field->beg = cp;
321 cp += fp->reclen;
322 field->end = cp - 1;
323 field++;
324 }
325 else
326 do /* separate into fields */
327 {
328 if (field >= fieldmax)
329 {
330 n = 2 * fp->maxfields;
331 fp->fields = newof(fp->fields, Field_t, n + 1, 0);
332 field = fp->fields + fp->maxfields;
333 fp->maxfields = n;
334 fieldmax = fp->fields + n;
335 }
336 field->beg = cp;
337 if (jp->delim == -1)
338 {
339 switch (sp[*(unsigned char*)cp])
340 {
341 case S_SPACE:
342 cp++;
343 break;
344 case S_WIDE:
345 tp = cp;
346 if (iswspace(mbchar(tp)))
347 {
348 cp = tp;
349 break;
350 }
351 /*FALLTHROUGH*/
352 default:
353 goto next;
354 }
355 fp->spaces = 1;
356 if (jp->mb)
357 for (;;)
358 {
359 switch (sp[*(unsigned char*)cp++])
360 {
361 case S_SPACE:
362 continue;
363 case S_WIDE:
364 tp = cp - 1;
365 if (iswspace(mbchar(tp)))
366 {
367 cp = tp;
368 continue;
369 }
370 break;
371 }
372 break;
373 }
374 else
375 while (sp[*(unsigned char*)cp++]==S_SPACE);
376 cp--;
377 }
378 next:
379 if (jp->mb)
380 {
381 for (;;)
382 {
383 tp = cp;
384 switch (n = sp[*(unsigned char*)cp++])
385 {
386 case 0:
387 continue;
388 case S_WIDE:
389 cp--;
390 n = mbchar(cp);
391 if (n == jp->delim)
392 {
393 n = S_DELIM;
394 break;
395 }
396 if (jp->delim == -1 && iswspace(n))
397 {
398 n = S_SPACE;
399 break;
400 }
401 continue;
402 }
403 break;
404 }
405 field->end = tp;
406 }
407 else
408 {
409 while (!(n = sp[*(unsigned char*)cp++]));
410 field->end = cp - 1;
411 }
412 field++;
413 } while (n != S_NL);
414 fp->nfields = field - fp->fields;
415 if ((n = fp->field) < fp->nfields)
416 {
417 cp = fp->fields[n].beg;
418 /* eliminate leading spaces */
419 if (fp->spaces)
420 {
421 if (jp->mb)
422 for (;;)
423 {
424 switch (sp[*(unsigned char*)cp++])
425 {
426 case S_SPACE:
427 continue;
428 case S_WIDE:
429 tp = cp - 1;
430 if (iswspace(mbchar(tp)))
431 {
432 cp = tp;
433 continue;
434 }
435 break;
436 }
437 break;
438 }
439 else
440 while (sp[*(unsigned char*)cp++]==S_SPACE);
441 cp--;
442 }
443 fp->fieldlen = fp->fields[n].end - cp;
444 return (unsigned char*)cp;
445 }
446 fp->fieldlen = 0;
447 return (unsigned char*)"";
448 }
449
450 static unsigned char*
_trace_getrec(Join_t * jp,int index,int discard)451 _trace_getrec(Join_t* jp, int index, int discard)
452 {
453 unsigned char* r;
454
455 r = getrec(jp, index, discard);
456 return r;
457 }
458 #define getrec _trace_getrec
459
460 #if DEBUG_TRACE
461 static unsigned char* u1,u2,u3;
462 #define getrec(p,n,d) (u1 = getrec(p, n, d), sfprintf(sfstdout, "[G%d#%d@%I*d:%-.8s]", __LINE__, n, sizeof(Sfoff_t), sftell(p->file[n].iop), u1), u1)
463 #endif
464
465 /*
466 * print field <n> from file <index>
467 */
468 static int
outfield(Join_t * jp,int index,register int n,int last)469 outfield(Join_t* jp, int index, register int n, int last)
470 {
471 register File_t* fp = &jp->file[index];
472 register char* cp;
473 register char* cpmax;
474 register int size;
475 register Sfio_t* iop = jp->outfile;
476 char* tp;
477
478 if (n < fp->nfields)
479 {
480 cp = fp->fields[n].beg;
481 cpmax = fp->fields[n].end + 1;
482 }
483 else
484 cp = 0;
485 if ((n = jp->delim) == -1)
486 {
487 if (cp && fp->spaces)
488 {
489 register unsigned char* sp = jp->state;
490
491 /*eliminate leading spaces */
492 if (jp->mb)
493 for (;;)
494 {
495 switch (sp[*(unsigned char*)cp++])
496 {
497 case S_SPACE:
498 continue;
499 case S_WIDE:
500 tp = cp - 1;
501 if (iswspace(mbchar(tp)))
502 {
503 cp = tp;
504 continue;
505 }
506 break;
507 }
508 break;
509 }
510 else
511 while (sp[*(unsigned char*)cp++]==S_SPACE);
512 cp--;
513 }
514 n = ' ';
515 }
516 else if (jp->delimstr)
517 n = -1;
518 if (last)
519 n = '\n';
520 if (cp)
521 size = cpmax - cp;
522 else
523 size = 0;
524 if (n == -1)
525 {
526 if (size<=1)
527 {
528 if (jp->nullfield && sfputr(iop, jp->nullfield, -1) < 0)
529 return -1;
530 }
531 else if (sfwrite(iop, cp, size) < 0)
532 return -1;
533 if (sfwrite(iop, jp->delimstr, jp->delimlen) < 0)
534 return -1;
535 }
536 else if (size <= 1)
537 {
538 if (!jp->nullfield)
539 sfputc(iop, n);
540 else if (sfputr(iop, jp->nullfield, n) < 0)
541 return -1;
542 }
543 else
544 {
545 last = cp[size-1];
546 cp[size-1] = n;
547 if (sfwrite(iop, cp, size) < 0)
548 return -1;
549 cp[size-1] = last;
550 }
551 return 0;
552 }
553
554 #if DEBUG_TRACE
555 static int i1,i2,i3;
556 #define outfield(p,i,n,f) (sfprintf(sfstdout, "[F%d#%d:%d,%d]", __LINE__, i1=i, i2=n, i3=f), outfield(p, i1, i2, i3))
557 #endif
558
559 static int
outrec(register Join_t * jp,int mode)560 outrec(register Join_t* jp, int mode)
561 {
562 register File_t* fp;
563 register int i;
564 register int j;
565 register int k;
566 register int n;
567 int* out;
568
569 if (mode < 0 && jp->file[0].hit++)
570 return 0;
571 if (mode > 0 && jp->file[1].hit++)
572 return 0;
573 if (out = jp->outlist)
574 {
575 while ((n = *out++) >= 0)
576 {
577 if (n == JOINFIELD)
578 {
579 i = mode >= 0;
580 j = jp->file[i].field;
581 }
582 else
583 {
584 i = n & 1;
585 j = (mode<0 && i || mode>0 && !i) ?
586 jp->file[i].nfields :
587 n >> 2;
588 }
589 if (outfield(jp, i, j, *out < 0) < 0)
590 return -1;
591 }
592 return 0;
593 }
594 k = jp->file[0].nfields;
595 if (mode >= 0)
596 k += jp->file[1].nfields - 1;
597 for (i=0; i<2; i++)
598 {
599 fp = &jp->file[i];
600 if (mode>0 && i==0)
601 {
602 k -= (fp->nfields - 1);
603 continue;
604 }
605 n = fp->field;
606 if (mode||i==0)
607 {
608 /* output join field first */
609 if (outfield(jp,i,n,!--k) < 0)
610 return -1;
611 if (!k)
612 return 0;
613 for (j=0; j<n; j++)
614 {
615 if (outfield(jp,i,j,!--k) < 0)
616 return -1;
617 if (!k)
618 return 0;
619 }
620 j = n + 1;
621 }
622 else
623 j = 0;
624 for (;j<fp->nfields; j++)
625 {
626 if (j!=n && outfield(jp,i,j,!--k) < 0)
627 return -1;
628 if (!k)
629 return 0;
630 }
631 }
632 return 0;
633 }
634
635 #if DEBUG_TRACE
636 #define outrec(p,n) (sfprintf(sfstdout, "[R#%d,%d,%lld,%lld:%-.*s{%d}:%-.*s{%d}]", __LINE__, i1=n, lo, hi, jp->file[0].fieldlen, cp1, jp->file[0].hit, jp->file[1].fieldlen, cp2, jp->file[1].hit), outrec(p, i1))
637 #endif
638
639 static int
join(Join_t * jp)640 join(Join_t* jp)
641 {
642 register unsigned char* cp1;
643 register unsigned char* cp2;
644 register int n1;
645 register int n2;
646 register int n;
647 register int cmp;
648 register int same;
649 int o2;
650 Sfoff_t lo = -1;
651 Sfoff_t hi = -1;
652
653 if ((cp1 = getrec(jp, 0, 0)) && (cp2 = getrec(jp, 1, 0)) || (cp2 = 0))
654 {
655 n1 = jp->file[0].fieldlen;
656 n2 = jp->file[1].fieldlen;
657 same = 0;
658 for (;;)
659 {
660 n = n1 < n2 ? n1 : n2;
661 #if DEBUG_TRACE
662 if (!n && !(cmp = n1 < n2 ? -1 : (n1 > n2)) || n && !(cmp = (int)*cp1 - (int)*cp2) && !(cmp = jp->ignorecase ? strncasecmp((char*)cp1, (char*)cp2, n) : memcmp(cp1, cp2, n)))
663 cmp = n1 - n2;
664 sfprintf(sfstdout, "[C#%d:%d(%c-%c),%d,%lld,%lld%s]", __LINE__, cmp, *cp1, *cp2, same, lo, hi, (jp->outmode & C_COMMON) ? ",COMMON" : "");
665 if (!cmp)
666 #else
667 if (!n && !(cmp = n1 < n2 ? -1 : (n1 > n2)) || n && !(cmp = (int)*cp1 - (int)*cp2) && !(cmp = jp->ignorecase ? strncasecmp((char*)cp1, (char*)cp2, n) : memcmp(cp1, cp2, n)) && !(cmp = n1 - n2))
668 #endif
669 {
670 if (!(jp->outmode & C_COMMON))
671 {
672 if (cp1 = getrec(jp, 0, 1))
673 {
674 n1 = jp->file[0].fieldlen;
675 same = 1;
676 continue;
677 }
678 if ((jp->ooutmode & (C_FILE1|C_FILE2)) != C_FILE2)
679 break;
680 if (sfseek(jp->file[0].iop, (Sfoff_t)-jp->file[0].reclen, SEEK_CUR) < 0 || !(cp1 = getrec(jp, 0, 0)))
681 {
682 error(ERROR_SYSTEM|2, "%s: seek error", jp->file[0].name);
683 return -1;
684 }
685 }
686 else if (outrec(jp, 0) < 0)
687 return -1;
688 else if (lo < 0 && (jp->outmode & C_COMMON))
689 {
690 if ((lo = sfseek(jp->file[1].iop, (Sfoff_t)0, SEEK_CUR)) < 0)
691 {
692 error(ERROR_SYSTEM|2, "%s: seek error", jp->file[1].name);
693 return -1;
694 }
695 lo -= jp->file[1].reclen;
696 }
697 if (cp2 = getrec(jp, 1, lo < 0))
698 {
699 n2 = jp->file[1].fieldlen;
700 continue;
701 }
702 #if DEBUG_TRACE
703 sfprintf(sfstdout, "[2#%d:0,%lld,%lld]", __LINE__, lo, hi);
704 #endif
705 }
706 else if (cmp > 0)
707 {
708 if (same)
709 {
710 same = 0;
711 next:
712 if (n2 > jp->samesize)
713 {
714 jp->samesize = roundof(n2, 16);
715 if (!(jp->same = newof(jp->same, char, jp->samesize, 0)))
716 {
717 error(ERROR_SYSTEM|2, "out of space");
718 return -1;
719 }
720 }
721 memcpy(jp->same, cp2, o2 = n2);
722 if (!(cp2 = getrec(jp, 1, 0)))
723 break;
724 n2 = jp->file[1].fieldlen;
725 if (n2 == o2 && *cp2 == *jp->same && !memcmp(cp2, jp->same, n2))
726 goto next;
727 continue;
728 }
729 if (hi >= 0)
730 {
731 if (sfseek(jp->file[1].iop, hi, SEEK_SET) != hi)
732 {
733 error(ERROR_SYSTEM|2, "%s: seek error", jp->file[1].name);
734 return -1;
735 }
736 hi = -1;
737 }
738 else if ((jp->outmode & C_FILE2) && outrec(jp, 1) < 0)
739 return -1;
740 lo = -1;
741 if (cp2 = getrec(jp, 1, 1))
742 {
743 n2 = jp->file[1].fieldlen;
744 continue;
745 }
746 #if DEBUG_TRACE
747 sfprintf(sfstdout, "[2#%d:0,%lld,%lld]", __LINE__, lo, hi);
748 #endif
749 }
750 else if (same)
751 {
752 same = 0;
753 if (!(cp1 = getrec(jp, 0, 0)))
754 break;
755 n1 = jp->file[0].fieldlen;
756 continue;
757 }
758 if (lo >= 0)
759 {
760 if ((hi = sfseek(jp->file[1].iop, (Sfoff_t)0, SEEK_CUR)) < 0 ||
761 (hi -= jp->file[1].reclen) < 0 ||
762 sfseek(jp->file[1].iop, lo, SEEK_SET) != lo ||
763 !(cp2 = getrec(jp, 1, 0)))
764 {
765 error(ERROR_SYSTEM|2, "%s: seek error", jp->file[1].name);
766 return -1;
767 }
768 n2 = jp->file[1].fieldlen;
769 lo = -1;
770 if (jp->file[1].discard)
771 sfseek(jp->file[1].iop, (Sfoff_t)-1, SEEK_SET);
772 }
773 else if (!cp2)
774 break;
775 else if ((jp->outmode & C_FILE1) && outrec(jp, -1) < 0)
776 return -1;
777 if (!(cp1 = getrec(jp, 0, 1)))
778 break;
779 n1 = jp->file[0].fieldlen;
780 }
781 }
782 #if DEBUG_TRACE
783 sfprintf(sfstdout, "[X#%d:?,%p,%p,%d%,%d,%d%s]", __LINE__, cp1, cp2, cmp, lo, hi, (jp->outmode & C_COMMON) ? ",COMMON" : "");
784 #endif
785 if (cp2)
786 {
787 if (hi >= 0 &&
788 sfseek(jp->file[1].iop, (Sfoff_t)0, SEEK_CUR) < hi &&
789 sfseek(jp->file[1].iop, hi, SEEK_SET) != hi)
790 {
791 error(ERROR_SYSTEM|2, "%s: seek error", jp->file[1].name);
792 return -1;
793 }
794 #if DEBUG_TRACE
795 sfprintf(sfstdout, "[O#%d:%02o:%02o]", __LINE__, jp->ooutmode, jp->outmode);
796 #endif
797 cp1 = (!cp1 && cmp && hi < 0 && !jp->file[1].hit && ((jp->ooutmode ^ C_ALL) <= 1 || jp->outmode == 2)) ? cp2 : getrec(jp, 1, 0);
798 cmp = 1;
799 n = 1;
800 }
801 else
802 {
803 cmp = -1;
804 n = 0;
805 }
806 #if DEBUG_TRACE
807 sfprintf(sfstdout, "[X#%d:%d,%p,%p,%d,%02o,%02o%s]", __LINE__, n, cp1, cp2, cmp, jp->ooutmode, jp->outmode, (jp->outmode & C_COMMON) ? ",COMMON" : "");
808 #endif
809 if (!cp1 || !(jp->outmode & (1<<n)))
810 {
811 if (cp1 && jp->file[n].iop == sfstdin)
812 sfseek(sfstdin, (Sfoff_t)0, SEEK_END);
813 return 0;
814 }
815 if (outrec(jp, cmp) < 0)
816 return -1;
817 do
818 {
819 if (!getrec(jp, n, 1))
820 return 0;
821 } while (outrec(jp, cmp) >= 0);
822 return -1;
823 }
824
825 int
b_join(int argc,char ** argv,void * context)826 b_join(int argc, char** argv, void* context)
827 {
828 register int n;
829 register char* cp;
830 register Join_t* jp;
831 char* e;
832
833 #if !DEBUG_TRACE
834 cmdinit(argc, argv, context, ERROR_CATALOG, ERROR_NOTIFY);
835 #endif
836 if (!(jp = init()))
837 error(ERROR_system(1),"out of space");
838 jp->context = context;
839 for (;;)
840 {
841 switch (n = optget(argv, usage))
842 {
843 case 0:
844 break;
845 case 'j':
846 /*
847 * check for obsolete "-j1 field" and "-j2 field"
848 */
849
850 if (opt_info.offset == 0)
851 {
852 cp = argv[opt_info.index - 1];
853 for (n = strlen(cp) - 1; n > 0 && cp[n] != 'j'; n--);
854 n = cp[n] == 'j';
855 }
856 else
857 n = 0;
858 if (n)
859 {
860 if (opt_info.num!=1 && opt_info.num!=2)
861 error(2,"-jfileno field: fileno must be 1 or 2");
862 n = '0' + opt_info.num;
863 if (!(cp = argv[opt_info.index]))
864 {
865 argc = 0;
866 break;
867 }
868 opt_info.num = strtol(cp, &e, 10);
869 if (*e)
870 {
871 argc = 0;
872 break;
873 }
874 opt_info.index++;
875 }
876 else
877 {
878 jp->file[0].field = (int)(opt_info.num-1);
879 n = '2';
880 }
881 /*FALLTHROUGH*/
882 case '1':
883 case '2':
884 if (opt_info.num <=0)
885 error(2,"field number must positive");
886 jp->file[n-'1'].field = (int)(opt_info.num-1);
887 continue;
888 case 'v':
889 jp->outmode &= ~C_COMMON;
890 /*FALLTHROUGH*/
891 case 'a':
892 if (opt_info.num!=1 && opt_info.num!=2)
893 error(2,"%s: file number must be 1 or 2", opt_info.name);
894 jp->outmode |= 1<<(opt_info.num-1);
895 continue;
896 case 'e':
897 jp->nullfield = opt_info.arg;
898 continue;
899 case 'o':
900 /* need to accept obsolescent command syntax */
901 n = getolist(jp, opt_info.arg, argv+opt_info.index);
902 opt_info.index += n;
903 continue;
904 case 't':
905 jp->state[' '] = jp->state['\t'] = 0;
906 if (jp->mb)
907 {
908 cp = opt_info.arg;
909 jp->delim = mbchar(cp);
910 if ((n = cp - opt_info.arg) > 1)
911 {
912 jp->delimlen = n;
913 jp->delimstr = opt_info.arg;
914 continue;
915 }
916 }
917 n = *(unsigned char*)opt_info.arg;
918 jp->state[n] = S_DELIM;
919 jp->delim = n;
920 continue;
921 case 'i':
922 jp->ignorecase = !opt_info.num;
923 continue;
924 case 'B':
925 jp->buffered = !opt_info.num;
926 continue;
927 case ':':
928 error(2, "%s", opt_info.arg);
929 break;
930 case '?':
931 done(jp);
932 error(ERROR_usage(2), "%s", opt_info.arg);
933 break;
934 }
935 break;
936 }
937 argv += opt_info.index;
938 argc -= opt_info.index;
939 if (error_info.errors || argc!=2)
940 {
941 done(jp);
942 error(ERROR_usage(2),"%s", optusage(NiL));
943 }
944 jp->ooutmode = jp->outmode;
945 jp->file[0].name = cp = *argv++;
946 if (streq(cp,"-"))
947 {
948 if (sfseek(sfstdin,(Sfoff_t)0,SEEK_CUR) < 0)
949 {
950 if (sfdcseekable(sfstdin))
951 error(ERROR_warn(0),"%s: seek may fail",cp);
952 else
953 jp->file[0].discard = 1;
954 }
955 jp->file[0].iop = sfstdin;
956 }
957 else if (!(jp->file[0].iop = sfopen(NiL, cp, "r")))
958 {
959 done(jp);
960 error(ERROR_system(1),"%s: cannot open",cp);
961 }
962 jp->file[1].name = cp = *argv;
963 if (streq(cp,"-"))
964 {
965 if (sfseek(sfstdin,(Sfoff_t)0,SEEK_CUR) < 0)
966 {
967 if (sfdcseekable(sfstdin))
968 error(ERROR_warn(0),"%s: seek may fail",cp);
969 else
970 jp->file[1].discard = 1;
971 }
972 jp->file[1].iop = sfstdin;
973 }
974 else if (!(jp->file[1].iop = sfopen(NiL, cp, "r")))
975 {
976 done(jp);
977 error(ERROR_system(1),"%s: cannot open",cp);
978 }
979 if (jp->buffered)
980 {
981 sfsetbuf(jp->file[0].iop, jp->file[0].iop, SF_UNBOUND);
982 sfsetbuf(jp->file[1].iop, jp->file[1].iop, SF_UNBOUND);
983 }
984 jp->outfile = sfstdout;
985 if (!jp->outlist)
986 jp->nullfield = 0;
987 if (join(jp) < 0)
988 {
989 done(jp);
990 error(ERROR_system(1),"write error");
991 }
992 else if (jp->file[0].iop==sfstdin || jp->file[1].iop==sfstdin)
993 sfseek(sfstdin,(Sfoff_t)0,SEEK_END);
994 done(jp);
995 return error_info.errors;
996 }
997