1 /***********************************************************************
2 * *
3 * This software is part of the ast package *
4 * Copyright (c) 1992-2012 AT&T Intellectual Property *
5 * and is licensed under the *
6 * Eclipse Public License, Version 1.0 *
7 * by AT&T Intellectual Property *
8 * *
9 * A copy of the License is available at *
10 * http://www.eclipse.org/org/documents/epl-v10.html *
11 * (with md5 checksum b35adb5213ca9657e911e9befb180842) *
12 * *
13 * Information and Software Systems Research *
14 * AT&T Research *
15 * Florham Park NJ *
16 * *
17 * Glenn Fowler <gsf@research.att.com> *
18 * David Korn <dgk@research.att.com> *
19 * *
20 ***********************************************************************/
21 #pragma prototyped
22 /*
23 * David Korn
24 * Glenn Fowler
25 * AT&T Research
26 *
27 * join
28 */
29
30 static const char usage[] =
31 "[-?\n@(#)$Id: join (AT&T Research) 2009-12-10 $\n]"
32 USAGE_LICENSE
33 "[+NAME?join - relational database operator]"
34 "[+DESCRIPTION?\bjoin\b performs an \aequality join\a on the files \afile1\a "
35 "and \afile2\a and writes the resulting joined files to standard "
36 "output. By default, a field is delimited by one or more spaces "
37 "and tabs with leading spaces and/or tabs ignored. The \b-t\b option "
38 "can be used to change the field delimiter.]"
39 "[+?The \ajoin field\a is a field in each file on which files are compared. "
40 "By default \bjoin\b writes one line in the output for each pair "
41 "of lines in \afiles1\a and \afiles2\a that have identical join "
42 "fields. The default output line consists of the join field, "
43 "then the remaining fields from \afile1\a, then the remaining "
44 "fields from \afile2\a, but this can be changed with the \b-o\b "
45 "option. The \b-a\b option can be used to add unmatched lines "
46 "to the output. The \b-v\b option can be used to output only "
47 "unmatched lines.]"
48 "[+?The files \afile1\a and \afile2\a must be ordered in the collating "
49 "sequence of \bsort -b\b on the fields on which they are to be "
50 "joined otherwise the results are unspecified.]"
51 "[+?If either \afile1\a or \afile2\a is \b-\b, \bjoin\b "
52 "uses standard input starting at the current location.]"
53
54 "[e:empty]:[string?Replace empty output fields in the list selected with"
55 " \b-o\b with \astring\a.]"
56 "[o:output]:[list?Construct the output line to comprise the fields specified "
57 "in a blank or comma separated list \alist\a. Each element in "
58 "\alist\a consists of a file number (either 1 or 2), a period, "
59 "and a field number or \b0\b representing the join field. "
60 "As an obsolete feature multiple occurrences of \b-o\b can "
61 "be specified.]"
62 "[t:separator|tabs]:[delim?Use \adelim\a as the field separator for both input"
63 " and output.]"
64 "[1:j1]#[field?Join on field \afield\a of \afile1\a. Fields start at 1.]"
65 "[2:j2]#[field?Join on field \afield\a of \afile2\a. Fields start at 1.]"
66 "[j:join]#[field?Equivalent to \b-1\b \afield\a \b-2\b \afield\a.]"
67 "[a:unpairable]#[fileno?Write a line for each unpairable line in file"
68 " \afileno\a, where \afileno\a is either 1 or 2, in addition to the"
69 " normal output. If \b-a\b options appear for both 1 and 2, then "
70 "all unpairable lines will be output.]"
71 "[v:suppress]#[fileno?Write a line for each unpairable line in file"
72 " \afileno\a, where \afileno\a is either 1 or 2, instead of the normal "
73 "output. If \b-v\b options appear for both 1 and 2, then "
74 "all unpairable lines will be output.] ]"
75 "[i:ignorecase?Ignore case in field comparisons.]"
76 "[B!:mmap?Enable memory mapped reads instead of buffered.]"
77
78 "[+?The following obsolete option forms are also recognized: \b-j\b \afield\a"
79 " is equivalent to \b-1\b \afield\a \b-2\b \afield\a, \b-j1\b \afield\a"
80 " is equivalent to \b-1\b \afield\a, and \b-j2\b \afield\a is"
81 " equivalent to \b-2\b \afield\a.]"
82
83 "\n"
84 "\nfile1 file2\n"
85 "\n"
86 "[+EXIT STATUS?]{"
87 "[+0?Both files processed successfully.]"
88 "[+>0?An error occurred.]"
89 "}"
90 "[+SEE ALSO?\bcut\b(1), \bcomm\b(1), \bpaste\b(1), \bsort\b(1), \buniq\b(1)]"
91 ;
92
93 #include <cmd.h>
94 #include <sfdisc.h>
95
96 #if _hdr_wchar && _hdr_wctype && _lib_iswctype
97
98 #include <wchar.h>
99 #include <wctype.h>
100
101 #else
102
103 #include <ctype.h>
104
105 #ifndef iswspace
106 #define iswspace(x) isspace(x)
107 #endif
108
109 #endif
110
111 #define C_FILE1 001
112 #define C_FILE2 002
113 #define C_COMMON 004
114 #define C_ALL (C_FILE1|C_FILE2|C_COMMON)
115
116 #define NFIELD 10
117 #define JOINFIELD 2
118
119 #define S_DELIM 1
120 #define S_SPACE 2
121 #define S_NL 3
122 #define S_WIDE 4
123
124 typedef struct Field_s
125 {
126 char* beg;
127 char* end;
128 } Field_t;
129
130 typedef struct File_s
131 {
132 Sfio_t* iop;
133 char* name;
134 char* recptr;
135 int reclen;
136 int field;
137 int fieldlen;
138 int nfields;
139 int maxfields;
140 int spaces;
141 int hit;
142 int discard;
143 Field_t* fields;
144 } File_t;
145
146 typedef struct Join_s
147 {
148 unsigned char state[1<<CHAR_BIT];
149 Sfio_t* outfile;
150 int* outlist;
151 int outmode;
152 int ooutmode;
153 char* nullfield;
154 char* delimstr;
155 int delim;
156 int delimlen;
157 int buffered;
158 int ignorecase;
159 int mb;
160 char* same;
161 int samesize;
162 Shbltin_t* context;
163 File_t file[2];
164 } Join_t;
165
166 static void
done(register Join_t * jp)167 done(register Join_t* jp)
168 {
169 if (jp->file[0].iop && jp->file[0].iop != sfstdin)
170 sfclose(jp->file[0].iop);
171 if (jp->file[1].iop && jp->file[1].iop != sfstdin)
172 sfclose(jp->file[1].iop);
173 if (jp->outlist)
174 free(jp->outlist);
175 if (jp->file[0].fields)
176 free(jp->file[0].fields);
177 if (jp->file[1].fields)
178 free(jp->file[1].fields);
179 if (jp->same)
180 free(jp->same);
181 free(jp);
182 }
183
184 static Join_t*
init(void)185 init(void)
186 {
187 register Join_t* jp;
188 register int i;
189
190 setlocale(LC_ALL, "");
191 if (jp = newof(0, Join_t, 1, 0))
192 {
193 if (jp->mb = mbwide())
194 for (i = 0x80; i <= 0xff; i++)
195 jp->state[i] = S_WIDE;
196 jp->state[' '] = jp->state['\t'] = S_SPACE;
197 jp->state['\n'] = S_NL;
198 jp->delim = -1;
199 jp->nullfield = 0;
200 if (!(jp->file[0].fields = newof(0, Field_t, NFIELD + 1, 0)) ||
201 !(jp->file[1].fields = newof(0, Field_t, NFIELD + 1, 0)))
202 {
203 done(jp);
204 return 0;
205 }
206 jp->file[0].maxfields = NFIELD;
207 jp->file[1].maxfields = NFIELD;
208 jp->outmode = C_COMMON;
209 }
210 return jp;
211 }
212
213 static int
getolist(Join_t * jp,const char * first,char ** arglist)214 getolist(Join_t* jp, const char* first, char** arglist)
215 {
216 register const char* cp = first;
217 char** argv = arglist;
218 register int c;
219 int* outptr;
220 int* outmax;
221 int nfield = NFIELD;
222 char* str;
223
224 outptr = jp->outlist = newof(0, int, NFIELD + 1, 0);
225 outmax = outptr + NFIELD;
226 while (c = *cp++)
227 {
228 if (c==' ' || c=='\t' || c==',')
229 continue;
230 str = (char*)--cp;
231 if (*cp=='0' && ((c=cp[1])==0 || c==' ' || c=='\t' || c==','))
232 {
233 str++;
234 c = JOINFIELD;
235 goto skip;
236 }
237 if (cp[1]!='.' || (*cp!='1' && *cp!='2') || (c=strtol(cp+2,&str,10)) <=0)
238 {
239 error(2,"%s: invalid field list",first);
240 break;
241 }
242 c--;
243 c <<=2;
244 if (*cp=='2')
245 c |=1;
246 skip:
247 if (outptr >= outmax)
248 {
249 jp->outlist = newof(jp->outlist, int, 2 * nfield + 1, 0);
250 outptr = jp->outlist + nfield;
251 nfield *= 2;
252 outmax = jp->outlist + nfield;
253 }
254 *outptr++ = c;
255 cp = str;
256 }
257 /* need to accept obsolescent command syntax */
258 while (cp = *argv)
259 {
260 if (cp[1]!='.' || (*cp!='1' && *cp!='2'))
261 {
262 if (*cp=='0' && cp[1]==0)
263 {
264 c = JOINFIELD;
265 goto skip2;
266 }
267 break;
268 }
269 str = (char*)cp;
270 c = strtol(cp+2, &str,10);
271 if (*str || --c<0)
272 break;
273 argv++;
274 c <<= 2;
275 if (*cp=='2')
276 c |=1;
277 skip2:
278 if (outptr >= outmax)
279 {
280 jp->outlist = newof(jp->outlist, int, 2 * nfield + 1, 0);
281 outptr = jp->outlist + nfield;
282 nfield *= 2;
283 outmax = jp->outlist + nfield;
284 }
285 *outptr++ = c;
286 }
287 *outptr = -1;
288 return argv-arglist;
289 }
290
291 /*
292 * read in a record from file <index> and split into fields
293 */
294 static unsigned char*
getrec(Join_t * jp,int index,int discard)295 getrec(Join_t* jp, int index, int discard)
296 {
297 register unsigned char* sp = jp->state;
298 register File_t* fp = &jp->file[index];
299 register Field_t* field = fp->fields;
300 register Field_t* fieldmax = field + fp->maxfields;
301 register char* cp;
302 register int n;
303 char* tp;
304
305 if (sh_checksig(jp->context))
306 return 0;
307 if (discard && fp->discard)
308 sfraise(fp->iop, SFSK_DISCARD, NiL);
309 fp->spaces = 0;
310 fp->hit = 0;
311 if (!(cp = sfgetr(fp->iop, '\n', 0)))
312 {
313 jp->outmode &= ~(1<<index);
314 return 0;
315 }
316 fp->recptr = cp;
317 fp->reclen = sfvalue(fp->iop);
318 if (jp->delim == '\n') /* handle new-line delimiter specially */
319 {
320 field->beg = cp;
321 cp += fp->reclen;
322 field->end = cp - 1;
323 field++;
324 }
325 else
326 do /* separate into fields */
327 {
328 if (field >= fieldmax)
329 {
330 n = 2 * fp->maxfields;
331 fp->fields = newof(fp->fields, Field_t, n + 1, 0);
332 field = fp->fields + fp->maxfields;
333 fp->maxfields = n;
334 fieldmax = fp->fields + n;
335 }
336 field->beg = cp;
337 if (jp->delim == -1)
338 {
339 switch (sp[*(unsigned char*)cp])
340 {
341 case S_SPACE:
342 cp++;
343 break;
344 case S_WIDE:
345 tp = cp;
346 if (iswspace(mbchar(tp)))
347 {
348 cp = tp;
349 break;
350 }
351 /*FALLTHROUGH*/
352 default:
353 goto next;
354 }
355 fp->spaces = 1;
356 if (jp->mb)
357 for (;;)
358 {
359 switch (sp[*(unsigned char*)cp++])
360 {
361 case S_SPACE:
362 continue;
363 case S_WIDE:
364 tp = cp - 1;
365 if (iswspace(mbchar(tp)))
366 {
367 cp = tp;
368 continue;
369 }
370 break;
371 }
372 break;
373 }
374 else
375 while (sp[*(unsigned char*)cp++]==S_SPACE);
376 cp--;
377 }
378 next:
379 if (jp->mb)
380 {
381 for (;;)
382 {
383 tp = cp;
384 switch (n = sp[*(unsigned char*)cp++])
385 {
386 case 0:
387 continue;
388 case S_WIDE:
389 cp--;
390 n = mbchar(cp);
391 if (n == jp->delim)
392 {
393 n = S_DELIM;
394 break;
395 }
396 if (jp->delim == -1 && iswspace(n))
397 {
398 n = S_SPACE;
399 break;
400 }
401 continue;
402 }
403 break;
404 }
405 field->end = tp;
406 }
407 else
408 {
409 while (!(n = sp[*(unsigned char*)cp++]));
410 field->end = cp - 1;
411 }
412 field++;
413 } while (n != S_NL);
414 fp->nfields = field - fp->fields;
415 if ((n = fp->field) < fp->nfields)
416 {
417 cp = fp->fields[n].beg;
418 /* eliminate leading spaces */
419 if (fp->spaces)
420 {
421 if (jp->mb)
422 for (;;)
423 {
424 switch (sp[*(unsigned char*)cp++])
425 {
426 case S_SPACE:
427 continue;
428 case S_WIDE:
429 tp = cp - 1;
430 if (iswspace(mbchar(tp)))
431 {
432 cp = tp;
433 continue;
434 }
435 break;
436 }
437 break;
438 }
439 else
440 while (sp[*(unsigned char*)cp++]==S_SPACE);
441 cp--;
442 }
443 fp->fieldlen = fp->fields[n].end - cp;
444 return (unsigned char*)cp;
445 }
446 fp->fieldlen = 0;
447 return (unsigned char*)"";
448 }
449
450 #if DEBUG_TRACE
451 static unsigned char* u1;
452 #define getrec(p,n,d) (u1 = getrec(p, n, d), sfprintf(sfstdout, "[G%d#%d@%I*d:%-.8s]", __LINE__, n, sizeof(Sfoff_t), sftell(p->file[n].iop), u1), u1)
453 #endif
454
455 /*
456 * print field <n> from file <index>
457 */
458 static int
outfield(Join_t * jp,int index,register int n,int last)459 outfield(Join_t* jp, int index, register int n, int last)
460 {
461 register File_t* fp = &jp->file[index];
462 register char* cp;
463 register char* cpmax;
464 register int size;
465 register Sfio_t* iop = jp->outfile;
466 char* tp;
467
468 if (n < fp->nfields)
469 {
470 cp = fp->fields[n].beg;
471 cpmax = fp->fields[n].end + 1;
472 }
473 else
474 cp = 0;
475 if ((n = jp->delim) == -1)
476 {
477 if (cp && fp->spaces)
478 {
479 register unsigned char* sp = jp->state;
480
481 /*eliminate leading spaces */
482 if (jp->mb)
483 for (;;)
484 {
485 switch (sp[*(unsigned char*)cp++])
486 {
487 case S_SPACE:
488 continue;
489 case S_WIDE:
490 tp = cp - 1;
491 if (iswspace(mbchar(tp)))
492 {
493 cp = tp;
494 continue;
495 }
496 break;
497 }
498 break;
499 }
500 else
501 while (sp[*(unsigned char*)cp++]==S_SPACE);
502 cp--;
503 }
504 n = ' ';
505 }
506 else if (jp->delimstr)
507 n = -1;
508 if (last)
509 n = '\n';
510 if (cp)
511 size = cpmax - cp;
512 else
513 size = 0;
514 if (n == -1)
515 {
516 if (size<=1)
517 {
518 if (jp->nullfield && sfputr(iop, jp->nullfield, -1) < 0)
519 return -1;
520 }
521 else if (sfwrite(iop, cp, size) < 0)
522 return -1;
523 if (sfwrite(iop, jp->delimstr, jp->delimlen) < 0)
524 return -1;
525 }
526 else if (size <= 1)
527 {
528 if (!jp->nullfield)
529 sfputc(iop, n);
530 else if (sfputr(iop, jp->nullfield, n) < 0)
531 return -1;
532 }
533 else
534 {
535 last = cp[size-1];
536 cp[size-1] = n;
537 if (sfwrite(iop, cp, size) < 0)
538 return -1;
539 cp[size-1] = last;
540 }
541 return 0;
542 }
543
544 #if DEBUG_TRACE
545 static int i1,i2,i3;
546 #define outfield(p,i,n,f) (sfprintf(sfstdout, "[F%d#%d:%d,%d]", __LINE__, i1=i, i2=n, i3=f), outfield(p, i1, i2, i3))
547 #endif
548
549 static int
outrec(register Join_t * jp,int mode)550 outrec(register Join_t* jp, int mode)
551 {
552 register File_t* fp;
553 register int i;
554 register int j;
555 register int k;
556 register int n;
557 int* out;
558
559 if (mode < 0 && jp->file[0].hit++)
560 return 0;
561 if (mode > 0 && jp->file[1].hit++)
562 return 0;
563 if (out = jp->outlist)
564 {
565 while ((n = *out++) >= 0)
566 {
567 if (n == JOINFIELD)
568 {
569 i = mode >= 0;
570 j = jp->file[i].field;
571 }
572 else
573 {
574 i = n & 1;
575 j = (mode<0 && i || mode>0 && !i) ?
576 jp->file[i].nfields :
577 n >> 2;
578 }
579 if (outfield(jp, i, j, *out < 0) < 0)
580 return -1;
581 }
582 return 0;
583 }
584 k = jp->file[0].nfields;
585 if (mode >= 0)
586 k += jp->file[1].nfields - 1;
587 for (i=0; i<2; i++)
588 {
589 fp = &jp->file[i];
590 if (mode>0 && i==0)
591 {
592 k -= (fp->nfields - 1);
593 continue;
594 }
595 n = fp->field;
596 if (mode||i==0)
597 {
598 /* output join field first */
599 if (outfield(jp,i,n,!--k) < 0)
600 return -1;
601 if (!k)
602 return 0;
603 for (j=0; j<n; j++)
604 {
605 if (outfield(jp,i,j,!--k) < 0)
606 return -1;
607 if (!k)
608 return 0;
609 }
610 j = n + 1;
611 }
612 else
613 j = 0;
614 for (;j<fp->nfields; j++)
615 {
616 if (j!=n && outfield(jp,i,j,!--k) < 0)
617 return -1;
618 if (!k)
619 return 0;
620 }
621 }
622 return 0;
623 }
624
625 #if DEBUG_TRACE
626 #define outrec(p,n) (sfprintf(sfstdout, "[R#%d,%d,%lld,%lld:%-.*s{%d}:%-.*s{%d}]", __LINE__, i1=n, lo, hi, jp->file[0].fieldlen, cp1, jp->file[0].hit, jp->file[1].fieldlen, cp2, jp->file[1].hit), outrec(p, i1))
627 #endif
628
629 static int
join(Join_t * jp)630 join(Join_t* jp)
631 {
632 register unsigned char* cp1;
633 register unsigned char* cp2;
634 register int n1;
635 register int n2;
636 register int n;
637 register int cmp;
638 register int same;
639 int o2;
640 Sfoff_t lo = -1;
641 Sfoff_t hi = -1;
642
643 if ((cp1 = getrec(jp, 0, 0)) && (cp2 = getrec(jp, 1, 0)) || (cp2 = 0))
644 {
645 n1 = jp->file[0].fieldlen;
646 n2 = jp->file[1].fieldlen;
647 same = 0;
648 for (;;)
649 {
650 n = n1 < n2 ? n1 : n2;
651 #if DEBUG_TRACE
652 if (!n && !(cmp = n1 < n2 ? -1 : (n1 > n2)) || n && !(cmp = (int)*cp1 - (int)*cp2) && !(cmp = jp->ignorecase ? strncasecmp((char*)cp1, (char*)cp2, n) : memcmp(cp1, cp2, n)))
653 cmp = n1 - n2;
654 sfprintf(sfstdout, "[C#%d:%d(%c-%c),%d,%lld,%lld%s]", __LINE__, cmp, *cp1, *cp2, same, lo, hi, (jp->outmode & C_COMMON) ? ",COMMON" : "");
655 if (!cmp)
656 #else
657 if (!n && !(cmp = n1 < n2 ? -1 : (n1 > n2)) || n && !(cmp = (int)*cp1 - (int)*cp2) && !(cmp = jp->ignorecase ? strncasecmp((char*)cp1, (char*)cp2, n) : memcmp(cp1, cp2, n)) && !(cmp = n1 - n2))
658 #endif
659 {
660 if (!(jp->outmode & C_COMMON))
661 {
662 if (cp1 = getrec(jp, 0, 1))
663 {
664 n1 = jp->file[0].fieldlen;
665 same = 1;
666 continue;
667 }
668 if ((jp->ooutmode & (C_FILE1|C_FILE2)) != C_FILE2)
669 break;
670 if (sfseek(jp->file[0].iop, (Sfoff_t)-jp->file[0].reclen, SEEK_CUR) < 0 || !(cp1 = getrec(jp, 0, 0)))
671 {
672 error(ERROR_SYSTEM|2, "%s: seek error", jp->file[0].name);
673 return -1;
674 }
675 }
676 else if (outrec(jp, 0) < 0)
677 return -1;
678 else if (lo < 0 && (jp->outmode & C_COMMON))
679 {
680 if ((lo = sfseek(jp->file[1].iop, (Sfoff_t)0, SEEK_CUR)) < 0)
681 {
682 error(ERROR_SYSTEM|2, "%s: seek error", jp->file[1].name);
683 return -1;
684 }
685 lo -= jp->file[1].reclen;
686 }
687 if (cp2 = getrec(jp, 1, lo < 0))
688 {
689 n2 = jp->file[1].fieldlen;
690 continue;
691 }
692 #if DEBUG_TRACE
693 sfprintf(sfstdout, "[2#%d:0,%lld,%lld]", __LINE__, lo, hi);
694 #endif
695 }
696 else if (cmp > 0)
697 {
698 if (same)
699 {
700 same = 0;
701 next:
702 if (n2 > jp->samesize)
703 {
704 jp->samesize = roundof(n2, 16);
705 if (!(jp->same = newof(jp->same, char, jp->samesize, 0)))
706 {
707 error(ERROR_SYSTEM|2, "out of space");
708 return -1;
709 }
710 }
711 memcpy(jp->same, cp2, o2 = n2);
712 if (!(cp2 = getrec(jp, 1, 0)))
713 break;
714 n2 = jp->file[1].fieldlen;
715 if (n2 == o2 && *cp2 == *jp->same && !memcmp(cp2, jp->same, n2))
716 goto next;
717 continue;
718 }
719 if (hi >= 0)
720 {
721 if (sfseek(jp->file[1].iop, hi, SEEK_SET) != hi)
722 {
723 error(ERROR_SYSTEM|2, "%s: seek error", jp->file[1].name);
724 return -1;
725 }
726 hi = -1;
727 }
728 else if ((jp->outmode & C_FILE2) && outrec(jp, 1) < 0)
729 return -1;
730 lo = -1;
731 if (cp2 = getrec(jp, 1, 1))
732 {
733 n2 = jp->file[1].fieldlen;
734 continue;
735 }
736 #if DEBUG_TRACE
737 sfprintf(sfstdout, "[2#%d:0,%lld,%lld]", __LINE__, lo, hi);
738 #endif
739 }
740 else if (same)
741 {
742 same = 0;
743 if (!(cp1 = getrec(jp, 0, 0)))
744 break;
745 n1 = jp->file[0].fieldlen;
746 continue;
747 }
748 if (lo >= 0)
749 {
750 if ((hi = sfseek(jp->file[1].iop, (Sfoff_t)0, SEEK_CUR)) < 0 ||
751 (hi -= jp->file[1].reclen) < 0 ||
752 sfseek(jp->file[1].iop, lo, SEEK_SET) != lo ||
753 !(cp2 = getrec(jp, 1, 0)))
754 {
755 error(ERROR_SYSTEM|2, "%s: seek error", jp->file[1].name);
756 return -1;
757 }
758 n2 = jp->file[1].fieldlen;
759 lo = -1;
760 if (jp->file[1].discard)
761 sfseek(jp->file[1].iop, (Sfoff_t)-1, SEEK_SET);
762 }
763 else if (!cp2)
764 break;
765 else if ((jp->outmode & C_FILE1) && outrec(jp, -1) < 0)
766 return -1;
767 if (!(cp1 = getrec(jp, 0, 1)))
768 break;
769 n1 = jp->file[0].fieldlen;
770 }
771 }
772 #if DEBUG_TRACE
773 sfprintf(sfstdout, "[X#%d:?,%p,%p,%d,%d,%d%s]", __LINE__, cp1, cp2, cmp, lo, hi, (jp->outmode & C_COMMON) ? ",COMMON" : "");
774 #endif
775 if (cp2)
776 {
777 if (hi >= 0 &&
778 sfseek(jp->file[1].iop, (Sfoff_t)0, SEEK_CUR) < hi &&
779 sfseek(jp->file[1].iop, hi, SEEK_SET) != hi)
780 {
781 error(ERROR_SYSTEM|2, "%s: seek error", jp->file[1].name);
782 return -1;
783 }
784 #if DEBUG_TRACE
785 sfprintf(sfstdout, "[O#%d:%02o:%02o]", __LINE__, jp->ooutmode, jp->outmode);
786 #endif
787 cp1 = (!cp1 && cmp && hi < 0 && !jp->file[1].hit && ((jp->ooutmode ^ C_ALL) <= 1 || jp->outmode == 2)) ? cp2 : getrec(jp, 1, 0);
788 cmp = 1;
789 n = 1;
790 }
791 else
792 {
793 cmp = -1;
794 n = 0;
795 }
796 #if DEBUG_TRACE
797 sfprintf(sfstdout, "[X#%d:%d,%p,%p,%d,%02o,%02o%s]", __LINE__, n, cp1, cp2, cmp, jp->ooutmode, jp->outmode, (jp->outmode & C_COMMON) ? ",COMMON" : "");
798 #endif
799 if (!cp1 || !(jp->outmode & (1<<n)))
800 {
801 if (cp1 && jp->file[n].iop == sfstdin)
802 sfseek(sfstdin, (Sfoff_t)0, SEEK_END);
803 return 0;
804 }
805 if (outrec(jp, cmp) < 0)
806 return -1;
807 do
808 {
809 if (!getrec(jp, n, 1))
810 return 0;
811 } while (outrec(jp, cmp) >= 0);
812 return -1;
813 }
814
815 int
b_join(int argc,char ** argv,Shbltin_t * context)816 b_join(int argc, char** argv, Shbltin_t* context)
817 {
818 register int n;
819 register char* cp;
820 register Join_t* jp;
821 char* e;
822
823 #if !DEBUG_TRACE
824 cmdinit(argc, argv, context, ERROR_CATALOG, ERROR_NOTIFY);
825 #endif
826 if (!(jp = init()))
827 error(ERROR_system(1),"out of space");
828 jp->context = context;
829 for (;;)
830 {
831 switch (n = optget(argv, usage))
832 {
833 case 'j':
834 /*
835 * check for obsolete "-j1 field" and "-j2 field"
836 */
837
838 if (opt_info.offset == 0)
839 {
840 cp = argv[opt_info.index - 1];
841 for (n = strlen(cp) - 1; n > 0 && cp[n] != 'j'; n--);
842 n = cp[n] == 'j';
843 }
844 else
845 n = 0;
846 if (n)
847 {
848 if (opt_info.num!=1 && opt_info.num!=2)
849 error(2,"-jfileno field: fileno must be 1 or 2");
850 n = '0' + opt_info.num;
851 if (!(cp = argv[opt_info.index]))
852 {
853 argc = 0;
854 break;
855 }
856 opt_info.num = strtol(cp, &e, 10);
857 if (*e)
858 {
859 argc = 0;
860 break;
861 }
862 opt_info.index++;
863 }
864 else
865 {
866 jp->file[0].field = (int)(opt_info.num-1);
867 n = '2';
868 }
869 /*FALLTHROUGH*/
870 case '1':
871 case '2':
872 if (opt_info.num <=0)
873 error(2,"field number must positive");
874 jp->file[n-'1'].field = (int)(opt_info.num-1);
875 continue;
876 case 'v':
877 jp->outmode &= ~C_COMMON;
878 /*FALLTHROUGH*/
879 case 'a':
880 if (opt_info.num!=1 && opt_info.num!=2)
881 error(2,"%s: file number must be 1 or 2", opt_info.name);
882 jp->outmode |= 1<<(opt_info.num-1);
883 continue;
884 case 'e':
885 jp->nullfield = opt_info.arg;
886 continue;
887 case 'o':
888 /* need to accept obsolescent command syntax */
889 n = getolist(jp, opt_info.arg, argv+opt_info.index);
890 opt_info.index += n;
891 continue;
892 case 't':
893 jp->state[' '] = jp->state['\t'] = 0;
894 if (jp->mb)
895 {
896 cp = opt_info.arg;
897 jp->delim = mbchar(cp);
898 if ((n = cp - opt_info.arg) > 1)
899 {
900 jp->delimlen = n;
901 jp->delimstr = opt_info.arg;
902 continue;
903 }
904 }
905 n = *(unsigned char*)opt_info.arg;
906 jp->state[n] = S_DELIM;
907 jp->delim = n;
908 continue;
909 case 'i':
910 jp->ignorecase = !opt_info.num;
911 continue;
912 case 'B':
913 jp->buffered = !opt_info.num;
914 continue;
915 case ':':
916 error(2, "%s", opt_info.arg);
917 break;
918 case '?':
919 done(jp);
920 error(ERROR_usage(2), "%s", opt_info.arg);
921 break;
922 }
923 break;
924 }
925 argv += opt_info.index;
926 argc -= opt_info.index;
927 if (error_info.errors || argc!=2)
928 {
929 done(jp);
930 error(ERROR_usage(2),"%s", optusage(NiL));
931 }
932 jp->ooutmode = jp->outmode;
933 jp->file[0].name = cp = *argv++;
934 if (streq(cp,"-"))
935 {
936 if (sfseek(sfstdin,(Sfoff_t)0,SEEK_CUR) < 0)
937 {
938 if (sfdcseekable(sfstdin))
939 error(ERROR_warn(0),"%s: seek may fail",cp);
940 else
941 jp->file[0].discard = 1;
942 }
943 jp->file[0].iop = sfstdin;
944 }
945 else if (!(jp->file[0].iop = sfopen(NiL, cp, "r")))
946 {
947 done(jp);
948 error(ERROR_system(1),"%s: cannot open",cp);
949 }
950 jp->file[1].name = cp = *argv;
951 if (streq(cp,"-"))
952 {
953 if (sfseek(sfstdin,(Sfoff_t)0,SEEK_CUR) < 0)
954 {
955 if (sfdcseekable(sfstdin))
956 error(ERROR_warn(0),"%s: seek may fail",cp);
957 else
958 jp->file[1].discard = 1;
959 }
960 jp->file[1].iop = sfstdin;
961 }
962 else if (!(jp->file[1].iop = sfopen(NiL, cp, "r")))
963 {
964 done(jp);
965 error(ERROR_system(1),"%s: cannot open",cp);
966 }
967 if (jp->buffered)
968 {
969 sfsetbuf(jp->file[0].iop, jp->file[0].iop, SF_UNBOUND);
970 sfsetbuf(jp->file[1].iop, jp->file[1].iop, SF_UNBOUND);
971 }
972 jp->outfile = sfstdout;
973 if (!jp->outlist)
974 jp->nullfield = 0;
975 if (join(jp) < 0)
976 {
977 done(jp);
978 error(ERROR_system(1),"write error");
979 }
980 else if (jp->file[0].iop==sfstdin || jp->file[1].iop==sfstdin)
981 sfseek(sfstdin,(Sfoff_t)0,SEEK_END);
982 done(jp);
983 return error_info.errors;
984 }
985