1 /*********************************************************************** 2 * * 3 * This software is part of the ast package * 4 * Copyright (c) 1992-2012 AT&T Intellectual Property * 5 * and is licensed under the * 6 * Eclipse Public License, Version 1.0 * 7 * by AT&T Intellectual Property * 8 * * 9 * A copy of the License is available at * 10 * http://www.eclipse.org/org/documents/epl-v10.html * 11 * (with md5 checksum b35adb5213ca9657e911e9befb180842) * 12 * * 13 * Information and Software Systems Research * 14 * AT&T Research * 15 * Florham Park NJ * 16 * * 17 * Glenn Fowler <gsf@research.att.com> * 18 * David Korn <dgk@research.att.com> * 19 * * 20 ***********************************************************************/ 21 #pragma prototyped 22 /* 23 * David Korn 24 * Glenn Fowler 25 * AT&T Research 26 * 27 * join 28 */ 29 30 static const char usage[] = 31 "[-?\n@(#)$Id: join (AT&T Research) 2009-12-10 $\n]" 32 USAGE_LICENSE 33 "[+NAME?join - relational database operator]" 34 "[+DESCRIPTION?\bjoin\b performs an \aequality join\a on the files \afile1\a " 35 "and \afile2\a and writes the resulting joined files to standard " 36 "output. By default, a field is delimited by one or more spaces " 37 "and tabs with leading spaces and/or tabs ignored. The \b-t\b option " 38 "can be used to change the field delimiter.]" 39 "[+?The \ajoin field\a is a field in each file on which files are compared. " 40 "By default \bjoin\b writes one line in the output for each pair " 41 "of lines in \afiles1\a and \afiles2\a that have identical join " 42 "fields. The default output line consists of the join field, " 43 "then the remaining fields from \afile1\a, then the remaining " 44 "fields from \afile2\a, but this can be changed with the \b-o\b " 45 "option. The \b-a\b option can be used to add unmatched lines " 46 "to the output. The \b-v\b option can be used to output only " 47 "unmatched lines.]" 48 "[+?The files \afile1\a and \afile2\a must be ordered in the collating " 49 "sequence of \bsort -b\b on the fields on which they are to be " 50 "joined otherwise the results are unspecified.]" 51 "[+?If either \afile1\a or \afile2\a is \b-\b, \bjoin\b " 52 "uses standard input starting at the current location.]" 53 54 "[e:empty]:[string?Replace empty output fields in the list selected with" 55 " \b-o\b with \astring\a.]" 56 "[o:output]:[list?Construct the output line to comprise the fields specified " 57 "in a blank or comma separated list \alist\a. Each element in " 58 "\alist\a consists of a file number (either 1 or 2), a period, " 59 "and a field number or \b0\b representing the join field. " 60 "As an obsolete feature multiple occurrences of \b-o\b can " 61 "be specified.]" 62 "[t:separator|tabs]:[delim?Use \adelim\a as the field separator for both input" 63 " and output.]" 64 "[1:j1]#[field?Join on field \afield\a of \afile1\a. Fields start at 1.]" 65 "[2:j2]#[field?Join on field \afield\a of \afile2\a. Fields start at 1.]" 66 "[j:join]#[field?Equivalent to \b-1\b \afield\a \b-2\b \afield\a.]" 67 "[a:unpairable]#[fileno?Write a line for each unpairable line in file" 68 " \afileno\a, where \afileno\a is either 1 or 2, in addition to the" 69 " normal output. If \b-a\b options appear for both 1 and 2, then " 70 "all unpairable lines will be output.]" 71 "[v:suppress]#[fileno?Write a line for each unpairable line in file" 72 " \afileno\a, where \afileno\a is either 1 or 2, instead of the normal " 73 "output. If \b-v\b options appear for both 1 and 2, then " 74 "all unpairable lines will be output.] ]" 75 "[i:ignorecase?Ignore case in field comparisons.]" 76 "[B!:mmap?Enable memory mapped reads instead of buffered.]" 77 78 "[+?The following obsolete option forms are also recognized: \b-j\b \afield\a" 79 " is equivalent to \b-1\b \afield\a \b-2\b \afield\a, \b-j1\b \afield\a" 80 " is equivalent to \b-1\b \afield\a, and \b-j2\b \afield\a is" 81 " equivalent to \b-2\b \afield\a.]" 82 83 "\n" 84 "\nfile1 file2\n" 85 "\n" 86 "[+EXIT STATUS?]{" 87 "[+0?Both files processed successfully.]" 88 "[+>0?An error occurred.]" 89 "}" 90 "[+SEE ALSO?\bcut\b(1), \bcomm\b(1), \bpaste\b(1), \bsort\b(1), \buniq\b(1)]" 91 ; 92 93 #include <cmd.h> 94 #include <sfdisc.h> 95 96 #if _hdr_wchar && _hdr_wctype && _lib_iswctype 97 98 #include <wchar.h> 99 #include <wctype.h> 100 101 #else 102 103 #include <ctype.h> 104 105 #ifndef iswspace 106 #define iswspace(x) isspace(x) 107 #endif 108 109 #endif 110 111 #define C_FILE1 001 112 #define C_FILE2 002 113 #define C_COMMON 004 114 #define C_ALL (C_FILE1|C_FILE2|C_COMMON) 115 116 #define NFIELD 10 117 #define JOINFIELD 2 118 119 #define S_DELIM 1 120 #define S_SPACE 2 121 #define S_NL 3 122 #define S_WIDE 4 123 124 typedef struct Field_s 125 { 126 char* beg; 127 char* end; 128 } Field_t; 129 130 typedef struct File_s 131 { 132 Sfio_t* iop; 133 char* name; 134 char* recptr; 135 int reclen; 136 int field; 137 int fieldlen; 138 int nfields; 139 int maxfields; 140 int spaces; 141 int hit; 142 int discard; 143 Field_t* fields; 144 } File_t; 145 146 typedef struct Join_s 147 { 148 unsigned char state[1<<CHAR_BIT]; 149 Sfio_t* outfile; 150 int* outlist; 151 int outmode; 152 int ooutmode; 153 char* nullfield; 154 char* delimstr; 155 int delim; 156 int delimlen; 157 int buffered; 158 int ignorecase; 159 int mb; 160 char* same; 161 int samesize; 162 Shbltin_t* context; 163 File_t file[2]; 164 } Join_t; 165 166 static void 167 done(register Join_t* jp) 168 { 169 if (jp->file[0].iop && jp->file[0].iop != sfstdin) 170 sfclose(jp->file[0].iop); 171 if (jp->file[1].iop && jp->file[1].iop != sfstdin) 172 sfclose(jp->file[1].iop); 173 if (jp->outlist) 174 free(jp->outlist); 175 if (jp->file[0].fields) 176 free(jp->file[0].fields); 177 if (jp->file[1].fields) 178 free(jp->file[1].fields); 179 if (jp->same) 180 free(jp->same); 181 free(jp); 182 } 183 184 static Join_t* 185 init(void) 186 { 187 register Join_t* jp; 188 register int i; 189 190 setlocale(LC_ALL, ""); 191 if (jp = newof(0, Join_t, 1, 0)) 192 { 193 if (jp->mb = mbwide()) 194 for (i = 0x80; i <= 0xff; i++) 195 jp->state[i] = S_WIDE; 196 jp->state[' '] = jp->state['\t'] = S_SPACE; 197 jp->state['\n'] = S_NL; 198 jp->delim = -1; 199 jp->nullfield = 0; 200 if (!(jp->file[0].fields = newof(0, Field_t, NFIELD + 1, 0)) || 201 !(jp->file[1].fields = newof(0, Field_t, NFIELD + 1, 0))) 202 { 203 done(jp); 204 return 0; 205 } 206 jp->file[0].maxfields = NFIELD; 207 jp->file[1].maxfields = NFIELD; 208 jp->outmode = C_COMMON; 209 } 210 return jp; 211 } 212 213 static int 214 getolist(Join_t* jp, const char* first, char** arglist) 215 { 216 register const char* cp = first; 217 char** argv = arglist; 218 register int c; 219 int* outptr; 220 int* outmax; 221 int nfield = NFIELD; 222 char* str; 223 224 outptr = jp->outlist = newof(0, int, NFIELD + 1, 0); 225 outmax = outptr + NFIELD; 226 while (c = *cp++) 227 { 228 if (c==' ' || c=='\t' || c==',') 229 continue; 230 str = (char*)--cp; 231 if (*cp=='0' && ((c=cp[1])==0 || c==' ' || c=='\t' || c==',')) 232 { 233 str++; 234 c = JOINFIELD; 235 goto skip; 236 } 237 if (cp[1]!='.' || (*cp!='1' && *cp!='2') || (c=strtol(cp+2,&str,10)) <=0) 238 { 239 error(2,"%s: invalid field list",first); 240 break; 241 } 242 c--; 243 c <<=2; 244 if (*cp=='2') 245 c |=1; 246 skip: 247 if (outptr >= outmax) 248 { 249 jp->outlist = newof(jp->outlist, int, 2 * nfield + 1, 0); 250 outptr = jp->outlist + nfield; 251 nfield *= 2; 252 outmax = jp->outlist + nfield; 253 } 254 *outptr++ = c; 255 cp = str; 256 } 257 /* need to accept obsolescent command syntax */ 258 while (cp = *argv) 259 { 260 if (cp[1]!='.' || (*cp!='1' && *cp!='2')) 261 { 262 if (*cp=='0' && cp[1]==0) 263 { 264 c = JOINFIELD; 265 goto skip2; 266 } 267 break; 268 } 269 str = (char*)cp; 270 c = strtol(cp+2, &str,10); 271 if (*str || --c<0) 272 break; 273 argv++; 274 c <<= 2; 275 if (*cp=='2') 276 c |=1; 277 skip2: 278 if (outptr >= outmax) 279 { 280 jp->outlist = newof(jp->outlist, int, 2 * nfield + 1, 0); 281 outptr = jp->outlist + nfield; 282 nfield *= 2; 283 outmax = jp->outlist + nfield; 284 } 285 *outptr++ = c; 286 } 287 *outptr = -1; 288 return argv-arglist; 289 } 290 291 /* 292 * read in a record from file <index> and split into fields 293 */ 294 static unsigned char* 295 getrec(Join_t* jp, int index, int discard) 296 { 297 register unsigned char* sp = jp->state; 298 register File_t* fp = &jp->file[index]; 299 register Field_t* field = fp->fields; 300 register Field_t* fieldmax = field + fp->maxfields; 301 register char* cp; 302 register int n; 303 char* tp; 304 305 if (sh_checksig(jp->context)) 306 return 0; 307 if (discard && fp->discard) 308 sfraise(fp->iop, SFSK_DISCARD, NiL); 309 fp->spaces = 0; 310 fp->hit = 0; 311 if (!(cp = sfgetr(fp->iop, '\n', 0))) 312 { 313 jp->outmode &= ~(1<<index); 314 return 0; 315 } 316 fp->recptr = cp; 317 fp->reclen = sfvalue(fp->iop); 318 if (jp->delim == '\n') /* handle new-line delimiter specially */ 319 { 320 field->beg = cp; 321 cp += fp->reclen; 322 field->end = cp - 1; 323 field++; 324 } 325 else 326 do /* separate into fields */ 327 { 328 if (field >= fieldmax) 329 { 330 n = 2 * fp->maxfields; 331 fp->fields = newof(fp->fields, Field_t, n + 1, 0); 332 field = fp->fields + fp->maxfields; 333 fp->maxfields = n; 334 fieldmax = fp->fields + n; 335 } 336 field->beg = cp; 337 if (jp->delim == -1) 338 { 339 switch (sp[*(unsigned char*)cp]) 340 { 341 case S_SPACE: 342 cp++; 343 break; 344 case S_WIDE: 345 tp = cp; 346 if (iswspace(mbchar(tp))) 347 { 348 cp = tp; 349 break; 350 } 351 /*FALLTHROUGH*/ 352 default: 353 goto next; 354 } 355 fp->spaces = 1; 356 if (jp->mb) 357 for (;;) 358 { 359 switch (sp[*(unsigned char*)cp++]) 360 { 361 case S_SPACE: 362 continue; 363 case S_WIDE: 364 tp = cp - 1; 365 if (iswspace(mbchar(tp))) 366 { 367 cp = tp; 368 continue; 369 } 370 break; 371 } 372 break; 373 } 374 else 375 while (sp[*(unsigned char*)cp++]==S_SPACE); 376 cp--; 377 } 378 next: 379 if (jp->mb) 380 { 381 for (;;) 382 { 383 tp = cp; 384 switch (n = sp[*(unsigned char*)cp++]) 385 { 386 case 0: 387 continue; 388 case S_WIDE: 389 cp--; 390 n = mbchar(cp); 391 if (n == jp->delim) 392 { 393 n = S_DELIM; 394 break; 395 } 396 if (jp->delim == -1 && iswspace(n)) 397 { 398 n = S_SPACE; 399 break; 400 } 401 continue; 402 } 403 break; 404 } 405 field->end = tp; 406 } 407 else 408 { 409 while (!(n = sp[*(unsigned char*)cp++])); 410 field->end = cp - 1; 411 } 412 field++; 413 } while (n != S_NL); 414 fp->nfields = field - fp->fields; 415 if ((n = fp->field) < fp->nfields) 416 { 417 cp = fp->fields[n].beg; 418 /* eliminate leading spaces */ 419 if (fp->spaces) 420 { 421 if (jp->mb) 422 for (;;) 423 { 424 switch (sp[*(unsigned char*)cp++]) 425 { 426 case S_SPACE: 427 continue; 428 case S_WIDE: 429 tp = cp - 1; 430 if (iswspace(mbchar(tp))) 431 { 432 cp = tp; 433 continue; 434 } 435 break; 436 } 437 break; 438 } 439 else 440 while (sp[*(unsigned char*)cp++]==S_SPACE); 441 cp--; 442 } 443 fp->fieldlen = fp->fields[n].end - cp; 444 return (unsigned char*)cp; 445 } 446 fp->fieldlen = 0; 447 return (unsigned char*)""; 448 } 449 450 #if DEBUG_TRACE 451 static unsigned char* u1; 452 #define getrec(p,n,d) (u1 = getrec(p, n, d), sfprintf(sfstdout, "[G%d#%d@%I*d:%-.8s]", __LINE__, n, sizeof(Sfoff_t), sftell(p->file[n].iop), u1), u1) 453 #endif 454 455 /* 456 * print field <n> from file <index> 457 */ 458 static int 459 outfield(Join_t* jp, int index, register int n, int last) 460 { 461 register File_t* fp = &jp->file[index]; 462 register char* cp; 463 register char* cpmax; 464 register int size; 465 register Sfio_t* iop = jp->outfile; 466 char* tp; 467 468 if (n < fp->nfields) 469 { 470 cp = fp->fields[n].beg; 471 cpmax = fp->fields[n].end + 1; 472 } 473 else 474 cp = 0; 475 if ((n = jp->delim) == -1) 476 { 477 if (cp && fp->spaces) 478 { 479 register unsigned char* sp = jp->state; 480 481 /*eliminate leading spaces */ 482 if (jp->mb) 483 for (;;) 484 { 485 switch (sp[*(unsigned char*)cp++]) 486 { 487 case S_SPACE: 488 continue; 489 case S_WIDE: 490 tp = cp - 1; 491 if (iswspace(mbchar(tp))) 492 { 493 cp = tp; 494 continue; 495 } 496 break; 497 } 498 break; 499 } 500 else 501 while (sp[*(unsigned char*)cp++]==S_SPACE); 502 cp--; 503 } 504 n = ' '; 505 } 506 else if (jp->delimstr) 507 n = -1; 508 if (last) 509 n = '\n'; 510 if (cp) 511 size = cpmax - cp; 512 else 513 size = 0; 514 if (n == -1) 515 { 516 if (size<=1) 517 { 518 if (jp->nullfield && sfputr(iop, jp->nullfield, -1) < 0) 519 return -1; 520 } 521 else if (sfwrite(iop, cp, size) < 0) 522 return -1; 523 if (sfwrite(iop, jp->delimstr, jp->delimlen) < 0) 524 return -1; 525 } 526 else if (size <= 1) 527 { 528 if (!jp->nullfield) 529 sfputc(iop, n); 530 else if (sfputr(iop, jp->nullfield, n) < 0) 531 return -1; 532 } 533 else 534 { 535 last = cp[size-1]; 536 cp[size-1] = n; 537 if (sfwrite(iop, cp, size) < 0) 538 return -1; 539 cp[size-1] = last; 540 } 541 return 0; 542 } 543 544 #if DEBUG_TRACE 545 static int i1,i2,i3; 546 #define outfield(p,i,n,f) (sfprintf(sfstdout, "[F%d#%d:%d,%d]", __LINE__, i1=i, i2=n, i3=f), outfield(p, i1, i2, i3)) 547 #endif 548 549 static int 550 outrec(register Join_t* jp, int mode) 551 { 552 register File_t* fp; 553 register int i; 554 register int j; 555 register int k; 556 register int n; 557 int* out; 558 559 if (mode < 0 && jp->file[0].hit++) 560 return 0; 561 if (mode > 0 && jp->file[1].hit++) 562 return 0; 563 if (out = jp->outlist) 564 { 565 while ((n = *out++) >= 0) 566 { 567 if (n == JOINFIELD) 568 { 569 i = mode >= 0; 570 j = jp->file[i].field; 571 } 572 else 573 { 574 i = n & 1; 575 j = (mode<0 && i || mode>0 && !i) ? 576 jp->file[i].nfields : 577 n >> 2; 578 } 579 if (outfield(jp, i, j, *out < 0) < 0) 580 return -1; 581 } 582 return 0; 583 } 584 k = jp->file[0].nfields; 585 if (mode >= 0) 586 k += jp->file[1].nfields - 1; 587 for (i=0; i<2; i++) 588 { 589 fp = &jp->file[i]; 590 if (mode>0 && i==0) 591 { 592 k -= (fp->nfields - 1); 593 continue; 594 } 595 n = fp->field; 596 if (mode||i==0) 597 { 598 /* output join field first */ 599 if (outfield(jp,i,n,!--k) < 0) 600 return -1; 601 if (!k) 602 return 0; 603 for (j=0; j<n; j++) 604 { 605 if (outfield(jp,i,j,!--k) < 0) 606 return -1; 607 if (!k) 608 return 0; 609 } 610 j = n + 1; 611 } 612 else 613 j = 0; 614 for (;j<fp->nfields; j++) 615 { 616 if (j!=n && outfield(jp,i,j,!--k) < 0) 617 return -1; 618 if (!k) 619 return 0; 620 } 621 } 622 return 0; 623 } 624 625 #if DEBUG_TRACE 626 #define outrec(p,n) (sfprintf(sfstdout, "[R#%d,%d,%lld,%lld:%-.*s{%d}:%-.*s{%d}]", __LINE__, i1=n, lo, hi, jp->file[0].fieldlen, cp1, jp->file[0].hit, jp->file[1].fieldlen, cp2, jp->file[1].hit), outrec(p, i1)) 627 #endif 628 629 static int 630 join(Join_t* jp) 631 { 632 register unsigned char* cp1; 633 register unsigned char* cp2; 634 register int n1; 635 register int n2; 636 register int n; 637 register int cmp; 638 register int same; 639 int o2; 640 Sfoff_t lo = -1; 641 Sfoff_t hi = -1; 642 643 if ((cp1 = getrec(jp, 0, 0)) && (cp2 = getrec(jp, 1, 0)) || (cp2 = 0)) 644 { 645 n1 = jp->file[0].fieldlen; 646 n2 = jp->file[1].fieldlen; 647 same = 0; 648 for (;;) 649 { 650 n = n1 < n2 ? n1 : n2; 651 #if DEBUG_TRACE 652 if (!n && !(cmp = n1 < n2 ? -1 : (n1 > n2)) || n && !(cmp = (int)*cp1 - (int)*cp2) && !(cmp = jp->ignorecase ? strncasecmp((char*)cp1, (char*)cp2, n) : memcmp(cp1, cp2, n))) 653 cmp = n1 - n2; 654 sfprintf(sfstdout, "[C#%d:%d(%c-%c),%d,%lld,%lld%s]", __LINE__, cmp, *cp1, *cp2, same, lo, hi, (jp->outmode & C_COMMON) ? ",COMMON" : ""); 655 if (!cmp) 656 #else 657 if (!n && !(cmp = n1 < n2 ? -1 : (n1 > n2)) || n && !(cmp = (int)*cp1 - (int)*cp2) && !(cmp = jp->ignorecase ? strncasecmp((char*)cp1, (char*)cp2, n) : memcmp(cp1, cp2, n)) && !(cmp = n1 - n2)) 658 #endif 659 { 660 if (!(jp->outmode & C_COMMON)) 661 { 662 if (cp1 = getrec(jp, 0, 1)) 663 { 664 n1 = jp->file[0].fieldlen; 665 same = 1; 666 continue; 667 } 668 if ((jp->ooutmode & (C_FILE1|C_FILE2)) != C_FILE2) 669 break; 670 if (sfseek(jp->file[0].iop, (Sfoff_t)-jp->file[0].reclen, SEEK_CUR) < 0 || !(cp1 = getrec(jp, 0, 0))) 671 { 672 error(ERROR_SYSTEM|2, "%s: seek error", jp->file[0].name); 673 return -1; 674 } 675 } 676 else if (outrec(jp, 0) < 0) 677 return -1; 678 else if (lo < 0 && (jp->outmode & C_COMMON)) 679 { 680 if ((lo = sfseek(jp->file[1].iop, (Sfoff_t)0, SEEK_CUR)) < 0) 681 { 682 error(ERROR_SYSTEM|2, "%s: seek error", jp->file[1].name); 683 return -1; 684 } 685 lo -= jp->file[1].reclen; 686 } 687 if (cp2 = getrec(jp, 1, lo < 0)) 688 { 689 n2 = jp->file[1].fieldlen; 690 continue; 691 } 692 #if DEBUG_TRACE 693 sfprintf(sfstdout, "[2#%d:0,%lld,%lld]", __LINE__, lo, hi); 694 #endif 695 } 696 else if (cmp > 0) 697 { 698 if (same) 699 { 700 same = 0; 701 next: 702 if (n2 > jp->samesize) 703 { 704 jp->samesize = roundof(n2, 16); 705 if (!(jp->same = newof(jp->same, char, jp->samesize, 0))) 706 { 707 error(ERROR_SYSTEM|2, "out of space"); 708 return -1; 709 } 710 } 711 memcpy(jp->same, cp2, o2 = n2); 712 if (!(cp2 = getrec(jp, 1, 0))) 713 break; 714 n2 = jp->file[1].fieldlen; 715 if (n2 == o2 && *cp2 == *jp->same && !memcmp(cp2, jp->same, n2)) 716 goto next; 717 continue; 718 } 719 if (hi >= 0) 720 { 721 if (sfseek(jp->file[1].iop, hi, SEEK_SET) != hi) 722 { 723 error(ERROR_SYSTEM|2, "%s: seek error", jp->file[1].name); 724 return -1; 725 } 726 hi = -1; 727 } 728 else if ((jp->outmode & C_FILE2) && outrec(jp, 1) < 0) 729 return -1; 730 lo = -1; 731 if (cp2 = getrec(jp, 1, 1)) 732 { 733 n2 = jp->file[1].fieldlen; 734 continue; 735 } 736 #if DEBUG_TRACE 737 sfprintf(sfstdout, "[2#%d:0,%lld,%lld]", __LINE__, lo, hi); 738 #endif 739 } 740 else if (same) 741 { 742 same = 0; 743 if (!(cp1 = getrec(jp, 0, 0))) 744 break; 745 n1 = jp->file[0].fieldlen; 746 continue; 747 } 748 if (lo >= 0) 749 { 750 if ((hi = sfseek(jp->file[1].iop, (Sfoff_t)0, SEEK_CUR)) < 0 || 751 (hi -= jp->file[1].reclen) < 0 || 752 sfseek(jp->file[1].iop, lo, SEEK_SET) != lo || 753 !(cp2 = getrec(jp, 1, 0))) 754 { 755 error(ERROR_SYSTEM|2, "%s: seek error", jp->file[1].name); 756 return -1; 757 } 758 n2 = jp->file[1].fieldlen; 759 lo = -1; 760 if (jp->file[1].discard) 761 sfseek(jp->file[1].iop, (Sfoff_t)-1, SEEK_SET); 762 } 763 else if (!cp2) 764 break; 765 else if ((jp->outmode & C_FILE1) && outrec(jp, -1) < 0) 766 return -1; 767 if (!(cp1 = getrec(jp, 0, 1))) 768 break; 769 n1 = jp->file[0].fieldlen; 770 } 771 } 772 #if DEBUG_TRACE 773 sfprintf(sfstdout, "[X#%d:?,%p,%p,%d,%d,%d%s]", __LINE__, cp1, cp2, cmp, lo, hi, (jp->outmode & C_COMMON) ? ",COMMON" : ""); 774 #endif 775 if (cp2) 776 { 777 if (hi >= 0 && 778 sfseek(jp->file[1].iop, (Sfoff_t)0, SEEK_CUR) < hi && 779 sfseek(jp->file[1].iop, hi, SEEK_SET) != hi) 780 { 781 error(ERROR_SYSTEM|2, "%s: seek error", jp->file[1].name); 782 return -1; 783 } 784 #if DEBUG_TRACE 785 sfprintf(sfstdout, "[O#%d:%02o:%02o]", __LINE__, jp->ooutmode, jp->outmode); 786 #endif 787 cp1 = (!cp1 && cmp && hi < 0 && !jp->file[1].hit && ((jp->ooutmode ^ C_ALL) <= 1 || jp->outmode == 2)) ? cp2 : getrec(jp, 1, 0); 788 cmp = 1; 789 n = 1; 790 } 791 else 792 { 793 cmp = -1; 794 n = 0; 795 } 796 #if DEBUG_TRACE 797 sfprintf(sfstdout, "[X#%d:%d,%p,%p,%d,%02o,%02o%s]", __LINE__, n, cp1, cp2, cmp, jp->ooutmode, jp->outmode, (jp->outmode & C_COMMON) ? ",COMMON" : ""); 798 #endif 799 if (!cp1 || !(jp->outmode & (1<<n))) 800 { 801 if (cp1 && jp->file[n].iop == sfstdin) 802 sfseek(sfstdin, (Sfoff_t)0, SEEK_END); 803 return 0; 804 } 805 if (outrec(jp, cmp) < 0) 806 return -1; 807 do 808 { 809 if (!getrec(jp, n, 1)) 810 return 0; 811 } while (outrec(jp, cmp) >= 0); 812 return -1; 813 } 814 815 int 816 b_join(int argc, char** argv, Shbltin_t* context) 817 { 818 register int n; 819 register char* cp; 820 register Join_t* jp; 821 char* e; 822 823 #if !DEBUG_TRACE 824 cmdinit(argc, argv, context, ERROR_CATALOG, ERROR_NOTIFY); 825 #endif 826 if (!(jp = init())) 827 error(ERROR_system(1),"out of space"); 828 jp->context = context; 829 for (;;) 830 { 831 switch (n = optget(argv, usage)) 832 { 833 case 'j': 834 /* 835 * check for obsolete "-j1 field" and "-j2 field" 836 */ 837 838 if (opt_info.offset == 0) 839 { 840 cp = argv[opt_info.index - 1]; 841 for (n = strlen(cp) - 1; n > 0 && cp[n] != 'j'; n--); 842 n = cp[n] == 'j'; 843 } 844 else 845 n = 0; 846 if (n) 847 { 848 if (opt_info.num!=1 && opt_info.num!=2) 849 error(2,"-jfileno field: fileno must be 1 or 2"); 850 n = '0' + opt_info.num; 851 if (!(cp = argv[opt_info.index])) 852 { 853 argc = 0; 854 break; 855 } 856 opt_info.num = strtol(cp, &e, 10); 857 if (*e) 858 { 859 argc = 0; 860 break; 861 } 862 opt_info.index++; 863 } 864 else 865 { 866 jp->file[0].field = (int)(opt_info.num-1); 867 n = '2'; 868 } 869 /*FALLTHROUGH*/ 870 case '1': 871 case '2': 872 if (opt_info.num <=0) 873 error(2,"field number must positive"); 874 jp->file[n-'1'].field = (int)(opt_info.num-1); 875 continue; 876 case 'v': 877 jp->outmode &= ~C_COMMON; 878 /*FALLTHROUGH*/ 879 case 'a': 880 if (opt_info.num!=1 && opt_info.num!=2) 881 error(2,"%s: file number must be 1 or 2", opt_info.name); 882 jp->outmode |= 1<<(opt_info.num-1); 883 continue; 884 case 'e': 885 jp->nullfield = opt_info.arg; 886 continue; 887 case 'o': 888 /* need to accept obsolescent command syntax */ 889 n = getolist(jp, opt_info.arg, argv+opt_info.index); 890 opt_info.index += n; 891 continue; 892 case 't': 893 jp->state[' '] = jp->state['\t'] = 0; 894 if (jp->mb) 895 { 896 cp = opt_info.arg; 897 jp->delim = mbchar(cp); 898 if ((n = cp - opt_info.arg) > 1) 899 { 900 jp->delimlen = n; 901 jp->delimstr = opt_info.arg; 902 continue; 903 } 904 } 905 n = *(unsigned char*)opt_info.arg; 906 jp->state[n] = S_DELIM; 907 jp->delim = n; 908 continue; 909 case 'i': 910 jp->ignorecase = !opt_info.num; 911 continue; 912 case 'B': 913 jp->buffered = !opt_info.num; 914 continue; 915 case ':': 916 error(2, "%s", opt_info.arg); 917 break; 918 case '?': 919 done(jp); 920 error(ERROR_usage(2), "%s", opt_info.arg); 921 break; 922 } 923 break; 924 } 925 argv += opt_info.index; 926 argc -= opt_info.index; 927 if (error_info.errors || argc!=2) 928 { 929 done(jp); 930 error(ERROR_usage(2),"%s", optusage(NiL)); 931 } 932 jp->ooutmode = jp->outmode; 933 jp->file[0].name = cp = *argv++; 934 if (streq(cp,"-")) 935 { 936 if (sfseek(sfstdin,(Sfoff_t)0,SEEK_CUR) < 0) 937 { 938 if (sfdcseekable(sfstdin)) 939 error(ERROR_warn(0),"%s: seek may fail",cp); 940 else 941 jp->file[0].discard = 1; 942 } 943 jp->file[0].iop = sfstdin; 944 } 945 else if (!(jp->file[0].iop = sfopen(NiL, cp, "r"))) 946 { 947 done(jp); 948 error(ERROR_system(1),"%s: cannot open",cp); 949 } 950 jp->file[1].name = cp = *argv; 951 if (streq(cp,"-")) 952 { 953 if (sfseek(sfstdin,(Sfoff_t)0,SEEK_CUR) < 0) 954 { 955 if (sfdcseekable(sfstdin)) 956 error(ERROR_warn(0),"%s: seek may fail",cp); 957 else 958 jp->file[1].discard = 1; 959 } 960 jp->file[1].iop = sfstdin; 961 } 962 else if (!(jp->file[1].iop = sfopen(NiL, cp, "r"))) 963 { 964 done(jp); 965 error(ERROR_system(1),"%s: cannot open",cp); 966 } 967 if (jp->buffered) 968 { 969 sfsetbuf(jp->file[0].iop, jp->file[0].iop, SF_UNBOUND); 970 sfsetbuf(jp->file[1].iop, jp->file[1].iop, SF_UNBOUND); 971 } 972 jp->outfile = sfstdout; 973 if (!jp->outlist) 974 jp->nullfield = 0; 975 if (join(jp) < 0) 976 { 977 done(jp); 978 error(ERROR_system(1),"write error"); 979 } 980 else if (jp->file[0].iop==sfstdin || jp->file[1].iop==sfstdin) 981 sfseek(sfstdin,(Sfoff_t)0,SEEK_END); 982 done(jp); 983 return error_info.errors; 984 } 985