1 /*********************************************************************** 2 * * 3 * This software is part of the ast package * 4 * Copyright (c) 1992-2010 AT&T Intellectual Property * 5 * and is licensed under the * 6 * Common Public License, Version 1.0 * 7 * by AT&T Intellectual Property * 8 * * 9 * A copy of the License is available at * 10 * http://www.opensource.org/licenses/cpl1.0.txt * 11 * (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) * 12 * * 13 * Information and Software Systems Research * 14 * AT&T Research * 15 * Florham Park NJ * 16 * * 17 * Glenn Fowler <gsf@research.att.com> * 18 * David Korn <dgk@research.att.com> * 19 * * 20 ***********************************************************************/ 21 #pragma prototyped 22 /* 23 * David Korn 24 * Glenn Fowler 25 * AT&T Research 26 * 27 * join 28 */ 29 30 static const char usage[] = 31 "[-?\n@(#)$Id: join (AT&T Research) 2009-12-10 $\n]" 32 USAGE_LICENSE 33 "[+NAME?join - relational database operator]" 34 "[+DESCRIPTION?\bjoin\b performs an \aequality join\a on the files \afile1\a " 35 "and \afile2\a and writes the resulting joined files to standard " 36 "output. By default, a field is delimited by one or more spaces " 37 "and tabs with leading spaces and/or tabs ignored. The \b-t\b option " 38 "can be used to change the field delimiter.]" 39 "[+?The \ajoin field\a is a field in each file on which files are compared. " 40 "By default \bjoin\b writes one line in the output for each pair " 41 "of lines in \afiles1\a and \afiles2\a that have identical join " 42 "fields. The default output line consists of the join field, " 43 "then the remaining fields from \afile1\a, then the remaining " 44 "fields from \afile2\a, but this can be changed with the \b-o\b " 45 "option. The \b-a\b option can be used to add unmatched lines " 46 "to the output. The \b-v\b option can be used to output only " 47 "unmatched lines.]" 48 "[+?The files \afile1\a and \afile2\a must be ordered in the collating " 49 "sequence of \bsort -b\b on the fields on which they are to be " 50 "joined otherwise the results are unspecified.]" 51 "[+?If either \afile1\a or \afile2\a is \b-\b, \bjoin\b " 52 "uses standard input starting at the current location.]" 53 54 "[e:empty]:[string?Replace empty output fields in the list selected with" 55 " \b-o\b with \astring\a.]" 56 "[o:output]:[list?Construct the output line to comprise the fields specified " 57 "in a blank or comma separated list \alist\a. Each element in " 58 "\alist\a consists of a file number (either 1 or 2), a period, " 59 "and a field number or \b0\b representing the join field. " 60 "As an obsolete feature multiple occurrences of \b-o\b can " 61 "be specified.]" 62 "[t:separator|tabs]:[delim?Use \adelim\a as the field separator for both input" 63 " and output.]" 64 "[1:j1]#[field?Join on field \afield\a of \afile1\a. Fields start at 1.]" 65 "[2:j2]#[field?Join on field \afield\a of \afile2\a. Fields start at 1.]" 66 "[j:join]#[field?Equivalent to \b-1\b \afield\a \b-2\b \afield\a.]" 67 "[a:unpairable]#[fileno?Write a line for each unpairable line in file" 68 " \afileno\a, where \afileno\a is either 1 or 2, in addition to the" 69 " normal output. If \b-a\b options appear for both 1 and 2, then " 70 "all unpairable lines will be output.]" 71 "[v:suppress]#[fileno?Write a line for each unpairable line in file" 72 " \afileno\a, where \afileno\a is either 1 or 2, instead of the normal " 73 "output. If \b-v\b options appear for both 1 and 2, then " 74 "all unpairable lines will be output.] ]" 75 "[i:ignorecase?Ignore case in field comparisons.]" 76 "[B!:mmap?Enable memory mapped reads instead of buffered.]" 77 78 "[+?The following obsolete option forms are also recognized: \b-j\b \afield\a" 79 " is equivalent to \b-1\b \afield\a \b-2\b \afield\a, \b-j1\b \afield\a" 80 " is equivalent to \b-1\b \afield\a, and \b-j2\b \afield\a is" 81 " equivalent to \b-2\b \afield\a.]" 82 83 "\n" 84 "\nfile1 file2\n" 85 "\n" 86 "[+EXIT STATUS?]{" 87 "[+0?Both files processed successfully.]" 88 "[+>0?An error occurred.]" 89 "}" 90 "[+SEE ALSO?\bcut\b(1), \bcomm\b(1), \bpaste\b(1), \bsort\b(1), \buniq\b(1)]" 91 ; 92 93 #include <cmd.h> 94 #include <sfdisc.h> 95 96 #if _hdr_wchar && _hdr_wctype && _lib_iswctype 97 98 #include <wchar.h> 99 #include <wctype.h> 100 101 #else 102 103 #include <ctype.h> 104 105 #ifndef iswspace 106 #define iswspace(x) isspace(x) 107 #endif 108 109 #endif 110 111 #define C_FILE1 001 112 #define C_FILE2 002 113 #define C_COMMON 004 114 #define C_ALL (C_FILE1|C_FILE2|C_COMMON) 115 116 #define NFIELD 10 117 #define JOINFIELD 2 118 119 #define S_DELIM 1 120 #define S_SPACE 2 121 #define S_NL 3 122 #define S_WIDE 4 123 124 typedef struct Field_s 125 { 126 char* beg; 127 char* end; 128 } Field_t; 129 130 typedef struct File_s 131 { 132 Sfio_t* iop; 133 char* name; 134 char* recptr; 135 int reclen; 136 int field; 137 int fieldlen; 138 int nfields; 139 int maxfields; 140 int spaces; 141 int hit; 142 int discard; 143 Field_t* fields; 144 } File_t; 145 146 typedef struct Join_s 147 { 148 unsigned char state[1<<CHAR_BIT]; 149 Sfio_t* outfile; 150 int* outlist; 151 int outmode; 152 int ooutmode; 153 char* nullfield; 154 char* delimstr; 155 int delim; 156 int delimlen; 157 int buffered; 158 int ignorecase; 159 int mb; 160 char* same; 161 int samesize; 162 void* context; 163 File_t file[2]; 164 } Join_t; 165 166 static void 167 done(register Join_t* jp) 168 { 169 if (jp->file[0].iop && jp->file[0].iop != sfstdin) 170 sfclose(jp->file[0].iop); 171 if (jp->file[1].iop && jp->file[1].iop != sfstdin) 172 sfclose(jp->file[1].iop); 173 if (jp->outlist) 174 free(jp->outlist); 175 if (jp->file[0].fields) 176 free(jp->file[0].fields); 177 if (jp->file[1].fields) 178 free(jp->file[1].fields); 179 if (jp->same) 180 free(jp->same); 181 free(jp); 182 } 183 184 static Join_t* 185 init(void) 186 { 187 register Join_t* jp; 188 register int i; 189 190 setlocale(LC_ALL, ""); 191 if (jp = newof(0, Join_t, 1, 0)) 192 { 193 if (jp->mb = mbwide()) 194 for (i = 0x80; i <= 0xff; i++) 195 jp->state[i] = S_WIDE; 196 jp->state[' '] = jp->state['\t'] = S_SPACE; 197 jp->state['\n'] = S_NL; 198 jp->delim = -1; 199 jp->nullfield = 0; 200 if (!(jp->file[0].fields = newof(0, Field_t, NFIELD + 1, 0)) || 201 !(jp->file[1].fields = newof(0, Field_t, NFIELD + 1, 0))) 202 { 203 done(jp); 204 return 0; 205 } 206 jp->file[0].maxfields = NFIELD; 207 jp->file[1].maxfields = NFIELD; 208 jp->outmode = C_COMMON; 209 } 210 return jp; 211 } 212 213 static int 214 getolist(Join_t* jp, const char* first, char** arglist) 215 { 216 register const char* cp = first; 217 char** argv = arglist; 218 register int c; 219 int* outptr; 220 int* outmax; 221 int nfield = NFIELD; 222 char* str; 223 224 outptr = jp->outlist = newof(0, int, NFIELD + 1, 0); 225 outmax = outptr + NFIELD; 226 while (c = *cp++) 227 { 228 if (c==' ' || c=='\t' || c==',') 229 continue; 230 str = (char*)--cp; 231 if (*cp=='0' && ((c=cp[1])==0 || c==' ' || c=='\t' || c==',')) 232 { 233 str++; 234 c = JOINFIELD; 235 goto skip; 236 } 237 if (cp[1]!='.' || (*cp!='1' && *cp!='2') || (c=strtol(cp+2,&str,10)) <=0) 238 { 239 error(2,"%s: invalid field list",first); 240 break; 241 } 242 c--; 243 c <<=2; 244 if (*cp=='2') 245 c |=1; 246 skip: 247 if (outptr >= outmax) 248 { 249 jp->outlist = newof(jp->outlist, int, 2 * nfield + 1, 0); 250 outptr = jp->outlist + nfield; 251 nfield *= 2; 252 outmax = jp->outlist + nfield; 253 } 254 *outptr++ = c; 255 cp = str; 256 } 257 /* need to accept obsolescent command syntax */ 258 while (1) 259 { 260 if (!(cp= *argv) || cp[1]!='.' || (*cp!='1' && *cp!='2')) 261 { 262 if (*cp=='0' && cp[1]==0) 263 { 264 c = JOINFIELD; 265 goto skip2; 266 } 267 break; 268 } 269 str = (char*)cp; 270 c = strtol(cp+2, &str,10); 271 if (*str || --c<0) 272 break; 273 argv++; 274 c <<= 2; 275 if (*cp=='2') 276 c |=1; 277 skip2: 278 if (outptr >= outmax) 279 { 280 jp->outlist = newof(jp->outlist, int, 2 * nfield + 1, 0); 281 outptr = jp->outlist + nfield; 282 nfield *= 2; 283 outmax = jp->outlist + nfield; 284 } 285 *outptr++ = c; 286 } 287 *outptr = -1; 288 return argv-arglist; 289 } 290 291 /* 292 * read in a record from file <index> and split into fields 293 */ 294 static unsigned char* 295 getrec(Join_t* jp, int index, int discard) 296 { 297 register unsigned char* sp = jp->state; 298 register File_t* fp = &jp->file[index]; 299 register Field_t* field = fp->fields; 300 register Field_t* fieldmax = field + fp->maxfields; 301 register char* cp; 302 register int n; 303 char* tp; 304 305 if (sh_checksig(jp->context)) 306 return 0; 307 if (discard && fp->discard) 308 sfraise(fp->iop, SFSK_DISCARD, NiL); 309 fp->spaces = 0; 310 fp->hit = 0; 311 if (!(cp = sfgetr(fp->iop, '\n', 0))) 312 { 313 jp->outmode &= ~(1<<index); 314 return 0; 315 } 316 fp->recptr = cp; 317 fp->reclen = sfvalue(fp->iop); 318 if (jp->delim == '\n') /* handle new-line delimiter specially */ 319 { 320 field->beg = cp; 321 cp += fp->reclen; 322 field->end = cp - 1; 323 field++; 324 } 325 else 326 do /* separate into fields */ 327 { 328 if (field >= fieldmax) 329 { 330 n = 2 * fp->maxfields; 331 fp->fields = newof(fp->fields, Field_t, n + 1, 0); 332 field = fp->fields + fp->maxfields; 333 fp->maxfields = n; 334 fieldmax = fp->fields + n; 335 } 336 field->beg = cp; 337 if (jp->delim == -1) 338 { 339 switch (sp[*(unsigned char*)cp]) 340 { 341 case S_SPACE: 342 cp++; 343 break; 344 case S_WIDE: 345 tp = cp; 346 if (iswspace(mbchar(tp))) 347 { 348 cp = tp; 349 break; 350 } 351 /*FALLTHROUGH*/ 352 default: 353 goto next; 354 } 355 fp->spaces = 1; 356 if (jp->mb) 357 for (;;) 358 { 359 switch (sp[*(unsigned char*)cp++]) 360 { 361 case S_SPACE: 362 continue; 363 case S_WIDE: 364 tp = cp - 1; 365 if (iswspace(mbchar(tp))) 366 { 367 cp = tp; 368 continue; 369 } 370 break; 371 } 372 break; 373 } 374 else 375 while (sp[*(unsigned char*)cp++]==S_SPACE); 376 cp--; 377 } 378 next: 379 if (jp->mb) 380 { 381 for (;;) 382 { 383 tp = cp; 384 switch (n = sp[*(unsigned char*)cp++]) 385 { 386 case 0: 387 continue; 388 case S_WIDE: 389 cp--; 390 n = mbchar(cp); 391 if (n == jp->delim) 392 { 393 n = S_DELIM; 394 break; 395 } 396 if (jp->delim == -1 && iswspace(n)) 397 { 398 n = S_SPACE; 399 break; 400 } 401 continue; 402 } 403 break; 404 } 405 field->end = tp; 406 } 407 else 408 { 409 while (!(n = sp[*(unsigned char*)cp++])); 410 field->end = cp - 1; 411 } 412 field++; 413 } while (n != S_NL); 414 fp->nfields = field - fp->fields; 415 if ((n = fp->field) < fp->nfields) 416 { 417 cp = fp->fields[n].beg; 418 /* eliminate leading spaces */ 419 if (fp->spaces) 420 { 421 if (jp->mb) 422 for (;;) 423 { 424 switch (sp[*(unsigned char*)cp++]) 425 { 426 case S_SPACE: 427 continue; 428 case S_WIDE: 429 tp = cp - 1; 430 if (iswspace(mbchar(tp))) 431 { 432 cp = tp; 433 continue; 434 } 435 break; 436 } 437 break; 438 } 439 else 440 while (sp[*(unsigned char*)cp++]==S_SPACE); 441 cp--; 442 } 443 fp->fieldlen = fp->fields[n].end - cp; 444 return (unsigned char*)cp; 445 } 446 fp->fieldlen = 0; 447 return (unsigned char*)""; 448 } 449 450 static unsigned char* 451 _trace_getrec(Join_t* jp, int index, int discard) 452 { 453 unsigned char* r; 454 455 r = getrec(jp, index, discard); 456 return r; 457 } 458 #define getrec _trace_getrec 459 460 #if DEBUG_TRACE 461 static unsigned char* u1,u2,u3; 462 #define getrec(p,n,d) (u1 = getrec(p, n, d), sfprintf(sfstdout, "[G%d#%d@%I*d:%-.8s]", __LINE__, n, sizeof(Sfoff_t), sftell(p->file[n].iop), u1), u1) 463 #endif 464 465 /* 466 * print field <n> from file <index> 467 */ 468 static int 469 outfield(Join_t* jp, int index, register int n, int last) 470 { 471 register File_t* fp = &jp->file[index]; 472 register char* cp; 473 register char* cpmax; 474 register int size; 475 register Sfio_t* iop = jp->outfile; 476 char* tp; 477 478 if (n < fp->nfields) 479 { 480 cp = fp->fields[n].beg; 481 cpmax = fp->fields[n].end + 1; 482 } 483 else 484 cp = 0; 485 if ((n = jp->delim) == -1) 486 { 487 if (cp && fp->spaces) 488 { 489 register unsigned char* sp = jp->state; 490 491 /*eliminate leading spaces */ 492 if (jp->mb) 493 for (;;) 494 { 495 switch (sp[*(unsigned char*)cp++]) 496 { 497 case S_SPACE: 498 continue; 499 case S_WIDE: 500 tp = cp - 1; 501 if (iswspace(mbchar(tp))) 502 { 503 cp = tp; 504 continue; 505 } 506 break; 507 } 508 break; 509 } 510 else 511 while (sp[*(unsigned char*)cp++]==S_SPACE); 512 cp--; 513 } 514 n = ' '; 515 } 516 else if (jp->delimstr) 517 n = -1; 518 if (last) 519 n = '\n'; 520 if (cp) 521 size = cpmax - cp; 522 else 523 size = 0; 524 if (n == -1) 525 { 526 if (size<=1) 527 { 528 if (jp->nullfield && sfputr(iop, jp->nullfield, -1) < 0) 529 return -1; 530 } 531 else if (sfwrite(iop, cp, size) < 0) 532 return -1; 533 if (sfwrite(iop, jp->delimstr, jp->delimlen) < 0) 534 return -1; 535 } 536 else if (size <= 1) 537 { 538 if (!jp->nullfield) 539 sfputc(iop, n); 540 else if (sfputr(iop, jp->nullfield, n) < 0) 541 return -1; 542 } 543 else 544 { 545 last = cp[size-1]; 546 cp[size-1] = n; 547 if (sfwrite(iop, cp, size) < 0) 548 return -1; 549 cp[size-1] = last; 550 } 551 return 0; 552 } 553 554 #if DEBUG_TRACE 555 static int i1,i2,i3; 556 #define outfield(p,i,n,f) (sfprintf(sfstdout, "[F%d#%d:%d,%d]", __LINE__, i1=i, i2=n, i3=f), outfield(p, i1, i2, i3)) 557 #endif 558 559 static int 560 outrec(register Join_t* jp, int mode) 561 { 562 register File_t* fp; 563 register int i; 564 register int j; 565 register int k; 566 register int n; 567 int* out; 568 569 if (mode < 0 && jp->file[0].hit++) 570 return 0; 571 if (mode > 0 && jp->file[1].hit++) 572 return 0; 573 if (out = jp->outlist) 574 { 575 while ((n = *out++) >= 0) 576 { 577 if (n == JOINFIELD) 578 { 579 i = mode >= 0; 580 j = jp->file[i].field; 581 } 582 else 583 { 584 i = n & 1; 585 j = (mode<0 && i || mode>0 && !i) ? 586 jp->file[i].nfields : 587 n >> 2; 588 } 589 if (outfield(jp, i, j, *out < 0) < 0) 590 return -1; 591 } 592 return 0; 593 } 594 k = jp->file[0].nfields; 595 if (mode >= 0) 596 k += jp->file[1].nfields - 1; 597 for (i=0; i<2; i++) 598 { 599 fp = &jp->file[i]; 600 if (mode>0 && i==0) 601 { 602 k -= (fp->nfields - 1); 603 continue; 604 } 605 n = fp->field; 606 if (mode||i==0) 607 { 608 /* output join field first */ 609 if (outfield(jp,i,n,!--k) < 0) 610 return -1; 611 if (!k) 612 return 0; 613 for (j=0; j<n; j++) 614 { 615 if (outfield(jp,i,j,!--k) < 0) 616 return -1; 617 if (!k) 618 return 0; 619 } 620 j = n + 1; 621 } 622 else 623 j = 0; 624 for (;j<fp->nfields; j++) 625 { 626 if (j!=n && outfield(jp,i,j,!--k) < 0) 627 return -1; 628 if (!k) 629 return 0; 630 } 631 } 632 return 0; 633 } 634 635 #if DEBUG_TRACE 636 #define outrec(p,n) (sfprintf(sfstdout, "[R#%d,%d,%lld,%lld:%-.*s{%d}:%-.*s{%d}]", __LINE__, i1=n, lo, hi, jp->file[0].fieldlen, cp1, jp->file[0].hit, jp->file[1].fieldlen, cp2, jp->file[1].hit), outrec(p, i1)) 637 #endif 638 639 static int 640 join(Join_t* jp) 641 { 642 register unsigned char* cp1; 643 register unsigned char* cp2; 644 register int n1; 645 register int n2; 646 register int n; 647 register int cmp; 648 register int same; 649 int o2; 650 Sfoff_t lo = -1; 651 Sfoff_t hi = -1; 652 653 if ((cp1 = getrec(jp, 0, 0)) && (cp2 = getrec(jp, 1, 0)) || (cp2 = 0)) 654 { 655 n1 = jp->file[0].fieldlen; 656 n2 = jp->file[1].fieldlen; 657 same = 0; 658 for (;;) 659 { 660 n = n1 < n2 ? n1 : n2; 661 #if DEBUG_TRACE 662 if (!n && !(cmp = n1 < n2 ? -1 : (n1 > n2)) || n && !(cmp = (int)*cp1 - (int)*cp2) && !(cmp = jp->ignorecase ? strncasecmp((char*)cp1, (char*)cp2, n) : memcmp(cp1, cp2, n))) 663 cmp = n1 - n2; 664 sfprintf(sfstdout, "[C#%d:%d(%c-%c),%d,%lld,%lld%s]", __LINE__, cmp, *cp1, *cp2, same, lo, hi, (jp->outmode & C_COMMON) ? ",COMMON" : ""); 665 if (!cmp) 666 #else 667 if (!n && !(cmp = n1 < n2 ? -1 : (n1 > n2)) || n && !(cmp = (int)*cp1 - (int)*cp2) && !(cmp = jp->ignorecase ? strncasecmp((char*)cp1, (char*)cp2, n) : memcmp(cp1, cp2, n)) && !(cmp = n1 - n2)) 668 #endif 669 { 670 if (!(jp->outmode & C_COMMON)) 671 { 672 if (cp1 = getrec(jp, 0, 1)) 673 { 674 n1 = jp->file[0].fieldlen; 675 same = 1; 676 continue; 677 } 678 if ((jp->ooutmode & (C_FILE1|C_FILE2)) != C_FILE2) 679 break; 680 if (sfseek(jp->file[0].iop, (Sfoff_t)-jp->file[0].reclen, SEEK_CUR) < 0 || !(cp1 = getrec(jp, 0, 0))) 681 { 682 error(ERROR_SYSTEM|2, "%s: seek error", jp->file[0].name); 683 return -1; 684 } 685 } 686 else if (outrec(jp, 0) < 0) 687 return -1; 688 else if (lo < 0 && (jp->outmode & C_COMMON)) 689 { 690 if ((lo = sfseek(jp->file[1].iop, (Sfoff_t)0, SEEK_CUR)) < 0) 691 { 692 error(ERROR_SYSTEM|2, "%s: seek error", jp->file[1].name); 693 return -1; 694 } 695 lo -= jp->file[1].reclen; 696 } 697 if (cp2 = getrec(jp, 1, lo < 0)) 698 { 699 n2 = jp->file[1].fieldlen; 700 continue; 701 } 702 #if DEBUG_TRACE 703 sfprintf(sfstdout, "[2#%d:0,%lld,%lld]", __LINE__, lo, hi); 704 #endif 705 } 706 else if (cmp > 0) 707 { 708 if (same) 709 { 710 same = 0; 711 next: 712 if (n2 > jp->samesize) 713 { 714 jp->samesize = roundof(n2, 16); 715 if (!(jp->same = newof(jp->same, char, jp->samesize, 0))) 716 { 717 error(ERROR_SYSTEM|2, "out of space"); 718 return -1; 719 } 720 } 721 memcpy(jp->same, cp2, o2 = n2); 722 if (!(cp2 = getrec(jp, 1, 0))) 723 break; 724 n2 = jp->file[1].fieldlen; 725 if (n2 == o2 && *cp2 == *jp->same && !memcmp(cp2, jp->same, n2)) 726 goto next; 727 continue; 728 } 729 if (hi >= 0) 730 { 731 if (sfseek(jp->file[1].iop, hi, SEEK_SET) != hi) 732 { 733 error(ERROR_SYSTEM|2, "%s: seek error", jp->file[1].name); 734 return -1; 735 } 736 hi = -1; 737 } 738 else if ((jp->outmode & C_FILE2) && outrec(jp, 1) < 0) 739 return -1; 740 lo = -1; 741 if (cp2 = getrec(jp, 1, 1)) 742 { 743 n2 = jp->file[1].fieldlen; 744 continue; 745 } 746 #if DEBUG_TRACE 747 sfprintf(sfstdout, "[2#%d:0,%lld,%lld]", __LINE__, lo, hi); 748 #endif 749 } 750 else if (same) 751 { 752 same = 0; 753 if (!(cp1 = getrec(jp, 0, 0))) 754 break; 755 n1 = jp->file[0].fieldlen; 756 continue; 757 } 758 if (lo >= 0) 759 { 760 if ((hi = sfseek(jp->file[1].iop, (Sfoff_t)0, SEEK_CUR)) < 0 || 761 (hi -= jp->file[1].reclen) < 0 || 762 sfseek(jp->file[1].iop, lo, SEEK_SET) != lo || 763 !(cp2 = getrec(jp, 1, 0))) 764 { 765 error(ERROR_SYSTEM|2, "%s: seek error", jp->file[1].name); 766 return -1; 767 } 768 n2 = jp->file[1].fieldlen; 769 lo = -1; 770 if (jp->file[1].discard) 771 sfseek(jp->file[1].iop, (Sfoff_t)-1, SEEK_SET); 772 } 773 else if (!cp2) 774 break; 775 else if ((jp->outmode & C_FILE1) && outrec(jp, -1) < 0) 776 return -1; 777 if (!(cp1 = getrec(jp, 0, 1))) 778 break; 779 n1 = jp->file[0].fieldlen; 780 } 781 } 782 #if DEBUG_TRACE 783 sfprintf(sfstdout, "[X#%d:?,%p,%p,%d%,%d,%d%s]", __LINE__, cp1, cp2, cmp, lo, hi, (jp->outmode & C_COMMON) ? ",COMMON" : ""); 784 #endif 785 if (cp2) 786 { 787 if (hi >= 0 && 788 sfseek(jp->file[1].iop, (Sfoff_t)0, SEEK_CUR) < hi && 789 sfseek(jp->file[1].iop, hi, SEEK_SET) != hi) 790 { 791 error(ERROR_SYSTEM|2, "%s: seek error", jp->file[1].name); 792 return -1; 793 } 794 #if DEBUG_TRACE 795 sfprintf(sfstdout, "[O#%d:%02o:%02o]", __LINE__, jp->ooutmode, jp->outmode); 796 #endif 797 cp1 = (!cp1 && cmp && hi < 0 && !jp->file[1].hit && ((jp->ooutmode ^ C_ALL) <= 1 || jp->outmode == 2)) ? cp2 : getrec(jp, 1, 0); 798 cmp = 1; 799 n = 1; 800 } 801 else 802 { 803 cmp = -1; 804 n = 0; 805 } 806 #if DEBUG_TRACE 807 sfprintf(sfstdout, "[X#%d:%d,%p,%p,%d,%02o,%02o%s]", __LINE__, n, cp1, cp2, cmp, jp->ooutmode, jp->outmode, (jp->outmode & C_COMMON) ? ",COMMON" : ""); 808 #endif 809 if (!cp1 || !(jp->outmode & (1<<n))) 810 { 811 if (cp1 && jp->file[n].iop == sfstdin) 812 sfseek(sfstdin, (Sfoff_t)0, SEEK_END); 813 return 0; 814 } 815 if (outrec(jp, cmp) < 0) 816 return -1; 817 do 818 { 819 if (!getrec(jp, n, 1)) 820 return 0; 821 } while (outrec(jp, cmp) >= 0); 822 return -1; 823 } 824 825 int 826 b_join(int argc, char** argv, void* context) 827 { 828 register int n; 829 register char* cp; 830 register Join_t* jp; 831 char* e; 832 833 #if !DEBUG_TRACE 834 cmdinit(argc, argv, context, ERROR_CATALOG, ERROR_NOTIFY); 835 #endif 836 if (!(jp = init())) 837 error(ERROR_system(1),"out of space"); 838 jp->context = context; 839 for (;;) 840 { 841 switch (n = optget(argv, usage)) 842 { 843 case 0: 844 break; 845 case 'j': 846 /* 847 * check for obsolete "-j1 field" and "-j2 field" 848 */ 849 850 if (opt_info.offset == 0) 851 { 852 cp = argv[opt_info.index - 1]; 853 for (n = strlen(cp) - 1; n > 0 && cp[n] != 'j'; n--); 854 n = cp[n] == 'j'; 855 } 856 else 857 n = 0; 858 if (n) 859 { 860 if (opt_info.num!=1 && opt_info.num!=2) 861 error(2,"-jfileno field: fileno must be 1 or 2"); 862 n = '0' + opt_info.num; 863 if (!(cp = argv[opt_info.index])) 864 { 865 argc = 0; 866 break; 867 } 868 opt_info.num = strtol(cp, &e, 10); 869 if (*e) 870 { 871 argc = 0; 872 break; 873 } 874 opt_info.index++; 875 } 876 else 877 { 878 jp->file[0].field = (int)(opt_info.num-1); 879 n = '2'; 880 } 881 /*FALLTHROUGH*/ 882 case '1': 883 case '2': 884 if (opt_info.num <=0) 885 error(2,"field number must positive"); 886 jp->file[n-'1'].field = (int)(opt_info.num-1); 887 continue; 888 case 'v': 889 jp->outmode &= ~C_COMMON; 890 /*FALLTHROUGH*/ 891 case 'a': 892 if (opt_info.num!=1 && opt_info.num!=2) 893 error(2,"%s: file number must be 1 or 2", opt_info.name); 894 jp->outmode |= 1<<(opt_info.num-1); 895 continue; 896 case 'e': 897 jp->nullfield = opt_info.arg; 898 continue; 899 case 'o': 900 /* need to accept obsolescent command syntax */ 901 n = getolist(jp, opt_info.arg, argv+opt_info.index); 902 opt_info.index += n; 903 continue; 904 case 't': 905 jp->state[' '] = jp->state['\t'] = 0; 906 if (jp->mb) 907 { 908 cp = opt_info.arg; 909 jp->delim = mbchar(cp); 910 if ((n = cp - opt_info.arg) > 1) 911 { 912 jp->delimlen = n; 913 jp->delimstr = opt_info.arg; 914 continue; 915 } 916 } 917 n = *(unsigned char*)opt_info.arg; 918 jp->state[n] = S_DELIM; 919 jp->delim = n; 920 continue; 921 case 'i': 922 jp->ignorecase = !opt_info.num; 923 continue; 924 case 'B': 925 jp->buffered = !opt_info.num; 926 continue; 927 case ':': 928 error(2, "%s", opt_info.arg); 929 break; 930 case '?': 931 done(jp); 932 error(ERROR_usage(2), "%s", opt_info.arg); 933 break; 934 } 935 break; 936 } 937 argv += opt_info.index; 938 argc -= opt_info.index; 939 if (error_info.errors || argc!=2) 940 { 941 done(jp); 942 error(ERROR_usage(2),"%s", optusage(NiL)); 943 } 944 jp->ooutmode = jp->outmode; 945 jp->file[0].name = cp = *argv++; 946 if (streq(cp,"-")) 947 { 948 if (sfseek(sfstdin,(Sfoff_t)0,SEEK_CUR) < 0) 949 { 950 if (sfdcseekable(sfstdin)) 951 error(ERROR_warn(0),"%s: seek may fail",cp); 952 else 953 jp->file[0].discard = 1; 954 } 955 jp->file[0].iop = sfstdin; 956 } 957 else if (!(jp->file[0].iop = sfopen(NiL, cp, "r"))) 958 { 959 done(jp); 960 error(ERROR_system(1),"%s: cannot open",cp); 961 } 962 jp->file[1].name = cp = *argv; 963 if (streq(cp,"-")) 964 { 965 if (sfseek(sfstdin,(Sfoff_t)0,SEEK_CUR) < 0) 966 { 967 if (sfdcseekable(sfstdin)) 968 error(ERROR_warn(0),"%s: seek may fail",cp); 969 else 970 jp->file[1].discard = 1; 971 } 972 jp->file[1].iop = sfstdin; 973 } 974 else if (!(jp->file[1].iop = sfopen(NiL, cp, "r"))) 975 { 976 done(jp); 977 error(ERROR_system(1),"%s: cannot open",cp); 978 } 979 if (jp->buffered) 980 { 981 sfsetbuf(jp->file[0].iop, jp->file[0].iop, SF_UNBOUND); 982 sfsetbuf(jp->file[1].iop, jp->file[1].iop, SF_UNBOUND); 983 } 984 jp->outfile = sfstdout; 985 if (!jp->outlist) 986 jp->nullfield = 0; 987 if (join(jp) < 0) 988 { 989 done(jp); 990 error(ERROR_system(1),"write error"); 991 } 992 else if (jp->file[0].iop==sfstdin || jp->file[1].iop==sfstdin) 993 sfseek(sfstdin,(Sfoff_t)0,SEEK_END); 994 done(jp); 995 return error_info.errors; 996 } 997