1*906afcb8SAndy Fiddaman /*********************************************************************** 2*906afcb8SAndy Fiddaman * * 3*906afcb8SAndy Fiddaman * This software is part of the ast package * 4*906afcb8SAndy Fiddaman * Copyright (c) 1992-2012 AT&T Intellectual Property * 5*906afcb8SAndy Fiddaman * and is licensed under the * 6*906afcb8SAndy Fiddaman * Eclipse Public License, Version 1.0 * 7*906afcb8SAndy Fiddaman * by AT&T Intellectual Property * 8*906afcb8SAndy Fiddaman * * 9*906afcb8SAndy Fiddaman * A copy of the License is available at * 10*906afcb8SAndy Fiddaman * http://www.eclipse.org/org/documents/epl-v10.html * 11*906afcb8SAndy Fiddaman * (with md5 checksum b35adb5213ca9657e911e9befb180842) * 12*906afcb8SAndy Fiddaman * * 13*906afcb8SAndy Fiddaman * Information and Software Systems Research * 14*906afcb8SAndy Fiddaman * AT&T Research * 15*906afcb8SAndy Fiddaman * Florham Park NJ * 16*906afcb8SAndy Fiddaman * * 17*906afcb8SAndy Fiddaman * Glenn Fowler <gsf@research.att.com> * 18*906afcb8SAndy Fiddaman * David Korn <dgk@research.att.com> * 19*906afcb8SAndy Fiddaman * * 20*906afcb8SAndy Fiddaman ***********************************************************************/ 21*906afcb8SAndy Fiddaman #pragma prototyped 22*906afcb8SAndy Fiddaman /* 23*906afcb8SAndy Fiddaman * David Korn 24*906afcb8SAndy Fiddaman * Glenn Fowler 25*906afcb8SAndy Fiddaman * AT&T Research 26*906afcb8SAndy Fiddaman * 27*906afcb8SAndy Fiddaman * join 28*906afcb8SAndy Fiddaman */ 29*906afcb8SAndy Fiddaman 30*906afcb8SAndy Fiddaman static const char usage[] = 31*906afcb8SAndy Fiddaman "[-?\n@(#)$Id: join (AT&T Research) 2009-12-10 $\n]" 32*906afcb8SAndy Fiddaman USAGE_LICENSE 33*906afcb8SAndy Fiddaman "[+NAME?join - relational database operator]" 34*906afcb8SAndy Fiddaman "[+DESCRIPTION?\bjoin\b performs an \aequality join\a on the files \afile1\a " 35*906afcb8SAndy Fiddaman "and \afile2\a and writes the resulting joined files to standard " 36*906afcb8SAndy Fiddaman "output. By default, a field is delimited by one or more spaces " 37*906afcb8SAndy Fiddaman "and tabs with leading spaces and/or tabs ignored. The \b-t\b option " 38*906afcb8SAndy Fiddaman "can be used to change the field delimiter.]" 39*906afcb8SAndy Fiddaman "[+?The \ajoin field\a is a field in each file on which files are compared. " 40*906afcb8SAndy Fiddaman "By default \bjoin\b writes one line in the output for each pair " 41*906afcb8SAndy Fiddaman "of lines in \afiles1\a and \afiles2\a that have identical join " 42*906afcb8SAndy Fiddaman "fields. The default output line consists of the join field, " 43*906afcb8SAndy Fiddaman "then the remaining fields from \afile1\a, then the remaining " 44*906afcb8SAndy Fiddaman "fields from \afile2\a, but this can be changed with the \b-o\b " 45*906afcb8SAndy Fiddaman "option. The \b-a\b option can be used to add unmatched lines " 46*906afcb8SAndy Fiddaman "to the output. The \b-v\b option can be used to output only " 47*906afcb8SAndy Fiddaman "unmatched lines.]" 48*906afcb8SAndy Fiddaman "[+?The files \afile1\a and \afile2\a must be ordered in the collating " 49*906afcb8SAndy Fiddaman "sequence of \bsort -b\b on the fields on which they are to be " 50*906afcb8SAndy Fiddaman "joined otherwise the results are unspecified.]" 51*906afcb8SAndy Fiddaman "[+?If either \afile1\a or \afile2\a is \b-\b, \bjoin\b " 52*906afcb8SAndy Fiddaman "uses standard input starting at the current location.]" 53*906afcb8SAndy Fiddaman 54*906afcb8SAndy Fiddaman "[e:empty]:[string?Replace empty output fields in the list selected with" 55*906afcb8SAndy Fiddaman " \b-o\b with \astring\a.]" 56*906afcb8SAndy Fiddaman "[o:output]:[list?Construct the output line to comprise the fields specified " 57*906afcb8SAndy Fiddaman "in a blank or comma separated list \alist\a. Each element in " 58*906afcb8SAndy Fiddaman "\alist\a consists of a file number (either 1 or 2), a period, " 59*906afcb8SAndy Fiddaman "and a field number or \b0\b representing the join field. " 60*906afcb8SAndy Fiddaman "As an obsolete feature multiple occurrences of \b-o\b can " 61*906afcb8SAndy Fiddaman "be specified.]" 62*906afcb8SAndy Fiddaman "[t:separator|tabs]:[delim?Use \adelim\a as the field separator for both input" 63*906afcb8SAndy Fiddaman " and output.]" 64*906afcb8SAndy Fiddaman "[1:j1]#[field?Join on field \afield\a of \afile1\a. Fields start at 1.]" 65*906afcb8SAndy Fiddaman "[2:j2]#[field?Join on field \afield\a of \afile2\a. Fields start at 1.]" 66*906afcb8SAndy Fiddaman "[j:join]#[field?Equivalent to \b-1\b \afield\a \b-2\b \afield\a.]" 67*906afcb8SAndy Fiddaman "[a:unpairable]#[fileno?Write a line for each unpairable line in file" 68*906afcb8SAndy Fiddaman " \afileno\a, where \afileno\a is either 1 or 2, in addition to the" 69*906afcb8SAndy Fiddaman " normal output. If \b-a\b options appear for both 1 and 2, then " 70*906afcb8SAndy Fiddaman "all unpairable lines will be output.]" 71*906afcb8SAndy Fiddaman "[v:suppress]#[fileno?Write a line for each unpairable line in file" 72*906afcb8SAndy Fiddaman " \afileno\a, where \afileno\a is either 1 or 2, instead of the normal " 73*906afcb8SAndy Fiddaman "output. If \b-v\b options appear for both 1 and 2, then " 74*906afcb8SAndy Fiddaman "all unpairable lines will be output.] ]" 75*906afcb8SAndy Fiddaman "[i:ignorecase?Ignore case in field comparisons.]" 76*906afcb8SAndy Fiddaman "[B!:mmap?Enable memory mapped reads instead of buffered.]" 77*906afcb8SAndy Fiddaman 78*906afcb8SAndy Fiddaman "[+?The following obsolete option forms are also recognized: \b-j\b \afield\a" 79*906afcb8SAndy Fiddaman " is equivalent to \b-1\b \afield\a \b-2\b \afield\a, \b-j1\b \afield\a" 80*906afcb8SAndy Fiddaman " is equivalent to \b-1\b \afield\a, and \b-j2\b \afield\a is" 81*906afcb8SAndy Fiddaman " equivalent to \b-2\b \afield\a.]" 82*906afcb8SAndy Fiddaman 83*906afcb8SAndy Fiddaman "\n" 84*906afcb8SAndy Fiddaman "\nfile1 file2\n" 85*906afcb8SAndy Fiddaman "\n" 86*906afcb8SAndy Fiddaman "[+EXIT STATUS?]{" 87*906afcb8SAndy Fiddaman "[+0?Both files processed successfully.]" 88*906afcb8SAndy Fiddaman "[+>0?An error occurred.]" 89*906afcb8SAndy Fiddaman "}" 90*906afcb8SAndy Fiddaman "[+SEE ALSO?\bcut\b(1), \bcomm\b(1), \bpaste\b(1), \bsort\b(1), \buniq\b(1)]" 91*906afcb8SAndy Fiddaman ; 92*906afcb8SAndy Fiddaman 93*906afcb8SAndy Fiddaman #include <cmd.h> 94*906afcb8SAndy Fiddaman #include <sfdisc.h> 95*906afcb8SAndy Fiddaman 96*906afcb8SAndy Fiddaman #if _hdr_wchar && _hdr_wctype && _lib_iswctype 97*906afcb8SAndy Fiddaman 98*906afcb8SAndy Fiddaman #include <wchar.h> 99*906afcb8SAndy Fiddaman #include <wctype.h> 100*906afcb8SAndy Fiddaman 101*906afcb8SAndy Fiddaman #else 102*906afcb8SAndy Fiddaman 103*906afcb8SAndy Fiddaman #include <ctype.h> 104*906afcb8SAndy Fiddaman 105*906afcb8SAndy Fiddaman #ifndef iswspace 106*906afcb8SAndy Fiddaman #define iswspace(x) isspace(x) 107*906afcb8SAndy Fiddaman #endif 108*906afcb8SAndy Fiddaman 109*906afcb8SAndy Fiddaman #endif 110*906afcb8SAndy Fiddaman 111*906afcb8SAndy Fiddaman #define C_FILE1 001 112*906afcb8SAndy Fiddaman #define C_FILE2 002 113*906afcb8SAndy Fiddaman #define C_COMMON 004 114*906afcb8SAndy Fiddaman #define C_ALL (C_FILE1|C_FILE2|C_COMMON) 115*906afcb8SAndy Fiddaman 116*906afcb8SAndy Fiddaman #define NFIELD 10 117*906afcb8SAndy Fiddaman #define JOINFIELD 2 118*906afcb8SAndy Fiddaman 119*906afcb8SAndy Fiddaman #define S_DELIM 1 120*906afcb8SAndy Fiddaman #define S_SPACE 2 121*906afcb8SAndy Fiddaman #define S_NL 3 122*906afcb8SAndy Fiddaman #define S_WIDE 4 123*906afcb8SAndy Fiddaman 124*906afcb8SAndy Fiddaman typedef struct Field_s 125*906afcb8SAndy Fiddaman { 126*906afcb8SAndy Fiddaman char* beg; 127*906afcb8SAndy Fiddaman char* end; 128*906afcb8SAndy Fiddaman } Field_t; 129*906afcb8SAndy Fiddaman 130*906afcb8SAndy Fiddaman typedef struct File_s 131*906afcb8SAndy Fiddaman { 132*906afcb8SAndy Fiddaman Sfio_t* iop; 133*906afcb8SAndy Fiddaman char* name; 134*906afcb8SAndy Fiddaman char* recptr; 135*906afcb8SAndy Fiddaman int reclen; 136*906afcb8SAndy Fiddaman int field; 137*906afcb8SAndy Fiddaman int fieldlen; 138*906afcb8SAndy Fiddaman int nfields; 139*906afcb8SAndy Fiddaman int maxfields; 140*906afcb8SAndy Fiddaman int spaces; 141*906afcb8SAndy Fiddaman int hit; 142*906afcb8SAndy Fiddaman int discard; 143*906afcb8SAndy Fiddaman Field_t* fields; 144*906afcb8SAndy Fiddaman } File_t; 145*906afcb8SAndy Fiddaman 146*906afcb8SAndy Fiddaman typedef struct Join_s 147*906afcb8SAndy Fiddaman { 148*906afcb8SAndy Fiddaman unsigned char state[1<<CHAR_BIT]; 149*906afcb8SAndy Fiddaman Sfio_t* outfile; 150*906afcb8SAndy Fiddaman int* outlist; 151*906afcb8SAndy Fiddaman int outmode; 152*906afcb8SAndy Fiddaman int ooutmode; 153*906afcb8SAndy Fiddaman char* nullfield; 154*906afcb8SAndy Fiddaman char* delimstr; 155*906afcb8SAndy Fiddaman int delim; 156*906afcb8SAndy Fiddaman int delimlen; 157*906afcb8SAndy Fiddaman int buffered; 158*906afcb8SAndy Fiddaman int ignorecase; 159*906afcb8SAndy Fiddaman int mb; 160*906afcb8SAndy Fiddaman char* same; 161*906afcb8SAndy Fiddaman int samesize; 162*906afcb8SAndy Fiddaman Shbltin_t* context; 163*906afcb8SAndy Fiddaman File_t file[2]; 164*906afcb8SAndy Fiddaman } Join_t; 165*906afcb8SAndy Fiddaman 166*906afcb8SAndy Fiddaman static void 167*906afcb8SAndy Fiddaman done(register Join_t* jp) 168*906afcb8SAndy Fiddaman { 169*906afcb8SAndy Fiddaman if (jp->file[0].iop && jp->file[0].iop != sfstdin) 170*906afcb8SAndy Fiddaman sfclose(jp->file[0].iop); 171*906afcb8SAndy Fiddaman if (jp->file[1].iop && jp->file[1].iop != sfstdin) 172*906afcb8SAndy Fiddaman sfclose(jp->file[1].iop); 173*906afcb8SAndy Fiddaman if (jp->outlist) 174*906afcb8SAndy Fiddaman free(jp->outlist); 175*906afcb8SAndy Fiddaman if (jp->file[0].fields) 176*906afcb8SAndy Fiddaman free(jp->file[0].fields); 177*906afcb8SAndy Fiddaman if (jp->file[1].fields) 178*906afcb8SAndy Fiddaman free(jp->file[1].fields); 179*906afcb8SAndy Fiddaman if (jp->same) 180*906afcb8SAndy Fiddaman free(jp->same); 181*906afcb8SAndy Fiddaman free(jp); 182*906afcb8SAndy Fiddaman } 183*906afcb8SAndy Fiddaman 184*906afcb8SAndy Fiddaman static Join_t* 185*906afcb8SAndy Fiddaman init(void) 186*906afcb8SAndy Fiddaman { 187*906afcb8SAndy Fiddaman register Join_t* jp; 188*906afcb8SAndy Fiddaman register int i; 189*906afcb8SAndy Fiddaman 190*906afcb8SAndy Fiddaman setlocale(LC_ALL, ""); 191*906afcb8SAndy Fiddaman if (jp = newof(0, Join_t, 1, 0)) 192*906afcb8SAndy Fiddaman { 193*906afcb8SAndy Fiddaman if (jp->mb = mbwide()) 194*906afcb8SAndy Fiddaman for (i = 0x80; i <= 0xff; i++) 195*906afcb8SAndy Fiddaman jp->state[i] = S_WIDE; 196*906afcb8SAndy Fiddaman jp->state[' '] = jp->state['\t'] = S_SPACE; 197*906afcb8SAndy Fiddaman jp->state['\n'] = S_NL; 198*906afcb8SAndy Fiddaman jp->delim = -1; 199*906afcb8SAndy Fiddaman jp->nullfield = 0; 200*906afcb8SAndy Fiddaman if (!(jp->file[0].fields = newof(0, Field_t, NFIELD + 1, 0)) || 201*906afcb8SAndy Fiddaman !(jp->file[1].fields = newof(0, Field_t, NFIELD + 1, 0))) 202*906afcb8SAndy Fiddaman { 203*906afcb8SAndy Fiddaman done(jp); 204*906afcb8SAndy Fiddaman return 0; 205*906afcb8SAndy Fiddaman } 206*906afcb8SAndy Fiddaman jp->file[0].maxfields = NFIELD; 207*906afcb8SAndy Fiddaman jp->file[1].maxfields = NFIELD; 208*906afcb8SAndy Fiddaman jp->outmode = C_COMMON; 209*906afcb8SAndy Fiddaman } 210*906afcb8SAndy Fiddaman return jp; 211*906afcb8SAndy Fiddaman } 212*906afcb8SAndy Fiddaman 213*906afcb8SAndy Fiddaman static int 214*906afcb8SAndy Fiddaman getolist(Join_t* jp, const char* first, char** arglist) 215*906afcb8SAndy Fiddaman { 216*906afcb8SAndy Fiddaman register const char* cp = first; 217*906afcb8SAndy Fiddaman char** argv = arglist; 218*906afcb8SAndy Fiddaman register int c; 219*906afcb8SAndy Fiddaman int* outptr; 220*906afcb8SAndy Fiddaman int* outmax; 221*906afcb8SAndy Fiddaman int nfield = NFIELD; 222*906afcb8SAndy Fiddaman char* str; 223*906afcb8SAndy Fiddaman 224*906afcb8SAndy Fiddaman outptr = jp->outlist = newof(0, int, NFIELD + 1, 0); 225*906afcb8SAndy Fiddaman outmax = outptr + NFIELD; 226*906afcb8SAndy Fiddaman while (c = *cp++) 227*906afcb8SAndy Fiddaman { 228*906afcb8SAndy Fiddaman if (c==' ' || c=='\t' || c==',') 229*906afcb8SAndy Fiddaman continue; 230*906afcb8SAndy Fiddaman str = (char*)--cp; 231*906afcb8SAndy Fiddaman if (*cp=='0' && ((c=cp[1])==0 || c==' ' || c=='\t' || c==',')) 232*906afcb8SAndy Fiddaman { 233*906afcb8SAndy Fiddaman str++; 234*906afcb8SAndy Fiddaman c = JOINFIELD; 235*906afcb8SAndy Fiddaman goto skip; 236*906afcb8SAndy Fiddaman } 237*906afcb8SAndy Fiddaman if (cp[1]!='.' || (*cp!='1' && *cp!='2') || (c=strtol(cp+2,&str,10)) <=0) 238*906afcb8SAndy Fiddaman { 239*906afcb8SAndy Fiddaman error(2,"%s: invalid field list",first); 240*906afcb8SAndy Fiddaman break; 241*906afcb8SAndy Fiddaman } 242*906afcb8SAndy Fiddaman c--; 243*906afcb8SAndy Fiddaman c <<=2; 244*906afcb8SAndy Fiddaman if (*cp=='2') 245*906afcb8SAndy Fiddaman c |=1; 246*906afcb8SAndy Fiddaman skip: 247*906afcb8SAndy Fiddaman if (outptr >= outmax) 248*906afcb8SAndy Fiddaman { 249*906afcb8SAndy Fiddaman jp->outlist = newof(jp->outlist, int, 2 * nfield + 1, 0); 250*906afcb8SAndy Fiddaman outptr = jp->outlist + nfield; 251*906afcb8SAndy Fiddaman nfield *= 2; 252*906afcb8SAndy Fiddaman outmax = jp->outlist + nfield; 253*906afcb8SAndy Fiddaman } 254*906afcb8SAndy Fiddaman *outptr++ = c; 255*906afcb8SAndy Fiddaman cp = str; 256*906afcb8SAndy Fiddaman } 257*906afcb8SAndy Fiddaman /* need to accept obsolescent command syntax */ 258*906afcb8SAndy Fiddaman while (cp = *argv) 259*906afcb8SAndy Fiddaman { 260*906afcb8SAndy Fiddaman if (cp[1]!='.' || (*cp!='1' && *cp!='2')) 261*906afcb8SAndy Fiddaman { 262*906afcb8SAndy Fiddaman if (*cp=='0' && cp[1]==0) 263*906afcb8SAndy Fiddaman { 264*906afcb8SAndy Fiddaman c = JOINFIELD; 265*906afcb8SAndy Fiddaman goto skip2; 266*906afcb8SAndy Fiddaman } 267*906afcb8SAndy Fiddaman break; 268*906afcb8SAndy Fiddaman } 269*906afcb8SAndy Fiddaman str = (char*)cp; 270*906afcb8SAndy Fiddaman c = strtol(cp+2, &str,10); 271*906afcb8SAndy Fiddaman if (*str || --c<0) 272*906afcb8SAndy Fiddaman break; 273*906afcb8SAndy Fiddaman argv++; 274*906afcb8SAndy Fiddaman c <<= 2; 275*906afcb8SAndy Fiddaman if (*cp=='2') 276*906afcb8SAndy Fiddaman c |=1; 277*906afcb8SAndy Fiddaman skip2: 278*906afcb8SAndy Fiddaman if (outptr >= outmax) 279*906afcb8SAndy Fiddaman { 280*906afcb8SAndy Fiddaman jp->outlist = newof(jp->outlist, int, 2 * nfield + 1, 0); 281*906afcb8SAndy Fiddaman outptr = jp->outlist + nfield; 282*906afcb8SAndy Fiddaman nfield *= 2; 283*906afcb8SAndy Fiddaman outmax = jp->outlist + nfield; 284*906afcb8SAndy Fiddaman } 285*906afcb8SAndy Fiddaman *outptr++ = c; 286*906afcb8SAndy Fiddaman } 287*906afcb8SAndy Fiddaman *outptr = -1; 288*906afcb8SAndy Fiddaman return argv-arglist; 289*906afcb8SAndy Fiddaman } 290*906afcb8SAndy Fiddaman 291*906afcb8SAndy Fiddaman /* 292*906afcb8SAndy Fiddaman * read in a record from file <index> and split into fields 293*906afcb8SAndy Fiddaman */ 294*906afcb8SAndy Fiddaman static unsigned char* 295*906afcb8SAndy Fiddaman getrec(Join_t* jp, int index, int discard) 296*906afcb8SAndy Fiddaman { 297*906afcb8SAndy Fiddaman register unsigned char* sp = jp->state; 298*906afcb8SAndy Fiddaman register File_t* fp = &jp->file[index]; 299*906afcb8SAndy Fiddaman register Field_t* field = fp->fields; 300*906afcb8SAndy Fiddaman register Field_t* fieldmax = field + fp->maxfields; 301*906afcb8SAndy Fiddaman register char* cp; 302*906afcb8SAndy Fiddaman register int n; 303*906afcb8SAndy Fiddaman char* tp; 304*906afcb8SAndy Fiddaman 305*906afcb8SAndy Fiddaman if (sh_checksig(jp->context)) 306*906afcb8SAndy Fiddaman return 0; 307*906afcb8SAndy Fiddaman if (discard && fp->discard) 308*906afcb8SAndy Fiddaman sfraise(fp->iop, SFSK_DISCARD, NiL); 309*906afcb8SAndy Fiddaman fp->spaces = 0; 310*906afcb8SAndy Fiddaman fp->hit = 0; 311*906afcb8SAndy Fiddaman if (!(cp = sfgetr(fp->iop, '\n', 0))) 312*906afcb8SAndy Fiddaman { 313*906afcb8SAndy Fiddaman jp->outmode &= ~(1<<index); 314*906afcb8SAndy Fiddaman return 0; 315*906afcb8SAndy Fiddaman } 316*906afcb8SAndy Fiddaman fp->recptr = cp; 317*906afcb8SAndy Fiddaman fp->reclen = sfvalue(fp->iop); 318*906afcb8SAndy Fiddaman if (jp->delim == '\n') /* handle new-line delimiter specially */ 319*906afcb8SAndy Fiddaman { 320*906afcb8SAndy Fiddaman field->beg = cp; 321*906afcb8SAndy Fiddaman cp += fp->reclen; 322*906afcb8SAndy Fiddaman field->end = cp - 1; 323*906afcb8SAndy Fiddaman field++; 324*906afcb8SAndy Fiddaman } 325*906afcb8SAndy Fiddaman else 326*906afcb8SAndy Fiddaman do /* separate into fields */ 327*906afcb8SAndy Fiddaman { 328*906afcb8SAndy Fiddaman if (field >= fieldmax) 329*906afcb8SAndy Fiddaman { 330*906afcb8SAndy Fiddaman n = 2 * fp->maxfields; 331*906afcb8SAndy Fiddaman fp->fields = newof(fp->fields, Field_t, n + 1, 0); 332*906afcb8SAndy Fiddaman field = fp->fields + fp->maxfields; 333*906afcb8SAndy Fiddaman fp->maxfields = n; 334*906afcb8SAndy Fiddaman fieldmax = fp->fields + n; 335*906afcb8SAndy Fiddaman } 336*906afcb8SAndy Fiddaman field->beg = cp; 337*906afcb8SAndy Fiddaman if (jp->delim == -1) 338*906afcb8SAndy Fiddaman { 339*906afcb8SAndy Fiddaman switch (sp[*(unsigned char*)cp]) 340*906afcb8SAndy Fiddaman { 341*906afcb8SAndy Fiddaman case S_SPACE: 342*906afcb8SAndy Fiddaman cp++; 343*906afcb8SAndy Fiddaman break; 344*906afcb8SAndy Fiddaman case S_WIDE: 345*906afcb8SAndy Fiddaman tp = cp; 346*906afcb8SAndy Fiddaman if (iswspace(mbchar(tp))) 347*906afcb8SAndy Fiddaman { 348*906afcb8SAndy Fiddaman cp = tp; 349*906afcb8SAndy Fiddaman break; 350*906afcb8SAndy Fiddaman } 351*906afcb8SAndy Fiddaman /*FALLTHROUGH*/ 352*906afcb8SAndy Fiddaman default: 353*906afcb8SAndy Fiddaman goto next; 354*906afcb8SAndy Fiddaman } 355*906afcb8SAndy Fiddaman fp->spaces = 1; 356*906afcb8SAndy Fiddaman if (jp->mb) 357*906afcb8SAndy Fiddaman for (;;) 358*906afcb8SAndy Fiddaman { 359*906afcb8SAndy Fiddaman switch (sp[*(unsigned char*)cp++]) 360*906afcb8SAndy Fiddaman { 361*906afcb8SAndy Fiddaman case S_SPACE: 362*906afcb8SAndy Fiddaman continue; 363*906afcb8SAndy Fiddaman case S_WIDE: 364*906afcb8SAndy Fiddaman tp = cp - 1; 365*906afcb8SAndy Fiddaman if (iswspace(mbchar(tp))) 366*906afcb8SAndy Fiddaman { 367*906afcb8SAndy Fiddaman cp = tp; 368*906afcb8SAndy Fiddaman continue; 369*906afcb8SAndy Fiddaman } 370*906afcb8SAndy Fiddaman break; 371*906afcb8SAndy Fiddaman } 372*906afcb8SAndy Fiddaman break; 373*906afcb8SAndy Fiddaman } 374*906afcb8SAndy Fiddaman else 375*906afcb8SAndy Fiddaman while (sp[*(unsigned char*)cp++]==S_SPACE); 376*906afcb8SAndy Fiddaman cp--; 377*906afcb8SAndy Fiddaman } 378*906afcb8SAndy Fiddaman next: 379*906afcb8SAndy Fiddaman if (jp->mb) 380*906afcb8SAndy Fiddaman { 381*906afcb8SAndy Fiddaman for (;;) 382*906afcb8SAndy Fiddaman { 383*906afcb8SAndy Fiddaman tp = cp; 384*906afcb8SAndy Fiddaman switch (n = sp[*(unsigned char*)cp++]) 385*906afcb8SAndy Fiddaman { 386*906afcb8SAndy Fiddaman case 0: 387*906afcb8SAndy Fiddaman continue; 388*906afcb8SAndy Fiddaman case S_WIDE: 389*906afcb8SAndy Fiddaman cp--; 390*906afcb8SAndy Fiddaman n = mbchar(cp); 391*906afcb8SAndy Fiddaman if (n == jp->delim) 392*906afcb8SAndy Fiddaman { 393*906afcb8SAndy Fiddaman n = S_DELIM; 394*906afcb8SAndy Fiddaman break; 395*906afcb8SAndy Fiddaman } 396*906afcb8SAndy Fiddaman if (jp->delim == -1 && iswspace(n)) 397*906afcb8SAndy Fiddaman { 398*906afcb8SAndy Fiddaman n = S_SPACE; 399*906afcb8SAndy Fiddaman break; 400*906afcb8SAndy Fiddaman } 401*906afcb8SAndy Fiddaman continue; 402*906afcb8SAndy Fiddaman } 403*906afcb8SAndy Fiddaman break; 404*906afcb8SAndy Fiddaman } 405*906afcb8SAndy Fiddaman field->end = tp; 406*906afcb8SAndy Fiddaman } 407*906afcb8SAndy Fiddaman else 408*906afcb8SAndy Fiddaman { 409*906afcb8SAndy Fiddaman while (!(n = sp[*(unsigned char*)cp++])); 410*906afcb8SAndy Fiddaman field->end = cp - 1; 411*906afcb8SAndy Fiddaman } 412*906afcb8SAndy Fiddaman field++; 413*906afcb8SAndy Fiddaman } while (n != S_NL); 414*906afcb8SAndy Fiddaman fp->nfields = field - fp->fields; 415*906afcb8SAndy Fiddaman if ((n = fp->field) < fp->nfields) 416*906afcb8SAndy Fiddaman { 417*906afcb8SAndy Fiddaman cp = fp->fields[n].beg; 418*906afcb8SAndy Fiddaman /* eliminate leading spaces */ 419*906afcb8SAndy Fiddaman if (fp->spaces) 420*906afcb8SAndy Fiddaman { 421*906afcb8SAndy Fiddaman if (jp->mb) 422*906afcb8SAndy Fiddaman for (;;) 423*906afcb8SAndy Fiddaman { 424*906afcb8SAndy Fiddaman switch (sp[*(unsigned char*)cp++]) 425*906afcb8SAndy Fiddaman { 426*906afcb8SAndy Fiddaman case S_SPACE: 427*906afcb8SAndy Fiddaman continue; 428*906afcb8SAndy Fiddaman case S_WIDE: 429*906afcb8SAndy Fiddaman tp = cp - 1; 430*906afcb8SAndy Fiddaman if (iswspace(mbchar(tp))) 431*906afcb8SAndy Fiddaman { 432*906afcb8SAndy Fiddaman cp = tp; 433*906afcb8SAndy Fiddaman continue; 434*906afcb8SAndy Fiddaman } 435*906afcb8SAndy Fiddaman break; 436*906afcb8SAndy Fiddaman } 437*906afcb8SAndy Fiddaman break; 438*906afcb8SAndy Fiddaman } 439*906afcb8SAndy Fiddaman else 440*906afcb8SAndy Fiddaman while (sp[*(unsigned char*)cp++]==S_SPACE); 441*906afcb8SAndy Fiddaman cp--; 442*906afcb8SAndy Fiddaman } 443*906afcb8SAndy Fiddaman fp->fieldlen = fp->fields[n].end - cp; 444*906afcb8SAndy Fiddaman return (unsigned char*)cp; 445*906afcb8SAndy Fiddaman } 446*906afcb8SAndy Fiddaman fp->fieldlen = 0; 447*906afcb8SAndy Fiddaman return (unsigned char*)""; 448*906afcb8SAndy Fiddaman } 449*906afcb8SAndy Fiddaman 450*906afcb8SAndy Fiddaman #if DEBUG_TRACE 451*906afcb8SAndy Fiddaman static unsigned char* u1; 452*906afcb8SAndy Fiddaman #define getrec(p,n,d) (u1 = getrec(p, n, d), sfprintf(sfstdout, "[G%d#%d@%I*d:%-.8s]", __LINE__, n, sizeof(Sfoff_t), sftell(p->file[n].iop), u1), u1) 453*906afcb8SAndy Fiddaman #endif 454*906afcb8SAndy Fiddaman 455*906afcb8SAndy Fiddaman /* 456*906afcb8SAndy Fiddaman * print field <n> from file <index> 457*906afcb8SAndy Fiddaman */ 458*906afcb8SAndy Fiddaman static int 459*906afcb8SAndy Fiddaman outfield(Join_t* jp, int index, register int n, int last) 460*906afcb8SAndy Fiddaman { 461*906afcb8SAndy Fiddaman register File_t* fp = &jp->file[index]; 462*906afcb8SAndy Fiddaman register char* cp; 463*906afcb8SAndy Fiddaman register char* cpmax; 464*906afcb8SAndy Fiddaman register int size; 465*906afcb8SAndy Fiddaman register Sfio_t* iop = jp->outfile; 466*906afcb8SAndy Fiddaman char* tp; 467*906afcb8SAndy Fiddaman 468*906afcb8SAndy Fiddaman if (n < fp->nfields) 469*906afcb8SAndy Fiddaman { 470*906afcb8SAndy Fiddaman cp = fp->fields[n].beg; 471*906afcb8SAndy Fiddaman cpmax = fp->fields[n].end + 1; 472*906afcb8SAndy Fiddaman } 473*906afcb8SAndy Fiddaman else 474*906afcb8SAndy Fiddaman cp = 0; 475*906afcb8SAndy Fiddaman if ((n = jp->delim) == -1) 476*906afcb8SAndy Fiddaman { 477*906afcb8SAndy Fiddaman if (cp && fp->spaces) 478*906afcb8SAndy Fiddaman { 479*906afcb8SAndy Fiddaman register unsigned char* sp = jp->state; 480*906afcb8SAndy Fiddaman 481*906afcb8SAndy Fiddaman /*eliminate leading spaces */ 482*906afcb8SAndy Fiddaman if (jp->mb) 483*906afcb8SAndy Fiddaman for (;;) 484*906afcb8SAndy Fiddaman { 485*906afcb8SAndy Fiddaman switch (sp[*(unsigned char*)cp++]) 486*906afcb8SAndy Fiddaman { 487*906afcb8SAndy Fiddaman case S_SPACE: 488*906afcb8SAndy Fiddaman continue; 489*906afcb8SAndy Fiddaman case S_WIDE: 490*906afcb8SAndy Fiddaman tp = cp - 1; 491*906afcb8SAndy Fiddaman if (iswspace(mbchar(tp))) 492*906afcb8SAndy Fiddaman { 493*906afcb8SAndy Fiddaman cp = tp; 494*906afcb8SAndy Fiddaman continue; 495*906afcb8SAndy Fiddaman } 496*906afcb8SAndy Fiddaman break; 497*906afcb8SAndy Fiddaman } 498*906afcb8SAndy Fiddaman break; 499*906afcb8SAndy Fiddaman } 500*906afcb8SAndy Fiddaman else 501*906afcb8SAndy Fiddaman while (sp[*(unsigned char*)cp++]==S_SPACE); 502*906afcb8SAndy Fiddaman cp--; 503*906afcb8SAndy Fiddaman } 504*906afcb8SAndy Fiddaman n = ' '; 505*906afcb8SAndy Fiddaman } 506*906afcb8SAndy Fiddaman else if (jp->delimstr) 507*906afcb8SAndy Fiddaman n = -1; 508*906afcb8SAndy Fiddaman if (last) 509*906afcb8SAndy Fiddaman n = '\n'; 510*906afcb8SAndy Fiddaman if (cp) 511*906afcb8SAndy Fiddaman size = cpmax - cp; 512*906afcb8SAndy Fiddaman else 513*906afcb8SAndy Fiddaman size = 0; 514*906afcb8SAndy Fiddaman if (n == -1) 515*906afcb8SAndy Fiddaman { 516*906afcb8SAndy Fiddaman if (size<=1) 517*906afcb8SAndy Fiddaman { 518*906afcb8SAndy Fiddaman if (jp->nullfield && sfputr(iop, jp->nullfield, -1) < 0) 519*906afcb8SAndy Fiddaman return -1; 520*906afcb8SAndy Fiddaman } 521*906afcb8SAndy Fiddaman else if (sfwrite(iop, cp, size) < 0) 522*906afcb8SAndy Fiddaman return -1; 523*906afcb8SAndy Fiddaman if (sfwrite(iop, jp->delimstr, jp->delimlen) < 0) 524*906afcb8SAndy Fiddaman return -1; 525*906afcb8SAndy Fiddaman } 526*906afcb8SAndy Fiddaman else if (size <= 1) 527*906afcb8SAndy Fiddaman { 528*906afcb8SAndy Fiddaman if (!jp->nullfield) 529*906afcb8SAndy Fiddaman sfputc(iop, n); 530*906afcb8SAndy Fiddaman else if (sfputr(iop, jp->nullfield, n) < 0) 531*906afcb8SAndy Fiddaman return -1; 532*906afcb8SAndy Fiddaman } 533*906afcb8SAndy Fiddaman else 534*906afcb8SAndy Fiddaman { 535*906afcb8SAndy Fiddaman last = cp[size-1]; 536*906afcb8SAndy Fiddaman cp[size-1] = n; 537*906afcb8SAndy Fiddaman if (sfwrite(iop, cp, size) < 0) 538*906afcb8SAndy Fiddaman return -1; 539*906afcb8SAndy Fiddaman cp[size-1] = last; 540*906afcb8SAndy Fiddaman } 541*906afcb8SAndy Fiddaman return 0; 542*906afcb8SAndy Fiddaman } 543*906afcb8SAndy Fiddaman 544*906afcb8SAndy Fiddaman #if DEBUG_TRACE 545*906afcb8SAndy Fiddaman static int i1,i2,i3; 546*906afcb8SAndy Fiddaman #define outfield(p,i,n,f) (sfprintf(sfstdout, "[F%d#%d:%d,%d]", __LINE__, i1=i, i2=n, i3=f), outfield(p, i1, i2, i3)) 547*906afcb8SAndy Fiddaman #endif 548*906afcb8SAndy Fiddaman 549*906afcb8SAndy Fiddaman static int 550*906afcb8SAndy Fiddaman outrec(register Join_t* jp, int mode) 551*906afcb8SAndy Fiddaman { 552*906afcb8SAndy Fiddaman register File_t* fp; 553*906afcb8SAndy Fiddaman register int i; 554*906afcb8SAndy Fiddaman register int j; 555*906afcb8SAndy Fiddaman register int k; 556*906afcb8SAndy Fiddaman register int n; 557*906afcb8SAndy Fiddaman int* out; 558*906afcb8SAndy Fiddaman 559*906afcb8SAndy Fiddaman if (mode < 0 && jp->file[0].hit++) 560*906afcb8SAndy Fiddaman return 0; 561*906afcb8SAndy Fiddaman if (mode > 0 && jp->file[1].hit++) 562*906afcb8SAndy Fiddaman return 0; 563*906afcb8SAndy Fiddaman if (out = jp->outlist) 564*906afcb8SAndy Fiddaman { 565*906afcb8SAndy Fiddaman while ((n = *out++) >= 0) 566*906afcb8SAndy Fiddaman { 567*906afcb8SAndy Fiddaman if (n == JOINFIELD) 568*906afcb8SAndy Fiddaman { 569*906afcb8SAndy Fiddaman i = mode >= 0; 570*906afcb8SAndy Fiddaman j = jp->file[i].field; 571*906afcb8SAndy Fiddaman } 572*906afcb8SAndy Fiddaman else 573*906afcb8SAndy Fiddaman { 574*906afcb8SAndy Fiddaman i = n & 1; 575*906afcb8SAndy Fiddaman j = (mode<0 && i || mode>0 && !i) ? 576*906afcb8SAndy Fiddaman jp->file[i].nfields : 577*906afcb8SAndy Fiddaman n >> 2; 578*906afcb8SAndy Fiddaman } 579*906afcb8SAndy Fiddaman if (outfield(jp, i, j, *out < 0) < 0) 580*906afcb8SAndy Fiddaman return -1; 581*906afcb8SAndy Fiddaman } 582*906afcb8SAndy Fiddaman return 0; 583*906afcb8SAndy Fiddaman } 584*906afcb8SAndy Fiddaman k = jp->file[0].nfields; 585*906afcb8SAndy Fiddaman if (mode >= 0) 586*906afcb8SAndy Fiddaman k += jp->file[1].nfields - 1; 587*906afcb8SAndy Fiddaman for (i=0; i<2; i++) 588*906afcb8SAndy Fiddaman { 589*906afcb8SAndy Fiddaman fp = &jp->file[i]; 590*906afcb8SAndy Fiddaman if (mode>0 && i==0) 591*906afcb8SAndy Fiddaman { 592*906afcb8SAndy Fiddaman k -= (fp->nfields - 1); 593*906afcb8SAndy Fiddaman continue; 594*906afcb8SAndy Fiddaman } 595*906afcb8SAndy Fiddaman n = fp->field; 596*906afcb8SAndy Fiddaman if (mode||i==0) 597*906afcb8SAndy Fiddaman { 598*906afcb8SAndy Fiddaman /* output join field first */ 599*906afcb8SAndy Fiddaman if (outfield(jp,i,n,!--k) < 0) 600*906afcb8SAndy Fiddaman return -1; 601*906afcb8SAndy Fiddaman if (!k) 602*906afcb8SAndy Fiddaman return 0; 603*906afcb8SAndy Fiddaman for (j=0; j<n; j++) 604*906afcb8SAndy Fiddaman { 605*906afcb8SAndy Fiddaman if (outfield(jp,i,j,!--k) < 0) 606*906afcb8SAndy Fiddaman return -1; 607*906afcb8SAndy Fiddaman if (!k) 608*906afcb8SAndy Fiddaman return 0; 609*906afcb8SAndy Fiddaman } 610*906afcb8SAndy Fiddaman j = n + 1; 611*906afcb8SAndy Fiddaman } 612*906afcb8SAndy Fiddaman else 613*906afcb8SAndy Fiddaman j = 0; 614*906afcb8SAndy Fiddaman for (;j<fp->nfields; j++) 615*906afcb8SAndy Fiddaman { 616*906afcb8SAndy Fiddaman if (j!=n && outfield(jp,i,j,!--k) < 0) 617*906afcb8SAndy Fiddaman return -1; 618*906afcb8SAndy Fiddaman if (!k) 619*906afcb8SAndy Fiddaman return 0; 620*906afcb8SAndy Fiddaman } 621*906afcb8SAndy Fiddaman } 622*906afcb8SAndy Fiddaman return 0; 623*906afcb8SAndy Fiddaman } 624*906afcb8SAndy Fiddaman 625*906afcb8SAndy Fiddaman #if DEBUG_TRACE 626*906afcb8SAndy Fiddaman #define outrec(p,n) (sfprintf(sfstdout, "[R#%d,%d,%lld,%lld:%-.*s{%d}:%-.*s{%d}]", __LINE__, i1=n, lo, hi, jp->file[0].fieldlen, cp1, jp->file[0].hit, jp->file[1].fieldlen, cp2, jp->file[1].hit), outrec(p, i1)) 627*906afcb8SAndy Fiddaman #endif 628*906afcb8SAndy Fiddaman 629*906afcb8SAndy Fiddaman static int 630*906afcb8SAndy Fiddaman join(Join_t* jp) 631*906afcb8SAndy Fiddaman { 632*906afcb8SAndy Fiddaman register unsigned char* cp1; 633*906afcb8SAndy Fiddaman register unsigned char* cp2; 634*906afcb8SAndy Fiddaman register int n1; 635*906afcb8SAndy Fiddaman register int n2; 636*906afcb8SAndy Fiddaman register int n; 637*906afcb8SAndy Fiddaman register int cmp; 638*906afcb8SAndy Fiddaman register int same; 639*906afcb8SAndy Fiddaman int o2; 640*906afcb8SAndy Fiddaman Sfoff_t lo = -1; 641*906afcb8SAndy Fiddaman Sfoff_t hi = -1; 642*906afcb8SAndy Fiddaman 643*906afcb8SAndy Fiddaman if ((cp1 = getrec(jp, 0, 0)) && (cp2 = getrec(jp, 1, 0)) || (cp2 = 0)) 644*906afcb8SAndy Fiddaman { 645*906afcb8SAndy Fiddaman n1 = jp->file[0].fieldlen; 646*906afcb8SAndy Fiddaman n2 = jp->file[1].fieldlen; 647*906afcb8SAndy Fiddaman same = 0; 648*906afcb8SAndy Fiddaman for (;;) 649*906afcb8SAndy Fiddaman { 650*906afcb8SAndy Fiddaman n = n1 < n2 ? n1 : n2; 651*906afcb8SAndy Fiddaman #if DEBUG_TRACE 652*906afcb8SAndy Fiddaman if (!n && !(cmp = n1 < n2 ? -1 : (n1 > n2)) || n && !(cmp = (int)*cp1 - (int)*cp2) && !(cmp = jp->ignorecase ? strncasecmp((char*)cp1, (char*)cp2, n) : memcmp(cp1, cp2, n))) 653*906afcb8SAndy Fiddaman cmp = n1 - n2; 654*906afcb8SAndy Fiddaman sfprintf(sfstdout, "[C#%d:%d(%c-%c),%d,%lld,%lld%s]", __LINE__, cmp, *cp1, *cp2, same, lo, hi, (jp->outmode & C_COMMON) ? ",COMMON" : ""); 655*906afcb8SAndy Fiddaman if (!cmp) 656*906afcb8SAndy Fiddaman #else 657*906afcb8SAndy Fiddaman if (!n && !(cmp = n1 < n2 ? -1 : (n1 > n2)) || n && !(cmp = (int)*cp1 - (int)*cp2) && !(cmp = jp->ignorecase ? strncasecmp((char*)cp1, (char*)cp2, n) : memcmp(cp1, cp2, n)) && !(cmp = n1 - n2)) 658*906afcb8SAndy Fiddaman #endif 659*906afcb8SAndy Fiddaman { 660*906afcb8SAndy Fiddaman if (!(jp->outmode & C_COMMON)) 661*906afcb8SAndy Fiddaman { 662*906afcb8SAndy Fiddaman if (cp1 = getrec(jp, 0, 1)) 663*906afcb8SAndy Fiddaman { 664*906afcb8SAndy Fiddaman n1 = jp->file[0].fieldlen; 665*906afcb8SAndy Fiddaman same = 1; 666*906afcb8SAndy Fiddaman continue; 667*906afcb8SAndy Fiddaman } 668*906afcb8SAndy Fiddaman if ((jp->ooutmode & (C_FILE1|C_FILE2)) != C_FILE2) 669*906afcb8SAndy Fiddaman break; 670*906afcb8SAndy Fiddaman if (sfseek(jp->file[0].iop, (Sfoff_t)-jp->file[0].reclen, SEEK_CUR) < 0 || !(cp1 = getrec(jp, 0, 0))) 671*906afcb8SAndy Fiddaman { 672*906afcb8SAndy Fiddaman error(ERROR_SYSTEM|2, "%s: seek error", jp->file[0].name); 673*906afcb8SAndy Fiddaman return -1; 674*906afcb8SAndy Fiddaman } 675*906afcb8SAndy Fiddaman } 676*906afcb8SAndy Fiddaman else if (outrec(jp, 0) < 0) 677*906afcb8SAndy Fiddaman return -1; 678*906afcb8SAndy Fiddaman else if (lo < 0 && (jp->outmode & C_COMMON)) 679*906afcb8SAndy Fiddaman { 680*906afcb8SAndy Fiddaman if ((lo = sfseek(jp->file[1].iop, (Sfoff_t)0, SEEK_CUR)) < 0) 681*906afcb8SAndy Fiddaman { 682*906afcb8SAndy Fiddaman error(ERROR_SYSTEM|2, "%s: seek error", jp->file[1].name); 683*906afcb8SAndy Fiddaman return -1; 684*906afcb8SAndy Fiddaman } 685*906afcb8SAndy Fiddaman lo -= jp->file[1].reclen; 686*906afcb8SAndy Fiddaman } 687*906afcb8SAndy Fiddaman if (cp2 = getrec(jp, 1, lo < 0)) 688*906afcb8SAndy Fiddaman { 689*906afcb8SAndy Fiddaman n2 = jp->file[1].fieldlen; 690*906afcb8SAndy Fiddaman continue; 691*906afcb8SAndy Fiddaman } 692*906afcb8SAndy Fiddaman #if DEBUG_TRACE 693*906afcb8SAndy Fiddaman sfprintf(sfstdout, "[2#%d:0,%lld,%lld]", __LINE__, lo, hi); 694*906afcb8SAndy Fiddaman #endif 695*906afcb8SAndy Fiddaman } 696*906afcb8SAndy Fiddaman else if (cmp > 0) 697*906afcb8SAndy Fiddaman { 698*906afcb8SAndy Fiddaman if (same) 699*906afcb8SAndy Fiddaman { 700*906afcb8SAndy Fiddaman same = 0; 701*906afcb8SAndy Fiddaman next: 702*906afcb8SAndy Fiddaman if (n2 > jp->samesize) 703*906afcb8SAndy Fiddaman { 704*906afcb8SAndy Fiddaman jp->samesize = roundof(n2, 16); 705*906afcb8SAndy Fiddaman if (!(jp->same = newof(jp->same, char, jp->samesize, 0))) 706*906afcb8SAndy Fiddaman { 707*906afcb8SAndy Fiddaman error(ERROR_SYSTEM|2, "out of space"); 708*906afcb8SAndy Fiddaman return -1; 709*906afcb8SAndy Fiddaman } 710*906afcb8SAndy Fiddaman } 711*906afcb8SAndy Fiddaman memcpy(jp->same, cp2, o2 = n2); 712*906afcb8SAndy Fiddaman if (!(cp2 = getrec(jp, 1, 0))) 713*906afcb8SAndy Fiddaman break; 714*906afcb8SAndy Fiddaman n2 = jp->file[1].fieldlen; 715*906afcb8SAndy Fiddaman if (n2 == o2 && *cp2 == *jp->same && !memcmp(cp2, jp->same, n2)) 716*906afcb8SAndy Fiddaman goto next; 717*906afcb8SAndy Fiddaman continue; 718*906afcb8SAndy Fiddaman } 719*906afcb8SAndy Fiddaman if (hi >= 0) 720*906afcb8SAndy Fiddaman { 721*906afcb8SAndy Fiddaman if (sfseek(jp->file[1].iop, hi, SEEK_SET) != hi) 722*906afcb8SAndy Fiddaman { 723*906afcb8SAndy Fiddaman error(ERROR_SYSTEM|2, "%s: seek error", jp->file[1].name); 724*906afcb8SAndy Fiddaman return -1; 725*906afcb8SAndy Fiddaman } 726*906afcb8SAndy Fiddaman hi = -1; 727*906afcb8SAndy Fiddaman } 728*906afcb8SAndy Fiddaman else if ((jp->outmode & C_FILE2) && outrec(jp, 1) < 0) 729*906afcb8SAndy Fiddaman return -1; 730*906afcb8SAndy Fiddaman lo = -1; 731*906afcb8SAndy Fiddaman if (cp2 = getrec(jp, 1, 1)) 732*906afcb8SAndy Fiddaman { 733*906afcb8SAndy Fiddaman n2 = jp->file[1].fieldlen; 734*906afcb8SAndy Fiddaman continue; 735*906afcb8SAndy Fiddaman } 736*906afcb8SAndy Fiddaman #if DEBUG_TRACE 737*906afcb8SAndy Fiddaman sfprintf(sfstdout, "[2#%d:0,%lld,%lld]", __LINE__, lo, hi); 738*906afcb8SAndy Fiddaman #endif 739*906afcb8SAndy Fiddaman } 740*906afcb8SAndy Fiddaman else if (same) 741*906afcb8SAndy Fiddaman { 742*906afcb8SAndy Fiddaman same = 0; 743*906afcb8SAndy Fiddaman if (!(cp1 = getrec(jp, 0, 0))) 744*906afcb8SAndy Fiddaman break; 745*906afcb8SAndy Fiddaman n1 = jp->file[0].fieldlen; 746*906afcb8SAndy Fiddaman continue; 747*906afcb8SAndy Fiddaman } 748*906afcb8SAndy Fiddaman if (lo >= 0) 749*906afcb8SAndy Fiddaman { 750*906afcb8SAndy Fiddaman if ((hi = sfseek(jp->file[1].iop, (Sfoff_t)0, SEEK_CUR)) < 0 || 751*906afcb8SAndy Fiddaman (hi -= jp->file[1].reclen) < 0 || 752*906afcb8SAndy Fiddaman sfseek(jp->file[1].iop, lo, SEEK_SET) != lo || 753*906afcb8SAndy Fiddaman !(cp2 = getrec(jp, 1, 0))) 754*906afcb8SAndy Fiddaman { 755*906afcb8SAndy Fiddaman error(ERROR_SYSTEM|2, "%s: seek error", jp->file[1].name); 756*906afcb8SAndy Fiddaman return -1; 757*906afcb8SAndy Fiddaman } 758*906afcb8SAndy Fiddaman n2 = jp->file[1].fieldlen; 759*906afcb8SAndy Fiddaman lo = -1; 760*906afcb8SAndy Fiddaman if (jp->file[1].discard) 761*906afcb8SAndy Fiddaman sfseek(jp->file[1].iop, (Sfoff_t)-1, SEEK_SET); 762*906afcb8SAndy Fiddaman } 763*906afcb8SAndy Fiddaman else if (!cp2) 764*906afcb8SAndy Fiddaman break; 765*906afcb8SAndy Fiddaman else if ((jp->outmode & C_FILE1) && outrec(jp, -1) < 0) 766*906afcb8SAndy Fiddaman return -1; 767*906afcb8SAndy Fiddaman if (!(cp1 = getrec(jp, 0, 1))) 768*906afcb8SAndy Fiddaman break; 769*906afcb8SAndy Fiddaman n1 = jp->file[0].fieldlen; 770*906afcb8SAndy Fiddaman } 771*906afcb8SAndy Fiddaman } 772*906afcb8SAndy Fiddaman #if DEBUG_TRACE 773*906afcb8SAndy Fiddaman sfprintf(sfstdout, "[X#%d:?,%p,%p,%d,%d,%d%s]", __LINE__, cp1, cp2, cmp, lo, hi, (jp->outmode & C_COMMON) ? ",COMMON" : ""); 774*906afcb8SAndy Fiddaman #endif 775*906afcb8SAndy Fiddaman if (cp2) 776*906afcb8SAndy Fiddaman { 777*906afcb8SAndy Fiddaman if (hi >= 0 && 778*906afcb8SAndy Fiddaman sfseek(jp->file[1].iop, (Sfoff_t)0, SEEK_CUR) < hi && 779*906afcb8SAndy Fiddaman sfseek(jp->file[1].iop, hi, SEEK_SET) != hi) 780*906afcb8SAndy Fiddaman { 781*906afcb8SAndy Fiddaman error(ERROR_SYSTEM|2, "%s: seek error", jp->file[1].name); 782*906afcb8SAndy Fiddaman return -1; 783*906afcb8SAndy Fiddaman } 784*906afcb8SAndy Fiddaman #if DEBUG_TRACE 785*906afcb8SAndy Fiddaman sfprintf(sfstdout, "[O#%d:%02o:%02o]", __LINE__, jp->ooutmode, jp->outmode); 786*906afcb8SAndy Fiddaman #endif 787*906afcb8SAndy Fiddaman cp1 = (!cp1 && cmp && hi < 0 && !jp->file[1].hit && ((jp->ooutmode ^ C_ALL) <= 1 || jp->outmode == 2)) ? cp2 : getrec(jp, 1, 0); 788*906afcb8SAndy Fiddaman cmp = 1; 789*906afcb8SAndy Fiddaman n = 1; 790*906afcb8SAndy Fiddaman } 791*906afcb8SAndy Fiddaman else 792*906afcb8SAndy Fiddaman { 793*906afcb8SAndy Fiddaman cmp = -1; 794*906afcb8SAndy Fiddaman n = 0; 795*906afcb8SAndy Fiddaman } 796*906afcb8SAndy Fiddaman #if DEBUG_TRACE 797*906afcb8SAndy Fiddaman sfprintf(sfstdout, "[X#%d:%d,%p,%p,%d,%02o,%02o%s]", __LINE__, n, cp1, cp2, cmp, jp->ooutmode, jp->outmode, (jp->outmode & C_COMMON) ? ",COMMON" : ""); 798*906afcb8SAndy Fiddaman #endif 799*906afcb8SAndy Fiddaman if (!cp1 || !(jp->outmode & (1<<n))) 800*906afcb8SAndy Fiddaman { 801*906afcb8SAndy Fiddaman if (cp1 && jp->file[n].iop == sfstdin) 802*906afcb8SAndy Fiddaman sfseek(sfstdin, (Sfoff_t)0, SEEK_END); 803*906afcb8SAndy Fiddaman return 0; 804*906afcb8SAndy Fiddaman } 805*906afcb8SAndy Fiddaman if (outrec(jp, cmp) < 0) 806*906afcb8SAndy Fiddaman return -1; 807*906afcb8SAndy Fiddaman do 808*906afcb8SAndy Fiddaman { 809*906afcb8SAndy Fiddaman if (!getrec(jp, n, 1)) 810*906afcb8SAndy Fiddaman return 0; 811*906afcb8SAndy Fiddaman } while (outrec(jp, cmp) >= 0); 812*906afcb8SAndy Fiddaman return -1; 813*906afcb8SAndy Fiddaman } 814*906afcb8SAndy Fiddaman 815*906afcb8SAndy Fiddaman int 816*906afcb8SAndy Fiddaman b_join(int argc, char** argv, Shbltin_t* context) 817*906afcb8SAndy Fiddaman { 818*906afcb8SAndy Fiddaman register int n; 819*906afcb8SAndy Fiddaman register char* cp; 820*906afcb8SAndy Fiddaman register Join_t* jp; 821*906afcb8SAndy Fiddaman char* e; 822*906afcb8SAndy Fiddaman 823*906afcb8SAndy Fiddaman #if !DEBUG_TRACE 824*906afcb8SAndy Fiddaman cmdinit(argc, argv, context, ERROR_CATALOG, ERROR_NOTIFY); 825*906afcb8SAndy Fiddaman #endif 826*906afcb8SAndy Fiddaman if (!(jp = init())) 827*906afcb8SAndy Fiddaman error(ERROR_system(1),"out of space"); 828*906afcb8SAndy Fiddaman jp->context = context; 829*906afcb8SAndy Fiddaman for (;;) 830*906afcb8SAndy Fiddaman { 831*906afcb8SAndy Fiddaman switch (n = optget(argv, usage)) 832*906afcb8SAndy Fiddaman { 833*906afcb8SAndy Fiddaman case 'j': 834*906afcb8SAndy Fiddaman /* 835*906afcb8SAndy Fiddaman * check for obsolete "-j1 field" and "-j2 field" 836*906afcb8SAndy Fiddaman */ 837*906afcb8SAndy Fiddaman 838*906afcb8SAndy Fiddaman if (opt_info.offset == 0) 839*906afcb8SAndy Fiddaman { 840*906afcb8SAndy Fiddaman cp = argv[opt_info.index - 1]; 841*906afcb8SAndy Fiddaman for (n = strlen(cp) - 1; n > 0 && cp[n] != 'j'; n--); 842*906afcb8SAndy Fiddaman n = cp[n] == 'j'; 843*906afcb8SAndy Fiddaman } 844*906afcb8SAndy Fiddaman else 845*906afcb8SAndy Fiddaman n = 0; 846*906afcb8SAndy Fiddaman if (n) 847*906afcb8SAndy Fiddaman { 848*906afcb8SAndy Fiddaman if (opt_info.num!=1 && opt_info.num!=2) 849*906afcb8SAndy Fiddaman error(2,"-jfileno field: fileno must be 1 or 2"); 850*906afcb8SAndy Fiddaman n = '0' + opt_info.num; 851*906afcb8SAndy Fiddaman if (!(cp = argv[opt_info.index])) 852*906afcb8SAndy Fiddaman { 853*906afcb8SAndy Fiddaman argc = 0; 854*906afcb8SAndy Fiddaman break; 855*906afcb8SAndy Fiddaman } 856*906afcb8SAndy Fiddaman opt_info.num = strtol(cp, &e, 10); 857*906afcb8SAndy Fiddaman if (*e) 858*906afcb8SAndy Fiddaman { 859*906afcb8SAndy Fiddaman argc = 0; 860*906afcb8SAndy Fiddaman break; 861*906afcb8SAndy Fiddaman } 862*906afcb8SAndy Fiddaman opt_info.index++; 863*906afcb8SAndy Fiddaman } 864*906afcb8SAndy Fiddaman else 865*906afcb8SAndy Fiddaman { 866*906afcb8SAndy Fiddaman jp->file[0].field = (int)(opt_info.num-1); 867*906afcb8SAndy Fiddaman n = '2'; 868*906afcb8SAndy Fiddaman } 869*906afcb8SAndy Fiddaman /*FALLTHROUGH*/ 870*906afcb8SAndy Fiddaman case '1': 871*906afcb8SAndy Fiddaman case '2': 872*906afcb8SAndy Fiddaman if (opt_info.num <=0) 873*906afcb8SAndy Fiddaman error(2,"field number must positive"); 874*906afcb8SAndy Fiddaman jp->file[n-'1'].field = (int)(opt_info.num-1); 875*906afcb8SAndy Fiddaman continue; 876*906afcb8SAndy Fiddaman case 'v': 877*906afcb8SAndy Fiddaman jp->outmode &= ~C_COMMON; 878*906afcb8SAndy Fiddaman /*FALLTHROUGH*/ 879*906afcb8SAndy Fiddaman case 'a': 880*906afcb8SAndy Fiddaman if (opt_info.num!=1 && opt_info.num!=2) 881*906afcb8SAndy Fiddaman error(2,"%s: file number must be 1 or 2", opt_info.name); 882*906afcb8SAndy Fiddaman jp->outmode |= 1<<(opt_info.num-1); 883*906afcb8SAndy Fiddaman continue; 884*906afcb8SAndy Fiddaman case 'e': 885*906afcb8SAndy Fiddaman jp->nullfield = opt_info.arg; 886*906afcb8SAndy Fiddaman continue; 887*906afcb8SAndy Fiddaman case 'o': 888*906afcb8SAndy Fiddaman /* need to accept obsolescent command syntax */ 889*906afcb8SAndy Fiddaman n = getolist(jp, opt_info.arg, argv+opt_info.index); 890*906afcb8SAndy Fiddaman opt_info.index += n; 891*906afcb8SAndy Fiddaman continue; 892*906afcb8SAndy Fiddaman case 't': 893*906afcb8SAndy Fiddaman jp->state[' '] = jp->state['\t'] = 0; 894*906afcb8SAndy Fiddaman if (jp->mb) 895*906afcb8SAndy Fiddaman { 896*906afcb8SAndy Fiddaman cp = opt_info.arg; 897*906afcb8SAndy Fiddaman jp->delim = mbchar(cp); 898*906afcb8SAndy Fiddaman if ((n = cp - opt_info.arg) > 1) 899*906afcb8SAndy Fiddaman { 900*906afcb8SAndy Fiddaman jp->delimlen = n; 901*906afcb8SAndy Fiddaman jp->delimstr = opt_info.arg; 902*906afcb8SAndy Fiddaman continue; 903*906afcb8SAndy Fiddaman } 904*906afcb8SAndy Fiddaman } 905*906afcb8SAndy Fiddaman n = *(unsigned char*)opt_info.arg; 906*906afcb8SAndy Fiddaman jp->state[n] = S_DELIM; 907*906afcb8SAndy Fiddaman jp->delim = n; 908*906afcb8SAndy Fiddaman continue; 909*906afcb8SAndy Fiddaman case 'i': 910*906afcb8SAndy Fiddaman jp->ignorecase = !opt_info.num; 911*906afcb8SAndy Fiddaman continue; 912*906afcb8SAndy Fiddaman case 'B': 913*906afcb8SAndy Fiddaman jp->buffered = !opt_info.num; 914*906afcb8SAndy Fiddaman continue; 915*906afcb8SAndy Fiddaman case ':': 916*906afcb8SAndy Fiddaman error(2, "%s", opt_info.arg); 917*906afcb8SAndy Fiddaman break; 918*906afcb8SAndy Fiddaman case '?': 919*906afcb8SAndy Fiddaman done(jp); 920*906afcb8SAndy Fiddaman error(ERROR_usage(2), "%s", opt_info.arg); 921*906afcb8SAndy Fiddaman break; 922*906afcb8SAndy Fiddaman } 923*906afcb8SAndy Fiddaman break; 924*906afcb8SAndy Fiddaman } 925*906afcb8SAndy Fiddaman argv += opt_info.index; 926*906afcb8SAndy Fiddaman argc -= opt_info.index; 927*906afcb8SAndy Fiddaman if (error_info.errors || argc!=2) 928*906afcb8SAndy Fiddaman { 929*906afcb8SAndy Fiddaman done(jp); 930*906afcb8SAndy Fiddaman error(ERROR_usage(2),"%s", optusage(NiL)); 931*906afcb8SAndy Fiddaman } 932*906afcb8SAndy Fiddaman jp->ooutmode = jp->outmode; 933*906afcb8SAndy Fiddaman jp->file[0].name = cp = *argv++; 934*906afcb8SAndy Fiddaman if (streq(cp,"-")) 935*906afcb8SAndy Fiddaman { 936*906afcb8SAndy Fiddaman if (sfseek(sfstdin,(Sfoff_t)0,SEEK_CUR) < 0) 937*906afcb8SAndy Fiddaman { 938*906afcb8SAndy Fiddaman if (sfdcseekable(sfstdin)) 939*906afcb8SAndy Fiddaman error(ERROR_warn(0),"%s: seek may fail",cp); 940*906afcb8SAndy Fiddaman else 941*906afcb8SAndy Fiddaman jp->file[0].discard = 1; 942*906afcb8SAndy Fiddaman } 943*906afcb8SAndy Fiddaman jp->file[0].iop = sfstdin; 944*906afcb8SAndy Fiddaman } 945*906afcb8SAndy Fiddaman else if (!(jp->file[0].iop = sfopen(NiL, cp, "r"))) 946*906afcb8SAndy Fiddaman { 947*906afcb8SAndy Fiddaman done(jp); 948*906afcb8SAndy Fiddaman error(ERROR_system(1),"%s: cannot open",cp); 949*906afcb8SAndy Fiddaman } 950*906afcb8SAndy Fiddaman jp->file[1].name = cp = *argv; 951*906afcb8SAndy Fiddaman if (streq(cp,"-")) 952*906afcb8SAndy Fiddaman { 953*906afcb8SAndy Fiddaman if (sfseek(sfstdin,(Sfoff_t)0,SEEK_CUR) < 0) 954*906afcb8SAndy Fiddaman { 955*906afcb8SAndy Fiddaman if (sfdcseekable(sfstdin)) 956*906afcb8SAndy Fiddaman error(ERROR_warn(0),"%s: seek may fail",cp); 957*906afcb8SAndy Fiddaman else 958*906afcb8SAndy Fiddaman jp->file[1].discard = 1; 959*906afcb8SAndy Fiddaman } 960*906afcb8SAndy Fiddaman jp->file[1].iop = sfstdin; 961*906afcb8SAndy Fiddaman } 962*906afcb8SAndy Fiddaman else if (!(jp->file[1].iop = sfopen(NiL, cp, "r"))) 963*906afcb8SAndy Fiddaman { 964*906afcb8SAndy Fiddaman done(jp); 965*906afcb8SAndy Fiddaman error(ERROR_system(1),"%s: cannot open",cp); 966*906afcb8SAndy Fiddaman } 967*906afcb8SAndy Fiddaman if (jp->buffered) 968*906afcb8SAndy Fiddaman { 969*906afcb8SAndy Fiddaman sfsetbuf(jp->file[0].iop, jp->file[0].iop, SF_UNBOUND); 970*906afcb8SAndy Fiddaman sfsetbuf(jp->file[1].iop, jp->file[1].iop, SF_UNBOUND); 971*906afcb8SAndy Fiddaman } 972*906afcb8SAndy Fiddaman jp->outfile = sfstdout; 973*906afcb8SAndy Fiddaman if (!jp->outlist) 974*906afcb8SAndy Fiddaman jp->nullfield = 0; 975*906afcb8SAndy Fiddaman if (join(jp) < 0) 976*906afcb8SAndy Fiddaman { 977*906afcb8SAndy Fiddaman done(jp); 978*906afcb8SAndy Fiddaman error(ERROR_system(1),"write error"); 979*906afcb8SAndy Fiddaman } 980*906afcb8SAndy Fiddaman else if (jp->file[0].iop==sfstdin || jp->file[1].iop==sfstdin) 981*906afcb8SAndy Fiddaman sfseek(sfstdin,(Sfoff_t)0,SEEK_END); 982*906afcb8SAndy Fiddaman done(jp); 983*906afcb8SAndy Fiddaman return error_info.errors; 984*906afcb8SAndy Fiddaman } 985