1da2e3ebdSchin /*********************************************************************** 2da2e3ebdSchin * * 3da2e3ebdSchin * This software is part of the ast package * 4*7c2fbfb3SApril Chin * Copyright (c) 1992-2008 AT&T Intellectual Property * 5da2e3ebdSchin * and is licensed under the * 6da2e3ebdSchin * Common Public License, Version 1.0 * 7*7c2fbfb3SApril Chin * by AT&T Intellectual Property * 8da2e3ebdSchin * * 9da2e3ebdSchin * A copy of the License is available at * 10da2e3ebdSchin * http://www.opensource.org/licenses/cpl1.0.txt * 11da2e3ebdSchin * (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9) * 12da2e3ebdSchin * * 13da2e3ebdSchin * Information and Software Systems Research * 14da2e3ebdSchin * AT&T Research * 15da2e3ebdSchin * Florham Park NJ * 16da2e3ebdSchin * * 17da2e3ebdSchin * Glenn Fowler <gsf@research.att.com> * 18da2e3ebdSchin * David Korn <dgk@research.att.com> * 19da2e3ebdSchin * * 20da2e3ebdSchin ***********************************************************************/ 21da2e3ebdSchin #pragma prototyped 22da2e3ebdSchin /* 23da2e3ebdSchin * uniq 24da2e3ebdSchin * 25da2e3ebdSchin * Written by David Korn 26da2e3ebdSchin */ 27da2e3ebdSchin 28da2e3ebdSchin static const char usage[] = 29*7c2fbfb3SApril Chin "[-n?\n@(#)$Id: uniq (AT&T Research) 2008-04-24 $\n]" 30da2e3ebdSchin USAGE_LICENSE 31da2e3ebdSchin "[+NAME?uniq - Report or filter out repeated lines in a file]" 32da2e3ebdSchin "[+DESCRIPTION?\buniq\b reads an input, comparing adjacent lines, and " 33da2e3ebdSchin "writing one copy of each input line on the output. The second " 34da2e3ebdSchin "and succeeding copies of the repeated adjacent lines are not " 35da2e3ebdSchin "written.]" 36da2e3ebdSchin "[+?If the output file, \aoutfile\a, is not specified, \buniq\b writes " 37da2e3ebdSchin "to standard output. If no \ainfile\a is given, or if the \ainfile\a " 38da2e3ebdSchin "is \b-\b, \buniq\b reads from standard input with the start of " 39da2e3ebdSchin "the file is defined as the current offset.]" 40da2e3ebdSchin "[c:count?Output the number of times each line occurred along with " 41da2e3ebdSchin "the line.]" 42da2e3ebdSchin "[d:repeated|duplicates?Output the first of each duplicate line.]" 43da2e3ebdSchin "[D:all-repeated?Output all duplicate lines as a group with an empty " 44da2e3ebdSchin "line delimiter specified by \adelimit\a:]:?[delimit:=none]" 45da2e3ebdSchin "{" 46da2e3ebdSchin "[n:none?Do not delimit duplicate groups.]" 47da2e3ebdSchin "[p:prepend?Prepend an empty line before each group.]" 48da2e3ebdSchin "[s:separate?Separate each group with an empty line.]" 49da2e3ebdSchin "}" 50da2e3ebdSchin "[f:skip-fields]#[fields?\afields\a is the number of fields to skip over " 51da2e3ebdSchin "before checking for uniqueness. A field is the minimal string matching " 52da2e3ebdSchin "the BRE \b[[:blank:]]]]*[^[:blank:]]]]*\b.]" 53da2e3ebdSchin "[i:ignore-case?Ignore case in comparisons.]" 54da2e3ebdSchin "[s:skip-chars]#[chars?\achars\a is the number of characters to skip over " 55da2e3ebdSchin "before checking for uniqueness. If specified along with \b-f\b, " 56da2e3ebdSchin "the first \achars\a after the first \afields\a are ignored. If " 57da2e3ebdSchin "the \achars\a specifies more characters than are on the line, " 58da2e3ebdSchin "an empty string will be used for comparison.]" 59da2e3ebdSchin "[u:unique?Output unique lines.]" 60da2e3ebdSchin "[w:check-chars]#[chars?\achars\a is the number of characters to compare " 61da2e3ebdSchin "after skipping any specified fields and characters.]" 62da2e3ebdSchin "\n" 63da2e3ebdSchin "\n[infile [outfile]]\n" 64da2e3ebdSchin "\n" 65da2e3ebdSchin "[+EXIT STATUS?]{" 66da2e3ebdSchin "[+0?The input file was successfully processed.]" 67da2e3ebdSchin "[+>0?An error occurred.]" 68da2e3ebdSchin "}" 69da2e3ebdSchin "[+SEE ALSO?\bsort\b(1), \bgrep\b(1)]" 70da2e3ebdSchin ; 71da2e3ebdSchin 72da2e3ebdSchin #include <cmd.h> 73da2e3ebdSchin 74da2e3ebdSchin #define C_FLAG 1 75da2e3ebdSchin #define D_FLAG 2 76da2e3ebdSchin #define U_FLAG 4 77da2e3ebdSchin 78da2e3ebdSchin #define CWIDTH 4 79da2e3ebdSchin #define MAXCNT 9999 80da2e3ebdSchin 81da2e3ebdSchin typedef int (*Compare_f)(const char*, const char*, size_t); 82da2e3ebdSchin 83da2e3ebdSchin static int uniq(Sfio_t *fdin, Sfio_t *fdout, int fields, int chars, int width, int mode, int* all, Compare_f compare) 84da2e3ebdSchin { 85da2e3ebdSchin register int n, f, outsize=0; 86da2e3ebdSchin register char *cp, *ep, *bufp, *outp; 87da2e3ebdSchin char *orecp, *sbufp=0, *outbuff; 88da2e3ebdSchin int reclen,oreclen= -1,count=0,cwidth=0,sep,next; 89da2e3ebdSchin if(mode&C_FLAG) 90da2e3ebdSchin cwidth = CWIDTH+1; 91da2e3ebdSchin while(1) 92da2e3ebdSchin { 93da2e3ebdSchin if(bufp = sfgetr(fdin,'\n',0)) 94da2e3ebdSchin n = sfvalue(fdin); 95da2e3ebdSchin else if(bufp = sfgetr(fdin,'\n',SF_LASTR)) 96da2e3ebdSchin { 97da2e3ebdSchin n = sfvalue(fdin); 98da2e3ebdSchin bufp = memcpy(fmtbuf(n + 1), bufp, n); 99da2e3ebdSchin bufp[n++] = '\n'; 100da2e3ebdSchin } 101da2e3ebdSchin else 102da2e3ebdSchin n = 0; 103da2e3ebdSchin if(n) 104da2e3ebdSchin { 105da2e3ebdSchin cp = bufp; 106da2e3ebdSchin ep = cp + n; 107da2e3ebdSchin if(f=fields) 108da2e3ebdSchin while(f-->0 && cp<ep) /* skip over fields */ 109da2e3ebdSchin { 110da2e3ebdSchin while(cp<ep && *cp==' ' || *cp=='\t') 111da2e3ebdSchin cp++; 112da2e3ebdSchin while(cp<ep && *cp!=' ' && *cp!='\t') 113da2e3ebdSchin cp++; 114da2e3ebdSchin } 115da2e3ebdSchin if(chars) 116da2e3ebdSchin cp += chars; 117da2e3ebdSchin if((reclen = n - (cp-bufp)) <=0) 118da2e3ebdSchin { 119da2e3ebdSchin reclen = 1; 120da2e3ebdSchin cp = bufp + sfvalue(fdin)-1; 121da2e3ebdSchin } 122da2e3ebdSchin else if(width >= 0 && width < reclen) 123da2e3ebdSchin reclen = width; 124da2e3ebdSchin } 125da2e3ebdSchin else 126da2e3ebdSchin reclen=-2; 127da2e3ebdSchin if(reclen==oreclen && (!reclen || !(*compare)(cp,orecp,reclen))) 128da2e3ebdSchin { 129da2e3ebdSchin count++; 130da2e3ebdSchin if (!all) 131da2e3ebdSchin continue; 132da2e3ebdSchin next = count; 133da2e3ebdSchin } 134da2e3ebdSchin else 135da2e3ebdSchin { 136da2e3ebdSchin next = 0; 137da2e3ebdSchin if(outsize>0) 138da2e3ebdSchin { 139da2e3ebdSchin if(((mode&D_FLAG)&&count==0) || ((mode&U_FLAG)&&count)) 140da2e3ebdSchin { 141da2e3ebdSchin if(outp!=sbufp) 142da2e3ebdSchin sfwrite(fdout,outp,0); 143da2e3ebdSchin } 144da2e3ebdSchin else 145da2e3ebdSchin { 146da2e3ebdSchin if(cwidth) 147da2e3ebdSchin { 148da2e3ebdSchin outp[CWIDTH] = ' '; 149da2e3ebdSchin if(count<MAXCNT) 150da2e3ebdSchin { 151da2e3ebdSchin sfsprintf(outp,cwidth,"%*d",CWIDTH,count+1); 152da2e3ebdSchin outp[CWIDTH] = ' '; 153da2e3ebdSchin } 154da2e3ebdSchin else 155da2e3ebdSchin { 156da2e3ebdSchin outsize -= (CWIDTH+1); 157da2e3ebdSchin if(outp!=sbufp) 158da2e3ebdSchin { 159da2e3ebdSchin if(!(sbufp=fmtbuf(outsize))) 160da2e3ebdSchin return(1); 161da2e3ebdSchin memcpy(sbufp,outp+CWIDTH+1,outsize); 162da2e3ebdSchin sfwrite(fdout,outp,0); 163da2e3ebdSchin outp = sbufp; 164da2e3ebdSchin } 165da2e3ebdSchin else 166da2e3ebdSchin outp += CWIDTH+1; 167da2e3ebdSchin sfprintf(fdout,"%4d ",count+1); 168da2e3ebdSchin } 169da2e3ebdSchin } 170da2e3ebdSchin if(sfwrite(fdout,outp,outsize) != outsize) 171da2e3ebdSchin return(1); 172da2e3ebdSchin } 173da2e3ebdSchin } 174da2e3ebdSchin } 175da2e3ebdSchin if(n==0) 176da2e3ebdSchin break; 177da2e3ebdSchin if(count = next) 178da2e3ebdSchin { 179da2e3ebdSchin if(sfwrite(fdout,outp,outsize) != outsize) 180da2e3ebdSchin return(1); 181da2e3ebdSchin if(*all >= 0) 182da2e3ebdSchin *all = 1; 183da2e3ebdSchin sep = 0; 184da2e3ebdSchin } 185da2e3ebdSchin else 186da2e3ebdSchin sep = all && *all > 0; 187da2e3ebdSchin /* save current record */ 188da2e3ebdSchin if (!(outbuff = sfreserve(fdout, 0, 0)) || (outsize = sfvalue(fdout)) < 0) 189da2e3ebdSchin return(1); 190da2e3ebdSchin outp = outbuff; 191da2e3ebdSchin if(outsize < n+cwidth+sep) 192da2e3ebdSchin { 193da2e3ebdSchin /* no room in outp, clear lock and use side buffer */ 194da2e3ebdSchin sfwrite(fdout,outp,0); 195da2e3ebdSchin if(!(sbufp = outp=fmtbuf(outsize=n+cwidth+sep))) 196da2e3ebdSchin return(1); 197da2e3ebdSchin } 198da2e3ebdSchin else 199da2e3ebdSchin outsize = n+cwidth+sep; 200da2e3ebdSchin memcpy(outp+cwidth+sep,bufp,n); 201da2e3ebdSchin if(sep) 202da2e3ebdSchin outp[cwidth] = '\n'; 203da2e3ebdSchin oreclen = reclen; 204da2e3ebdSchin orecp = outp+cwidth+sep + (cp-bufp); 205da2e3ebdSchin } 206da2e3ebdSchin return(0); 207da2e3ebdSchin } 208da2e3ebdSchin 209da2e3ebdSchin int 210da2e3ebdSchin b_uniq(int argc, char** argv, void* context) 211da2e3ebdSchin { 212da2e3ebdSchin register int n, mode=0; 213da2e3ebdSchin register char *cp; 214da2e3ebdSchin int fields=0, chars=0, width=-1; 215da2e3ebdSchin Sfio_t *fpin, *fpout; 216da2e3ebdSchin int* all = 0; 217da2e3ebdSchin int sep; 218da2e3ebdSchin Compare_f compare = (Compare_f)memcmp; 219da2e3ebdSchin 220da2e3ebdSchin cmdinit(argc, argv, context, ERROR_CATALOG, 0); 221da2e3ebdSchin while (n = optget(argv, usage)) switch (n) 222da2e3ebdSchin { 223da2e3ebdSchin case 'c': 224da2e3ebdSchin mode |= C_FLAG; 225da2e3ebdSchin break; 226da2e3ebdSchin case 'd': 227da2e3ebdSchin mode |= D_FLAG; 228da2e3ebdSchin break; 229da2e3ebdSchin case 'D': 230da2e3ebdSchin mode |= D_FLAG; 231da2e3ebdSchin switch ((int)opt_info.num) 232da2e3ebdSchin { 233da2e3ebdSchin case 'p': 234da2e3ebdSchin sep = 1; 235da2e3ebdSchin break; 236da2e3ebdSchin case 's': 237da2e3ebdSchin sep = 0; 238da2e3ebdSchin break; 239da2e3ebdSchin default: 240da2e3ebdSchin sep = -1; 241da2e3ebdSchin break; 242da2e3ebdSchin } 243da2e3ebdSchin all = &sep; 244da2e3ebdSchin break; 245da2e3ebdSchin case 'i': 246da2e3ebdSchin compare = (Compare_f)strncasecmp; 247da2e3ebdSchin break; 248da2e3ebdSchin case 'u': 249da2e3ebdSchin mode |= U_FLAG; 250da2e3ebdSchin break; 251da2e3ebdSchin case 'f': 252da2e3ebdSchin if(*opt_info.option=='-') 253da2e3ebdSchin fields = opt_info.num; 254da2e3ebdSchin else 255da2e3ebdSchin chars = opt_info.num; 256da2e3ebdSchin break; 257da2e3ebdSchin case 's': 258da2e3ebdSchin chars = opt_info.num; 259da2e3ebdSchin break; 260da2e3ebdSchin case 'w': 261da2e3ebdSchin width = opt_info.num; 262da2e3ebdSchin break; 263da2e3ebdSchin case ':': 264da2e3ebdSchin error(2, "%s", opt_info.arg); 265da2e3ebdSchin break; 266da2e3ebdSchin case '?': 267da2e3ebdSchin error(ERROR_usage(2), "%s", opt_info.arg); 268da2e3ebdSchin break; 269da2e3ebdSchin } 270da2e3ebdSchin argv += opt_info.index; 271da2e3ebdSchin if(all && (mode&C_FLAG)) 272da2e3ebdSchin error(2, "-c and -D are mutually exclusive"); 273da2e3ebdSchin if(error_info.errors) 274da2e3ebdSchin error(ERROR_usage(2), "%s", optusage(NiL)); 275da2e3ebdSchin if((cp = *argv) && (argv++,!streq(cp,"-"))) 276da2e3ebdSchin { 277da2e3ebdSchin if(!(fpin = sfopen(NiL,cp,"r"))) 278da2e3ebdSchin error(ERROR_system(1),"%s: cannot open",cp); 279da2e3ebdSchin } 280da2e3ebdSchin else 281da2e3ebdSchin fpin = sfstdin; 282da2e3ebdSchin if(cp = *argv) 283da2e3ebdSchin { 284da2e3ebdSchin argv++; 285da2e3ebdSchin if(!(fpout = sfopen(NiL,cp,"w"))) 286da2e3ebdSchin error(ERROR_system(1),"%s: cannot create",cp); 287da2e3ebdSchin } 288da2e3ebdSchin else 289da2e3ebdSchin fpout = sfstdout; 290da2e3ebdSchin if(*argv) 291da2e3ebdSchin { 292da2e3ebdSchin error(2, "too many arguments"); 293da2e3ebdSchin error(ERROR_usage(2), "%s", optusage(NiL)); 294da2e3ebdSchin } 295da2e3ebdSchin error_info.errors = uniq(fpin,fpout,fields,chars,width,mode,all,compare); 296da2e3ebdSchin if(fpin!=sfstdin) 297da2e3ebdSchin sfclose(fpin); 298da2e3ebdSchin if(fpout!=sfstdout) 299da2e3ebdSchin sfclose(fpout); 300da2e3ebdSchin return(error_info.errors); 301da2e3ebdSchin } 302da2e3ebdSchin 303