1 /*********************************************************************** 2 * * 3 * This software is part of the ast package * 4 * Copyright (c) 1992-2012 AT&T Intellectual Property * 5 * and is licensed under the * 6 * Eclipse Public License, Version 1.0 * 7 * by AT&T Intellectual Property * 8 * * 9 * A copy of the License is available at * 10 * http://www.eclipse.org/org/documents/epl-v10.html * 11 * (with md5 checksum b35adb5213ca9657e911e9befb180842) * 12 * * 13 * Information and Software Systems Research * 14 * AT&T Research * 15 * Florham Park NJ * 16 * * 17 * Glenn Fowler <gsf@research.att.com> * 18 * David Korn <dgk@research.att.com> * 19 * * 20 ***********************************************************************/ 21 #pragma prototyped 22 /* 23 * uniq 24 * 25 * Written by David Korn 26 */ 27 28 static const char usage[] = 29 "[-n?\n@(#)$Id: uniq (AT&T Research) 2009-11-28 $\n]" 30 USAGE_LICENSE 31 "[+NAME?uniq - Report or filter out repeated lines in a file]" 32 "[+DESCRIPTION?\buniq\b reads the input, compares adjacent lines, and " 33 "writes one copy of each input line on the output. The second " 34 "and succeeding copies of the repeated adjacent lines are not " 35 "written.]" 36 "[+?If the output file, \aoutfile\a, is not specified, \buniq\b writes " 37 "to standard output. If no \ainfile\a is given, or if the \ainfile\a " 38 "is \b-\b, \buniq\b reads from standard input with the start of " 39 "the file defined as the current offset.]" 40 "[c:count?Output the number of times each line occurred along with " 41 "the line.]" 42 "[d:repeated|duplicates?Output the first of each duplicate line.]" 43 "[D:all-repeated?Output all duplicate lines as a group with an empty " 44 "line delimiter specified by \adelimit\a:]:?[delimit:=none]" 45 "{" 46 "[n:none?Do not delimit duplicate groups.]" 47 "[p:prepend?Prepend an empty line before each group.]" 48 "[s:separate?Separate each group with an empty line.]" 49 "}" 50 "[f:skip-fields]#[fields?\afields\a is the number of fields to skip over " 51 "before checking for uniqueness. A field is the minimal string matching " 52 "the BRE \b[[:blank:]]]]*[^[:blank:]]]]*\b. -\anumber\a is equivalent to " 53 "\b--skip-fields\b=\anumber\a.]" 54 "[i:ignore-case?Ignore case in comparisons.]" 55 "[s:skip-chars]#[chars?\achars\a is the number of characters to skip over " 56 "before checking for uniqueness. If specified along with \b-f\b, " 57 "the first \achars\a after the first \afields\a are ignored. If " 58 "the \achars\a specifies more characters than are on the line, " 59 "an empty string will be used for comparison. +\anumber\a is " 60 "equivalent to \b--skip-chars\b=\anumber\a.]" 61 "[u:unique?Output unique lines.]" 62 "[w:check-chars]#[chars?\achars\a is the number of characters to compare " 63 "after skipping any specified fields and characters.]" 64 "\n" 65 "\n[infile [outfile]]\n" 66 "\n" 67 "[+EXIT STATUS?]{" 68 "[+0?The input file was successfully processed.]" 69 "[+>0?An error occurred.]" 70 "}" 71 "[+SEE ALSO?\bsort\b(1), \bgrep\b(1)]" 72 ; 73 74 #include <cmd.h> 75 76 #define C_FLAG 1 77 #define D_FLAG 2 78 #define U_FLAG 4 79 80 #define CWIDTH 4 81 #define MAXCNT 9999 82 83 typedef int (*Compare_f)(const char*, const char*, size_t); 84 85 static int uniq(Sfio_t *fdin, Sfio_t *fdout, int fields, int chars, int width, int mode, int* all, Compare_f compare) 86 { 87 register int n, f, outsize=0, mb = mbwide(); 88 register char *cp, *ep, *mp, *bufp, *outp; 89 char *orecp, *sbufp=0, *outbuff; 90 int reclen,oreclen= -1,count=0,cwidth=0,sep,next; 91 if(mode&C_FLAG) 92 cwidth = CWIDTH+1; 93 while(1) 94 { 95 if(bufp = sfgetr(fdin,'\n',0)) 96 n = sfvalue(fdin); 97 else if(bufp = sfgetr(fdin,'\n',SF_LASTR)) 98 { 99 n = sfvalue(fdin); 100 bufp = memcpy(fmtbuf(n + 1), bufp, n); 101 bufp[n++] = '\n'; 102 } 103 else 104 n = 0; 105 if (n) 106 { 107 cp = bufp; 108 ep = cp + n; 109 if (f = fields) 110 while (f-->0 && cp<ep) /* skip over fields */ 111 { 112 while (cp<ep && *cp==' ' || *cp=='\t') 113 cp++; 114 while (cp<ep && *cp!=' ' && *cp!='\t') 115 cp++; 116 } 117 if (chars) 118 { 119 if (mb) 120 for (f = chars; f; f--) 121 mbchar(cp); 122 else 123 cp += chars; 124 } 125 if ((reclen = n - (cp - bufp)) <= 0) 126 { 127 reclen = 1; 128 cp = bufp + n - 1; 129 } 130 else if (width >= 0 && width < reclen) 131 { 132 if (mb) 133 { 134 reclen = 0; 135 mp = cp; 136 while (reclen < width && mp < ep) 137 { 138 reclen++; 139 mbchar(mp); 140 } 141 reclen = mp - cp; 142 } 143 else 144 reclen = width; 145 } 146 } 147 else 148 reclen = -2; 149 if(reclen==oreclen && (!reclen || !(*compare)(cp,orecp,reclen))) 150 { 151 count++; 152 if (!all) 153 continue; 154 next = count; 155 } 156 else 157 { 158 next = 0; 159 if(outsize>0) 160 { 161 if(((mode&D_FLAG)&&count==0) || ((mode&U_FLAG)&&count)) 162 { 163 if(outp!=sbufp) 164 sfwrite(fdout,outp,0); 165 } 166 else 167 { 168 if(cwidth) 169 { 170 if(count<9) 171 { 172 f = 0; 173 while(f < CWIDTH-1) 174 outp[f++] = ' '; 175 outp[f++] = '0' + count + 1; 176 outp[f] = ' '; 177 } 178 else if(count<MAXCNT) 179 { 180 count++; 181 f = CWIDTH; 182 outp[f--] = ' '; 183 do 184 { 185 outp[f--] = '0' + (count % 10); 186 } while (count /= 10); 187 while (f >= 0) 188 outp[f--] = ' '; 189 } 190 else 191 { 192 outsize -= (CWIDTH+1); 193 if(outp!=sbufp) 194 { 195 if(!(sbufp=fmtbuf(outsize))) 196 return(1); 197 memcpy(sbufp,outp+CWIDTH+1,outsize); 198 sfwrite(fdout,outp,0); 199 outp = sbufp; 200 } 201 else 202 outp += CWIDTH+1; 203 sfprintf(fdout,"%4d ",count+1); 204 } 205 } 206 if(sfwrite(fdout,outp,outsize) != outsize) 207 return(1); 208 } 209 } 210 } 211 if(n==0) 212 break; 213 if(count = next) 214 { 215 if(sfwrite(fdout,outp,outsize) != outsize) 216 return(1); 217 if(*all >= 0) 218 *all = 1; 219 sep = 0; 220 } 221 else 222 sep = all && *all > 0; 223 /* save current record */ 224 if (!(outbuff = sfreserve(fdout, 0, 0)) || (outsize = sfvalue(fdout)) < 0) 225 return(1); 226 outp = outbuff; 227 if(outsize < n+cwidth+sep) 228 { 229 /* no room in outp, clear lock and use side buffer */ 230 sfwrite(fdout,outp,0); 231 if(!(sbufp = outp=fmtbuf(outsize=n+cwidth+sep))) 232 return(1); 233 } 234 else 235 outsize = n+cwidth+sep; 236 memcpy(outp+cwidth+sep,bufp,n); 237 if(sep) 238 outp[cwidth] = '\n'; 239 oreclen = reclen; 240 orecp = outp+cwidth+sep + (cp-bufp); 241 } 242 return(0); 243 } 244 245 int 246 b_uniq(int argc, char** argv, Shbltin_t* context) 247 { 248 register int mode=0; 249 register char *cp; 250 int fields=0, chars=0, width=-1; 251 Sfio_t *fpin, *fpout; 252 int* all = 0; 253 int sep; 254 Compare_f compare = (Compare_f)memcmp; 255 256 cmdinit(argc, argv, context, ERROR_CATALOG, 0); 257 for (;;) 258 { 259 switch (optget(argv, usage)) 260 { 261 case 'c': 262 mode |= C_FLAG; 263 continue; 264 case 'd': 265 mode |= D_FLAG; 266 continue; 267 case 'D': 268 mode |= D_FLAG; 269 switch ((int)opt_info.num) 270 { 271 case 'p': 272 sep = 1; 273 break; 274 case 's': 275 sep = 0; 276 break; 277 default: 278 sep = -1; 279 break; 280 } 281 all = &sep; 282 continue; 283 case 'i': 284 compare = (Compare_f)strncasecmp; 285 continue; 286 case 'u': 287 mode |= U_FLAG; 288 continue; 289 case 'f': 290 if(*opt_info.option=='-') 291 fields = opt_info.num; 292 else 293 chars = opt_info.num; 294 continue; 295 case 's': 296 chars = opt_info.num; 297 continue; 298 case 'w': 299 width = opt_info.num; 300 continue; 301 case ':': 302 error(2, "%s", opt_info.arg); 303 break; 304 case '?': 305 error(ERROR_usage(2), "%s", opt_info.arg); 306 break; 307 } 308 break; 309 } 310 argv += opt_info.index; 311 if(all && (mode&C_FLAG)) 312 error(2, "-c and -D are mutually exclusive"); 313 if(error_info.errors) 314 error(ERROR_usage(2), "%s", optusage(NiL)); 315 if((cp = *argv) && (argv++,!streq(cp,"-"))) 316 { 317 if(!(fpin = sfopen(NiL,cp,"r"))) 318 error(ERROR_system(1),"%s: cannot open",cp); 319 } 320 else 321 fpin = sfstdin; 322 if(cp = *argv) 323 { 324 argv++; 325 if(!(fpout = sfopen(NiL,cp,"w"))) 326 error(ERROR_system(1),"%s: cannot create",cp); 327 } 328 else 329 fpout = sfstdout; 330 if(*argv) 331 { 332 error(2, "too many arguments"); 333 error(ERROR_usage(2), "%s", optusage(NiL)); 334 } 335 error_info.errors = uniq(fpin,fpout,fields,chars,width,mode,all,compare); 336 if(fpin!=sfstdin) 337 sfclose(fpin); 338 if(fpout!=sfstdout) 339 sfclose(fpout); 340 return(error_info.errors); 341 } 342 343