xref: /titanic_51/usr/src/lib/libcmd/common/uniq.c (revision 7c2fbfb345896881c631598ee3852ce9ce33fb07)
1da2e3ebdSchin /***********************************************************************
2da2e3ebdSchin *                                                                      *
3da2e3ebdSchin *               This software is part of the ast package               *
4*7c2fbfb3SApril Chin *          Copyright (c) 1992-2008 AT&T Intellectual Property          *
5da2e3ebdSchin *                      and is licensed under the                       *
6da2e3ebdSchin *                  Common Public License, Version 1.0                  *
7*7c2fbfb3SApril Chin *                    by AT&T Intellectual Property                     *
8da2e3ebdSchin *                                                                      *
9da2e3ebdSchin *                A copy of the License is available at                 *
10da2e3ebdSchin *            http://www.opensource.org/licenses/cpl1.0.txt             *
11da2e3ebdSchin *         (with md5 checksum 059e8cd6165cb4c31e351f2b69388fd9)         *
12da2e3ebdSchin *                                                                      *
13da2e3ebdSchin *              Information and Software Systems Research               *
14da2e3ebdSchin *                            AT&T Research                             *
15da2e3ebdSchin *                           Florham Park NJ                            *
16da2e3ebdSchin *                                                                      *
17da2e3ebdSchin *                 Glenn Fowler <gsf@research.att.com>                  *
18da2e3ebdSchin *                  David Korn <dgk@research.att.com>                   *
19da2e3ebdSchin *                                                                      *
20da2e3ebdSchin ***********************************************************************/
21da2e3ebdSchin #pragma prototyped
22da2e3ebdSchin /*
23da2e3ebdSchin  * uniq
24da2e3ebdSchin  *
25da2e3ebdSchin  * Written by David Korn
26da2e3ebdSchin  */
27da2e3ebdSchin 
28da2e3ebdSchin static const char usage[] =
29*7c2fbfb3SApril Chin "[-n?\n@(#)$Id: uniq (AT&T Research) 2008-04-24 $\n]"
30da2e3ebdSchin USAGE_LICENSE
31da2e3ebdSchin "[+NAME?uniq - Report or filter out repeated lines in a file]"
32da2e3ebdSchin "[+DESCRIPTION?\buniq\b reads an input, comparing adjacent lines, and "
33da2e3ebdSchin 	"writing one copy of each input line on the output.  The second "
34da2e3ebdSchin 	"and succeeding copies of the repeated adjacent lines are not "
35da2e3ebdSchin 	"written.]"
36da2e3ebdSchin "[+?If the output file, \aoutfile\a, is not specified, \buniq\b writes "
37da2e3ebdSchin 	"to standard output.  If no \ainfile\a is given, or if the \ainfile\a "
38da2e3ebdSchin 	"is \b-\b, \buniq\b reads from standard input with  the start of "
39da2e3ebdSchin 	"the file is defined as the current offset.]"
40da2e3ebdSchin "[c:count?Output the number of times each line occurred  along with "
41da2e3ebdSchin 	"the line.]"
42da2e3ebdSchin "[d:repeated|duplicates?Output the first of each duplicate line.]"
43da2e3ebdSchin "[D:all-repeated?Output all duplicate lines as a group with an empty "
44da2e3ebdSchin     "line delimiter specified by \adelimit\a:]:?[delimit:=none]"
45da2e3ebdSchin     "{"
46da2e3ebdSchin         "[n:none?Do not delimit duplicate groups.]"
47da2e3ebdSchin         "[p:prepend?Prepend an empty line before each group.]"
48da2e3ebdSchin         "[s:separate?Separate each group with an empty line.]"
49da2e3ebdSchin     "}"
50da2e3ebdSchin "[f:skip-fields]#[fields?\afields\a is the number of fields to skip over "
51da2e3ebdSchin     "before checking for uniqueness. A field is the minimal string matching "
52da2e3ebdSchin     "the BRE \b[[:blank:]]]]*[^[:blank:]]]]*\b.]"
53da2e3ebdSchin "[i:ignore-case?Ignore case in comparisons.]"
54da2e3ebdSchin "[s:skip-chars]#[chars?\achars\a is the number of characters to skip over "
55da2e3ebdSchin 	"before checking for uniqueness.  If specified along with \b-f\b, "
56da2e3ebdSchin 	"the first \achars\a after the first \afields\a are ignored.  If "
57da2e3ebdSchin 	"the \achars\a specifies more characters than are on the line, "
58da2e3ebdSchin 	"an empty string will be used for comparison.]"
59da2e3ebdSchin "[u:unique?Output unique lines.]"
60da2e3ebdSchin "[w:check-chars]#[chars?\achars\a is the number of characters to compare "
61da2e3ebdSchin 	"after skipping any specified fields and characters.]"
62da2e3ebdSchin "\n"
63da2e3ebdSchin "\n[infile [outfile]]\n"
64da2e3ebdSchin "\n"
65da2e3ebdSchin "[+EXIT STATUS?]{"
66da2e3ebdSchin 	"[+0?The input file was successfully processed.]"
67da2e3ebdSchin 	"[+>0?An error occurred.]"
68da2e3ebdSchin "}"
69da2e3ebdSchin "[+SEE ALSO?\bsort\b(1), \bgrep\b(1)]"
70da2e3ebdSchin ;
71da2e3ebdSchin 
72da2e3ebdSchin #include <cmd.h>
73da2e3ebdSchin 
74da2e3ebdSchin #define C_FLAG	1
75da2e3ebdSchin #define D_FLAG	2
76da2e3ebdSchin #define U_FLAG	4
77da2e3ebdSchin 
78da2e3ebdSchin #define CWIDTH	4
79da2e3ebdSchin #define MAXCNT	9999
80da2e3ebdSchin 
81da2e3ebdSchin typedef int (*Compare_f)(const char*, const char*, size_t);
82da2e3ebdSchin 
83da2e3ebdSchin static int uniq(Sfio_t *fdin, Sfio_t *fdout, int fields, int chars, int width, int mode, int* all, Compare_f compare)
84da2e3ebdSchin {
85da2e3ebdSchin 	register int n, f, outsize=0;
86da2e3ebdSchin 	register char *cp, *ep, *bufp, *outp;
87da2e3ebdSchin 	char *orecp, *sbufp=0, *outbuff;
88da2e3ebdSchin 	int reclen,oreclen= -1,count=0,cwidth=0,sep,next;
89da2e3ebdSchin 	if(mode&C_FLAG)
90da2e3ebdSchin 		cwidth = CWIDTH+1;
91da2e3ebdSchin 	while(1)
92da2e3ebdSchin 	{
93da2e3ebdSchin 		if(bufp = sfgetr(fdin,'\n',0))
94da2e3ebdSchin 			n = sfvalue(fdin);
95da2e3ebdSchin 		else if(bufp = sfgetr(fdin,'\n',SF_LASTR))
96da2e3ebdSchin 		{
97da2e3ebdSchin 			n = sfvalue(fdin);
98da2e3ebdSchin 			bufp = memcpy(fmtbuf(n + 1), bufp, n);
99da2e3ebdSchin 			bufp[n++] = '\n';
100da2e3ebdSchin 		}
101da2e3ebdSchin 		else
102da2e3ebdSchin 			n = 0;
103da2e3ebdSchin 		if(n)
104da2e3ebdSchin 		{
105da2e3ebdSchin 			cp = bufp;
106da2e3ebdSchin 			ep = cp + n;
107da2e3ebdSchin 			if(f=fields)
108da2e3ebdSchin 				while(f-->0 && cp<ep) /* skip over fields */
109da2e3ebdSchin 				{
110da2e3ebdSchin 					while(cp<ep && *cp==' ' || *cp=='\t')
111da2e3ebdSchin 						cp++;
112da2e3ebdSchin 					while(cp<ep && *cp!=' ' && *cp!='\t')
113da2e3ebdSchin 						cp++;
114da2e3ebdSchin 				}
115da2e3ebdSchin 			if(chars)
116da2e3ebdSchin 				cp += chars;
117da2e3ebdSchin 			if((reclen = n - (cp-bufp)) <=0)
118da2e3ebdSchin 			{
119da2e3ebdSchin 				reclen = 1;
120da2e3ebdSchin 				cp = bufp + sfvalue(fdin)-1;
121da2e3ebdSchin 			}
122da2e3ebdSchin 			else if(width >= 0 && width < reclen)
123da2e3ebdSchin 				reclen = width;
124da2e3ebdSchin 		}
125da2e3ebdSchin 		else
126da2e3ebdSchin 			reclen=-2;
127da2e3ebdSchin 		if(reclen==oreclen && (!reclen || !(*compare)(cp,orecp,reclen)))
128da2e3ebdSchin 		{
129da2e3ebdSchin 			count++;
130da2e3ebdSchin 			if (!all)
131da2e3ebdSchin 				continue;
132da2e3ebdSchin 			next = count;
133da2e3ebdSchin 		}
134da2e3ebdSchin 		else
135da2e3ebdSchin 		{
136da2e3ebdSchin 			next = 0;
137da2e3ebdSchin 			if(outsize>0)
138da2e3ebdSchin 			{
139da2e3ebdSchin 				if(((mode&D_FLAG)&&count==0) || ((mode&U_FLAG)&&count))
140da2e3ebdSchin 				{
141da2e3ebdSchin 					if(outp!=sbufp)
142da2e3ebdSchin 						sfwrite(fdout,outp,0);
143da2e3ebdSchin 				}
144da2e3ebdSchin 				else
145da2e3ebdSchin 				{
146da2e3ebdSchin 					if(cwidth)
147da2e3ebdSchin 					{
148da2e3ebdSchin 						outp[CWIDTH] = ' ';
149da2e3ebdSchin 						if(count<MAXCNT)
150da2e3ebdSchin 						{
151da2e3ebdSchin 							sfsprintf(outp,cwidth,"%*d",CWIDTH,count+1);
152da2e3ebdSchin 							outp[CWIDTH] = ' ';
153da2e3ebdSchin 						}
154da2e3ebdSchin 						else
155da2e3ebdSchin 						{
156da2e3ebdSchin 							outsize -= (CWIDTH+1);
157da2e3ebdSchin 							if(outp!=sbufp)
158da2e3ebdSchin 							{
159da2e3ebdSchin 								if(!(sbufp=fmtbuf(outsize)))
160da2e3ebdSchin 									return(1);
161da2e3ebdSchin 								memcpy(sbufp,outp+CWIDTH+1,outsize);
162da2e3ebdSchin 								sfwrite(fdout,outp,0);
163da2e3ebdSchin 								outp = sbufp;
164da2e3ebdSchin 							}
165da2e3ebdSchin 							else
166da2e3ebdSchin 								outp += CWIDTH+1;
167da2e3ebdSchin 							sfprintf(fdout,"%4d ",count+1);
168da2e3ebdSchin 						}
169da2e3ebdSchin 					}
170da2e3ebdSchin 					if(sfwrite(fdout,outp,outsize) != outsize)
171da2e3ebdSchin 						return(1);
172da2e3ebdSchin 				}
173da2e3ebdSchin 			}
174da2e3ebdSchin 		}
175da2e3ebdSchin 		if(n==0)
176da2e3ebdSchin 			break;
177da2e3ebdSchin 		if(count = next)
178da2e3ebdSchin 		{
179da2e3ebdSchin 			if(sfwrite(fdout,outp,outsize) != outsize)
180da2e3ebdSchin 				return(1);
181da2e3ebdSchin 			if(*all >= 0)
182da2e3ebdSchin 				*all = 1;
183da2e3ebdSchin 			sep = 0;
184da2e3ebdSchin 		}
185da2e3ebdSchin 		else
186da2e3ebdSchin 			sep = all && *all > 0;
187da2e3ebdSchin 		/* save current record */
188da2e3ebdSchin 		if (!(outbuff = sfreserve(fdout, 0, 0)) || (outsize = sfvalue(fdout)) < 0)
189da2e3ebdSchin 			return(1);
190da2e3ebdSchin 		outp = outbuff;
191da2e3ebdSchin 		if(outsize < n+cwidth+sep)
192da2e3ebdSchin 		{
193da2e3ebdSchin 			/* no room in outp, clear lock and use side buffer */
194da2e3ebdSchin 			sfwrite(fdout,outp,0);
195da2e3ebdSchin 			if(!(sbufp = outp=fmtbuf(outsize=n+cwidth+sep)))
196da2e3ebdSchin 				return(1);
197da2e3ebdSchin 		}
198da2e3ebdSchin 		else
199da2e3ebdSchin 			outsize = n+cwidth+sep;
200da2e3ebdSchin 		memcpy(outp+cwidth+sep,bufp,n);
201da2e3ebdSchin 		if(sep)
202da2e3ebdSchin 			outp[cwidth] = '\n';
203da2e3ebdSchin 		oreclen = reclen;
204da2e3ebdSchin 		orecp = outp+cwidth+sep + (cp-bufp);
205da2e3ebdSchin 	}
206da2e3ebdSchin 	return(0);
207da2e3ebdSchin }
208da2e3ebdSchin 
209da2e3ebdSchin int
210da2e3ebdSchin b_uniq(int argc, char** argv, void* context)
211da2e3ebdSchin {
212da2e3ebdSchin 	register int n, mode=0;
213da2e3ebdSchin 	register char *cp;
214da2e3ebdSchin 	int fields=0, chars=0, width=-1;
215da2e3ebdSchin 	Sfio_t *fpin, *fpout;
216da2e3ebdSchin 	int* all = 0;
217da2e3ebdSchin 	int sep;
218da2e3ebdSchin 	Compare_f compare = (Compare_f)memcmp;
219da2e3ebdSchin 
220da2e3ebdSchin 	cmdinit(argc, argv, context, ERROR_CATALOG, 0);
221da2e3ebdSchin 	while (n = optget(argv, usage)) switch (n)
222da2e3ebdSchin 	{
223da2e3ebdSchin 	    case 'c':
224da2e3ebdSchin 		mode |= C_FLAG;
225da2e3ebdSchin 		break;
226da2e3ebdSchin 	    case 'd':
227da2e3ebdSchin 		mode |= D_FLAG;
228da2e3ebdSchin 		break;
229da2e3ebdSchin 	    case 'D':
230da2e3ebdSchin 		mode |= D_FLAG;
231da2e3ebdSchin 		switch ((int)opt_info.num)
232da2e3ebdSchin 		{
233da2e3ebdSchin 		case 'p':
234da2e3ebdSchin 			sep = 1;
235da2e3ebdSchin 			break;
236da2e3ebdSchin 		case 's':
237da2e3ebdSchin 			sep = 0;
238da2e3ebdSchin 			break;
239da2e3ebdSchin 		default:
240da2e3ebdSchin 			sep = -1;
241da2e3ebdSchin 			break;
242da2e3ebdSchin 		}
243da2e3ebdSchin 		all = &sep;
244da2e3ebdSchin 		break;
245da2e3ebdSchin 	    case 'i':
246da2e3ebdSchin 		compare = (Compare_f)strncasecmp;
247da2e3ebdSchin 		break;
248da2e3ebdSchin 	    case 'u':
249da2e3ebdSchin 		mode |= U_FLAG;
250da2e3ebdSchin 		break;
251da2e3ebdSchin 	    case 'f':
252da2e3ebdSchin 		if(*opt_info.option=='-')
253da2e3ebdSchin 			fields = opt_info.num;
254da2e3ebdSchin 		else
255da2e3ebdSchin 			chars = opt_info.num;
256da2e3ebdSchin 		break;
257da2e3ebdSchin 	    case 's':
258da2e3ebdSchin 		chars = opt_info.num;
259da2e3ebdSchin 		break;
260da2e3ebdSchin 	    case 'w':
261da2e3ebdSchin 		width = opt_info.num;
262da2e3ebdSchin 		break;
263da2e3ebdSchin 	    case ':':
264da2e3ebdSchin 		error(2, "%s", opt_info.arg);
265da2e3ebdSchin 		break;
266da2e3ebdSchin 	    case '?':
267da2e3ebdSchin 		error(ERROR_usage(2), "%s", opt_info.arg);
268da2e3ebdSchin 		break;
269da2e3ebdSchin 	}
270da2e3ebdSchin 	argv += opt_info.index;
271da2e3ebdSchin 	if(all && (mode&C_FLAG))
272da2e3ebdSchin 		error(2, "-c and -D are mutually exclusive");
273da2e3ebdSchin 	if(error_info.errors)
274da2e3ebdSchin 		error(ERROR_usage(2), "%s", optusage(NiL));
275da2e3ebdSchin 	if((cp = *argv) && (argv++,!streq(cp,"-")))
276da2e3ebdSchin 	{
277da2e3ebdSchin 		if(!(fpin = sfopen(NiL,cp,"r")))
278da2e3ebdSchin 			error(ERROR_system(1),"%s: cannot open",cp);
279da2e3ebdSchin 	}
280da2e3ebdSchin 	else
281da2e3ebdSchin 		fpin = sfstdin;
282da2e3ebdSchin 	if(cp = *argv)
283da2e3ebdSchin 	{
284da2e3ebdSchin 		argv++;
285da2e3ebdSchin 		if(!(fpout = sfopen(NiL,cp,"w")))
286da2e3ebdSchin 			error(ERROR_system(1),"%s: cannot create",cp);
287da2e3ebdSchin 	}
288da2e3ebdSchin 	else
289da2e3ebdSchin 		fpout = sfstdout;
290da2e3ebdSchin 	if(*argv)
291da2e3ebdSchin 	{
292da2e3ebdSchin 		error(2, "too many arguments");
293da2e3ebdSchin 		error(ERROR_usage(2), "%s", optusage(NiL));
294da2e3ebdSchin 	}
295da2e3ebdSchin 	error_info.errors = uniq(fpin,fpout,fields,chars,width,mode,all,compare);
296da2e3ebdSchin 	if(fpin!=sfstdin)
297da2e3ebdSchin 		sfclose(fpin);
298da2e3ebdSchin 	if(fpout!=sfstdout)
299da2e3ebdSchin 		sfclose(fpout);
300da2e3ebdSchin 	return(error_info.errors);
301da2e3ebdSchin }
302da2e3ebdSchin 
303