xref: /freebsd/usr.sbin/services_mkdb/uniq.c (revision 39ee7a7a6bdd1557b1c3532abf60d139798ac88b)
1 /*	$NetBSD: uniq.c,v 1.4 2008/04/28 20:24:17 martin Exp $	*/
2 
3 /*-
4  * Copyright (c) 2007 The NetBSD Foundation, Inc.
5  * All rights reserved.
6  *
7  * This code is derived from software contributed to The NetBSD Foundation
8  * by Christos Zoulas.
9  *
10  * Redistribution and use in source and binary forms, with or without
11  * modification, are permitted provided that the following conditions
12  * are met:
13  * 1. Redistributions of source code must retain the above copyright
14  *    notice, this list of conditions and the following disclaimer.
15  * 2. Redistributions in binary form must reproduce the above copyright
16  *    notice, this list of conditions and the following disclaimer in the
17  *    documentation and/or other materials provided with the distribution.
18  *
19  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
20  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
21  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
22  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
23  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
24  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
25  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
26  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
27  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
28  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
29  * POSSIBILITY OF SUCH DAMAGE.
30  */
31 #include <sys/cdefs.h>
32 __FBSDID("$FreeBSD$");
33 
34 #include <stdio.h>
35 #include <string.h>
36 #include <stdlib.h>
37 #include <db.h>
38 #include <err.h>
39 #include <libutil.h>
40 #include <ctype.h>
41 #include <fcntl.h>
42 
43 #include "extern.h"
44 
45 static int comp(const char *, char **, size_t *);
46 
47 /*
48  * Preserve only unique content lines in a file. Input lines that have
49  * content [alphanumeric characters before a comment] are white-space
50  * normalized and have their comments removed. Then they are placed
51  * in a hash table, and only the first instance of them is printed.
52  * Comment lines without any alphanumeric content are always printed
53  * since they are there to make the file "pretty". Comment lines with
54  * alphanumeric content are also placed into the hash table and only
55  * printed once.
56  */
57 void
58 uniq(const char *fname)
59 {
60 	DB *db;
61 	DBT key;
62 	static const DBT data = { NULL, 0 };
63 	FILE *fp;
64 	char *line;
65 	size_t len;
66 
67 	if ((db = dbopen(NULL, O_RDWR, 0, DB_HASH, &hinfo)) == NULL)
68 		err(1, "Cannot create in memory database");
69 
70 	if ((fp = fopen(fname, "r")) == NULL)
71 		err(1, "Cannot open `%s'", fname);
72 	while ((line = fgetln(fp, &len)) != NULL) {
73 		size_t complen = len;
74 		char *compline;
75 		if (!comp(line, &compline, &complen)) {
76 			(void)fprintf(stdout, "%*.*s", (int)len, (int)len,
77 			    line);
78 			continue;
79 		}
80 		key.data = compline;
81 		key.size = complen;
82 		switch ((db->put)(db, &key, &data, R_NOOVERWRITE)) {
83 		case 0:
84 			(void)fprintf(stdout, "%*.*s", (int)len, (int)len,
85 			    line);
86 			break;
87 		case 1:
88 			break;
89 		case -1:
90 			err(1, "put");
91 		default:
92 			abort();
93 			break;
94 		}
95 	}
96 	(void)fflush(stdout);
97 	exit(0);
98 }
99 
100 /*
101  * normalize whitespace in the original line and place a new string
102  * with whitespace converted to a single space in compline. If the line
103  * contains just comments, we preserve them. If it contains data and
104  * comments, we kill the comments. Return 1 if the line had actual
105  * contents, or 0 if it was just a comment without alphanumeric characters.
106  */
107 static int
108 comp(const char *origline, char **compline, size_t *len)
109 {
110 	const unsigned char *p;
111 	unsigned char *q;
112 	char *cline;
113 	size_t l = *len, complen;
114 	int hasalnum, iscomment;
115 
116 	/* Eat leading space */
117 	for (p = (const unsigned char *)origline; l && *p && isspace(*p);
118 	    p++, l--)
119 		continue;
120 	if ((cline = malloc(l + 1)) == NULL)
121 		err(1, "Cannot allocate %zu bytes", l + 1);
122 	(void)memcpy(cline, p, l);
123 	cline[l] = '\0';
124 	if (*cline == '\0')
125 		return 0;
126 
127 	complen = 0;
128 	hasalnum = 0;
129 	iscomment = 0;
130 
131 	for (q = (unsigned char *)cline; l && *p; p++, l--) {
132 		if (isspace(*p)) {
133 			if (complen && isspace(q[-1]))
134 				continue;
135 			*q++ = ' ';
136 			complen++;
137 		} else {
138 			if (!iscomment && *p == '#') {
139 				if (hasalnum)
140 					break;
141 				iscomment = 1;
142 			} else
143 				hasalnum |= isalnum(*p);
144 			*q++ = *p;
145 			complen++;
146 		}
147 	}
148 
149 	/* Eat trailing space */
150 	while (complen && isspace(q[-1])) {
151 		--q;
152 		--complen;
153 	}
154 	*q = '\0';
155 	*compline = cline;
156 	*len = complen;
157 	return hasalnum;
158 }
159