xref: /freebsd/usr.sbin/services_mkdb/uniq.c (revision bc5304a006238115291e7568583632889dffbab9)
1 /*	$NetBSD: uniq.c,v 1.4 2008/04/28 20:24:17 martin Exp $	*/
2 
3 /*-
4  * SPDX-License-Identifier: BSD-2-Clause-NetBSD
5  *
6  * Copyright (c) 2007 The NetBSD Foundation, Inc.
7  * All rights reserved.
8  *
9  * This code is derived from software contributed to The NetBSD Foundation
10  * by Christos Zoulas.
11  *
12  * Redistribution and use in source and binary forms, with or without
13  * modification, are permitted provided that the following conditions
14  * are met:
15  * 1. Redistributions of source code must retain the above copyright
16  *    notice, this list of conditions and the following disclaimer.
17  * 2. Redistributions in binary form must reproduce the above copyright
18  *    notice, this list of conditions and the following disclaimer in the
19  *    documentation and/or other materials provided with the distribution.
20  *
21  * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
22  * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
23  * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
24  * PURPOSE ARE DISCLAIMED.  IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
25  * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
26  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
27  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
28  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
29  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
30  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
31  * POSSIBILITY OF SUCH DAMAGE.
32  */
33 #include <sys/cdefs.h>
34 __FBSDID("$FreeBSD$");
35 
36 #include <stdio.h>
37 #include <string.h>
38 #include <stdlib.h>
39 #include <db.h>
40 #include <err.h>
41 #include <libutil.h>
42 #include <ctype.h>
43 #include <fcntl.h>
44 
45 #include "extern.h"
46 
47 static int comp(const char *, char **, size_t *);
48 
49 /*
50  * Preserve only unique content lines in a file. Input lines that have
51  * content [alphanumeric characters before a comment] are white-space
52  * normalized and have their comments removed. Then they are placed
53  * in a hash table, and only the first instance of them is printed.
54  * Comment lines without any alphanumeric content are always printed
55  * since they are there to make the file "pretty". Comment lines with
56  * alphanumeric content are also placed into the hash table and only
57  * printed once.
58  */
59 void
60 uniq(const char *fname)
61 {
62 	DB *db;
63 	DBT key;
64 	static const DBT data = { NULL, 0 };
65 	FILE *fp;
66 	char *line;
67 	size_t len;
68 
69 	if ((db = dbopen(NULL, O_RDWR, 0, DB_HASH, &hinfo)) == NULL)
70 		err(1, "Cannot create in memory database");
71 
72 	if ((fp = fopen(fname, "r")) == NULL)
73 		err(1, "Cannot open `%s'", fname);
74 	while ((line = fgetln(fp, &len)) != NULL) {
75 		size_t complen = len;
76 		char *compline;
77 		if (!comp(line, &compline, &complen)) {
78 			(void)fprintf(stdout, "%*.*s", (int)len, (int)len,
79 			    line);
80 			continue;
81 		}
82 		key.data = compline;
83 		key.size = complen;
84 		switch ((db->put)(db, &key, &data, R_NOOVERWRITE)) {
85 		case 0:
86 			(void)fprintf(stdout, "%*.*s", (int)len, (int)len,
87 			    line);
88 			break;
89 		case 1:
90 			break;
91 		case -1:
92 			err(1, "put");
93 			/* NOTREACHED */
94 		default:
95 			abort();
96 			break;
97 		}
98 	}
99 	(void)fflush(stdout);
100 	exit(0);
101 }
102 
103 /*
104  * normalize whitespace in the original line and place a new string
105  * with whitespace converted to a single space in compline. If the line
106  * contains just comments, we preserve them. If it contains data and
107  * comments, we kill the comments. Return 1 if the line had actual
108  * contents, or 0 if it was just a comment without alphanumeric characters.
109  */
110 static int
111 comp(const char *origline, char **compline, size_t *len)
112 {
113 	const unsigned char *p;
114 	unsigned char *q;
115 	char *cline;
116 	size_t l = *len, complen;
117 	int hasalnum, iscomment;
118 
119 	/* Eat leading space */
120 	for (p = (const unsigned char *)origline; l && *p && isspace(*p);
121 	    p++, l--)
122 		continue;
123 	if (*p == '\0' || l == 0)
124 		return 0;
125 
126 	if ((cline = malloc(l + 1)) == NULL)
127 		err(1, "Cannot allocate %zu bytes", l + 1);
128 	(void)memcpy(cline, p, l);
129 	cline[l] = '\0';
130 
131 	complen = 0;
132 	hasalnum = 0;
133 	iscomment = 0;
134 
135 	for (q = (unsigned char *)cline; l && *p; p++, l--) {
136 		if (isspace(*p)) {
137 			if (complen && isspace(q[-1]))
138 				continue;
139 			*q++ = ' ';
140 			complen++;
141 		} else {
142 			if (!iscomment && *p == '#') {
143 				if (hasalnum)
144 					break;
145 				iscomment = 1;
146 			} else
147 				hasalnum |= isalnum(*p);
148 			*q++ = *p;
149 			complen++;
150 		}
151 	}
152 
153 	/* Eat trailing space */
154 	while (complen && isspace(q[-1])) {
155 		--q;
156 		--complen;
157 	}
158 	*q = '\0';
159 	if (!hasalnum) {
160 		free(cline);
161 		cline = NULL;
162 		complen = 0;
163 	}
164 	*compline = cline;
165 	*len = complen;
166 	return hasalnum;
167 }
168