10b25da7eSHajimu UMEMOTO /* $NetBSD: uniq.c,v 1.4 2008/04/28 20:24:17 martin Exp $ */
20b25da7eSHajimu UMEMOTO
30b25da7eSHajimu UMEMOTO /*-
4*b61a5730SWarner Losh * SPDX-License-Identifier: BSD-2-Clause
51de7b4b8SPedro F. Giffuni *
60b25da7eSHajimu UMEMOTO * Copyright (c) 2007 The NetBSD Foundation, Inc.
70b25da7eSHajimu UMEMOTO * All rights reserved.
80b25da7eSHajimu UMEMOTO *
90b25da7eSHajimu UMEMOTO * This code is derived from software contributed to The NetBSD Foundation
100b25da7eSHajimu UMEMOTO * by Christos Zoulas.
110b25da7eSHajimu UMEMOTO *
120b25da7eSHajimu UMEMOTO * Redistribution and use in source and binary forms, with or without
130b25da7eSHajimu UMEMOTO * modification, are permitted provided that the following conditions
140b25da7eSHajimu UMEMOTO * are met:
150b25da7eSHajimu UMEMOTO * 1. Redistributions of source code must retain the above copyright
160b25da7eSHajimu UMEMOTO * notice, this list of conditions and the following disclaimer.
170b25da7eSHajimu UMEMOTO * 2. Redistributions in binary form must reproduce the above copyright
180b25da7eSHajimu UMEMOTO * notice, this list of conditions and the following disclaimer in the
190b25da7eSHajimu UMEMOTO * documentation and/or other materials provided with the distribution.
200b25da7eSHajimu UMEMOTO *
210b25da7eSHajimu UMEMOTO * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS
220b25da7eSHajimu UMEMOTO * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED
230b25da7eSHajimu UMEMOTO * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR
240b25da7eSHajimu UMEMOTO * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS
250b25da7eSHajimu UMEMOTO * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
260b25da7eSHajimu UMEMOTO * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
270b25da7eSHajimu UMEMOTO * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
280b25da7eSHajimu UMEMOTO * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
290b25da7eSHajimu UMEMOTO * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
300b25da7eSHajimu UMEMOTO * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
310b25da7eSHajimu UMEMOTO * POSSIBILITY OF SUCH DAMAGE.
320b25da7eSHajimu UMEMOTO */
330b25da7eSHajimu UMEMOTO #include <sys/cdefs.h>
340b25da7eSHajimu UMEMOTO #include <stdio.h>
350b25da7eSHajimu UMEMOTO #include <string.h>
360b25da7eSHajimu UMEMOTO #include <stdlib.h>
370b25da7eSHajimu UMEMOTO #include <db.h>
380b25da7eSHajimu UMEMOTO #include <err.h>
390b25da7eSHajimu UMEMOTO #include <libutil.h>
400b25da7eSHajimu UMEMOTO #include <ctype.h>
410b25da7eSHajimu UMEMOTO #include <fcntl.h>
420b25da7eSHajimu UMEMOTO
43eccad222SEd Schouten #include "extern.h"
440b25da7eSHajimu UMEMOTO
450b25da7eSHajimu UMEMOTO static int comp(const char *, char **, size_t *);
460b25da7eSHajimu UMEMOTO
470b25da7eSHajimu UMEMOTO /*
480b25da7eSHajimu UMEMOTO * Preserve only unique content lines in a file. Input lines that have
490b25da7eSHajimu UMEMOTO * content [alphanumeric characters before a comment] are white-space
500b25da7eSHajimu UMEMOTO * normalized and have their comments removed. Then they are placed
510b25da7eSHajimu UMEMOTO * in a hash table, and only the first instance of them is printed.
520b25da7eSHajimu UMEMOTO * Comment lines without any alphanumeric content are always printed
530b25da7eSHajimu UMEMOTO * since they are there to make the file "pretty". Comment lines with
540b25da7eSHajimu UMEMOTO * alphanumeric content are also placed into the hash table and only
550b25da7eSHajimu UMEMOTO * printed once.
560b25da7eSHajimu UMEMOTO */
570b25da7eSHajimu UMEMOTO void
uniq(const char * fname)580b25da7eSHajimu UMEMOTO uniq(const char *fname)
590b25da7eSHajimu UMEMOTO {
600b25da7eSHajimu UMEMOTO DB *db;
610b25da7eSHajimu UMEMOTO DBT key;
620b25da7eSHajimu UMEMOTO static const DBT data = { NULL, 0 };
630b25da7eSHajimu UMEMOTO FILE *fp;
640b25da7eSHajimu UMEMOTO char *line;
650b25da7eSHajimu UMEMOTO size_t len;
660b25da7eSHajimu UMEMOTO
670b25da7eSHajimu UMEMOTO if ((db = dbopen(NULL, O_RDWR, 0, DB_HASH, &hinfo)) == NULL)
680b25da7eSHajimu UMEMOTO err(1, "Cannot create in memory database");
690b25da7eSHajimu UMEMOTO
700b25da7eSHajimu UMEMOTO if ((fp = fopen(fname, "r")) == NULL)
710b25da7eSHajimu UMEMOTO err(1, "Cannot open `%s'", fname);
720b25da7eSHajimu UMEMOTO while ((line = fgetln(fp, &len)) != NULL) {
730b25da7eSHajimu UMEMOTO size_t complen = len;
740b25da7eSHajimu UMEMOTO char *compline;
750b25da7eSHajimu UMEMOTO if (!comp(line, &compline, &complen)) {
760b25da7eSHajimu UMEMOTO (void)fprintf(stdout, "%*.*s", (int)len, (int)len,
770b25da7eSHajimu UMEMOTO line);
780b25da7eSHajimu UMEMOTO continue;
790b25da7eSHajimu UMEMOTO }
800b25da7eSHajimu UMEMOTO key.data = compline;
810b25da7eSHajimu UMEMOTO key.size = complen;
820b25da7eSHajimu UMEMOTO switch ((db->put)(db, &key, &data, R_NOOVERWRITE)) {
830b25da7eSHajimu UMEMOTO case 0:
840b25da7eSHajimu UMEMOTO (void)fprintf(stdout, "%*.*s", (int)len, (int)len,
850b25da7eSHajimu UMEMOTO line);
860b25da7eSHajimu UMEMOTO break;
870b25da7eSHajimu UMEMOTO case 1:
880b25da7eSHajimu UMEMOTO break;
890b25da7eSHajimu UMEMOTO case -1:
900b25da7eSHajimu UMEMOTO err(1, "put");
9157b9a062SXin LI /* NOTREACHED */
920b25da7eSHajimu UMEMOTO default:
930b25da7eSHajimu UMEMOTO abort();
940b25da7eSHajimu UMEMOTO break;
950b25da7eSHajimu UMEMOTO }
960b25da7eSHajimu UMEMOTO }
970b25da7eSHajimu UMEMOTO (void)fflush(stdout);
980b25da7eSHajimu UMEMOTO exit(0);
990b25da7eSHajimu UMEMOTO }
1000b25da7eSHajimu UMEMOTO
1010b25da7eSHajimu UMEMOTO /*
1020b25da7eSHajimu UMEMOTO * normalize whitespace in the original line and place a new string
1030b25da7eSHajimu UMEMOTO * with whitespace converted to a single space in compline. If the line
1040b25da7eSHajimu UMEMOTO * contains just comments, we preserve them. If it contains data and
1050b25da7eSHajimu UMEMOTO * comments, we kill the comments. Return 1 if the line had actual
1060b25da7eSHajimu UMEMOTO * contents, or 0 if it was just a comment without alphanumeric characters.
1070b25da7eSHajimu UMEMOTO */
1080b25da7eSHajimu UMEMOTO static int
comp(const char * origline,char ** compline,size_t * len)1090b25da7eSHajimu UMEMOTO comp(const char *origline, char **compline, size_t *len)
1100b25da7eSHajimu UMEMOTO {
1110b25da7eSHajimu UMEMOTO const unsigned char *p;
1120b25da7eSHajimu UMEMOTO unsigned char *q;
1130b25da7eSHajimu UMEMOTO char *cline;
1140b25da7eSHajimu UMEMOTO size_t l = *len, complen;
1150b25da7eSHajimu UMEMOTO int hasalnum, iscomment;
1160b25da7eSHajimu UMEMOTO
1170b25da7eSHajimu UMEMOTO /* Eat leading space */
1180b25da7eSHajimu UMEMOTO for (p = (const unsigned char *)origline; l && *p && isspace(*p);
1190b25da7eSHajimu UMEMOTO p++, l--)
1200b25da7eSHajimu UMEMOTO continue;
12118f3c5feSXin LI if (*p == '\0' || l == 0)
12218f3c5feSXin LI return 0;
12318f3c5feSXin LI
1240b25da7eSHajimu UMEMOTO if ((cline = malloc(l + 1)) == NULL)
1250b25da7eSHajimu UMEMOTO err(1, "Cannot allocate %zu bytes", l + 1);
1260b25da7eSHajimu UMEMOTO (void)memcpy(cline, p, l);
1270b25da7eSHajimu UMEMOTO cline[l] = '\0';
1280b25da7eSHajimu UMEMOTO
1290b25da7eSHajimu UMEMOTO complen = 0;
1300b25da7eSHajimu UMEMOTO hasalnum = 0;
1310b25da7eSHajimu UMEMOTO iscomment = 0;
1320b25da7eSHajimu UMEMOTO
1330b25da7eSHajimu UMEMOTO for (q = (unsigned char *)cline; l && *p; p++, l--) {
1340b25da7eSHajimu UMEMOTO if (isspace(*p)) {
1350b25da7eSHajimu UMEMOTO if (complen && isspace(q[-1]))
1360b25da7eSHajimu UMEMOTO continue;
1370b25da7eSHajimu UMEMOTO *q++ = ' ';
1380b25da7eSHajimu UMEMOTO complen++;
1390b25da7eSHajimu UMEMOTO } else {
1400b25da7eSHajimu UMEMOTO if (!iscomment && *p == '#') {
1410b25da7eSHajimu UMEMOTO if (hasalnum)
1420b25da7eSHajimu UMEMOTO break;
1430b25da7eSHajimu UMEMOTO iscomment = 1;
1440b25da7eSHajimu UMEMOTO } else
1450b25da7eSHajimu UMEMOTO hasalnum |= isalnum(*p);
1460b25da7eSHajimu UMEMOTO *q++ = *p;
1470b25da7eSHajimu UMEMOTO complen++;
1480b25da7eSHajimu UMEMOTO }
1490b25da7eSHajimu UMEMOTO }
1500b25da7eSHajimu UMEMOTO
1510b25da7eSHajimu UMEMOTO /* Eat trailing space */
1520b25da7eSHajimu UMEMOTO while (complen && isspace(q[-1])) {
1530b25da7eSHajimu UMEMOTO --q;
1540b25da7eSHajimu UMEMOTO --complen;
1550b25da7eSHajimu UMEMOTO }
1560b25da7eSHajimu UMEMOTO *q = '\0';
15718f3c5feSXin LI if (!hasalnum) {
15818f3c5feSXin LI free(cline);
15918f3c5feSXin LI cline = NULL;
16018f3c5feSXin LI complen = 0;
16118f3c5feSXin LI }
1620b25da7eSHajimu UMEMOTO *compline = cline;
1630b25da7eSHajimu UMEMOTO *len = complen;
1640b25da7eSHajimu UMEMOTO return hasalnum;
1650b25da7eSHajimu UMEMOTO }
166