1 /* $NetBSD: uniq.c,v 1.4 2008/04/28 20:24:17 martin Exp $ */ 2 3 /*- 4 * SPDX-License-Identifier: BSD-2-Clause-NetBSD 5 * 6 * Copyright (c) 2007 The NetBSD Foundation, Inc. 7 * All rights reserved. 8 * 9 * This code is derived from software contributed to The NetBSD Foundation 10 * by Christos Zoulas. 11 * 12 * Redistribution and use in source and binary forms, with or without 13 * modification, are permitted provided that the following conditions 14 * are met: 15 * 1. Redistributions of source code must retain the above copyright 16 * notice, this list of conditions and the following disclaimer. 17 * 2. Redistributions in binary form must reproduce the above copyright 18 * notice, this list of conditions and the following disclaimer in the 19 * documentation and/or other materials provided with the distribution. 20 * 21 * THIS SOFTWARE IS PROVIDED BY THE NETBSD FOUNDATION, INC. AND CONTRIBUTORS 22 * ``AS IS'' AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED 23 * TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR 24 * PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE FOUNDATION OR CONTRIBUTORS 25 * BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 26 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 27 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 28 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 29 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 30 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 31 * POSSIBILITY OF SUCH DAMAGE. 32 */ 33 #include <sys/cdefs.h> 34 __FBSDID("$FreeBSD$"); 35 36 #include <stdio.h> 37 #include <string.h> 38 #include <stdlib.h> 39 #include <db.h> 40 #include <err.h> 41 #include <libutil.h> 42 #include <ctype.h> 43 #include <fcntl.h> 44 45 #include "extern.h" 46 47 static int comp(const char *, char **, size_t *); 48 49 /* 50 * Preserve only unique content lines in a file. Input lines that have 51 * content [alphanumeric characters before a comment] are white-space 52 * normalized and have their comments removed. Then they are placed 53 * in a hash table, and only the first instance of them is printed. 54 * Comment lines without any alphanumeric content are always printed 55 * since they are there to make the file "pretty". Comment lines with 56 * alphanumeric content are also placed into the hash table and only 57 * printed once. 58 */ 59 void 60 uniq(const char *fname) 61 { 62 DB *db; 63 DBT key; 64 static const DBT data = { NULL, 0 }; 65 FILE *fp; 66 char *line; 67 size_t len; 68 69 if ((db = dbopen(NULL, O_RDWR, 0, DB_HASH, &hinfo)) == NULL) 70 err(1, "Cannot create in memory database"); 71 72 if ((fp = fopen(fname, "r")) == NULL) 73 err(1, "Cannot open `%s'", fname); 74 while ((line = fgetln(fp, &len)) != NULL) { 75 size_t complen = len; 76 char *compline; 77 if (!comp(line, &compline, &complen)) { 78 (void)fprintf(stdout, "%*.*s", (int)len, (int)len, 79 line); 80 continue; 81 } 82 key.data = compline; 83 key.size = complen; 84 switch ((db->put)(db, &key, &data, R_NOOVERWRITE)) { 85 case 0: 86 (void)fprintf(stdout, "%*.*s", (int)len, (int)len, 87 line); 88 break; 89 case 1: 90 break; 91 case -1: 92 err(1, "put"); 93 /* NOTREACHED */ 94 default: 95 abort(); 96 break; 97 } 98 } 99 (void)fflush(stdout); 100 exit(0); 101 } 102 103 /* 104 * normalize whitespace in the original line and place a new string 105 * with whitespace converted to a single space in compline. If the line 106 * contains just comments, we preserve them. If it contains data and 107 * comments, we kill the comments. Return 1 if the line had actual 108 * contents, or 0 if it was just a comment without alphanumeric characters. 109 */ 110 static int 111 comp(const char *origline, char **compline, size_t *len) 112 { 113 const unsigned char *p; 114 unsigned char *q; 115 char *cline; 116 size_t l = *len, complen; 117 int hasalnum, iscomment; 118 119 /* Eat leading space */ 120 for (p = (const unsigned char *)origline; l && *p && isspace(*p); 121 p++, l--) 122 continue; 123 if (*p == '\0' || l == 0) 124 return 0; 125 126 if ((cline = malloc(l + 1)) == NULL) 127 err(1, "Cannot allocate %zu bytes", l + 1); 128 (void)memcpy(cline, p, l); 129 cline[l] = '\0'; 130 131 complen = 0; 132 hasalnum = 0; 133 iscomment = 0; 134 135 for (q = (unsigned char *)cline; l && *p; p++, l--) { 136 if (isspace(*p)) { 137 if (complen && isspace(q[-1])) 138 continue; 139 *q++ = ' '; 140 complen++; 141 } else { 142 if (!iscomment && *p == '#') { 143 if (hasalnum) 144 break; 145 iscomment = 1; 146 } else 147 hasalnum |= isalnum(*p); 148 *q++ = *p; 149 complen++; 150 } 151 } 152 153 /* Eat trailing space */ 154 while (complen && isspace(q[-1])) { 155 --q; 156 --complen; 157 } 158 *q = '\0'; 159 if (!hasalnum) { 160 free(cline); 161 cline = NULL; 162 complen = 0; 163 } 164 *compline = cline; 165 *len = complen; 166 return hasalnum; 167 } 168