xref: /freebsd/contrib/mandoc/dbm_map.c (revision 01d4e2149e5566e5d9394913dc9fb032da259e0b)
1*61d06d6bSBaptiste Daroussin /*	$Id: dbm_map.c,v 1.8 2017/02/17 14:43:54 schwarze Exp $ */
2*61d06d6bSBaptiste Daroussin /*
3*61d06d6bSBaptiste Daroussin  * Copyright (c) 2016 Ingo Schwarze <schwarze@openbsd.org>
4*61d06d6bSBaptiste Daroussin  *
5*61d06d6bSBaptiste Daroussin  * Permission to use, copy, modify, and distribute this software for any
6*61d06d6bSBaptiste Daroussin  * purpose with or without fee is hereby granted, provided that the above
7*61d06d6bSBaptiste Daroussin  * copyright notice and this permission notice appear in all copies.
8*61d06d6bSBaptiste Daroussin  *
9*61d06d6bSBaptiste Daroussin  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10*61d06d6bSBaptiste Daroussin  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11*61d06d6bSBaptiste Daroussin  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12*61d06d6bSBaptiste Daroussin  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13*61d06d6bSBaptiste Daroussin  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14*61d06d6bSBaptiste Daroussin  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15*61d06d6bSBaptiste Daroussin  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16*61d06d6bSBaptiste Daroussin  *
17*61d06d6bSBaptiste Daroussin  * Low-level routines for the map-based version
18*61d06d6bSBaptiste Daroussin  * of the mandoc database, for read-only access.
19*61d06d6bSBaptiste Daroussin  * The interface is defined in "dbm_map.h".
20*61d06d6bSBaptiste Daroussin  */
21*61d06d6bSBaptiste Daroussin #include "config.h"
22*61d06d6bSBaptiste Daroussin 
23*61d06d6bSBaptiste Daroussin #include <sys/mman.h>
24*61d06d6bSBaptiste Daroussin #include <sys/stat.h>
25*61d06d6bSBaptiste Daroussin #include <sys/types.h>
26*61d06d6bSBaptiste Daroussin 
27*61d06d6bSBaptiste Daroussin #if HAVE_ENDIAN
28*61d06d6bSBaptiste Daroussin #include <endian.h>
29*61d06d6bSBaptiste Daroussin #elif HAVE_SYS_ENDIAN
30*61d06d6bSBaptiste Daroussin #include <sys/endian.h>
31*61d06d6bSBaptiste Daroussin #elif HAVE_NTOHL
32*61d06d6bSBaptiste Daroussin #include <arpa/inet.h>
33*61d06d6bSBaptiste Daroussin #endif
34*61d06d6bSBaptiste Daroussin #if HAVE_ERR
35*61d06d6bSBaptiste Daroussin #include <err.h>
36*61d06d6bSBaptiste Daroussin #endif
37*61d06d6bSBaptiste Daroussin #include <errno.h>
38*61d06d6bSBaptiste Daroussin #include <fcntl.h>
39*61d06d6bSBaptiste Daroussin #include <regex.h>
40*61d06d6bSBaptiste Daroussin #include <stdint.h>
41*61d06d6bSBaptiste Daroussin #include <stdlib.h>
42*61d06d6bSBaptiste Daroussin #include <string.h>
43*61d06d6bSBaptiste Daroussin #include <unistd.h>
44*61d06d6bSBaptiste Daroussin 
45*61d06d6bSBaptiste Daroussin #include "mansearch.h"
46*61d06d6bSBaptiste Daroussin #include "dbm_map.h"
47*61d06d6bSBaptiste Daroussin #include "dbm.h"
48*61d06d6bSBaptiste Daroussin 
49*61d06d6bSBaptiste Daroussin static struct stat	 st;
50*61d06d6bSBaptiste Daroussin static char		*dbm_base;
51*61d06d6bSBaptiste Daroussin static int		 ifd;
52*61d06d6bSBaptiste Daroussin static int32_t		 max_offset;
53*61d06d6bSBaptiste Daroussin 
54*61d06d6bSBaptiste Daroussin /*
55*61d06d6bSBaptiste Daroussin  * Open a disk-based database for read-only access.
56*61d06d6bSBaptiste Daroussin  * Validate the file format as far as it is not mandoc-specific.
57*61d06d6bSBaptiste Daroussin  * Return 0 on success.  Return -1 and set errno on failure.
58*61d06d6bSBaptiste Daroussin  */
59*61d06d6bSBaptiste Daroussin int
dbm_map(const char * fname)60*61d06d6bSBaptiste Daroussin dbm_map(const char *fname)
61*61d06d6bSBaptiste Daroussin {
62*61d06d6bSBaptiste Daroussin 	int		 save_errno;
63*61d06d6bSBaptiste Daroussin 	const int32_t	*magic;
64*61d06d6bSBaptiste Daroussin 
65*61d06d6bSBaptiste Daroussin 	if ((ifd = open(fname, O_RDONLY)) == -1)
66*61d06d6bSBaptiste Daroussin 		return -1;
67*61d06d6bSBaptiste Daroussin 	if (fstat(ifd, &st) == -1)
68*61d06d6bSBaptiste Daroussin 		goto fail;
69*61d06d6bSBaptiste Daroussin 	if (st.st_size < 5) {
70*61d06d6bSBaptiste Daroussin 		warnx("dbm_map(%s): File too short", fname);
71*61d06d6bSBaptiste Daroussin 		errno = EFTYPE;
72*61d06d6bSBaptiste Daroussin 		goto fail;
73*61d06d6bSBaptiste Daroussin 	}
74*61d06d6bSBaptiste Daroussin 	if (st.st_size > INT32_MAX) {
75*61d06d6bSBaptiste Daroussin 		errno = EFBIG;
76*61d06d6bSBaptiste Daroussin 		goto fail;
77*61d06d6bSBaptiste Daroussin 	}
78*61d06d6bSBaptiste Daroussin 	if ((dbm_base = mmap(NULL, st.st_size, PROT_READ, MAP_SHARED,
79*61d06d6bSBaptiste Daroussin 	    ifd, 0)) == MAP_FAILED)
80*61d06d6bSBaptiste Daroussin 		goto fail;
81*61d06d6bSBaptiste Daroussin 	magic = dbm_getint(0);
82*61d06d6bSBaptiste Daroussin 	if (be32toh(*magic) != MANDOCDB_MAGIC) {
83*61d06d6bSBaptiste Daroussin 		if (strncmp(dbm_base, "SQLite format 3", 15))
84*61d06d6bSBaptiste Daroussin 			warnx("dbm_map(%s): "
85*61d06d6bSBaptiste Daroussin 			    "Bad initial magic %x (expected %x)",
86*61d06d6bSBaptiste Daroussin 			    fname, be32toh(*magic), MANDOCDB_MAGIC);
87*61d06d6bSBaptiste Daroussin 		else
88*61d06d6bSBaptiste Daroussin 			warnx("dbm_map(%s): "
89*61d06d6bSBaptiste Daroussin 			    "Obsolete format based on SQLite 3",
90*61d06d6bSBaptiste Daroussin 			    fname);
91*61d06d6bSBaptiste Daroussin 		errno = EFTYPE;
92*61d06d6bSBaptiste Daroussin 		goto fail;
93*61d06d6bSBaptiste Daroussin 	}
94*61d06d6bSBaptiste Daroussin 	magic = dbm_getint(1);
95*61d06d6bSBaptiste Daroussin 	if (be32toh(*magic) != MANDOCDB_VERSION) {
96*61d06d6bSBaptiste Daroussin 		warnx("dbm_map(%s): Bad version number %d (expected %d)",
97*61d06d6bSBaptiste Daroussin 		    fname, be32toh(*magic), MANDOCDB_VERSION);
98*61d06d6bSBaptiste Daroussin 		errno = EFTYPE;
99*61d06d6bSBaptiste Daroussin 		goto fail;
100*61d06d6bSBaptiste Daroussin 	}
101*61d06d6bSBaptiste Daroussin 	max_offset = be32toh(*dbm_getint(3)) + sizeof(int32_t);
102*61d06d6bSBaptiste Daroussin 	if (st.st_size != max_offset) {
103*61d06d6bSBaptiste Daroussin 		warnx("dbm_map(%s): Inconsistent file size %lld (expected %d)",
104*61d06d6bSBaptiste Daroussin 		    fname, (long long)st.st_size, max_offset);
105*61d06d6bSBaptiste Daroussin 		errno = EFTYPE;
106*61d06d6bSBaptiste Daroussin 		goto fail;
107*61d06d6bSBaptiste Daroussin 	}
108*61d06d6bSBaptiste Daroussin 	if ((magic = dbm_get(*dbm_getint(3))) == NULL) {
109*61d06d6bSBaptiste Daroussin 		errno = EFTYPE;
110*61d06d6bSBaptiste Daroussin 		goto fail;
111*61d06d6bSBaptiste Daroussin 	}
112*61d06d6bSBaptiste Daroussin 	if (be32toh(*magic) != MANDOCDB_MAGIC) {
113*61d06d6bSBaptiste Daroussin 		warnx("dbm_map(%s): Bad final magic %x (expected %x)",
114*61d06d6bSBaptiste Daroussin 		    fname, be32toh(*magic), MANDOCDB_MAGIC);
115*61d06d6bSBaptiste Daroussin 		errno = EFTYPE;
116*61d06d6bSBaptiste Daroussin 		goto fail;
117*61d06d6bSBaptiste Daroussin 	}
118*61d06d6bSBaptiste Daroussin 	return 0;
119*61d06d6bSBaptiste Daroussin 
120*61d06d6bSBaptiste Daroussin fail:
121*61d06d6bSBaptiste Daroussin 	save_errno = errno;
122*61d06d6bSBaptiste Daroussin 	close(ifd);
123*61d06d6bSBaptiste Daroussin 	errno = save_errno;
124*61d06d6bSBaptiste Daroussin 	return -1;
125*61d06d6bSBaptiste Daroussin }
126*61d06d6bSBaptiste Daroussin 
127*61d06d6bSBaptiste Daroussin void
dbm_unmap(void)128*61d06d6bSBaptiste Daroussin dbm_unmap(void)
129*61d06d6bSBaptiste Daroussin {
130*61d06d6bSBaptiste Daroussin 	if (munmap(dbm_base, st.st_size) == -1)
131*61d06d6bSBaptiste Daroussin 		warn("dbm_unmap: munmap");
132*61d06d6bSBaptiste Daroussin 	if (close(ifd) == -1)
133*61d06d6bSBaptiste Daroussin 		warn("dbm_unmap: close");
134*61d06d6bSBaptiste Daroussin 	dbm_base = (char *)-1;
135*61d06d6bSBaptiste Daroussin }
136*61d06d6bSBaptiste Daroussin 
137*61d06d6bSBaptiste Daroussin /*
138*61d06d6bSBaptiste Daroussin  * Take a raw integer as it was read from the database.
139*61d06d6bSBaptiste Daroussin  * Interpret it as an offset into the database file
140*61d06d6bSBaptiste Daroussin  * and return a pointer to that place in the file.
141*61d06d6bSBaptiste Daroussin  */
142*61d06d6bSBaptiste Daroussin void *
dbm_get(int32_t offset)143*61d06d6bSBaptiste Daroussin dbm_get(int32_t offset)
144*61d06d6bSBaptiste Daroussin {
145*61d06d6bSBaptiste Daroussin 	offset = be32toh(offset);
146*61d06d6bSBaptiste Daroussin 	if (offset < 0) {
147*61d06d6bSBaptiste Daroussin 		warnx("dbm_get: Database corrupt: offset %d", offset);
148*61d06d6bSBaptiste Daroussin 		return NULL;
149*61d06d6bSBaptiste Daroussin 	}
150*61d06d6bSBaptiste Daroussin 	if (offset >= max_offset) {
151*61d06d6bSBaptiste Daroussin 		warnx("dbm_get: Database corrupt: offset %d > %d",
152*61d06d6bSBaptiste Daroussin 		    offset, max_offset);
153*61d06d6bSBaptiste Daroussin 		return NULL;
154*61d06d6bSBaptiste Daroussin 	}
155*61d06d6bSBaptiste Daroussin 	return dbm_base + offset;
156*61d06d6bSBaptiste Daroussin }
157*61d06d6bSBaptiste Daroussin 
158*61d06d6bSBaptiste Daroussin /*
159*61d06d6bSBaptiste Daroussin  * Assume the database starts with some integers.
160*61d06d6bSBaptiste Daroussin  * Assume they are numbered starting from 0, increasing.
161*61d06d6bSBaptiste Daroussin  * Get a pointer to one with the number "offset".
162*61d06d6bSBaptiste Daroussin  */
163*61d06d6bSBaptiste Daroussin int32_t *
dbm_getint(int32_t offset)164*61d06d6bSBaptiste Daroussin dbm_getint(int32_t offset)
165*61d06d6bSBaptiste Daroussin {
166*61d06d6bSBaptiste Daroussin 	return (int32_t *)dbm_base + offset;
167*61d06d6bSBaptiste Daroussin }
168*61d06d6bSBaptiste Daroussin 
169*61d06d6bSBaptiste Daroussin /*
170*61d06d6bSBaptiste Daroussin  * The reverse of dbm_get().
171*61d06d6bSBaptiste Daroussin  * Take pointer into the database file
172*61d06d6bSBaptiste Daroussin  * and convert it to the raw integer
173*61d06d6bSBaptiste Daroussin  * that would be used to refer to that place in the file.
174*61d06d6bSBaptiste Daroussin  */
175*61d06d6bSBaptiste Daroussin int32_t
dbm_addr(const void * p)176*61d06d6bSBaptiste Daroussin dbm_addr(const void *p)
177*61d06d6bSBaptiste Daroussin {
178*61d06d6bSBaptiste Daroussin 	return htobe32((const char *)p - dbm_base);
179*61d06d6bSBaptiste Daroussin }
180*61d06d6bSBaptiste Daroussin 
181*61d06d6bSBaptiste Daroussin int
dbm_match(const struct dbm_match * match,const char * str)182*61d06d6bSBaptiste Daroussin dbm_match(const struct dbm_match *match, const char *str)
183*61d06d6bSBaptiste Daroussin {
184*61d06d6bSBaptiste Daroussin 	switch (match->type) {
185*61d06d6bSBaptiste Daroussin 	case DBM_EXACT:
186*61d06d6bSBaptiste Daroussin 		return strcmp(str, match->str) == 0;
187*61d06d6bSBaptiste Daroussin 	case DBM_SUB:
188*61d06d6bSBaptiste Daroussin 		return strcasestr(str, match->str) != NULL;
189*61d06d6bSBaptiste Daroussin 	case DBM_REGEX:
190*61d06d6bSBaptiste Daroussin 		return regexec(match->re, str, 0, NULL, 0) == 0;
191*61d06d6bSBaptiste Daroussin 	default:
192*61d06d6bSBaptiste Daroussin 		abort();
193*61d06d6bSBaptiste Daroussin 	}
194*61d06d6bSBaptiste Daroussin }
195