1 /* $Id: dbm.c,v 1.7 2019/07/01 22:56:24 schwarze Exp $ */
2 /*
3 * Copyright (c) 2016 Ingo Schwarze <schwarze@openbsd.org>
4 *
5 * Permission to use, copy, modify, and distribute this software for any
6 * purpose with or without fee is hereby granted, provided that the above
7 * copyright notice and this permission notice appear in all copies.
8 *
9 * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10 * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11 * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12 * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13 * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14 * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15 * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16 *
17 * Map-based version of the mandoc database, for read-only access.
18 * The interface is defined in "dbm.h".
19 */
20 #include "config.h"
21
22 #include <assert.h>
23 #if HAVE_ENDIAN
24 #include <endian.h>
25 #elif HAVE_SYS_ENDIAN
26 #include <sys/endian.h>
27 #elif HAVE_NTOHL
28 #include <arpa/inet.h>
29 #endif
30 #if HAVE_ERR
31 #include <err.h>
32 #endif
33 #include <errno.h>
34 #include <regex.h>
35 #include <stdint.h>
36 #include <stdio.h>
37 #include <stdlib.h>
38 #include <string.h>
39
40 #include "mansearch.h"
41 #include "dbm_map.h"
42 #include "dbm.h"
43
44 struct macro {
45 int32_t value;
46 int32_t pages;
47 };
48
49 struct page {
50 int32_t name;
51 int32_t sect;
52 int32_t arch;
53 int32_t desc;
54 int32_t file;
55 };
56
57 enum iter {
58 ITER_NONE = 0,
59 ITER_NAME,
60 ITER_SECT,
61 ITER_ARCH,
62 ITER_DESC,
63 ITER_MACRO
64 };
65
66 static struct macro *macros[MACRO_MAX];
67 static int32_t nvals[MACRO_MAX];
68 static struct page *pages;
69 static int32_t npages;
70 static enum iter iteration;
71
72 static struct dbm_res page_bytitle(enum iter, const struct dbm_match *);
73 static struct dbm_res page_byarch(const struct dbm_match *);
74 static struct dbm_res page_bymacro(int32_t, const struct dbm_match *);
75 static char *macro_bypage(int32_t, int32_t);
76
77
78 /*** top level functions **********************************************/
79
80 /*
81 * Open a disk-based mandoc database for read-only access.
82 * Map the pages and macros[] arrays.
83 * Return 0 on success. Return -1 and set errno on failure.
84 */
85 int
dbm_open(const char * fname)86 dbm_open(const char *fname)
87 {
88 const int32_t *mp, *ep;
89 int32_t im;
90
91 if (dbm_map(fname) == -1)
92 return -1;
93
94 if ((npages = be32toh(*dbm_getint(4))) < 0) {
95 warnx("dbm_open(%s): Invalid number of pages: %d",
96 fname, npages);
97 goto fail;
98 }
99 pages = (struct page *)dbm_getint(5);
100
101 if ((mp = dbm_get(*dbm_getint(2))) == NULL) {
102 warnx("dbm_open(%s): Invalid offset of macros array", fname);
103 goto fail;
104 }
105 if (be32toh(*mp) != MACRO_MAX) {
106 warnx("dbm_open(%s): Invalid number of macros: %d",
107 fname, be32toh(*mp));
108 goto fail;
109 }
110 for (im = 0; im < MACRO_MAX; im++) {
111 if ((ep = dbm_get(*++mp)) == NULL) {
112 warnx("dbm_open(%s): Invalid offset of macro %d",
113 fname, im);
114 goto fail;
115 }
116 nvals[im] = be32toh(*ep);
117 macros[im] = (struct macro *)++ep;
118 }
119 return 0;
120
121 fail:
122 dbm_unmap();
123 errno = EFTYPE;
124 return -1;
125 }
126
127 void
dbm_close(void)128 dbm_close(void)
129 {
130 dbm_unmap();
131 }
132
133
134 /*** functions for handling pages *************************************/
135
136 int32_t
dbm_page_count(void)137 dbm_page_count(void)
138 {
139 return npages;
140 }
141
142 /*
143 * Give the caller pointers to the data for one manual page.
144 */
145 struct dbm_page *
dbm_page_get(int32_t ip)146 dbm_page_get(int32_t ip)
147 {
148 static struct dbm_page res;
149
150 assert(ip >= 0);
151 assert(ip < npages);
152 res.name = dbm_get(pages[ip].name);
153 if (res.name == NULL)
154 res.name = "(NULL)\0";
155 res.sect = dbm_get(pages[ip].sect);
156 if (res.sect == NULL)
157 res.sect = "(NULL)\0";
158 res.arch = pages[ip].arch ? dbm_get(pages[ip].arch) : NULL;
159 res.desc = dbm_get(pages[ip].desc);
160 if (res.desc == NULL)
161 res.desc = "(NULL)";
162 res.file = dbm_get(pages[ip].file);
163 if (res.file == NULL)
164 res.file = " (NULL)\0";
165 res.addr = dbm_addr(pages + ip);
166 return &res;
167 }
168
169 /*
170 * Functions to start filtered iterations over manual pages.
171 */
172 void
dbm_page_byname(const struct dbm_match * match)173 dbm_page_byname(const struct dbm_match *match)
174 {
175 assert(match != NULL);
176 page_bytitle(ITER_NAME, match);
177 }
178
179 void
dbm_page_bysect(const struct dbm_match * match)180 dbm_page_bysect(const struct dbm_match *match)
181 {
182 assert(match != NULL);
183 page_bytitle(ITER_SECT, match);
184 }
185
186 void
dbm_page_byarch(const struct dbm_match * match)187 dbm_page_byarch(const struct dbm_match *match)
188 {
189 assert(match != NULL);
190 page_byarch(match);
191 }
192
193 void
dbm_page_bydesc(const struct dbm_match * match)194 dbm_page_bydesc(const struct dbm_match *match)
195 {
196 assert(match != NULL);
197 page_bytitle(ITER_DESC, match);
198 }
199
200 void
dbm_page_bymacro(int32_t im,const struct dbm_match * match)201 dbm_page_bymacro(int32_t im, const struct dbm_match *match)
202 {
203 assert(im >= 0);
204 assert(im < MACRO_MAX);
205 assert(match != NULL);
206 page_bymacro(im, match);
207 }
208
209 /*
210 * Return the number of the next manual page in the current iteration.
211 */
212 struct dbm_res
dbm_page_next(void)213 dbm_page_next(void)
214 {
215 struct dbm_res res = {-1, 0};
216
217 switch(iteration) {
218 case ITER_NONE:
219 return res;
220 case ITER_ARCH:
221 return page_byarch(NULL);
222 case ITER_MACRO:
223 return page_bymacro(0, NULL);
224 default:
225 return page_bytitle(iteration, NULL);
226 }
227 }
228
229 /*
230 * Functions implementing the iteration over manual pages.
231 */
232 static struct dbm_res
page_bytitle(enum iter arg_iter,const struct dbm_match * arg_match)233 page_bytitle(enum iter arg_iter, const struct dbm_match *arg_match)
234 {
235 static const struct dbm_match *match;
236 static const char *cp;
237 static int32_t ip;
238 struct dbm_res res = {-1, 0};
239
240 assert(arg_iter == ITER_NAME || arg_iter == ITER_DESC ||
241 arg_iter == ITER_SECT);
242
243 /* Initialize for a new iteration. */
244
245 if (arg_match != NULL) {
246 iteration = arg_iter;
247 match = arg_match;
248 switch (iteration) {
249 case ITER_NAME:
250 cp = dbm_get(pages[0].name);
251 break;
252 case ITER_SECT:
253 cp = dbm_get(pages[0].sect);
254 break;
255 case ITER_DESC:
256 cp = dbm_get(pages[0].desc);
257 break;
258 default:
259 abort();
260 }
261 if (cp == NULL) {
262 iteration = ITER_NONE;
263 match = NULL;
264 cp = NULL;
265 ip = npages;
266 } else
267 ip = 0;
268 return res;
269 }
270
271 /* Search for a name. */
272
273 while (ip < npages) {
274 if (iteration == ITER_NAME)
275 cp++;
276 if (dbm_match(match, cp))
277 break;
278 cp = strchr(cp, '\0') + 1;
279 if (iteration == ITER_DESC)
280 ip++;
281 else if (*cp == '\0') {
282 cp++;
283 ip++;
284 }
285 }
286
287 /* Reached the end without a match. */
288
289 if (ip == npages) {
290 iteration = ITER_NONE;
291 match = NULL;
292 cp = NULL;
293 return res;
294 }
295
296 /* Found a match; save the quality for later retrieval. */
297
298 res.page = ip;
299 res.bits = iteration == ITER_NAME ? cp[-1] : 0;
300
301 /* Skip the remaining names of this page. */
302
303 if (++ip < npages) {
304 do {
305 cp++;
306 } while (cp[-1] != '\0' ||
307 (iteration != ITER_DESC && cp[-2] != '\0'));
308 }
309 return res;
310 }
311
312 static struct dbm_res
page_byarch(const struct dbm_match * arg_match)313 page_byarch(const struct dbm_match *arg_match)
314 {
315 static const struct dbm_match *match;
316 struct dbm_res res = {-1, 0};
317 static int32_t ip;
318 const char *cp;
319
320 /* Initialize for a new iteration. */
321
322 if (arg_match != NULL) {
323 iteration = ITER_ARCH;
324 match = arg_match;
325 ip = 0;
326 return res;
327 }
328
329 /* Search for an architecture. */
330
331 for ( ; ip < npages; ip++)
332 if (pages[ip].arch)
333 for (cp = dbm_get(pages[ip].arch);
334 *cp != '\0';
335 cp = strchr(cp, '\0') + 1)
336 if (dbm_match(match, cp)) {
337 res.page = ip++;
338 return res;
339 }
340
341 /* Reached the end without a match. */
342
343 iteration = ITER_NONE;
344 match = NULL;
345 return res;
346 }
347
348 static struct dbm_res
page_bymacro(int32_t arg_im,const struct dbm_match * arg_match)349 page_bymacro(int32_t arg_im, const struct dbm_match *arg_match)
350 {
351 static const struct dbm_match *match;
352 static const int32_t *pp;
353 static const char *cp;
354 static int32_t im, iv;
355 struct dbm_res res = {-1, 0};
356
357 assert(im >= 0);
358 assert(im < MACRO_MAX);
359
360 /* Initialize for a new iteration. */
361
362 if (arg_match != NULL) {
363 iteration = ITER_MACRO;
364 match = arg_match;
365 im = arg_im;
366 cp = nvals[im] ? dbm_get(macros[im]->value) : NULL;
367 pp = NULL;
368 iv = -1;
369 return res;
370 }
371 if (iteration != ITER_MACRO)
372 return res;
373
374 /* Find the next matching macro value. */
375
376 while (pp == NULL || *pp == 0) {
377 if (++iv == nvals[im]) {
378 iteration = ITER_NONE;
379 return res;
380 }
381 if (iv)
382 cp = strchr(cp, '\0') + 1;
383 if (dbm_match(match, cp))
384 pp = dbm_get(macros[im][iv].pages);
385 }
386
387 /* Found a matching page. */
388
389 res.page = (struct page *)dbm_get(*pp++) - pages;
390 return res;
391 }
392
393
394 /*** functions for handling macros ************************************/
395
396 int32_t
dbm_macro_count(int32_t im)397 dbm_macro_count(int32_t im)
398 {
399 assert(im >= 0);
400 assert(im < MACRO_MAX);
401 return nvals[im];
402 }
403
404 struct dbm_macro *
dbm_macro_get(int32_t im,int32_t iv)405 dbm_macro_get(int32_t im, int32_t iv)
406 {
407 static struct dbm_macro macro;
408
409 assert(im >= 0);
410 assert(im < MACRO_MAX);
411 assert(iv >= 0);
412 assert(iv < nvals[im]);
413 macro.value = dbm_get(macros[im][iv].value);
414 macro.pp = dbm_get(macros[im][iv].pages);
415 return ¯o;
416 }
417
418 /*
419 * Filtered iteration over macro entries.
420 */
421 void
dbm_macro_bypage(int32_t im,int32_t ip)422 dbm_macro_bypage(int32_t im, int32_t ip)
423 {
424 assert(im >= 0);
425 assert(im < MACRO_MAX);
426 assert(ip != 0);
427 macro_bypage(im, ip);
428 }
429
430 char *
dbm_macro_next(void)431 dbm_macro_next(void)
432 {
433 return macro_bypage(MACRO_MAX, 0);
434 }
435
436 static char *
macro_bypage(int32_t arg_im,int32_t arg_ip)437 macro_bypage(int32_t arg_im, int32_t arg_ip)
438 {
439 static const int32_t *pp;
440 static int32_t im, ip, iv;
441
442 /* Initialize for a new iteration. */
443
444 if (arg_im < MACRO_MAX && arg_ip != 0) {
445 im = arg_im;
446 ip = arg_ip;
447 pp = dbm_get(macros[im]->pages);
448 iv = 0;
449 return NULL;
450 }
451 if (im >= MACRO_MAX)
452 return NULL;
453
454 /* Search for the next value. */
455
456 while (iv < nvals[im]) {
457 if (*pp == ip)
458 break;
459 if (*pp == 0)
460 iv++;
461 pp++;
462 }
463
464 /* Reached the end without a match. */
465
466 if (iv == nvals[im]) {
467 im = MACRO_MAX;
468 ip = 0;
469 pp = NULL;
470 return NULL;
471 }
472
473 /* Found a match; skip the remaining pages of this entry. */
474
475 if (++iv < nvals[im])
476 while (*pp++ != 0)
477 continue;
478
479 return dbm_get(macros[im][iv - 1].value);
480 }
481