xref: /freebsd/contrib/mandoc/dba.c (revision 2f513db72b034fd5ef7f080b11be5c711c15186a)
1 /*	$Id: dba.c,v 1.10 2017/02/17 14:43:54 schwarze Exp $ */
2 /*
3  * Copyright (c) 2016, 2017 Ingo Schwarze <schwarze@openbsd.org>
4  *
5  * Permission to use, copy, modify, and distribute this software for any
6  * purpose with or without fee is hereby granted, provided that the above
7  * copyright notice and this permission notice appear in all copies.
8  *
9  * THE SOFTWARE IS PROVIDED "AS IS" AND THE AUTHOR DISCLAIMS ALL WARRANTIES
10  * WITH REGARD TO THIS SOFTWARE INCLUDING ALL IMPLIED WARRANTIES OF
11  * MERCHANTABILITY AND FITNESS. IN NO EVENT SHALL THE AUTHOR BE LIABLE FOR
12  * ANY SPECIAL, DIRECT, INDIRECT, OR CONSEQUENTIAL DAMAGES OR ANY DAMAGES
13  * WHATSOEVER RESULTING FROM LOSS OF USE, DATA OR PROFITS, WHETHER IN AN
14  * ACTION OF CONTRACT, NEGLIGENCE OR OTHER TORTIOUS ACTION, ARISING OUT OF
15  * OR IN CONNECTION WITH THE USE OR PERFORMANCE OF THIS SOFTWARE.
16  *
17  * Allocation-based version of the mandoc database, for read-write access.
18  * The interface is defined in "dba.h".
19  */
20 #include "config.h"
21 
22 #include <sys/types.h>
23 #if HAVE_ENDIAN
24 #include <endian.h>
25 #elif HAVE_SYS_ENDIAN
26 #include <sys/endian.h>
27 #elif HAVE_NTOHL
28 #include <arpa/inet.h>
29 #endif
30 #include <errno.h>
31 #include <stddef.h>
32 #include <stdint.h>
33 #include <stdlib.h>
34 #include <string.h>
35 #include <unistd.h>
36 
37 #include "mandoc_aux.h"
38 #include "mandoc_ohash.h"
39 #include "mansearch.h"
40 #include "dba_write.h"
41 #include "dba_array.h"
42 #include "dba.h"
43 
44 struct macro_entry {
45 	struct dba_array	*pages;
46 	char			 value[];
47 };
48 
49 static void	*prepend(const char *, char);
50 static void	 dba_pages_write(struct dba_array *);
51 static int	 compare_names(const void *, const void *);
52 static int	 compare_strings(const void *, const void *);
53 
54 static struct macro_entry
55 		*get_macro_entry(struct ohash *, const char *, int32_t);
56 static void	 dba_macros_write(struct dba_array *);
57 static void	 dba_macro_write(struct ohash *);
58 static int	 compare_entries(const void *, const void *);
59 
60 
61 /*** top-level functions **********************************************/
62 
63 struct dba *
64 dba_new(int32_t npages)
65 {
66 	struct dba	*dba;
67 	struct ohash	*macro;
68 	int32_t		 im;
69 
70 	dba = mandoc_malloc(sizeof(*dba));
71 	dba->pages = dba_array_new(npages, DBA_GROW);
72 	dba->macros = dba_array_new(MACRO_MAX, 0);
73 	for (im = 0; im < MACRO_MAX; im++) {
74 		macro = mandoc_malloc(sizeof(*macro));
75 		mandoc_ohash_init(macro, 4,
76 		    offsetof(struct macro_entry, value));
77 		dba_array_set(dba->macros, im, macro);
78 	}
79 	return dba;
80 }
81 
82 void
83 dba_free(struct dba *dba)
84 {
85 	struct dba_array	*page;
86 	struct ohash		*macro;
87 	struct macro_entry	*entry;
88 	unsigned int		 slot;
89 
90 	dba_array_FOREACH(dba->macros, macro) {
91 		for (entry = ohash_first(macro, &slot); entry != NULL;
92 		     entry = ohash_next(macro, &slot)) {
93 			dba_array_free(entry->pages);
94 			free(entry);
95 		}
96 		ohash_delete(macro);
97 		free(macro);
98 	}
99 	dba_array_free(dba->macros);
100 
101 	dba_array_undel(dba->pages);
102 	dba_array_FOREACH(dba->pages, page) {
103 		dba_array_free(dba_array_get(page, DBP_NAME));
104 		dba_array_free(dba_array_get(page, DBP_SECT));
105 		dba_array_free(dba_array_get(page, DBP_ARCH));
106 		free(dba_array_get(page, DBP_DESC));
107 		dba_array_free(dba_array_get(page, DBP_FILE));
108 		dba_array_free(page);
109 	}
110 	dba_array_free(dba->pages);
111 
112 	free(dba);
113 }
114 
115 /*
116  * Write the complete mandoc database to disk; the format is:
117  * - One integer each for magic and version.
118  * - One pointer each to the macros table and to the final magic.
119  * - The pages table.
120  * - The macros table.
121  * - And at the very end, the magic integer again.
122  */
123 int
124 dba_write(const char *fname, struct dba *dba)
125 {
126 	int	 save_errno;
127 	int32_t	 pos_end, pos_macros, pos_macros_ptr;
128 
129 	if (dba_open(fname) == -1)
130 		return -1;
131 	dba_int_write(MANDOCDB_MAGIC);
132 	dba_int_write(MANDOCDB_VERSION);
133 	pos_macros_ptr = dba_skip(1, 2);
134 	dba_pages_write(dba->pages);
135 	pos_macros = dba_tell();
136 	dba_macros_write(dba->macros);
137 	pos_end = dba_tell();
138 	dba_int_write(MANDOCDB_MAGIC);
139 	dba_seek(pos_macros_ptr);
140 	dba_int_write(pos_macros);
141 	dba_int_write(pos_end);
142 	if (dba_close() == -1) {
143 		save_errno = errno;
144 		unlink(fname);
145 		errno = save_errno;
146 		return -1;
147 	}
148 	return 0;
149 }
150 
151 
152 /*** functions for handling pages *************************************/
153 
154 /*
155  * Create a new page and append it to the pages table.
156  */
157 struct dba_array *
158 dba_page_new(struct dba_array *pages, const char *arch,
159     const char *desc, const char *file, enum form form)
160 {
161 	struct dba_array *page, *entry;
162 
163 	page = dba_array_new(DBP_MAX, 0);
164 	entry = dba_array_new(1, DBA_STR | DBA_GROW);
165 	dba_array_add(page, entry);
166 	entry = dba_array_new(1, DBA_STR | DBA_GROW);
167 	dba_array_add(page, entry);
168 	if (arch != NULL && *arch != '\0') {
169 		entry = dba_array_new(1, DBA_STR | DBA_GROW);
170 		dba_array_add(entry, (void *)arch);
171 	} else
172 		entry = NULL;
173 	dba_array_add(page, entry);
174 	dba_array_add(page, mandoc_strdup(desc));
175 	entry = dba_array_new(1, DBA_STR | DBA_GROW);
176 	dba_array_add(entry, prepend(file, form));
177 	dba_array_add(page, entry);
178 	dba_array_add(pages, page);
179 	return page;
180 }
181 
182 /*
183  * Add a section, architecture, or file name to an existing page.
184  * Passing the NULL pointer for the architecture makes the page MI.
185  * In that case, any earlier or later architectures are ignored.
186  */
187 void
188 dba_page_add(struct dba_array *page, int32_t ie, const char *str)
189 {
190 	struct dba_array	*entries;
191 	char			*entry;
192 
193 	entries = dba_array_get(page, ie);
194 	if (ie == DBP_ARCH) {
195 		if (entries == NULL)
196 			return;
197 		if (str == NULL || *str == '\0') {
198 			dba_array_free(entries);
199 			dba_array_set(page, DBP_ARCH, NULL);
200 			return;
201 		}
202 	}
203 	if (*str == '\0')
204 		return;
205 	dba_array_FOREACH(entries, entry) {
206 		if (ie == DBP_FILE && *entry < ' ')
207 			entry++;
208 		if (strcmp(entry, str) == 0)
209 			return;
210 	}
211 	dba_array_add(entries, (void *)str);
212 }
213 
214 /*
215  * Add an additional name to an existing page.
216  */
217 void
218 dba_page_alias(struct dba_array *page, const char *name, uint64_t mask)
219 {
220 	struct dba_array	*entries;
221 	char			*entry;
222 	char			 maskbyte;
223 
224 	if (*name == '\0')
225 		return;
226 	maskbyte = mask & NAME_MASK;
227 	entries = dba_array_get(page, DBP_NAME);
228 	dba_array_FOREACH(entries, entry) {
229 		if (strcmp(entry + 1, name) == 0) {
230 			*entry |= maskbyte;
231 			return;
232 		}
233 	}
234 	dba_array_add(entries, prepend(name, maskbyte));
235 }
236 
237 /*
238  * Return a pointer to a temporary copy of instr with inbyte prepended.
239  */
240 static void *
241 prepend(const char *instr, char inbyte)
242 {
243 	static char	*outstr = NULL;
244 	static size_t	 outlen = 0;
245 	size_t		 newlen;
246 
247 	newlen = strlen(instr) + 1;
248 	if (newlen > outlen) {
249 		outstr = mandoc_realloc(outstr, newlen + 1);
250 		outlen = newlen;
251 	}
252 	*outstr = inbyte;
253 	memcpy(outstr + 1, instr, newlen);
254 	return outstr;
255 }
256 
257 /*
258  * Write the pages table to disk; the format is:
259  * - One integer containing the number of pages.
260  * - For each page, five pointers to the names, sections,
261  *   architectures, description, and file names of the page.
262  *   MI pages write 0 instead of the architecture pointer.
263  * - One list each for names, sections, architectures, descriptions and
264  *   file names.  The description for each page ends with a NUL byte.
265  *   For all the other lists, each string ends with a NUL byte,
266  *   and the last string for a page ends with two NUL bytes.
267  * - To assure alignment of following integers,
268  *   the end is padded with NUL bytes up to a multiple of four bytes.
269  */
270 static void
271 dba_pages_write(struct dba_array *pages)
272 {
273 	struct dba_array	*page, *entry;
274 	int32_t			 pos_pages, pos_end;
275 
276 	pos_pages = dba_array_writelen(pages, 5);
277 	dba_array_FOREACH(pages, page) {
278 		dba_array_setpos(page, DBP_NAME, dba_tell());
279 		entry = dba_array_get(page, DBP_NAME);
280 		dba_array_sort(entry, compare_names);
281 		dba_array_writelst(entry);
282 	}
283 	dba_array_FOREACH(pages, page) {
284 		dba_array_setpos(page, DBP_SECT, dba_tell());
285 		entry = dba_array_get(page, DBP_SECT);
286 		dba_array_sort(entry, compare_strings);
287 		dba_array_writelst(entry);
288 	}
289 	dba_array_FOREACH(pages, page) {
290 		if ((entry = dba_array_get(page, DBP_ARCH)) != NULL) {
291 			dba_array_setpos(page, DBP_ARCH, dba_tell());
292 			dba_array_sort(entry, compare_strings);
293 			dba_array_writelst(entry);
294 		} else
295 			dba_array_setpos(page, DBP_ARCH, 0);
296 	}
297 	dba_array_FOREACH(pages, page) {
298 		dba_array_setpos(page, DBP_DESC, dba_tell());
299 		dba_str_write(dba_array_get(page, DBP_DESC));
300 	}
301 	dba_array_FOREACH(pages, page) {
302 		dba_array_setpos(page, DBP_FILE, dba_tell());
303 		dba_array_writelst(dba_array_get(page, DBP_FILE));
304 	}
305 	pos_end = dba_align();
306 	dba_seek(pos_pages);
307 	dba_array_FOREACH(pages, page)
308 		dba_array_writepos(page);
309 	dba_seek(pos_end);
310 }
311 
312 static int
313 compare_names(const void *vp1, const void *vp2)
314 {
315 	const char	*cp1, *cp2;
316 	int		 diff;
317 
318 	cp1 = *(const char * const *)vp1;
319 	cp2 = *(const char * const *)vp2;
320 	return (diff = *cp2 - *cp1) ? diff :
321 	    strcasecmp(cp1 + 1, cp2 + 1);
322 }
323 
324 static int
325 compare_strings(const void *vp1, const void *vp2)
326 {
327 	const char	*cp1, *cp2;
328 
329 	cp1 = *(const char * const *)vp1;
330 	cp2 = *(const char * const *)vp2;
331 	return strcmp(cp1, cp2);
332 }
333 
334 /*** functions for handling macros ************************************/
335 
336 /*
337  * In the hash table for a single macro, look up an entry by
338  * the macro value or add an empty one if it doesn't exist yet.
339  */
340 static struct macro_entry *
341 get_macro_entry(struct ohash *macro, const char *value, int32_t np)
342 {
343 	struct macro_entry	*entry;
344 	size_t			 len;
345 	unsigned int		 slot;
346 
347 	slot = ohash_qlookup(macro, value);
348 	if ((entry = ohash_find(macro, slot)) == NULL) {
349 		len = strlen(value) + 1;
350 		entry = mandoc_malloc(sizeof(*entry) + len);
351 		memcpy(&entry->value, value, len);
352 		entry->pages = dba_array_new(np, DBA_GROW);
353 		ohash_insert(macro, slot, entry);
354 	}
355 	return entry;
356 }
357 
358 /*
359  * In addition to get_macro_entry(), add multiple page references,
360  * converting them from the on-disk format (byte offsets in the file)
361  * to page pointers in memory.
362  */
363 void
364 dba_macro_new(struct dba *dba, int32_t im, const char *value,
365     const int32_t *pp)
366 {
367 	struct macro_entry	*entry;
368 	const int32_t		*ip;
369 	int32_t			 np;
370 
371 	np = 0;
372 	for (ip = pp; *ip; ip++)
373 		np++;
374 
375 	entry = get_macro_entry(dba_array_get(dba->macros, im), value, np);
376 	for (ip = pp; *ip; ip++)
377 		dba_array_add(entry->pages, dba_array_get(dba->pages,
378 		    be32toh(*ip) / 5 / sizeof(*ip) - 1));
379 }
380 
381 /*
382  * In addition to get_macro_entry(), add one page reference,
383  * directly taking the in-memory page pointer as an argument.
384  */
385 void
386 dba_macro_add(struct dba_array *macros, int32_t im, const char *value,
387     struct dba_array *page)
388 {
389 	struct macro_entry	*entry;
390 
391 	if (*value == '\0')
392 		return;
393 	entry = get_macro_entry(dba_array_get(macros, im), value, 1);
394 	dba_array_add(entry->pages, page);
395 }
396 
397 /*
398  * Write the macros table to disk; the format is:
399  * - The number of macro tables (actually, MACRO_MAX).
400  * - That number of pointers to the individual macro tables.
401  * - The individual macro tables.
402  */
403 static void
404 dba_macros_write(struct dba_array *macros)
405 {
406 	struct ohash		*macro;
407 	int32_t			 im, pos_macros, pos_end;
408 
409 	pos_macros = dba_array_writelen(macros, 1);
410 	im = 0;
411 	dba_array_FOREACH(macros, macro) {
412 		dba_array_setpos(macros, im++, dba_tell());
413 		dba_macro_write(macro);
414 	}
415 	pos_end = dba_tell();
416 	dba_seek(pos_macros);
417 	dba_array_writepos(macros);
418 	dba_seek(pos_end);
419 }
420 
421 /*
422  * Write one individual macro table to disk; the format is:
423  * - The number of entries in the table.
424  * - For each entry, two pointers, the first one to the value
425  *   and the second one to the list of pages.
426  * - A list of values, each ending in a NUL byte.
427  * - To assure alignment of following integers,
428  *   padding with NUL bytes up to a multiple of four bytes.
429  * - A list of pointers to pages, each list ending in a 0 integer.
430  */
431 static void
432 dba_macro_write(struct ohash *macro)
433 {
434 	struct macro_entry	**entries, *entry;
435 	struct dba_array	 *page;
436 	int32_t			 *kpos, *dpos;
437 	unsigned int		  ie, ne, slot;
438 	int			  use;
439 	int32_t			  addr, pos_macro, pos_end;
440 
441 	/* Temporary storage for filtering and sorting. */
442 
443 	ne = ohash_entries(macro);
444 	entries = mandoc_reallocarray(NULL, ne, sizeof(*entries));
445 	kpos = mandoc_reallocarray(NULL, ne, sizeof(*kpos));
446 	dpos = mandoc_reallocarray(NULL, ne, sizeof(*dpos));
447 
448 	/* Build a list of non-empty entries and sort it. */
449 
450 	ne = 0;
451 	for (entry = ohash_first(macro, &slot); entry != NULL;
452 	     entry = ohash_next(macro, &slot)) {
453 		use = 0;
454 		dba_array_FOREACH(entry->pages, page)
455 			if (dba_array_getpos(page))
456 				use = 1;
457 		if (use)
458 			entries[ne++] = entry;
459 	}
460 	qsort(entries, ne, sizeof(*entries), compare_entries);
461 
462 	/* Number of entries, and space for the pointer pairs. */
463 
464 	dba_int_write(ne);
465 	pos_macro = dba_skip(2, ne);
466 
467 	/* String table. */
468 
469 	for (ie = 0; ie < ne; ie++) {
470 		kpos[ie] = dba_tell();
471 		dba_str_write(entries[ie]->value);
472 	}
473 	dba_align();
474 
475 	/* Pages table. */
476 
477 	for (ie = 0; ie < ne; ie++) {
478 		dpos[ie] = dba_tell();
479 		dba_array_FOREACH(entries[ie]->pages, page)
480 			if ((addr = dba_array_getpos(page)))
481 				dba_int_write(addr);
482 		dba_int_write(0);
483 	}
484 	pos_end = dba_tell();
485 
486 	/* Fill in the pointer pairs. */
487 
488 	dba_seek(pos_macro);
489 	for (ie = 0; ie < ne; ie++) {
490 		dba_int_write(kpos[ie]);
491 		dba_int_write(dpos[ie]);
492 	}
493 	dba_seek(pos_end);
494 
495 	free(entries);
496 	free(kpos);
497 	free(dpos);
498 }
499 
500 static int
501 compare_entries(const void *vp1, const void *vp2)
502 {
503 	const struct macro_entry *ep1, *ep2;
504 
505 	ep1 = *(const struct macro_entry * const *)vp1;
506 	ep2 = *(const struct macro_entry * const *)vp2;
507 	return strcmp(ep1->value, ep2->value);
508 }
509