xref: /freebsd/contrib/bc/gen/strgen.c (revision 924226fba12cc9a228c73b956e1b7fa24c60b055)
1 /*
2  * *****************************************************************************
3  *
4  * SPDX-License-Identifier: BSD-2-Clause
5  *
6  * Copyright (c) 2018-2021 Gavin D. Howard and contributors.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions are met:
10  *
11  * * Redistributions of source code must retain the above copyright notice, this
12  *   list of conditions and the following disclaimer.
13  *
14  * * Redistributions in binary form must reproduce the above copyright notice,
15  *   this list of conditions and the following disclaimer in the documentation
16  *   and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
22  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28  * POSSIBILITY OF SUCH DAMAGE.
29  *
30  * *****************************************************************************
31  *
32  * Generates a const array from a bc script.
33  *
34  */
35 
36 #include <assert.h>
37 #include <stdbool.h>
38 #include <stdio.h>
39 #include <stdlib.h>
40 #include <string.h>
41 
42 #include <errno.h>
43 
44 #include <fcntl.h>
45 #include <sys/stat.h>
46 
47 #ifndef _WIN32
48 #include <unistd.h>
49 #endif // _WIN32
50 
51 // For some reason, Windows can't have this header.
52 #ifndef _WIN32
53 #include <libgen.h>
54 #endif // _WIN32
55 
56 // This pulls in cross-platform stuff.
57 #include "../include/bcl.h"
58 
59 #define BC_ERR(v) (v)
60 
61 // clang-format off
62 
63 // The usage help.
64 static const char* const bc_gen_usage =
65 	"usage: %s input output exclude name [label [define [remove_tabs]]]\n";
66 
67 static const char* const bc_gen_ex_start = "{{ A H N HN }}";
68 static const char* const bc_gen_ex_end = "{{ end }}";
69 
70 // This is exactly what it looks like. It just slaps a simple license header on
71 // the generated C source file.
72 static const char* const bc_gen_header =
73 	"// Copyright (c) 2018-2021 Gavin D. Howard and contributors.\n"
74 	"// Licensed under the 2-clause BSD license.\n"
75 	"// *** AUTOMATICALLY GENERATED FROM %s. DO NOT MODIFY. ***\n\n";
76 // clang-format on
77 
78 // These are just format strings used to generate the C source.
79 static const char* const bc_gen_label = "const char *%s = \"%s\";\n\n";
80 static const char* const bc_gen_label_extern = "extern const char *%s;\n\n";
81 static const char* const bc_gen_ifdef = "#if %s\n";
82 static const char* const bc_gen_endif = "#endif // %s\n";
83 static const char* const bc_gen_name = "const char %s[] = {\n";
84 static const char* const bc_gen_name_extern = "extern const char %s[];\n\n";
85 
86 // Error codes. We can't use 0 because these are used as exit statuses, and 0
87 // as an exit status is not an error.
88 #define IO_ERR (1)
89 #define INVALID_INPUT_FILE (2)
90 #define INVALID_PARAMS (3)
91 
92 // This is the max width to print characters to the screen. This is to ensure
93 // that lines don't go much over 80 characters.
94 #define MAX_WIDTH (72)
95 
96 /**
97  * Open a file. This function is to smooth over differences between POSIX and
98  * Windows.
99  * @param f         A pointer to the FILE pointer that will be initialized.
100  * @param filename  The name of the file.
101  * @param mode      The mode to open the file in.
102  */
103 static void
104 open_file(FILE** f, const char* filename, const char* mode)
105 {
106 #ifndef _WIN32
107 
108 	*f = fopen(filename, mode);
109 
110 #else // _WIN32
111 
112 	// We want the file pointer to be NULL on failure, but fopen_s() is not
113 	// guaranteed to set it.
114 	*f = NULL;
115 	fopen_s(f, filename, mode);
116 
117 #endif // _WIN32
118 }
119 
120 /**
121  * A portability file open function. This is copied from src/read.c. Make sure
122  * to update that if this changes.
123  * @param path  The path to the file to open.
124  * @param mode  The mode to open in.
125  */
126 static int
127 bc_read_open(const char* path, int mode)
128 {
129 	int fd;
130 
131 #ifndef _WIN32
132 	fd = open(path, mode);
133 #else // _WIN32
134 	fd = -1;
135 	open(&fd, path, mode);
136 #endif
137 
138 	return fd;
139 }
140 
141 /**
142  * Reads a file and returns the file as a string. This has been copied from
143  * src/read.c. Make sure to change that if this changes.
144  * @param path  The path to the file.
145  * @return      The contents of the file as a string.
146  */
147 static char*
148 bc_read_file(const char* path)
149 {
150 	int e = IO_ERR;
151 	size_t size, to_read;
152 	struct stat pstat;
153 	int fd;
154 	char* buf;
155 	char* buf2;
156 
157 	// This has been copied from src/read.c. Make sure to change that if this
158 	// changes.
159 
160 	assert(path != NULL);
161 
162 #ifndef NDEBUG
163 	// Need this to quiet MSan.
164 	// NOLINTNEXTLINE
165 	memset(&pstat, 0, sizeof(struct stat));
166 #endif // NDEBUG
167 
168 	fd = bc_read_open(path, O_RDONLY);
169 
170 	// If we can't read a file, we just barf.
171 	if (BC_ERR(fd < 0))
172 	{
173 		fprintf(stderr, "Could not open file: %s\n", path);
174 		exit(INVALID_INPUT_FILE);
175 	}
176 
177 	// The reason we call fstat is to eliminate TOCTOU race conditions. This
178 	// way, we have an open file, so it's not going anywhere.
179 	if (BC_ERR(fstat(fd, &pstat) == -1))
180 	{
181 		fprintf(stderr, "Could not stat file: %s\n", path);
182 		exit(INVALID_INPUT_FILE);
183 	}
184 
185 	// Make sure it's not a directory.
186 	if (BC_ERR(S_ISDIR(pstat.st_mode)))
187 	{
188 		fprintf(stderr, "Path is directory: %s\n", path);
189 		exit(INVALID_INPUT_FILE);
190 	}
191 
192 	// Get the size of the file and allocate that much.
193 	size = (size_t) pstat.st_size;
194 	buf = (char*) malloc(size + 1);
195 	if (buf == NULL)
196 	{
197 		fprintf(stderr, "Could not malloc\n");
198 		exit(INVALID_INPUT_FILE);
199 	}
200 	buf2 = buf;
201 	to_read = size;
202 
203 	do
204 	{
205 		// Read the file. We just bail if a signal interrupts. This is so that
206 		// users can interrupt the reading of big files if they want.
207 		ssize_t r = read(fd, buf2, to_read);
208 		if (BC_ERR(r < 0)) exit(e);
209 		to_read -= (size_t) r;
210 		buf2 += (size_t) r;
211 	}
212 	while (to_read);
213 
214 	// Got to have a nul byte.
215 	buf[size] = '\0';
216 
217 	close(fd);
218 
219 	return buf;
220 }
221 
222 /**
223  * Outputs a label, which is a string literal that the code can use as a name
224  * for the file that is being turned into a string. This is important for the
225  * math libraries because the parse and lex code expects a filename. The label
226  * becomes the filename for the purposes of lexing and parsing.
227  *
228  * The label is generated from bc_gen_label (above). It has the form:
229  *
230  * const char *<label_name> = <label>;
231  *
232  * This function is also needed to smooth out differences between POSIX and
233  * Windows, specifically, the fact that Windows uses backslashes for filenames
234  * and that backslashes have to be escaped in a string literal.
235  *
236  * @param out    The file to output to.
237  * @param label  The label name.
238  * @param name   The actual label text, which is a filename.
239  * @return       Positive if no error, negative on error, just like *printf().
240  */
241 static int
242 output_label(FILE* out, const char* label, const char* name)
243 {
244 #ifndef _WIN32
245 
246 	return fprintf(out, bc_gen_label, label, name);
247 
248 #else // _WIN32
249 
250 	size_t i, count = 0, len = strlen(name);
251 	char* buf;
252 	int ret;
253 
254 	// This loop counts how many backslashes there are in the label.
255 	for (i = 0; i < len; ++i)
256 	{
257 		count += (name[i] == '\\');
258 	}
259 
260 	buf = (char*) malloc(len + 1 + count);
261 	if (buf == NULL) return -1;
262 
263 	count = 0;
264 
265 	// This loop is the meat of the Windows version. What it does is copy the
266 	// label byte-for-byte, unless it encounters a backslash, in which case, it
267 	// copies the backslash twice to have it escaped properly in the string
268 	// literal.
269 	for (i = 0; i < len; ++i)
270 	{
271 		buf[i + count] = name[i];
272 
273 		if (name[i] == '\\')
274 		{
275 			count += 1;
276 			buf[i + count] = name[i];
277 		}
278 	}
279 
280 	buf[i + count] = '\0';
281 
282 	ret = fprintf(out, bc_gen_label, label, buf);
283 
284 	free(buf);
285 
286 	return ret;
287 
288 #endif // _WIN32
289 }
290 
291 /**
292  * This program generates C strings (well, actually, C char arrays) from text
293  * files. It generates 1 C source file. The resulting file has this structure:
294  *
295  * <Copyright Header>
296  *
297  * [<Label Extern>]
298  *
299  * <Char Array Extern>
300  *
301  * [<Preprocessor Guard Begin>]
302  * [<Label Definition>]
303  *
304  * <Char Array Definition>
305  * [<Preprocessor Guard End>]
306  *
307  * Anything surrounded by square brackets may not be in the final generated
308  * source file.
309  *
310  * The required command-line parameters are:
311  *
312  * input    Input filename.
313  * output   Output filename.
314  * exclude  Whether to exclude extra math-only stuff.
315  * name     The name of the char array.
316  *
317  * The optional parameters are:
318  *
319  * label        If given, a label for the char array. See the comment for the
320  *              output_label() function. It is meant as a "filename" for the
321  *              text when processed by bc and dc. If label is given, then the
322  *              <Label Extern> and <Label Definition> will exist in the
323  *              generated source file.
324  * define       If given, a preprocessor macro that should be used as a guard
325  *              for the char array and its label. If define is given, then
326  *              <Preprocessor Guard Begin> will exist in the form
327  *              "#if <define>" as part of the generated source file, and
328  *              <Preprocessor Guard End> will exist in the form
329  *              "endif // <define>".
330  * remove_tabs  If this parameter exists, it must be an integer. If it is
331  *              non-zero, then tabs are removed from the input file text before
332  *              outputting to the output char array.
333  *
334  * All text files that are transformed have license comments. This program finds
335  * the end of that comment and strips it out as well.
336  */
337 int
338 main(int argc, char* argv[])
339 {
340 	char* in;
341 	FILE* out;
342 	const char* label;
343 	const char* define;
344 	char* name;
345 	unsigned int count, slashes, err = IO_ERR;
346 	bool has_label, has_define, remove_tabs, exclude_extra_math;
347 	size_t i;
348 
349 	if (argc < 5)
350 	{
351 		printf(bc_gen_usage, argv[0]);
352 		return INVALID_PARAMS;
353 	}
354 
355 	exclude_extra_math = (strtoul(argv[3], NULL, 10) != 0);
356 
357 	name = argv[4];
358 
359 	has_label = (argc > 5 && strcmp("", argv[5]) != 0);
360 	label = has_label ? argv[5] : "";
361 
362 	has_define = (argc > 6 && strcmp("", argv[6]) != 0);
363 	define = has_define ? argv[6] : "";
364 
365 	remove_tabs = (argc > 7);
366 
367 	in = bc_read_file(argv[1]);
368 	if (in == NULL) return INVALID_INPUT_FILE;
369 
370 	open_file(&out, argv[2], "w");
371 	if (out == NULL) goto out_err;
372 
373 	if (fprintf(out, bc_gen_header, argv[1]) < 0) goto err;
374 	if (has_label && fprintf(out, bc_gen_label_extern, label) < 0) goto err;
375 	if (fprintf(out, bc_gen_name_extern, name) < 0) goto err;
376 	if (has_define && fprintf(out, bc_gen_ifdef, define) < 0) goto err;
377 	if (has_label && output_label(out, label, argv[1]) < 0) goto err;
378 	if (fprintf(out, bc_gen_name, name) < 0) goto err;
379 
380 	i = count = slashes = 0;
381 
382 	// This is where the end of the license comment is found.
383 	while (slashes < 2 && in[i] > 0)
384 	{
385 		if (slashes == 1 && in[i] == '*' && in[i + 1] == '/' &&
386 		    (in[i + 2] == '\n' || in[i + 2] == '\r'))
387 		{
388 			slashes += 1;
389 			i += 2;
390 		}
391 		else if (!slashes && in[i] == '/' && in[i + 1] == '*')
392 		{
393 			slashes += 1;
394 			i += 1;
395 		}
396 
397 		i += 1;
398 	}
399 
400 	// The file is invalid if the end of the license comment could not be found.
401 	if (in[i] == 0)
402 	{
403 		fprintf(stderr, "Could not find end of license comment\n");
404 		err = INVALID_INPUT_FILE;
405 		goto err;
406 	}
407 
408 	i += 1;
409 
410 	// Do not put extra newlines at the beginning of the char array.
411 	while (in[i] == '\n' || in[i] == '\r')
412 	{
413 		i += 1;
414 	}
415 
416 	// This loop is what generates the actual char array. It counts how many
417 	// chars it has printed per line in order to insert newlines at appropriate
418 	// places. It also skips tabs if they should be removed.
419 	while (in[i] != 0)
420 	{
421 		int val;
422 
423 		if (in[i] == '\r')
424 		{
425 			i += 1;
426 			continue;
427 		}
428 
429 		if (!remove_tabs || in[i] != '\t')
430 		{
431 			// Check for excluding something for extra math.
432 			if (in[i] == '{')
433 			{
434 				// If we found the start...
435 				if (!strncmp(in + i, bc_gen_ex_start, strlen(bc_gen_ex_start)))
436 				{
437 					if (exclude_extra_math)
438 					{
439 						// Get past the braces.
440 						i += 2;
441 
442 						// Find the end of the end.
443 						while (in[i] != '{' && strncmp(in + i, bc_gen_ex_end,
444 						                               strlen(bc_gen_ex_end)))
445 						{
446 							i += 1;
447 						}
448 
449 						i += strlen(bc_gen_ex_end);
450 
451 						// Skip the last newline.
452 						if (in[i] == '\r') i += 1;
453 						i += 1;
454 						continue;
455 					}
456 					else
457 					{
458 						i += strlen(bc_gen_ex_start);
459 
460 						// Skip the last newline.
461 						if (in[i] == '\r') i += 1;
462 						i += 1;
463 						continue;
464 					}
465 				}
466 				else if (!exclude_extra_math &&
467 				         !strncmp(in + i, bc_gen_ex_end, strlen(bc_gen_ex_end)))
468 				{
469 					i += strlen(bc_gen_ex_end);
470 
471 					// Skip the last newline.
472 					if (in[i] == '\r') i += 1;
473 					i += 1;
474 					continue;
475 				}
476 			}
477 
478 			// Print a tab if we are at the beginning of a line.
479 			if (!count && fputc('\t', out) == EOF) goto err;
480 
481 			// Print the character.
482 			val = fprintf(out, "%d,", in[i]);
483 			if (val < 0) goto err;
484 
485 			// Adjust the count.
486 			count += (unsigned int) val;
487 			if (count > MAX_WIDTH)
488 			{
489 				count = 0;
490 				if (fputc('\n', out) == EOF) goto err;
491 			}
492 		}
493 
494 		i += 1;
495 	}
496 
497 	// Make sure the end looks nice and insert the NUL byte at the end.
498 	if (!count && (fputc(' ', out) == EOF || fputc(' ', out) == EOF)) goto err;
499 	if (fprintf(out, "0\n};\n") < 0) goto err;
500 
501 	err = (has_define && fprintf(out, bc_gen_endif, define) < 0);
502 
503 err:
504 	fclose(out);
505 out_err:
506 	free(in);
507 	return (int) err;
508 }
509