xref: /freebsd/contrib/bc/gen/strgen.c (revision 43a5ec4eb41567cc92586503212743d89686d78f)
1 /*
2  * *****************************************************************************
3  *
4  * SPDX-License-Identifier: BSD-2-Clause
5  *
6  * Copyright (c) 2018-2021 Gavin D. Howard and contributors.
7  *
8  * Redistribution and use in source and binary forms, with or without
9  * modification, are permitted provided that the following conditions are met:
10  *
11  * * Redistributions of source code must retain the above copyright notice, this
12  *   list of conditions and the following disclaimer.
13  *
14  * * Redistributions in binary form must reproduce the above copyright notice,
15  *   this list of conditions and the following disclaimer in the documentation
16  *   and/or other materials provided with the distribution.
17  *
18  * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
19  * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
20  * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
21  * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE
22  * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
23  * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
24  * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
25  * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
26  * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
27  * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
28  * POSSIBILITY OF SUCH DAMAGE.
29  *
30  * *****************************************************************************
31  *
32  * Generates a const array from a bc script.
33  *
34  */
35 
36 #include <stdbool.h>
37 #include <stdio.h>
38 #include <stdlib.h>
39 #include <string.h>
40 
41 #include <errno.h>
42 
43 // For some reason, Windows needs this header.
44 #ifndef _WIN32
45 #include <libgen.h>
46 #endif // _WIN32
47 
48 // This is exactly what it looks like. It just slaps a simple license header on
49 // the generated C source file.
50 static const char* const bc_gen_header =
51 	"// Copyright (c) 2018-2021 Gavin D. Howard and contributors.\n"
52 	"// Licensed under the 2-clause BSD license.\n"
53 	"// *** AUTOMATICALLY GENERATED FROM %s. DO NOT MODIFY. ***\n\n";
54 
55 // These are just format strings used to generate the C source.
56 static const char* const bc_gen_label = "const char *%s = \"%s\";\n\n";
57 static const char* const bc_gen_label_extern = "extern const char *%s;\n\n";
58 static const char* const bc_gen_ifdef = "#if %s\n";
59 static const char* const bc_gen_endif = "#endif // %s\n";
60 static const char* const bc_gen_name = "const char %s[] = {\n";
61 static const char* const bc_gen_name_extern = "extern const char %s[];\n\n";
62 
63 // Error codes. We can't use 0 because these are used as exit statuses, and 0
64 // as an exit status is not an error.
65 #define IO_ERR (1)
66 #define INVALID_INPUT_FILE (2)
67 #define INVALID_PARAMS (3)
68 
69 // This is the max width to print characters to the screen. This is to ensure
70 // that lines don't go much over 80 characters.
71 #define MAX_WIDTH (72)
72 
73 /**
74  * Open a file. This function is to smooth over differences between POSIX and
75  * Windows.
76  * @param f         A pointer to the FILE pointer that will be initialized.
77  * @param filename  The name of the file.
78  * @param mode      The mode to open the file in.
79  */
80 static void open_file(FILE** f, const char* filename, const char* mode) {
81 
82 #ifndef _WIN32
83 
84 	*f = fopen(filename, mode);
85 
86 #else // _WIN32
87 
88 	// We want the file pointer to be NULL on failure, but fopen_s() is not
89 	// guaranteed to set it.
90 	*f = NULL;
91 	fopen_s(f, filename, mode);
92 
93 #endif // _WIN32
94 }
95 
96 /**
97  * Outputs a label, which is a string literal that the code can use as a name
98  * for the file that is being turned into a string. This is important for the
99  * math libraries because the parse and lex code expects a filename. The label
100  * becomes the filename for the purposes of lexing and parsing.
101  *
102  * The label is generated from bc_gen_label (above). It has the form:
103  *
104  * const char *<label_name> = <label>;
105  *
106  * This function is also needed to smooth out differences between POSIX and
107  * Windows, specifically, the fact that Windows uses backslashes for filenames
108  * and that backslashes have to be escaped in a string literal.
109  *
110  * @param out    The file to output to.
111  * @param label  The label name.
112  * @param name   The actual label text, which is a filename.
113  * @return       Positive if no error, negative on error, just like *printf().
114  */
115 static int output_label(FILE* out, const char* label, const char* name) {
116 
117 #ifndef _WIN32
118 
119 	return fprintf(out, bc_gen_label, label, name);
120 
121 #else // _WIN32
122 
123 	size_t i, count = 0, len = strlen(name);
124 	char* buf;
125 	int ret;
126 
127 	// This loop counts how many backslashes there are in the label.
128 	for (i = 0; i < len; ++i) count += (name[i] == '\\');
129 
130 	buf = (char*) malloc(len + 1 + count);
131 	if (buf == NULL) return -1;
132 
133 	count = 0;
134 
135 	// This loop is the meat of the Windows version. What it does is copy the
136 	// label byte-for-byte, unless it encounters a backslash, in which case, it
137 	// copies the backslash twice to have it escaped properly in the string
138 	// literal.
139 	for (i = 0; i < len; ++i) {
140 
141 		buf[i + count] = name[i];
142 
143 		if (name[i] == '\\') {
144 			count += 1;
145 			buf[i + count] = name[i];
146 		}
147 	}
148 
149 	buf[i + count] = '\0';
150 
151 	ret = fprintf(out, bc_gen_label, label, buf);
152 
153 	free(buf);
154 
155 	return ret;
156 
157 #endif // _WIN32
158 }
159 
160 /**
161  * This program generates C strings (well, actually, C char arrays) from text
162  * files. It generates 1 C source file. The resulting file has this structure:
163  *
164  * <Copyright Header>
165  *
166  * [<Label Extern>]
167  *
168  * <Char Array Extern>
169  *
170  * [<Preprocessor Guard Begin>]
171  * [<Label Definition>]
172  *
173  * <Char Array Definition>
174  * [<Preprocessor Guard End>]
175  *
176  * Anything surrounded by square brackets may not be in the final generated
177  * source file.
178  *
179  * The required command-line parameters are:
180  *
181  * input   Input filename.
182  * output  Output filename.
183  * name    The name of the char array.
184  *
185  * The optional parameters are:
186  *
187  * label        If given, a label for the char array. See the comment for the
188  *              output_label() function. It is meant as a "filename" for the
189  *              text when processed by bc and dc. If label is given, then the
190  *              <Label Extern> and <Label Definition> will exist in the
191  *              generated source file.
192  * define       If given, a preprocessor macro that should be used as a guard
193  *              for the char array and its label. If define is given, then
194  *              <Preprocessor Guard Begin> will exist in the form
195  *              "#if <define>" as part of the generated source file, and
196  *              <Preprocessor Guard End> will exist in the form
197  *              "endif // <define>".
198  * remove_tabs  If this parameter exists, it must be an integer. If it is
199  *              non-zero, then tabs are removed from the input file text before
200  *              outputting to the output char array.
201  *
202  * All text files that are transformed have license comments. This program finds
203  * the end of that comment and strips it out as well.
204  */
205 int main(int argc, char *argv[]) {
206 
207 	FILE *in, *out;
208 	char *label, *define, *name;
209 	int c, count, slashes, err = IO_ERR;
210 	bool has_label, has_define, remove_tabs;
211 
212 	if (argc < 4) {
213 		printf("usage: %s input output name [label [define [remove_tabs]]]\n",
214 		       argv[0]);
215 		return INVALID_PARAMS;
216 	}
217 
218 	name = argv[3];
219 
220 	has_label = (argc > 4 && strcmp("", argv[4]) != 0);
221 	label = has_label ? argv[4] : "";
222 
223 	has_define = (argc > 5 && strcmp("", argv[5]) != 0);
224 	define = has_define ? argv[5] : "";
225 
226 	remove_tabs = (argc > 6);
227 
228 	open_file(&in, argv[1], "r");
229 	if (!in) return INVALID_INPUT_FILE;
230 
231 	open_file(&out, argv[2], "w");
232 	if (!out) goto out_err;
233 
234 	if (fprintf(out, bc_gen_header, argv[1]) < 0) goto err;
235 	if (has_label && fprintf(out, bc_gen_label_extern, label) < 0) goto err;
236 	if (fprintf(out, bc_gen_name_extern, name) < 0) goto err;
237 	if (has_define && fprintf(out, bc_gen_ifdef, define) < 0) goto err;
238 	if (has_label && output_label(out, label, argv[1]) < 0) goto err;
239 	if (fprintf(out, bc_gen_name, name) < 0) goto err;
240 
241 	c = count = slashes = 0;
242 
243 	// This is where the end of the license comment is found.
244 	while (slashes < 2 && (c = fgetc(in)) >= 0) {
245 		slashes += (slashes == 1 && c == '/' && fgetc(in) == '\n');
246 		slashes += (!slashes && c == '/' && fgetc(in) == '*');
247 	}
248 
249 	// The file is invalid if the end of the license comment could not be found.
250 	if (c < 0) {
251 		err = INVALID_INPUT_FILE;
252 		goto err;
253 	}
254 
255 	// Do not put extra newlines at the beginning of the char array.
256 	while ((c = fgetc(in)) == '\n');
257 
258 	// This loop is what generates the actual char array. It counts how many
259 	// chars it has printed per line in order to insert newlines at appropriate
260 	// places. It also skips tabs if they should be removed.
261 	while (c >= 0) {
262 
263 		int val;
264 
265 		if (!remove_tabs || c != '\t') {
266 
267 			if (!count && fputc('\t', out) == EOF) goto err;
268 
269 			val = fprintf(out, "%d,", c);
270 			if (val < 0) goto err;
271 
272 			count += val;
273 
274 			if (count > MAX_WIDTH) {
275 				count = 0;
276 				if (fputc('\n', out) == EOF) goto err;
277 			}
278 		}
279 
280 		c = fgetc(in);
281 	}
282 
283 	// Make sure the end looks nice and insert the NUL byte at the end.
284 	if (!count && (fputc(' ', out) == EOF || fputc(' ', out) == EOF)) goto err;
285 	if (fprintf(out, "0\n};\n") < 0) goto err;
286 
287 	err = (has_define && fprintf(out, bc_gen_endif, define) < 0);
288 
289 err:
290 	fclose(out);
291 out_err:
292 	fclose(in);
293 	return err;
294 }
295