1 /* 2 * ***************************************************************************** 3 * 4 * SPDX-License-Identifier: BSD-2-Clause 5 * 6 * Copyright (c) 2018-2023 Gavin D. Howard and contributors. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions are met: 10 * 11 * * Redistributions of source code must retain the above copyright notice, this 12 * list of conditions and the following disclaimer. 13 * 14 * * Redistributions in binary form must reproduce the above copyright notice, 15 * this list of conditions and the following disclaimer in the documentation 16 * and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 22 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGE. 29 * 30 * ***************************************************************************** 31 * 32 * Generates a const array from a bc script. 33 * 34 */ 35 36 #include <assert.h> 37 #include <stdbool.h> 38 #include <stdio.h> 39 #include <stdlib.h> 40 #include <string.h> 41 42 #include <errno.h> 43 44 #include <fcntl.h> 45 #include <sys/stat.h> 46 47 #ifndef _WIN32 48 #include <unistd.h> 49 #endif // _WIN32 50 51 // For some reason, Windows can't have this header. 52 #ifndef _WIN32 53 #include <libgen.h> 54 #endif // _WIN32 55 56 // This pulls in cross-platform stuff. 57 #include <status.h> 58 59 // clang-format off 60 61 // The usage help. 62 static const char* const bc_gen_usage = 63 "usage: %s input output exclude name [label [define [remove_tabs]]]\n"; 64 65 static const char* const bc_gen_ex_start = "{{ A H N HN }}"; 66 static const char* const bc_gen_ex_end = "{{ end }}"; 67 68 // This is exactly what it looks like. It just slaps a simple license header on 69 // the generated C source file. 70 static const char* const bc_gen_header = 71 "// Copyright (c) 2018-2023 Gavin D. Howard and contributors.\n" 72 "// Licensed under the 2-clause BSD license.\n" 73 "// *** AUTOMATICALLY GENERATED FROM %s. DO NOT MODIFY. ***\n\n"; 74 // clang-format on 75 76 // These are just format strings used to generate the C source. 77 static const char* const bc_gen_label = "const char *%s = \"%s\";\n\n"; 78 static const char* const bc_gen_label_extern = "extern const char *%s;\n\n"; 79 static const char* const bc_gen_ifdef = "#if %s\n"; 80 static const char* const bc_gen_endif = "#endif // %s\n"; 81 static const char* const bc_gen_name = "const char %s[] = {\n"; 82 static const char* const bc_gen_name_extern = "extern const char %s[];\n\n"; 83 84 // Error codes. We can't use 0 because these are used as exit statuses, and 0 85 // as an exit status is not an error. 86 #define IO_ERR (1) 87 #define INVALID_INPUT_FILE (2) 88 #define INVALID_PARAMS (3) 89 90 // This is the max width to print characters to the screen. This is to ensure 91 // that lines don't go much over 80 characters. 92 #define MAX_WIDTH (72) 93 94 /** 95 * Open a file. This function is to smooth over differences between POSIX and 96 * Windows. 97 * @param f A pointer to the FILE pointer that will be initialized. 98 * @param filename The name of the file. 99 * @param mode The mode to open the file in. 100 */ 101 static void 102 open_file(FILE** f, const char* filename, const char* mode) 103 { 104 #ifndef _WIN32 105 106 *f = fopen(filename, mode); 107 108 #else // _WIN32 109 110 // We want the file pointer to be NULL on failure, but fopen_s() is not 111 // guaranteed to set it. 112 *f = NULL; 113 fopen_s(f, filename, mode); 114 115 #endif // _WIN32 116 } 117 118 /** 119 * A portability file open function. This is copied from src/read.c. Make sure 120 * to update that if this changes. 121 * @param path The path to the file to open. 122 * @param mode The mode to open in. 123 */ 124 static int 125 bc_read_open(const char* path, int mode) 126 { 127 int fd; 128 129 #ifndef _WIN32 130 fd = open(path, mode); 131 #else // _WIN32 132 fd = -1; 133 open(&fd, path, mode); 134 #endif 135 136 return fd; 137 } 138 139 /** 140 * Reads a file and returns the file as a string. This has been copied from 141 * src/read.c. Make sure to change that if this changes. 142 * @param path The path to the file. 143 * @return The contents of the file as a string. 144 */ 145 static char* 146 bc_read_file(const char* path) 147 { 148 int e = IO_ERR; 149 size_t size, to_read; 150 struct stat pstat; 151 int fd; 152 char* buf; 153 char* buf2; 154 155 // This has been copied from src/read.c. Make sure to change that if this 156 // changes. 157 158 assert(path != NULL); 159 160 #if BC_DEBUG 161 // Need this to quiet MSan. 162 // NOLINTNEXTLINE 163 memset(&pstat, 0, sizeof(struct stat)); 164 #endif // BC_DEBUG 165 166 fd = bc_read_open(path, O_RDONLY); 167 168 // If we can't read a file, we just barf. 169 if (BC_ERR(fd < 0)) 170 { 171 fprintf(stderr, "Could not open file: %s\n", path); 172 exit(INVALID_INPUT_FILE); 173 } 174 175 // The reason we call fstat is to eliminate TOCTOU race conditions. This 176 // way, we have an open file, so it's not going anywhere. 177 if (BC_ERR(fstat(fd, &pstat) == -1)) 178 { 179 fprintf(stderr, "Could not stat file: %s\n", path); 180 exit(INVALID_INPUT_FILE); 181 } 182 183 // Make sure it's not a directory. 184 if (BC_ERR(S_ISDIR(pstat.st_mode))) 185 { 186 fprintf(stderr, "Path is directory: %s\n", path); 187 exit(INVALID_INPUT_FILE); 188 } 189 190 // Get the size of the file and allocate that much. 191 size = (size_t) pstat.st_size; 192 buf = (char*) malloc(size + 1); 193 if (buf == NULL) 194 { 195 fprintf(stderr, "Could not malloc\n"); 196 exit(INVALID_INPUT_FILE); 197 } 198 buf2 = buf; 199 to_read = size; 200 201 do 202 { 203 // Read the file. We just bail if a signal interrupts. This is so that 204 // users can interrupt the reading of big files if they want. 205 ssize_t r = read(fd, buf2, to_read); 206 if (BC_ERR(r < 0)) exit(e); 207 to_read -= (size_t) r; 208 buf2 += (size_t) r; 209 } 210 while (to_read); 211 212 // Got to have a nul byte. 213 buf[size] = '\0'; 214 215 close(fd); 216 217 return buf; 218 } 219 220 /** 221 * Outputs a label, which is a string literal that the code can use as a name 222 * for the file that is being turned into a string. This is important for the 223 * math libraries because the parse and lex code expects a filename. The label 224 * becomes the filename for the purposes of lexing and parsing. 225 * 226 * The label is generated from bc_gen_label (above). It has the form: 227 * 228 * const char *<label_name> = <label>; 229 * 230 * This function is also needed to smooth out differences between POSIX and 231 * Windows, specifically, the fact that Windows uses backslashes for filenames 232 * and that backslashes have to be escaped in a string literal. 233 * 234 * @param out The file to output to. 235 * @param label The label name. 236 * @param name The actual label text, which is a filename. 237 * @return Positive if no error, negative on error, just like *printf(). 238 */ 239 static int 240 output_label(FILE* out, const char* label, const char* name) 241 { 242 #ifndef _WIN32 243 244 return fprintf(out, bc_gen_label, label, name); 245 246 #else // _WIN32 247 248 size_t i, count = 0, len = strlen(name); 249 char* buf; 250 int ret; 251 252 // This loop counts how many backslashes there are in the label. 253 for (i = 0; i < len; ++i) 254 { 255 count += (name[i] == '\\'); 256 } 257 258 buf = (char*) malloc(len + 1 + count); 259 if (buf == NULL) return -1; 260 261 count = 0; 262 263 // This loop is the meat of the Windows version. What it does is copy the 264 // label byte-for-byte, unless it encounters a backslash, in which case, it 265 // copies the backslash twice to have it escaped properly in the string 266 // literal. 267 for (i = 0; i < len; ++i) 268 { 269 buf[i + count] = name[i]; 270 271 if (name[i] == '\\') 272 { 273 count += 1; 274 buf[i + count] = name[i]; 275 } 276 } 277 278 buf[i + count] = '\0'; 279 280 ret = fprintf(out, bc_gen_label, label, buf); 281 282 free(buf); 283 284 return ret; 285 286 #endif // _WIN32 287 } 288 289 /** 290 * This program generates C strings (well, actually, C char arrays) from text 291 * files. It generates 1 C source file. The resulting file has this structure: 292 * 293 * <Copyright Header> 294 * 295 * [<Label Extern>] 296 * 297 * <Char Array Extern> 298 * 299 * [<Preprocessor Guard Begin>] 300 * [<Label Definition>] 301 * 302 * <Char Array Definition> 303 * [<Preprocessor Guard End>] 304 * 305 * Anything surrounded by square brackets may not be in the final generated 306 * source file. 307 * 308 * The required command-line parameters are: 309 * 310 * input Input filename. 311 * output Output filename. 312 * exclude Whether to exclude extra math-only stuff. 313 * name The name of the char array. 314 * 315 * The optional parameters are: 316 * 317 * label If given, a label for the char array. See the comment for the 318 * output_label() function. It is meant as a "filename" for the 319 * text when processed by bc and dc. If label is given, then the 320 * <Label Extern> and <Label Definition> will exist in the 321 * generated source file. 322 * define If given, a preprocessor macro that should be used as a guard 323 * for the char array and its label. If define is given, then 324 * <Preprocessor Guard Begin> will exist in the form 325 * "#if <define>" as part of the generated source file, and 326 * <Preprocessor Guard End> will exist in the form 327 * "endif // <define>". 328 * remove_tabs If this parameter exists, it must be an integer. If it is 329 * non-zero, then tabs are removed from the input file text before 330 * outputting to the output char array. 331 * 332 * All text files that are transformed have license comments. This program finds 333 * the end of that comment and strips it out as well. 334 */ 335 int 336 main(int argc, char* argv[]) 337 { 338 char* in; 339 FILE* out; 340 const char* label; 341 const char* define; 342 char* name; 343 unsigned int count, slashes, err = IO_ERR; 344 bool has_label, has_define, remove_tabs, exclude_extra_math; 345 size_t i; 346 347 if (argc < 5) 348 { 349 printf(bc_gen_usage, argv[0]); 350 return INVALID_PARAMS; 351 } 352 353 exclude_extra_math = (strtoul(argv[3], NULL, 10) != 0); 354 355 name = argv[4]; 356 357 has_label = (argc > 5 && strcmp("", argv[5]) != 0); 358 label = has_label ? argv[5] : ""; 359 360 has_define = (argc > 6 && strcmp("", argv[6]) != 0); 361 define = has_define ? argv[6] : ""; 362 363 remove_tabs = (argc > 7 && atoi(argv[7]) != 0); 364 365 in = bc_read_file(argv[1]); 366 if (in == NULL) return INVALID_INPUT_FILE; 367 368 open_file(&out, argv[2], "w"); 369 if (out == NULL) goto out_err; 370 371 if (fprintf(out, bc_gen_header, argv[1]) < 0) goto err; 372 if (has_label && fprintf(out, bc_gen_label_extern, label) < 0) goto err; 373 if (fprintf(out, bc_gen_name_extern, name) < 0) goto err; 374 if (has_define && fprintf(out, bc_gen_ifdef, define) < 0) goto err; 375 if (has_label && output_label(out, label, argv[1]) < 0) goto err; 376 if (fprintf(out, bc_gen_name, name) < 0) goto err; 377 378 i = count = slashes = 0; 379 380 // This is where the end of the license comment is found. 381 while (slashes < 2 && in[i] > 0) 382 { 383 if (slashes == 1 && in[i] == '*' && in[i + 1] == '/' && 384 (in[i + 2] == '\n' || in[i + 2] == '\r')) 385 { 386 slashes += 1; 387 i += 2; 388 } 389 else if (!slashes && in[i] == '/' && in[i + 1] == '*') 390 { 391 slashes += 1; 392 i += 1; 393 } 394 395 i += 1; 396 } 397 398 // The file is invalid if the end of the license comment could not be found. 399 if (in[i] == 0) 400 { 401 fprintf(stderr, "Could not find end of license comment\n"); 402 err = INVALID_INPUT_FILE; 403 goto err; 404 } 405 406 i += 1; 407 408 // Do not put extra newlines at the beginning of the char array. 409 while (in[i] == '\n' || in[i] == '\r') 410 { 411 i += 1; 412 } 413 414 // This loop is what generates the actual char array. It counts how many 415 // chars it has printed per line in order to insert newlines at appropriate 416 // places. It also skips tabs if they should be removed. 417 while (in[i] != 0) 418 { 419 int val; 420 421 if (in[i] == '\r') 422 { 423 i += 1; 424 continue; 425 } 426 427 if (!remove_tabs || in[i] != '\t') 428 { 429 // Check for excluding something for extra math. 430 if (in[i] == '{') 431 { 432 // If we found the start... 433 if (!strncmp(in + i, bc_gen_ex_start, strlen(bc_gen_ex_start))) 434 { 435 if (exclude_extra_math) 436 { 437 // Get past the braces. 438 i += 2; 439 440 // Find the end of the end. 441 while (in[i] != '{' && strncmp(in + i, bc_gen_ex_end, 442 strlen(bc_gen_ex_end))) 443 { 444 i += 1; 445 } 446 447 i += strlen(bc_gen_ex_end); 448 449 // Skip the last newline. 450 if (in[i] == '\r') i += 1; 451 i += 1; 452 continue; 453 } 454 else 455 { 456 i += strlen(bc_gen_ex_start); 457 458 // Skip the last newline. 459 if (in[i] == '\r') i += 1; 460 i += 1; 461 continue; 462 } 463 } 464 else if (!exclude_extra_math && 465 !strncmp(in + i, bc_gen_ex_end, strlen(bc_gen_ex_end))) 466 { 467 i += strlen(bc_gen_ex_end); 468 469 // Skip the last newline. 470 if (in[i] == '\r') i += 1; 471 i += 1; 472 continue; 473 } 474 } 475 476 // Print a tab if we are at the beginning of a line. 477 if (!count && fputc('\t', out) == EOF) goto err; 478 479 // Print the character. 480 val = fprintf(out, "%d,", in[i]); 481 if (val < 0) goto err; 482 483 // Adjust the count. 484 count += (unsigned int) val; 485 if (count > MAX_WIDTH) 486 { 487 count = 0; 488 if (fputc('\n', out) == EOF) goto err; 489 } 490 } 491 492 i += 1; 493 } 494 495 // Make sure the end looks nice and insert the NUL byte at the end. 496 if (!count && (fputc(' ', out) == EOF || fputc(' ', out) == EOF)) goto err; 497 if (fprintf(out, "0\n};\n") < 0) goto err; 498 499 err = (has_define && fprintf(out, bc_gen_endif, define) < 0); 500 501 err: 502 fclose(out); 503 out_err: 504 free(in); 505 return (int) err; 506 } 507