1 /* 2 * ***************************************************************************** 3 * 4 * SPDX-License-Identifier: BSD-2-Clause 5 * 6 * Copyright (c) 2018-2021 Gavin D. Howard and contributors. 7 * 8 * Redistribution and use in source and binary forms, with or without 9 * modification, are permitted provided that the following conditions are met: 10 * 11 * * Redistributions of source code must retain the above copyright notice, this 12 * list of conditions and the following disclaimer. 13 * 14 * * Redistributions in binary form must reproduce the above copyright notice, 15 * this list of conditions and the following disclaimer in the documentation 16 * and/or other materials provided with the distribution. 17 * 18 * THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS" 19 * AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE 20 * IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE 21 * ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT HOLDER OR CONTRIBUTORS BE 22 * LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR 23 * CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF 24 * SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS 25 * INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN 26 * CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE) 27 * ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE 28 * POSSIBILITY OF SUCH DAMAGE. 29 * 30 * ***************************************************************************** 31 * 32 * Generates a const array from a bc script. 33 * 34 */ 35 36 #include <assert.h> 37 #include <stdbool.h> 38 #include <stdio.h> 39 #include <stdlib.h> 40 #include <string.h> 41 42 #include <errno.h> 43 44 #include <fcntl.h> 45 #include <sys/stat.h> 46 47 #ifndef _WIN32 48 #include <unistd.h> 49 #endif // _WIN32 50 51 // For some reason, Windows can't have this header. 52 #ifndef _WIN32 53 #include <libgen.h> 54 #endif // _WIN32 55 56 // This pulls in cross-platform stuff. 57 #include "../include/bcl.h" 58 59 #define BC_ERR(v) (v) 60 61 // clang-format off 62 63 // The usage help. 64 static const char* const bc_gen_usage = 65 "usage: %s input output exclude name [label [define [remove_tabs]]]\n"; 66 67 static const char* const bc_gen_ex_start = "{{ A H N HN }}"; 68 static const char* const bc_gen_ex_end = "{{ end }}"; 69 70 // This is exactly what it looks like. It just slaps a simple license header on 71 // the generated C source file. 72 static const char* const bc_gen_header = 73 "// Copyright (c) 2018-2021 Gavin D. Howard and contributors.\n" 74 "// Licensed under the 2-clause BSD license.\n" 75 "// *** AUTOMATICALLY GENERATED FROM %s. DO NOT MODIFY. ***\n\n"; 76 // clang-format on 77 78 // These are just format strings used to generate the C source. 79 static const char* const bc_gen_label = "const char *%s = \"%s\";\n\n"; 80 static const char* const bc_gen_label_extern = "extern const char *%s;\n\n"; 81 static const char* const bc_gen_ifdef = "#if %s\n"; 82 static const char* const bc_gen_endif = "#endif // %s\n"; 83 static const char* const bc_gen_name = "const char %s[] = {\n"; 84 static const char* const bc_gen_name_extern = "extern const char %s[];\n\n"; 85 86 // Error codes. We can't use 0 because these are used as exit statuses, and 0 87 // as an exit status is not an error. 88 #define IO_ERR (1) 89 #define INVALID_INPUT_FILE (2) 90 #define INVALID_PARAMS (3) 91 92 // This is the max width to print characters to the screen. This is to ensure 93 // that lines don't go much over 80 characters. 94 #define MAX_WIDTH (72) 95 96 /** 97 * Open a file. This function is to smooth over differences between POSIX and 98 * Windows. 99 * @param f A pointer to the FILE pointer that will be initialized. 100 * @param filename The name of the file. 101 * @param mode The mode to open the file in. 102 */ 103 static void 104 open_file(FILE** f, const char* filename, const char* mode) 105 { 106 #ifndef _WIN32 107 108 *f = fopen(filename, mode); 109 110 #else // _WIN32 111 112 // We want the file pointer to be NULL on failure, but fopen_s() is not 113 // guaranteed to set it. 114 *f = NULL; 115 fopen_s(f, filename, mode); 116 117 #endif // _WIN32 118 } 119 120 /** 121 * A portability file open function. This is copied from src/read.c. Make sure 122 * to update that if this changes. 123 * @param path The path to the file to open. 124 * @param mode The mode to open in. 125 */ 126 static int 127 bc_read_open(const char* path, int mode) 128 { 129 int fd; 130 131 #ifndef _WIN32 132 fd = open(path, mode); 133 #else // _WIN32 134 fd = -1; 135 open(&fd, path, mode); 136 #endif 137 138 return fd; 139 } 140 141 /** 142 * Reads a file and returns the file as a string. This has been copied from 143 * src/read.c. Make sure to change that if this changes. 144 * @param path The path to the file. 145 * @return The contents of the file as a string. 146 */ 147 static char* 148 bc_read_file(const char* path) 149 { 150 int e = IO_ERR; 151 size_t size, to_read; 152 struct stat pstat; 153 int fd; 154 char* buf; 155 char* buf2; 156 157 // This has been copied from src/read.c. Make sure to change that if this 158 // changes. 159 160 assert(path != NULL); 161 162 #ifndef NDEBUG 163 // Need this to quiet MSan. 164 // NOLINTNEXTLINE 165 memset(&pstat, 0, sizeof(struct stat)); 166 #endif // NDEBUG 167 168 fd = bc_read_open(path, O_RDONLY); 169 170 // If we can't read a file, we just barf. 171 if (BC_ERR(fd < 0)) 172 { 173 fprintf(stderr, "Could not open file: %s\n", path); 174 exit(INVALID_INPUT_FILE); 175 } 176 177 // The reason we call fstat is to eliminate TOCTOU race conditions. This 178 // way, we have an open file, so it's not going anywhere. 179 if (BC_ERR(fstat(fd, &pstat) == -1)) 180 { 181 fprintf(stderr, "Could not stat file: %s\n", path); 182 exit(INVALID_INPUT_FILE); 183 } 184 185 // Make sure it's not a directory. 186 if (BC_ERR(S_ISDIR(pstat.st_mode))) 187 { 188 fprintf(stderr, "Path is directory: %s\n", path); 189 exit(INVALID_INPUT_FILE); 190 } 191 192 // Get the size of the file and allocate that much. 193 size = (size_t) pstat.st_size; 194 buf = (char*) malloc(size + 1); 195 if (buf == NULL) 196 { 197 fprintf(stderr, "Could not malloc\n"); 198 exit(INVALID_INPUT_FILE); 199 } 200 buf2 = buf; 201 to_read = size; 202 203 do 204 { 205 // Read the file. We just bail if a signal interrupts. This is so that 206 // users can interrupt the reading of big files if they want. 207 ssize_t r = read(fd, buf2, to_read); 208 if (BC_ERR(r < 0)) exit(e); 209 to_read -= (size_t) r; 210 buf2 += (size_t) r; 211 } 212 while (to_read); 213 214 // Got to have a nul byte. 215 buf[size] = '\0'; 216 217 close(fd); 218 219 return buf; 220 } 221 222 /** 223 * Outputs a label, which is a string literal that the code can use as a name 224 * for the file that is being turned into a string. This is important for the 225 * math libraries because the parse and lex code expects a filename. The label 226 * becomes the filename for the purposes of lexing and parsing. 227 * 228 * The label is generated from bc_gen_label (above). It has the form: 229 * 230 * const char *<label_name> = <label>; 231 * 232 * This function is also needed to smooth out differences between POSIX and 233 * Windows, specifically, the fact that Windows uses backslashes for filenames 234 * and that backslashes have to be escaped in a string literal. 235 * 236 * @param out The file to output to. 237 * @param label The label name. 238 * @param name The actual label text, which is a filename. 239 * @return Positive if no error, negative on error, just like *printf(). 240 */ 241 static int 242 output_label(FILE* out, const char* label, const char* name) 243 { 244 #ifndef _WIN32 245 246 return fprintf(out, bc_gen_label, label, name); 247 248 #else // _WIN32 249 250 size_t i, count = 0, len = strlen(name); 251 char* buf; 252 int ret; 253 254 // This loop counts how many backslashes there are in the label. 255 for (i = 0; i < len; ++i) 256 { 257 count += (name[i] == '\\'); 258 } 259 260 buf = (char*) malloc(len + 1 + count); 261 if (buf == NULL) return -1; 262 263 count = 0; 264 265 // This loop is the meat of the Windows version. What it does is copy the 266 // label byte-for-byte, unless it encounters a backslash, in which case, it 267 // copies the backslash twice to have it escaped properly in the string 268 // literal. 269 for (i = 0; i < len; ++i) 270 { 271 buf[i + count] = name[i]; 272 273 if (name[i] == '\\') 274 { 275 count += 1; 276 buf[i + count] = name[i]; 277 } 278 } 279 280 buf[i + count] = '\0'; 281 282 ret = fprintf(out, bc_gen_label, label, buf); 283 284 free(buf); 285 286 return ret; 287 288 #endif // _WIN32 289 } 290 291 /** 292 * This program generates C strings (well, actually, C char arrays) from text 293 * files. It generates 1 C source file. The resulting file has this structure: 294 * 295 * <Copyright Header> 296 * 297 * [<Label Extern>] 298 * 299 * <Char Array Extern> 300 * 301 * [<Preprocessor Guard Begin>] 302 * [<Label Definition>] 303 * 304 * <Char Array Definition> 305 * [<Preprocessor Guard End>] 306 * 307 * Anything surrounded by square brackets may not be in the final generated 308 * source file. 309 * 310 * The required command-line parameters are: 311 * 312 * input Input filename. 313 * output Output filename. 314 * exclude Whether to exclude extra math-only stuff. 315 * name The name of the char array. 316 * 317 * The optional parameters are: 318 * 319 * label If given, a label for the char array. See the comment for the 320 * output_label() function. It is meant as a "filename" for the 321 * text when processed by bc and dc. If label is given, then the 322 * <Label Extern> and <Label Definition> will exist in the 323 * generated source file. 324 * define If given, a preprocessor macro that should be used as a guard 325 * for the char array and its label. If define is given, then 326 * <Preprocessor Guard Begin> will exist in the form 327 * "#if <define>" as part of the generated source file, and 328 * <Preprocessor Guard End> will exist in the form 329 * "endif // <define>". 330 * remove_tabs If this parameter exists, it must be an integer. If it is 331 * non-zero, then tabs are removed from the input file text before 332 * outputting to the output char array. 333 * 334 * All text files that are transformed have license comments. This program finds 335 * the end of that comment and strips it out as well. 336 */ 337 int 338 main(int argc, char* argv[]) 339 { 340 char* in; 341 FILE* out; 342 const char* label; 343 const char* define; 344 char* name; 345 unsigned int count, slashes, err = IO_ERR; 346 bool has_label, has_define, remove_tabs, exclude_extra_math; 347 size_t i; 348 349 if (argc < 5) 350 { 351 printf(bc_gen_usage, argv[0]); 352 return INVALID_PARAMS; 353 } 354 355 exclude_extra_math = (strtoul(argv[3], NULL, 10) != 0); 356 357 name = argv[4]; 358 359 has_label = (argc > 5 && strcmp("", argv[5]) != 0); 360 label = has_label ? argv[5] : ""; 361 362 has_define = (argc > 6 && strcmp("", argv[6]) != 0); 363 define = has_define ? argv[6] : ""; 364 365 remove_tabs = (argc > 7); 366 367 in = bc_read_file(argv[1]); 368 if (in == NULL) return INVALID_INPUT_FILE; 369 370 open_file(&out, argv[2], "w"); 371 if (out == NULL) goto out_err; 372 373 if (fprintf(out, bc_gen_header, argv[1]) < 0) goto err; 374 if (has_label && fprintf(out, bc_gen_label_extern, label) < 0) goto err; 375 if (fprintf(out, bc_gen_name_extern, name) < 0) goto err; 376 if (has_define && fprintf(out, bc_gen_ifdef, define) < 0) goto err; 377 if (has_label && output_label(out, label, argv[1]) < 0) goto err; 378 if (fprintf(out, bc_gen_name, name) < 0) goto err; 379 380 i = count = slashes = 0; 381 382 // This is where the end of the license comment is found. 383 while (slashes < 2 && in[i] > 0) 384 { 385 if (slashes == 1 && in[i] == '*' && in[i + 1] == '/' && 386 (in[i + 2] == '\n' || in[i + 2] == '\r')) 387 { 388 slashes += 1; 389 i += 2; 390 } 391 else if (!slashes && in[i] == '/' && in[i + 1] == '*') 392 { 393 slashes += 1; 394 i += 1; 395 } 396 397 i += 1; 398 } 399 400 // The file is invalid if the end of the license comment could not be found. 401 if (in[i] == 0) 402 { 403 fprintf(stderr, "Could not find end of license comment\n"); 404 err = INVALID_INPUT_FILE; 405 goto err; 406 } 407 408 i += 1; 409 410 // Do not put extra newlines at the beginning of the char array. 411 while (in[i] == '\n' || in[i] == '\r') 412 { 413 i += 1; 414 } 415 416 // This loop is what generates the actual char array. It counts how many 417 // chars it has printed per line in order to insert newlines at appropriate 418 // places. It also skips tabs if they should be removed. 419 while (in[i] != 0) 420 { 421 int val; 422 423 if (in[i] == '\r') 424 { 425 i += 1; 426 continue; 427 } 428 429 if (!remove_tabs || in[i] != '\t') 430 { 431 // Check for excluding something for extra math. 432 if (in[i] == '{') 433 { 434 // If we found the start... 435 if (!strncmp(in + i, bc_gen_ex_start, strlen(bc_gen_ex_start))) 436 { 437 if (exclude_extra_math) 438 { 439 // Get past the braces. 440 i += 2; 441 442 // Find the end of the end. 443 while (in[i] != '{' && strncmp(in + i, bc_gen_ex_end, 444 strlen(bc_gen_ex_end))) 445 { 446 i += 1; 447 } 448 449 i += strlen(bc_gen_ex_end); 450 451 // Skip the last newline. 452 if (in[i] == '\r') i += 1; 453 i += 1; 454 continue; 455 } 456 else 457 { 458 i += strlen(bc_gen_ex_start); 459 460 // Skip the last newline. 461 if (in[i] == '\r') i += 1; 462 i += 1; 463 continue; 464 } 465 } 466 else if (!exclude_extra_math && 467 !strncmp(in + i, bc_gen_ex_end, strlen(bc_gen_ex_end))) 468 { 469 i += strlen(bc_gen_ex_end); 470 471 // Skip the last newline. 472 if (in[i] == '\r') i += 1; 473 i += 1; 474 continue; 475 } 476 } 477 478 // Print a tab if we are at the beginning of a line. 479 if (!count && fputc('\t', out) == EOF) goto err; 480 481 // Print the character. 482 val = fprintf(out, "%d,", in[i]); 483 if (val < 0) goto err; 484 485 // Adjust the count. 486 count += (unsigned int) val; 487 if (count > MAX_WIDTH) 488 { 489 count = 0; 490 if (fputc('\n', out) == EOF) goto err; 491 } 492 } 493 494 i += 1; 495 } 496 497 // Make sure the end looks nice and insert the NUL byte at the end. 498 if (!count && (fputc(' ', out) == EOF || fputc(' ', out) == EOF)) goto err; 499 if (fprintf(out, "0\n};\n") < 0) goto err; 500 501 err = (has_define && fprintf(out, bc_gen_endif, define) < 0); 502 503 err: 504 fclose(out); 505 out_err: 506 free(in); 507 return (int) err; 508 } 509