1 // Copyright 2012 The Kyua Authors. 2 // All rights reserved. 3 // 4 // Redistribution and use in source and binary forms, with or without 5 // modification, are permitted provided that the following conditions are 6 // met: 7 // 8 // * Redistributions of source code must retain the above copyright 9 // notice, this list of conditions and the following disclaimer. 10 // * Redistributions in binary form must reproduce the above copyright 11 // notice, this list of conditions and the following disclaimer in the 12 // documentation and/or other materials provided with the distribution. 13 // * Neither the name of Google Inc. nor the names of its contributors 14 // may be used to endorse or promote products derived from this software 15 // without specific prior written permission. 16 // 17 // THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS 18 // "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT 19 // LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR 20 // A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT 21 // OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, 22 // SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT 23 // LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, 24 // DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY 25 // THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT 26 // (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE 27 // OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE. 28 29 #include "utils/text/templates.hpp" 30 31 #include <algorithm> 32 #include <fstream> 33 #include <sstream> 34 #include <stack> 35 36 #include "utils/format/macros.hpp" 37 #include "utils/fs/path.hpp" 38 #include "utils/noncopyable.hpp" 39 #include "utils/sanity.hpp" 40 #include "utils/text/exceptions.hpp" 41 #include "utils/text/operations.ipp" 42 43 namespace text = utils::text; 44 45 46 namespace { 47 48 49 /// Definition of a template statement. 50 /// 51 /// A template statement is a particular line in the input file that is 52 /// preceeded by a template marker. This class provides a high-level 53 /// representation of the contents of such statement and a mechanism to parse 54 /// the textual line into this high-level representation. 55 class statement_def { 56 public: 57 /// Types of the known statements. 58 enum statement_type { 59 /// Alternative clause of a conditional. 60 /// 61 /// Takes no arguments. 62 type_else, 63 64 /// End of conditional marker. 65 /// 66 /// Takes no arguments. 67 type_endif, 68 69 /// End of loop marker. 70 /// 71 /// Takes no arguments. 72 type_endloop, 73 74 /// Beginning of a conditional. 75 /// 76 /// Takes a single argument, which denotes the name of the variable or 77 /// vector to check for existence. This is the only expression 78 /// supported. 79 type_if, 80 81 /// Beginning of a loop over all the elements of a vector. 82 /// 83 /// Takes two arguments: the name of the vector over which to iterate 84 /// and the name of the iterator to later index this vector. 85 type_loop, 86 }; 87 88 private: 89 /// Internal data describing the structure of a particular statement type. 90 struct type_descriptor { 91 /// The native type of the statement. 92 statement_type type; 93 94 /// The expected number of arguments. 95 unsigned int n_arguments; 96 97 /// Constructs a new type descriptor. 98 /// 99 /// \param type_ The native type of the statement. 100 /// \param n_arguments_ The expected number of arguments. 101 type_descriptor(const statement_type type_, 102 const unsigned int n_arguments_) 103 : type(type_), n_arguments(n_arguments_) 104 { 105 } 106 }; 107 108 /// Mapping of statement type names to their definitions. 109 typedef std::map< std::string, type_descriptor > types_map; 110 111 /// Description of the different statement types. 112 /// 113 /// This static map is initialized once and reused later for any statement 114 /// lookup. Unfortunately, we cannot perform this initialization in a 115 /// static manner without C++11. 116 static types_map _types; 117 118 /// Generates a new types definition map. 119 /// 120 /// \return A new types definition map, to be assigned to _types. 121 static types_map 122 generate_types_map(void) 123 { 124 // If you change this, please edit the comments in the enum above. 125 types_map types; 126 types.insert(types_map::value_type( 127 "else", type_descriptor(type_else, 0))); 128 types.insert(types_map::value_type( 129 "endif", type_descriptor(type_endif, 0))); 130 types.insert(types_map::value_type( 131 "endloop", type_descriptor(type_endloop, 0))); 132 types.insert(types_map::value_type( 133 "if", type_descriptor(type_if, 1))); 134 types.insert(types_map::value_type( 135 "loop", type_descriptor(type_loop, 2))); 136 return types; 137 } 138 139 public: 140 /// The type of the statement. 141 statement_type type; 142 143 /// The arguments to the statement, in textual form. 144 const std::vector< std::string > arguments; 145 146 /// Creates a new statement. 147 /// 148 /// \param type_ The type of the statement. 149 /// \param arguments_ The arguments to the statement. 150 statement_def(const statement_type& type_, 151 const std::vector< std::string >& arguments_) : 152 type(type_), arguments(arguments_) 153 { 154 #if !defined(NDEBUG) 155 for (types_map::const_iterator iter = _types.begin(); 156 iter != _types.end(); ++iter) { 157 const type_descriptor& descriptor = (*iter).second; 158 if (descriptor.type == type_) { 159 PRE(descriptor.n_arguments == arguments_.size()); 160 return; 161 } 162 } 163 UNREACHABLE; 164 #endif 165 } 166 167 /// Parses a statement. 168 /// 169 /// \param line The textual representation of the statement without any 170 /// prefix. 171 /// 172 /// \return The parsed statement. 173 /// 174 /// \throw text::syntax_error If the statement is not correctly defined. 175 static statement_def 176 parse(const std::string& line) 177 { 178 if (_types.empty()) 179 _types = generate_types_map(); 180 181 const std::vector< std::string > words = text::split(line, ' '); 182 if (words.empty()) 183 throw text::syntax_error("Empty statement"); 184 185 const types_map::const_iterator iter = _types.find(words[0]); 186 if (iter == _types.end()) 187 throw text::syntax_error(F("Unknown statement '%s'") % words[0]); 188 const type_descriptor& descriptor = (*iter).second; 189 190 if (words.size() - 1 != descriptor.n_arguments) 191 throw text::syntax_error(F("Invalid number of arguments for " 192 "statement '%s'") % words[0]); 193 194 std::vector< std::string > new_arguments; 195 new_arguments.resize(words.size() - 1); 196 std::copy(words.begin() + 1, words.end(), new_arguments.begin()); 197 198 return statement_def(descriptor.type, new_arguments); 199 } 200 }; 201 202 203 statement_def::types_map statement_def::_types; 204 205 206 /// Definition of a loop. 207 /// 208 /// This simple structure is used to keep track of the parameters of a loop. 209 struct loop_def { 210 /// The name of the vector over which this loop is iterating. 211 std::string vector; 212 213 /// The name of the iterator defined by this loop. 214 std::string iterator; 215 216 /// Position in the input to which to rewind to on looping. 217 /// 218 /// This position points to the line after the loop statement, not the loop 219 /// itself. This is one of the reasons why we have this structure, so that 220 /// we can maintain the data about the loop without having to re-process it. 221 std::istream::pos_type position; 222 223 /// Constructs a new loop definition. 224 /// 225 /// \param vector_ The name of the vector (first argument). 226 /// \param iterator_ The name of the iterator (second argumnet). 227 /// \param position_ Position of the next line after the loop statement. 228 loop_def(const std::string& vector_, const std::string& iterator_, 229 const std::istream::pos_type position_) : 230 vector(vector_), iterator(iterator_), position(position_) 231 { 232 } 233 }; 234 235 236 /// Stateful class to instantiate the templates in an input stream. 237 /// 238 /// The goal of this parser is to scan the input once and not buffer anything in 239 /// memory. The only exception are loops: loops are reinterpreted on every 240 /// iteration from the same input file by rewidining the stream to the 241 /// appropriate position. 242 class templates_parser : utils::noncopyable { 243 /// The templates to apply. 244 /// 245 /// Note that this is not const because the parser has to have write access 246 /// to the templates. In particular, it needs to be able to define the 247 /// iterators as regular variables. 248 text::templates_def _templates; 249 250 /// Prefix that marks a line as a statement. 251 const std::string _prefix; 252 253 /// Delimiter to surround an expression instantiation. 254 const std::string _delimiter; 255 256 /// Whether to skip incoming lines or not. 257 /// 258 /// The top of the stack is true whenever we encounter a conditional that 259 /// evaluates to false or a loop that does not have any iterations left. 260 /// Under these circumstances, we need to continue scanning the input stream 261 /// until we find the matching closing endif or endloop construct. 262 /// 263 /// This is a stack rather than a plain boolean to allow us deal with 264 /// if-else clauses. 265 std::stack< bool > _skip; 266 267 /// Current count of nested conditionals. 268 unsigned int _if_level; 269 270 /// Level of the top-most conditional that evaluated to false. 271 unsigned int _exit_if_level; 272 273 /// Current count of nested loops. 274 unsigned int _loop_level; 275 276 /// Level of the top-most loop that does not have any iterations left. 277 unsigned int _exit_loop_level; 278 279 /// Information about all the nested loops up to the current point. 280 std::stack< loop_def > _loops; 281 282 /// Checks if a line is a statement or not. 283 /// 284 /// \param line The line to validate. 285 /// 286 /// \return True if the line looks like a statement, which is determined by 287 /// checking if the line starts by the predefined prefix. 288 bool 289 is_statement(const std::string& line) 290 { 291 return ((line.length() >= _prefix.length() && 292 line.substr(0, _prefix.length()) == _prefix) && 293 (line.length() < _delimiter.length() || 294 line.substr(0, _delimiter.length()) != _delimiter)); 295 } 296 297 /// Parses a given statement line into a statement definition. 298 /// 299 /// \param line The line to validate; it must be a valid statement. 300 /// 301 /// \return The parsed statement. 302 /// 303 /// \throw text::syntax_error If the input is not a valid statement. 304 statement_def 305 parse_statement(const std::string& line) 306 { 307 PRE(is_statement(line)); 308 return statement_def::parse(line.substr(_prefix.length())); 309 } 310 311 /// Processes a line from the input when not in skip mode. 312 /// 313 /// \param line The line to be processed. 314 /// \param input The input stream from which the line was read. The current 315 /// position in the stream must be after the line being processed. 316 /// \param output The output stream into which to write the results. 317 /// 318 /// \throw text::syntax_error If the input is not valid. 319 void 320 handle_normal(const std::string& line, std::istream& input, 321 std::ostream& output) 322 { 323 if (!is_statement(line)) { 324 // Fast path. Mostly to avoid an indentation level for the big 325 // chunk of code below. 326 output << line << '\n'; 327 return; 328 } 329 330 const statement_def statement = parse_statement(line); 331 332 switch (statement.type) { 333 case statement_def::type_else: 334 _skip.top() = !_skip.top(); 335 break; 336 337 case statement_def::type_endif: 338 _if_level--; 339 break; 340 341 case statement_def::type_endloop: { 342 PRE(_loops.size() == _loop_level); 343 loop_def& loop = _loops.top(); 344 345 const std::size_t next_index = 1 + text::to_type< std::size_t >( 346 _templates.get_variable(loop.iterator)); 347 348 if (next_index < _templates.get_vector(loop.vector).size()) { 349 _templates.add_variable(loop.iterator, F("%s") % next_index); 350 input.seekg(loop.position); 351 } else { 352 _loop_level--; 353 _loops.pop(); 354 _templates.remove_variable(loop.iterator); 355 } 356 } break; 357 358 case statement_def::type_if: { 359 _if_level++; 360 const std::string value = _templates.evaluate( 361 statement.arguments[0]); 362 if (value.empty() || value == "0" || value == "false") { 363 _exit_if_level = _if_level; 364 _skip.push(true); 365 } else { 366 _skip.push(false); 367 } 368 } break; 369 370 case statement_def::type_loop: { 371 _loop_level++; 372 373 const loop_def loop(statement.arguments[0], statement.arguments[1], 374 input.tellg()); 375 if (_templates.get_vector(loop.vector).empty()) { 376 _exit_loop_level = _loop_level; 377 _skip.push(true); 378 } else { 379 _templates.add_variable(loop.iterator, "0"); 380 _loops.push(loop); 381 _skip.push(false); 382 } 383 } break; 384 } 385 } 386 387 /// Processes a line from the input when in skip mode. 388 /// 389 /// \param line The line to be processed. 390 /// 391 /// \throw text::syntax_error If the input is not valid. 392 void 393 handle_skip(const std::string& line) 394 { 395 PRE(_skip.top()); 396 397 if (!is_statement(line)) 398 return; 399 400 const statement_def statement = parse_statement(line); 401 switch (statement.type) { 402 case statement_def::type_else: 403 if (_exit_if_level == _if_level) 404 _skip.top() = !_skip.top(); 405 break; 406 407 case statement_def::type_endif: 408 INV(_if_level >= _exit_if_level); 409 if (_if_level == _exit_if_level) 410 _skip.top() = false; 411 _if_level--; 412 _skip.pop(); 413 break; 414 415 case statement_def::type_endloop: 416 INV(_loop_level >= _exit_loop_level); 417 if (_loop_level == _exit_loop_level) 418 _skip.top() = false; 419 _loop_level--; 420 _skip.pop(); 421 break; 422 423 case statement_def::type_if: 424 _if_level++; 425 _skip.push(true); 426 break; 427 428 case statement_def::type_loop: 429 _loop_level++; 430 _skip.push(true); 431 break; 432 433 default: 434 break; 435 } 436 } 437 438 /// Evaluates expressions on a given input line. 439 /// 440 /// An expression is surrounded by _delimiter on both sides. We scan the 441 /// string from left to right finding any expressions that may appear, yank 442 /// them out and call templates_def::evaluate() to get their value. 443 /// 444 /// Lonely or unbalanced appearances of _delimiter on the input line are 445 /// not considered an error, given that the user may actually want to supply 446 /// that character sequence without being interpreted as a template. 447 /// 448 /// \param in_line The input line from which to evaluate expressions. 449 /// 450 /// \return The evaluated line. 451 /// 452 /// \throw text::syntax_error If the expressions in the line are malformed. 453 std::string 454 evaluate(const std::string& in_line) 455 { 456 std::string out_line; 457 458 std::string::size_type last_pos = 0; 459 while (last_pos != std::string::npos) { 460 const std::string::size_type open_pos = in_line.find( 461 _delimiter, last_pos); 462 if (open_pos == std::string::npos) { 463 out_line += in_line.substr(last_pos); 464 last_pos = std::string::npos; 465 } else { 466 const std::string::size_type close_pos = in_line.find( 467 _delimiter, open_pos + _delimiter.length()); 468 if (close_pos == std::string::npos) { 469 out_line += in_line.substr(last_pos); 470 last_pos = std::string::npos; 471 } else { 472 out_line += in_line.substr(last_pos, open_pos - last_pos); 473 out_line += _templates.evaluate(in_line.substr( 474 open_pos + _delimiter.length(), 475 close_pos - open_pos - _delimiter.length())); 476 last_pos = close_pos + _delimiter.length(); 477 } 478 } 479 } 480 481 return out_line; 482 } 483 484 public: 485 /// Constructs a new template parser. 486 /// 487 /// \param templates_ The templates to apply to the processed file. 488 /// \param prefix_ The prefix that identifies lines as statements. 489 /// \param delimiter_ Delimiter to surround a variable instantiation. 490 templates_parser(const text::templates_def& templates_, 491 const std::string& prefix_, 492 const std::string& delimiter_) : 493 _templates(templates_), 494 _prefix(prefix_), 495 _delimiter(delimiter_), 496 _if_level(0), 497 _exit_if_level(0), 498 _loop_level(0), 499 _exit_loop_level(0) 500 { 501 } 502 503 /// Applies the templates to a given input. 504 /// 505 /// \param input The stream to which to apply the templates. 506 /// \param output The stream into which to write the results. 507 /// 508 /// \throw text::syntax_error If the input is not valid. Note that the 509 /// is not guaranteed to be unmodified on exit if an error is 510 /// encountered. 511 void 512 instantiate(std::istream& input, std::ostream& output) 513 { 514 std::string line; 515 while (std::getline(input, line).good()) { 516 if (!_skip.empty() && _skip.top()) 517 handle_skip(line); 518 else 519 handle_normal(evaluate(line), input, output); 520 } 521 } 522 }; 523 524 525 } // anonymous namespace 526 527 528 /// Constructs an empty templates definition. 529 text::templates_def::templates_def(void) 530 { 531 } 532 533 534 /// Sets a string variable in the templates. 535 /// 536 /// If the variable already exists, its value is replaced. This behavior is 537 /// required to implement iterators, but client code should really not be 538 /// redefining variables. 539 /// 540 /// \pre The variable must not already exist as a vector. 541 /// 542 /// \param name The name of the variable to set. 543 /// \param value The value to set the given variable to. 544 void 545 text::templates_def::add_variable(const std::string& name, 546 const std::string& value) 547 { 548 PRE(_vectors.find(name) == _vectors.end()); 549 _variables[name] = value; 550 } 551 552 553 /// Unsets a string variable from the templates. 554 /// 555 /// Client code has no reason to use this. This is only required to implement 556 /// proper scoping of loop iterators. 557 /// 558 /// \pre The variable must exist. 559 /// 560 /// \param name The name of the variable to remove from the templates. 561 void 562 text::templates_def::remove_variable(const std::string& name) 563 { 564 PRE(_variables.find(name) != _variables.end()); 565 _variables.erase(_variables.find(name)); 566 } 567 568 569 /// Creates a new vector in the templates. 570 /// 571 /// If the vector already exists, it is cleared. Client code should really not 572 /// be redefining variables. 573 /// 574 /// \pre The vector must not already exist as a variable. 575 /// 576 /// \param name The name of the vector to set. 577 void 578 text::templates_def::add_vector(const std::string& name) 579 { 580 PRE(_variables.find(name) == _variables.end()); 581 _vectors[name] = strings_vector(); 582 } 583 584 585 /// Adds a value to an existing vector in the templates. 586 /// 587 /// \pre name The vector must exist. 588 /// 589 /// \param name The name of the vector to append the value to. 590 /// \param value The textual value to append to the vector. 591 void 592 text::templates_def::add_to_vector(const std::string& name, 593 const std::string& value) 594 { 595 PRE(_variables.find(name) == _variables.end()); 596 PRE(_vectors.find(name) != _vectors.end()); 597 _vectors[name].push_back(value); 598 } 599 600 601 /// Checks whether a given identifier exists as a variable or a vector. 602 /// 603 /// This is used to implement the evaluation of conditions in if clauses. 604 /// 605 /// \param name The name of the variable or vector. 606 /// 607 /// \return True if the given name exists as a variable or a vector; false 608 /// otherwise. 609 bool 610 text::templates_def::exists(const std::string& name) const 611 { 612 return (_variables.find(name) != _variables.end() || 613 _vectors.find(name) != _vectors.end()); 614 } 615 616 617 /// Gets the value of a variable. 618 /// 619 /// \param name The name of the variable. 620 /// 621 /// \return The value of the requested variable. 622 /// 623 /// \throw text::syntax_error If the variable does not exist. 624 const std::string& 625 text::templates_def::get_variable(const std::string& name) const 626 { 627 const variables_map::const_iterator iter = _variables.find(name); 628 if (iter == _variables.end()) 629 throw text::syntax_error(F("Unknown variable '%s'") % name); 630 return (*iter).second; 631 } 632 633 634 /// Gets a vector. 635 /// 636 /// \param name The name of the vector. 637 /// 638 /// \return A reference to the requested vector. 639 /// 640 /// \throw text::syntax_error If the vector does not exist. 641 const text::templates_def::strings_vector& 642 text::templates_def::get_vector(const std::string& name) const 643 { 644 const vectors_map::const_iterator iter = _vectors.find(name); 645 if (iter == _vectors.end()) 646 throw text::syntax_error(F("Unknown vector '%s'") % name); 647 return (*iter).second; 648 } 649 650 651 /// Indexes a vector and gets the value. 652 /// 653 /// \param name The name of the vector to index. 654 /// \param index_name The name of a variable representing the index to use. 655 /// This must be convertible to a natural. 656 /// 657 /// \return The value of the vector at the given index. 658 /// 659 /// \throw text::syntax_error If the vector does not existor if the index is out 660 /// of range. 661 const std::string& 662 text::templates_def::get_vector(const std::string& name, 663 const std::string& index_name) const 664 { 665 const strings_vector& vector = get_vector(name); 666 const std::string& index_str = get_variable(index_name); 667 668 std::size_t index; 669 try { 670 index = text::to_type< std::size_t >(index_str); 671 } catch (const text::syntax_error& e) { 672 throw text::syntax_error(F("Index '%s' not an integer, value '%s'") % 673 index_name % index_str); 674 } 675 if (index >= vector.size()) 676 throw text::syntax_error(F("Index '%s' out of range at position '%s'") % 677 index_name % index); 678 679 return vector[index]; 680 } 681 682 683 /// Evaluates a expression using these templates. 684 /// 685 /// An expression is a query on the current templates to fetch a particular 686 /// value. The value is always returned as a string, as this is how templates 687 /// are internally stored. 688 /// 689 /// \param expression The expression to evaluate. This should not include any 690 /// of the delimiters used in the user input, as otherwise the expression 691 /// will not be evaluated properly. 692 /// 693 /// \return The result of the expression evaluation as a string. 694 /// 695 /// \throw text::syntax_error If there is any problem while evaluating the 696 /// expression. 697 std::string 698 text::templates_def::evaluate(const std::string& expression) const 699 { 700 const std::string::size_type paren_open = expression.find('('); 701 if (paren_open == std::string::npos) { 702 return get_variable(expression); 703 } else { 704 const std::string::size_type paren_close = expression.find( 705 ')', paren_open); 706 if (paren_close == std::string::npos) 707 throw text::syntax_error(F("Expected ')' in expression '%s')") % 708 expression); 709 if (paren_close != expression.length() - 1) 710 throw text::syntax_error(F("Unexpected text found after ')' in " 711 "expression '%s'") % expression); 712 713 const std::string arg0 = expression.substr(0, paren_open); 714 const std::string arg1 = expression.substr( 715 paren_open + 1, paren_close - paren_open - 1); 716 if (arg0 == "defined") { 717 return exists(arg1) ? "true" : "false"; 718 } else if (arg0 == "length") { 719 return F("%s") % get_vector(arg1).size(); 720 } else { 721 return get_vector(arg0, arg1); 722 } 723 } 724 } 725 726 727 /// Applies a set of templates to an input stream. 728 /// 729 /// \param templates The templates to use. 730 /// \param input The input to process. 731 /// \param output The stream to which to write the processed text. 732 /// 733 /// \throw text::syntax_error If there is any problem processing the input. 734 void 735 text::instantiate(const templates_def& templates, 736 std::istream& input, std::ostream& output) 737 { 738 templates_parser parser(templates, "%", "%%"); 739 parser.instantiate(input, output); 740 } 741 742 743 /// Applies a set of templates to an input file and writes an output file. 744 /// 745 /// \param templates The templates to use. 746 /// \param input_file The path to the input to process. 747 /// \param output_file The path to the file into which to write the output. 748 /// 749 /// \throw text::error If the input or output files cannot be opened. 750 /// \throw text::syntax_error If there is any problem processing the input. 751 void 752 text::instantiate(const templates_def& templates, 753 const fs::path& input_file, const fs::path& output_file) 754 { 755 std::ifstream input(input_file.c_str()); 756 if (!input) 757 throw text::error(F("Failed to open %s for read") % input_file); 758 759 std::ofstream output(output_file.c_str()); 760 if (!output) 761 throw text::error(F("Failed to open %s for write") % output_file); 762 763 instantiate(templates, input, output); 764 } 765