1 2 /* ntp_scanner.c 3 * 4 * The source code for a simple lexical analyzer. 5 * 6 * Written By: Sachin Kamboj 7 * University of Delaware 8 * Newark, DE 19711 9 * Copyright (c) 2006 10 */ 11 12 #ifdef HAVE_CONFIG_H 13 # include <config.h> 14 #endif 15 16 #include <stdio.h> 17 #include <ctype.h> 18 #include <stdlib.h> 19 #include <errno.h> 20 #include <string.h> 21 22 #include "ntpd.h" 23 #include "ntp_config.h" 24 #include "ntpsim.h" 25 #include "ntp_scanner.h" 26 #include "ntp_parser.h" 27 28 /* ntp_keyword.h declares finite state machine and token text */ 29 #include "ntp_keyword.h" 30 31 32 33 /* SCANNER GLOBAL VARIABLES 34 * ------------------------ 35 */ 36 37 #define MAX_LEXEME (1024 + 1) /* The maximum size of a lexeme */ 38 char yytext[MAX_LEXEME]; /* Buffer for storing the input text/lexeme */ 39 u_int32 conf_file_sum; /* Simple sum of characters read */ 40 41 42 43 44 /* CONSTANTS 45 * --------- 46 */ 47 48 49 /* SCANNER GLOBAL VARIABLES 50 * ------------------------ 51 */ 52 const char special_chars[] = "{}(),;|="; 53 54 55 /* FUNCTIONS 56 * --------- 57 */ 58 59 static int is_keyword(char *lexeme, follby *pfollowedby); 60 61 62 /* 63 * keyword() - Return the keyword associated with token T_ identifier. 64 * See also token_name() for the string-ized T_ identifier. 65 * Example: keyword(T_Server) returns "server" 66 * token_name(T_Server) returns "T_Server" 67 */ 68 const char * 69 keyword( 70 int token 71 ) 72 { 73 size_t i; 74 const char *text; 75 76 i = token - LOWEST_KEYWORD_ID; 77 78 if (i < COUNTOF(keyword_text)) 79 text = keyword_text[i]; 80 else 81 text = NULL; 82 83 return (text != NULL) 84 ? text 85 : "(keyword not found)"; 86 } 87 88 89 /* FILE INTERFACE 90 * -------------- 91 * We define a couple of wrapper functions around the standard C fgetc 92 * and ungetc functions in order to include positional bookkeeping 93 */ 94 95 struct FILE_INFO * 96 F_OPEN( 97 const char *path, 98 const char *mode 99 ) 100 { 101 struct FILE_INFO *my_info; 102 103 my_info = emalloc(sizeof *my_info); 104 105 my_info->line_no = 1; 106 my_info->col_no = 0; 107 my_info->prev_line_col_no = 0; 108 my_info->prev_token_col_no = 0; 109 my_info->fname = path; 110 111 my_info->fd = fopen(path, mode); 112 if (NULL == my_info->fd) { 113 free(my_info); 114 return NULL; 115 } 116 return my_info; 117 } 118 119 int 120 FGETC( 121 struct FILE_INFO *stream 122 ) 123 { 124 int ch; 125 126 do 127 ch = fgetc(stream->fd); 128 while (EOF != ch && (CHAR_MIN > ch || ch > CHAR_MAX)); 129 130 if (EOF != ch) { 131 if (input_from_file) 132 conf_file_sum += (u_char)ch; 133 ++stream->col_no; 134 if (ch == '\n') { 135 stream->prev_line_col_no = stream->col_no; 136 ++stream->line_no; 137 stream->col_no = 1; 138 } 139 } 140 141 return ch; 142 } 143 144 /* BUGS: 1. Function will fail on more than one line of pushback 145 * 2. No error checking is done to see if ungetc fails 146 * SK: I don't think its worth fixing these bugs for our purposes ;-) 147 */ 148 int 149 UNGETC( 150 int ch, 151 struct FILE_INFO *stream 152 ) 153 { 154 if (input_from_file) 155 conf_file_sum -= (u_char)ch; 156 if (ch == '\n') { 157 stream->col_no = stream->prev_line_col_no; 158 stream->prev_line_col_no = -1; 159 --stream->line_no; 160 } 161 --stream->col_no; 162 return ungetc(ch, stream->fd); 163 } 164 165 int 166 FCLOSE( 167 struct FILE_INFO *stream 168 ) 169 { 170 int ret_val = fclose(stream->fd); 171 172 if (!ret_val) 173 free(stream); 174 return ret_val; 175 } 176 177 /* STREAM INTERFACE 178 * ---------------- 179 * Provide a wrapper for the stream functions so that the 180 * stream can either read from a file or from a character 181 * array. 182 * NOTE: This is not very efficient for reading from character 183 * arrays, but needed to allow remote configuration where the 184 * configuration command is provided through ntpq. 185 * 186 * The behavior of there two functions is determined by the 187 * input_from_file flag. 188 */ 189 190 static int 191 get_next_char( 192 struct FILE_INFO *ip_file 193 ) 194 { 195 char ch; 196 197 if (input_from_file) 198 return FGETC(ip_file); 199 else { 200 if (remote_config.buffer[remote_config.pos] == '\0') 201 return EOF; 202 else { 203 ip_file->col_no++; 204 ch = remote_config.buffer[remote_config.pos++]; 205 if (ch == '\n') { 206 ip_file->prev_line_col_no = ip_file->col_no; 207 ++ip_file->line_no; 208 ip_file->col_no = 1; 209 } 210 return ch; 211 } 212 } 213 } 214 215 static void 216 push_back_char( 217 struct FILE_INFO *ip_file, 218 int ch 219 ) 220 { 221 if (input_from_file) 222 UNGETC(ch, ip_file); 223 else { 224 if (ch == '\n') { 225 ip_file->col_no = ip_file->prev_line_col_no; 226 ip_file->prev_line_col_no = -1; 227 --ip_file->line_no; 228 } 229 --ip_file->col_no; 230 231 remote_config.pos--; 232 } 233 } 234 235 236 237 /* STATE MACHINES 238 * -------------- 239 */ 240 241 /* Keywords */ 242 static int 243 is_keyword( 244 char *lexeme, 245 follby *pfollowedby 246 ) 247 { 248 follby fb; 249 int curr_s; /* current state index */ 250 int token; 251 int i; 252 253 curr_s = SCANNER_INIT_S; 254 token = 0; 255 256 for (i = 0; lexeme[i]; i++) { 257 while (curr_s && (lexeme[i] != SS_CH(sst[curr_s]))) 258 curr_s = SS_OTHER_N(sst[curr_s]); 259 260 if (curr_s && (lexeme[i] == SS_CH(sst[curr_s]))) { 261 if ('\0' == lexeme[i + 1] 262 && FOLLBY_NON_ACCEPTING 263 != SS_FB(sst[curr_s])) { 264 fb = SS_FB(sst[curr_s]); 265 *pfollowedby = fb; 266 token = curr_s; 267 break; 268 } 269 curr_s = SS_MATCH_N(sst[curr_s]); 270 } else 271 break; 272 } 273 274 return token; 275 } 276 277 278 /* Integer */ 279 static int 280 is_integer( 281 char *lexeme 282 ) 283 { 284 int i; 285 int is_neg; 286 u_int u_val; 287 288 i = 0; 289 290 /* Allow a leading minus sign */ 291 if (lexeme[i] == '-') { 292 i++; 293 is_neg = TRUE; 294 } else { 295 is_neg = FALSE; 296 } 297 298 /* Check that all the remaining characters are digits */ 299 for (; lexeme[i] != '\0'; i++) { 300 if (!isdigit((unsigned char)lexeme[i])) 301 return FALSE; 302 } 303 304 if (is_neg) 305 return TRUE; 306 307 /* Reject numbers that fit in unsigned but not in signed int */ 308 if (1 == sscanf(lexeme, "%u", &u_val)) 309 return (u_val <= INT_MAX); 310 else 311 return FALSE; 312 } 313 314 315 /* U_int -- assumes is_integer() has returned FALSE */ 316 static int 317 is_u_int( 318 char *lexeme 319 ) 320 { 321 int i; 322 int is_hex; 323 324 i = 0; 325 if ('0' == lexeme[i] && 'x' == tolower((unsigned char)lexeme[i + 1])) { 326 i += 2; 327 is_hex = TRUE; 328 } else { 329 is_hex = FALSE; 330 } 331 332 /* Check that all the remaining characters are digits */ 333 for (; lexeme[i] != '\0'; i++) { 334 if (is_hex && !isxdigit((unsigned char)lexeme[i])) 335 return FALSE; 336 if (!is_hex && !isdigit((unsigned char)lexeme[i])) 337 return FALSE; 338 } 339 340 return TRUE; 341 } 342 343 344 /* Double */ 345 static int 346 is_double( 347 char *lexeme 348 ) 349 { 350 u_int num_digits = 0; /* Number of digits read */ 351 u_int i; 352 353 i = 0; 354 355 /* Check for an optional '+' or '-' */ 356 if ('+' == lexeme[i] || '-' == lexeme[i]) 357 i++; 358 359 /* Read the integer part */ 360 for (; lexeme[i] && isdigit((unsigned char)lexeme[i]); i++) 361 num_digits++; 362 363 /* Check for the optional decimal point */ 364 if ('.' == lexeme[i]) { 365 i++; 366 /* Check for any digits after the decimal point */ 367 for (; lexeme[i] && isdigit((unsigned char)lexeme[i]); i++) 368 num_digits++; 369 } 370 371 /* 372 * The number of digits in both the decimal part and the 373 * fraction part must not be zero at this point 374 */ 375 if (!num_digits) 376 return 0; 377 378 /* Check if we are done */ 379 if (!lexeme[i]) 380 return 1; 381 382 /* There is still more input, read the exponent */ 383 if ('e' == tolower((unsigned char)lexeme[i])) 384 i++; 385 else 386 return 0; 387 388 /* Read an optional Sign */ 389 if ('+' == lexeme[i] || '-' == lexeme[i]) 390 i++; 391 392 /* Now read the exponent part */ 393 while (lexeme[i] && isdigit((unsigned char)lexeme[i])) 394 i++; 395 396 /* Check if we are done */ 397 if (!lexeme[i]) 398 return 1; 399 else 400 return 0; 401 } 402 403 404 /* is_special() - Test whether a character is a token */ 405 static inline int 406 is_special( 407 int ch 408 ) 409 { 410 return strchr(special_chars, ch) != NULL; 411 } 412 413 414 static int 415 is_EOC( 416 int ch 417 ) 418 { 419 if ((old_config_style && (ch == '\n')) || 420 (!old_config_style && (ch == ';'))) 421 return 1; 422 return 0; 423 } 424 425 426 char * 427 quote_if_needed(char *str) 428 { 429 char *ret; 430 size_t len; 431 size_t octets; 432 433 len = strlen(str); 434 octets = len + 2 + 1; 435 ret = emalloc(octets); 436 if ('"' != str[0] 437 && (strcspn(str, special_chars) < len 438 || strchr(str, ' ') != NULL)) { 439 snprintf(ret, octets, "\"%s\"", str); 440 } else 441 strlcpy(ret, str, octets); 442 443 return ret; 444 } 445 446 447 static int 448 create_string_token( 449 char *lexeme 450 ) 451 { 452 char *pch; 453 454 /* 455 * ignore end of line whitespace 456 */ 457 pch = lexeme; 458 while (*pch && isspace((unsigned char)*pch)) 459 pch++; 460 461 if (!*pch) { 462 yylval.Integer = T_EOC; 463 return yylval.Integer; 464 } 465 466 yylval.String = estrdup(lexeme); 467 return T_String; 468 } 469 470 471 /* 472 * yylex() - function that does the actual scanning. 473 * Bison expects this function to be called yylex and for it to take no 474 * input and return an int. 475 * Conceptually yylex "returns" yylval as well as the actual return 476 * value representing the token or type. 477 */ 478 int 479 yylex( 480 struct FILE_INFO *ip_file 481 ) 482 { 483 static follby followedby = FOLLBY_TOKEN; 484 size_t i; 485 int instring; 486 int yylval_was_set; 487 int converted; 488 int token; /* The return value */ 489 int ch; 490 491 if (input_from_file) 492 ip_file = fp[curr_include_level]; 493 instring = FALSE; 494 yylval_was_set = FALSE; 495 496 do { 497 /* Ignore whitespace at the beginning */ 498 while (EOF != (ch = get_next_char(ip_file)) && 499 isspace(ch) && 500 !is_EOC(ch)) 501 ; /* Null Statement */ 502 503 if (EOF == ch) { 504 505 if (!input_from_file || curr_include_level <= 0) 506 return 0; 507 508 FCLOSE(fp[curr_include_level]); 509 ip_file = fp[--curr_include_level]; 510 token = T_EOC; 511 goto normal_return; 512 513 } else if (is_EOC(ch)) { 514 515 /* end FOLLBY_STRINGS_TO_EOC effect */ 516 followedby = FOLLBY_TOKEN; 517 token = T_EOC; 518 goto normal_return; 519 520 } else if (is_special(ch) && FOLLBY_TOKEN == followedby) { 521 /* special chars are their own token values */ 522 token = ch; 523 /* 524 * '=' outside simulator configuration implies 525 * a single string following as in: 526 * setvar Owner = "The Boss" default 527 */ 528 if ('=' == ch && old_config_style) 529 followedby = FOLLBY_STRING; 530 yytext[0] = (char)ch; 531 yytext[1] = '\0'; 532 goto normal_return; 533 } else 534 push_back_char(ip_file, ch); 535 536 /* save the position of start of the token */ 537 ip_file->prev_token_line_no = ip_file->line_no; 538 ip_file->prev_token_col_no = ip_file->col_no; 539 540 /* Read in the lexeme */ 541 i = 0; 542 while (EOF != (ch = get_next_char(ip_file))) { 543 544 yytext[i] = (char)ch; 545 546 /* Break on whitespace or a special character */ 547 if (isspace(ch) || is_EOC(ch) 548 || '"' == ch 549 || (FOLLBY_TOKEN == followedby 550 && is_special(ch))) 551 break; 552 553 /* Read the rest of the line on reading a start 554 of comment character */ 555 if ('#' == ch) { 556 while (EOF != (ch = get_next_char(ip_file)) 557 && '\n' != ch) 558 ; /* Null Statement */ 559 break; 560 } 561 562 i++; 563 if (i >= COUNTOF(yytext)) 564 goto lex_too_long; 565 } 566 /* Pick up all of the string inside between " marks, to 567 * end of line. If we make it to EOL without a 568 * terminating " assume it for them. 569 * 570 * XXX - HMS: I'm not sure we want to assume the closing " 571 */ 572 if ('"' == ch) { 573 instring = TRUE; 574 while (EOF != (ch = get_next_char(ip_file)) && 575 ch != '"' && ch != '\n') { 576 yytext[i++] = (char)ch; 577 if (i >= COUNTOF(yytext)) 578 goto lex_too_long; 579 } 580 /* 581 * yytext[i] will be pushed back as not part of 582 * this lexeme, but any closing quote should 583 * not be pushed back, so we read another char. 584 */ 585 if ('"' == ch) 586 ch = get_next_char(ip_file); 587 } 588 /* Pushback the last character read that is not a part 589 * of this lexeme. 590 * If the last character read was an EOF, pushback a 591 * newline character. This is to prevent a parse error 592 * when there is no newline at the end of a file. 593 */ 594 if (EOF == ch) 595 push_back_char(ip_file, '\n'); 596 else 597 push_back_char(ip_file, ch); 598 yytext[i] = '\0'; 599 } while (i == 0); 600 601 /* Now return the desired token */ 602 603 /* First make sure that the parser is *not* expecting a string 604 * as the next token (based on the previous token that was 605 * returned) and that we haven't read a string. 606 */ 607 608 if (followedby == FOLLBY_TOKEN && !instring) { 609 token = is_keyword(yytext, &followedby); 610 if (token) { 611 /* 612 * T_Server is exceptional as it forces the 613 * following token to be a string in the 614 * non-simulator parts of the configuration, 615 * but in the simulator configuration section, 616 * "server" is followed by "=" which must be 617 * recognized as a token not a string. 618 */ 619 if (T_Server == token && !old_config_style) 620 followedby = FOLLBY_TOKEN; 621 goto normal_return; 622 } else if (is_integer(yytext)) { 623 yylval_was_set = TRUE; 624 errno = 0; 625 if ((yylval.Integer = strtol(yytext, NULL, 10)) == 0 626 && ((errno == EINVAL) || (errno == ERANGE))) { 627 msyslog(LOG_ERR, 628 "Integer cannot be represented: %s", 629 yytext); 630 if (input_from_file) { 631 exit(1); 632 } else { 633 /* force end of parsing */ 634 yylval.Integer = 0; 635 return 0; 636 } 637 } 638 token = T_Integer; 639 goto normal_return; 640 } else if (is_u_int(yytext)) { 641 yylval_was_set = TRUE; 642 if ('0' == yytext[0] && 643 'x' == tolower((unsigned char)yytext[1])) 644 converted = sscanf(&yytext[2], "%x", 645 &yylval.U_int); 646 else 647 converted = sscanf(yytext, "%u", 648 &yylval.U_int); 649 if (1 != converted) { 650 msyslog(LOG_ERR, 651 "U_int cannot be represented: %s", 652 yytext); 653 if (input_from_file) { 654 exit(1); 655 } else { 656 /* force end of parsing */ 657 yylval.Integer = 0; 658 return 0; 659 } 660 } 661 token = T_U_int; 662 goto normal_return; 663 } else if (is_double(yytext)) { 664 yylval_was_set = TRUE; 665 errno = 0; 666 if ((yylval.Double = atof(yytext)) == 0 && errno == ERANGE) { 667 msyslog(LOG_ERR, 668 "Double too large to represent: %s", 669 yytext); 670 exit(1); 671 } else { 672 token = T_Double; 673 goto normal_return; 674 } 675 } else { 676 /* Default: Everything is a string */ 677 yylval_was_set = TRUE; 678 token = create_string_token(yytext); 679 goto normal_return; 680 } 681 } 682 683 /* 684 * Either followedby is not FOLLBY_TOKEN or this lexeme is part 685 * of a string. Hence, we need to return T_String. 686 * 687 * _Except_ we might have a -4 or -6 flag on a an association 688 * configuration line (server, peer, pool, etc.). 689 * 690 * This is a terrible hack, but the grammar is ambiguous so we 691 * don't have a choice. [SK] 692 * 693 * The ambiguity is in the keyword scanner, not ntp_parser.y. 694 * We do not require server addresses be quoted in ntp.conf, 695 * complicating the scanner's job. To avoid trying (and 696 * failing) to match an IP address or DNS name to a keyword, 697 * the association keywords use FOLLBY_STRING in the keyword 698 * table, which tells the scanner to force the next token to be 699 * a T_String, so it does not try to match a keyword but rather 700 * expects a string when -4/-6 modifiers to server, peer, etc. 701 * are encountered. 702 * restrict -4 and restrict -6 parsing works correctly without 703 * this hack, as restrict uses FOLLBY_TOKEN. [DH] 704 */ 705 if ('-' == yytext[0]) { 706 if ('4' == yytext[1]) { 707 token = T_Ipv4_flag; 708 goto normal_return; 709 } else if ('6' == yytext[1]) { 710 token = T_Ipv6_flag; 711 goto normal_return; 712 } 713 } 714 715 instring = FALSE; 716 if (FOLLBY_STRING == followedby) 717 followedby = FOLLBY_TOKEN; 718 719 yylval_was_set = TRUE; 720 token = create_string_token(yytext); 721 722 normal_return: 723 if (T_EOC == token) 724 DPRINTF(4,("\t<end of command>\n")); 725 else 726 DPRINTF(4, ("yylex: lexeme '%s' -> %s\n", yytext, 727 token_name(token))); 728 729 if (!yylval_was_set) 730 yylval.Integer = token; 731 732 return token; 733 734 lex_too_long: 735 yytext[min(sizeof(yytext) - 1, 50)] = 0; 736 msyslog(LOG_ERR, 737 "configuration item on line %d longer than limit of %lu, began with '%s'", 738 ip_file->line_no, (u_long)min(sizeof(yytext) - 1, 50), 739 yytext); 740 741 /* 742 * If we hit the length limit reading the startup configuration 743 * file, abort. 744 */ 745 if (input_from_file) 746 exit(sizeof(yytext) - 1); 747 748 /* 749 * If it's runtime configuration via ntpq :config treat it as 750 * if the configuration text ended before the too-long lexeme, 751 * hostname, or string. 752 */ 753 yylval.Integer = 0; 754 return 0; 755 } 756