1 /*- 2 * SPDX-License-Identifier: MIT-CMU 3 * 4 * Mach Operating System 5 * Copyright (c) 1991,1990 Carnegie Mellon University 6 * All Rights Reserved. 7 * 8 * Permission to use, copy, modify and distribute this software and its 9 * documentation is hereby granted, provided that both the copyright 10 * notice and this permission notice appear in all copies of the 11 * software, derivative works or modified versions, and any portions 12 * thereof, and that both notices appear in supporting documentation. 13 * 14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS 15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR 16 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 17 * 18 * Carnegie Mellon requests users of this software to return to 19 * 20 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 21 * School of Computer Science 22 * Carnegie Mellon University 23 * Pittsburgh PA 15213-3890 24 * 25 * any improvements or extensions that they make and grant Carnegie the 26 * rights to redistribute these changes. 27 */ 28 /* 29 * Author: David B. Golub, Carnegie Mellon University 30 * Date: 7/90 31 */ 32 /* 33 * Lexical analyzer. 34 */ 35 36 #include <sys/param.h> 37 #include <sys/libkern.h> 38 #include <sys/lock.h> 39 40 #include <ddb/ddb.h> 41 #include <ddb/db_lex.h> 42 43 static char db_line[DB_MAXLINE]; 44 static char * db_lp, *db_endlp; 45 46 static int db_lex(int); 47 static void db_flush_line(void); 48 static int db_read_char(void); 49 static void db_unread_char(int); 50 51 int 52 db_read_line(void) 53 { 54 int i; 55 56 i = db_readline(db_line, sizeof(db_line)); 57 if (i == 0) 58 return (0); /* EOI */ 59 db_lp = db_line; 60 db_endlp = db_lp + i; 61 return (i); 62 } 63 64 /* 65 * Simulate a line of input into DDB. 66 */ 67 void 68 db_inject_line(const char *command) 69 { 70 71 strlcpy(db_line, command, sizeof(db_line)); 72 db_lp = db_line; 73 db_endlp = db_lp + strlen(command); 74 } 75 76 /* 77 * In rare cases, we may want to pull the remainder of the line input 78 * verbatim, rather than lexing it. For example, when assigning literal 79 * values associated with scripts. In that case, return a static pointer to 80 * the current location in the input buffer. The caller must be aware that 81 * the contents are not stable if other lex/input calls are made. 82 */ 83 char * 84 db_get_line(void) 85 { 86 87 return (db_lp); 88 } 89 90 static void 91 db_flush_line(void) 92 { 93 db_lp = db_line; 94 db_endlp = db_line; 95 } 96 97 static int 98 db_read_char(void) 99 { 100 int c; 101 102 if (db_lp >= db_endlp) 103 c = -1; 104 else 105 c = *db_lp++; 106 return (c); 107 } 108 109 static void 110 db_unread_char(int c) 111 { 112 113 if (c == -1) { 114 /* Unread EOL at EOL is okay. */ 115 if (db_lp < db_endlp) 116 db_error("db_unread_char(-1) before end of line\n"); 117 } else { 118 if (db_lp > db_line) { 119 db_lp--; 120 if (*db_lp != c) 121 db_error("db_unread_char() wrong char\n"); 122 } else { 123 db_error("db_unread_char() at beginning of line\n"); 124 } 125 } 126 } 127 128 static int db_look_token = 0; 129 130 void 131 db_unread_token(int t) 132 { 133 db_look_token = t; 134 } 135 136 int 137 db_read_token_flags(int flags) 138 { 139 int t; 140 141 MPASS((flags & ~(DRT_VALID_FLAGS_MASK)) == 0); 142 143 if (db_look_token) { 144 t = db_look_token; 145 db_look_token = 0; 146 } 147 else 148 t = db_lex(flags); 149 return (t); 150 } 151 152 db_expr_t db_tok_number; 153 char db_tok_string[TOK_STRING_SIZE]; 154 155 db_expr_t db_radix = 16; 156 157 void 158 db_flush_lex(void) 159 { 160 db_flush_line(); 161 db_look_token = 0; 162 } 163 164 static int 165 db_lex(int flags) 166 { 167 int c, n, radix_mode; 168 bool lex_wspace, lex_hex_numbers; 169 170 switch (flags & DRT_RADIX_MASK) { 171 case DRT_DEFAULT_RADIX: 172 radix_mode = -1; 173 break; 174 case DRT_OCTAL: 175 radix_mode = 8; 176 break; 177 case DRT_DECIMAL: 178 radix_mode = 10; 179 break; 180 case DRT_HEXADECIMAL: 181 radix_mode = 16; 182 break; 183 } 184 185 lex_wspace = ((flags & DRT_WSPACE) != 0); 186 lex_hex_numbers = ((flags & DRT_HEX) != 0); 187 188 c = db_read_char(); 189 for (n = 0; c <= ' ' || c > '~'; n++) { 190 if (c == '\n' || c == -1) 191 return (tEOL); 192 c = db_read_char(); 193 } 194 if (lex_wspace && n != 0) { 195 db_unread_char(c); 196 return (tWSPACE); 197 } 198 199 if ((c >= '0' && c <= '9') || 200 (lex_hex_numbers && 201 ((c >= 'a' && c <= 'f') || 202 (c >= 'A' && c <= 'F')))) { 203 /* number */ 204 int r, digit = 0; 205 206 if (radix_mode != -1) 207 r = radix_mode; 208 else if (c != '0') 209 r = db_radix; 210 else { 211 c = db_read_char(); 212 if (c == 'O' || c == 'o') 213 r = 8; 214 else if (c == 'T' || c == 't') 215 r = 10; 216 else if (c == 'X' || c == 'x') 217 r = 16; 218 else { 219 r = db_radix; 220 db_unread_char(c); 221 } 222 c = db_read_char(); 223 } 224 db_tok_number = 0; 225 for (;;) { 226 if (c >= '0' && c <= ((r == 8) ? '7' : '9')) 227 digit = c - '0'; 228 else if (r == 16 && ((c >= 'A' && c <= 'F') || 229 (c >= 'a' && c <= 'f'))) { 230 if (c >= 'a') 231 digit = c - 'a' + 10; 232 else if (c >= 'A') 233 digit = c - 'A' + 10; 234 } 235 else 236 break; 237 db_tok_number = db_tok_number * r + digit; 238 c = db_read_char(); 239 } 240 if ((c >= '0' && c <= '9') || 241 (c >= 'A' && c <= 'Z') || 242 (c >= 'a' && c <= 'z') || 243 (c == '_')) 244 { 245 db_error("Bad character in number\n"); 246 db_flush_lex(); 247 return (tEOF); 248 } 249 db_unread_char(c); 250 return (tNUMBER); 251 } 252 if ((c >= 'A' && c <= 'Z') || 253 (c >= 'a' && c <= 'z') || 254 c == '_' || c == '\\') 255 { 256 /* string */ 257 char *cp; 258 259 cp = db_tok_string; 260 if (c == '\\') { 261 c = db_read_char(); 262 if (c == '\n' || c == -1) 263 db_error("Bad escape\n"); 264 } 265 *cp++ = c; 266 while (1) { 267 c = db_read_char(); 268 if ((c >= 'A' && c <= 'Z') || 269 (c >= 'a' && c <= 'z') || 270 (c >= '0' && c <= '9') || 271 c == '_' || c == '\\' || c == ':' || c == '.') 272 { 273 if (c == '\\') { 274 c = db_read_char(); 275 if (c == '\n' || c == -1) 276 db_error("Bad escape\n"); 277 } 278 *cp++ = c; 279 if (cp == db_tok_string+sizeof(db_tok_string)) { 280 db_error("String too long\n"); 281 db_flush_lex(); 282 return (tEOF); 283 } 284 continue; 285 } 286 else { 287 *cp = '\0'; 288 break; 289 } 290 } 291 db_unread_char(c); 292 return (tIDENT); 293 } 294 295 switch (c) { 296 case '+': 297 return (tPLUS); 298 case '-': 299 return (tMINUS); 300 case '.': 301 c = db_read_char(); 302 if (c == '.') 303 return (tDOTDOT); 304 db_unread_char(c); 305 return (tDOT); 306 case '*': 307 return (tSTAR); 308 case '/': 309 return (tSLASH); 310 case '=': 311 c = db_read_char(); 312 if (c == '=') 313 return (tLOG_EQ); 314 db_unread_char(c); 315 return (tEQ); 316 case '%': 317 return (tPCT); 318 case '#': 319 return (tHASH); 320 case '(': 321 return (tLPAREN); 322 case ')': 323 return (tRPAREN); 324 case ',': 325 return (tCOMMA); 326 case '"': 327 return (tDITTO); 328 case '$': 329 return (tDOLLAR); 330 case '!': 331 c = db_read_char(); 332 if (c == '='){ 333 return (tLOG_NOT_EQ); 334 } 335 db_unread_char(c); 336 return (tEXCL); 337 case ':': 338 c = db_read_char(); 339 if (c == ':') 340 return (tCOLONCOLON); 341 db_unread_char(c); 342 return (tCOLON); 343 case ';': 344 return (tSEMI); 345 case '&': 346 c = db_read_char(); 347 if (c == '&') 348 return (tLOG_AND); 349 db_unread_char(c); 350 return (tBIT_AND); 351 case '|': 352 c = db_read_char(); 353 if (c == '|') 354 return (tLOG_OR); 355 db_unread_char(c); 356 return (tBIT_OR); 357 case '<': 358 c = db_read_char(); 359 if (c == '<') 360 return (tSHIFT_L); 361 if (c == '=') 362 return (tLESS_EQ); 363 db_unread_char(c); 364 return (tLESS); 365 case '>': 366 c = db_read_char(); 367 if (c == '>') 368 return (tSHIFT_R); 369 if (c == '=') 370 return (tGREATER_EQ); 371 db_unread_char(c); 372 return (tGREATER); 373 case '?': 374 return (tQUESTION); 375 case '~': 376 return (tBIT_NOT); 377 case -1: 378 return (tEOF); 379 } 380 db_printf("Bad character\n"); 381 db_flush_lex(); 382 return (tEOF); 383 } 384