1 /*- 2 * SPDX-License-Identifier: MIT-CMU 3 * 4 * Mach Operating System 5 * Copyright (c) 1991,1990 Carnegie Mellon University 6 * All Rights Reserved. 7 * 8 * Permission to use, copy, modify and distribute this software and its 9 * documentation is hereby granted, provided that both the copyright 10 * notice and this permission notice appear in all copies of the 11 * software, derivative works or modified versions, and any portions 12 * thereof, and that both notices appear in supporting documentation. 13 * 14 * CARNEGIE MELLON ALLOWS FREE USE OF THIS SOFTWARE IN ITS 15 * CONDITION. CARNEGIE MELLON DISCLAIMS ANY LIABILITY OF ANY KIND FOR 16 * ANY DAMAGES WHATSOEVER RESULTING FROM THE USE OF THIS SOFTWARE. 17 * 18 * Carnegie Mellon requests users of this software to return to 19 * 20 * Software Distribution Coordinator or Software.Distribution@CS.CMU.EDU 21 * School of Computer Science 22 * Carnegie Mellon University 23 * Pittsburgh PA 15213-3890 24 * 25 * any improvements or extensions that they make and grant Carnegie the 26 * rights to redistribute these changes. 27 */ 28 /* 29 * Author: David B. Golub, Carnegie Mellon University 30 * Date: 7/90 31 */ 32 /* 33 * Lexical analyzer. 34 */ 35 36 #include <sys/cdefs.h> 37 #include <sys/param.h> 38 #include <sys/libkern.h> 39 #include <sys/lock.h> 40 41 #include <ddb/ddb.h> 42 #include <ddb/db_lex.h> 43 44 static char db_line[DB_MAXLINE]; 45 static char * db_lp, *db_endlp; 46 47 static int db_lex(int); 48 static void db_flush_line(void); 49 static int db_read_char(void); 50 static void db_unread_char(int); 51 52 int 53 db_read_line(void) 54 { 55 int i; 56 57 i = db_readline(db_line, sizeof(db_line)); 58 if (i == 0) 59 return (0); /* EOI */ 60 db_lp = db_line; 61 db_endlp = db_lp + i; 62 return (i); 63 } 64 65 /* 66 * Simulate a line of input into DDB. 67 */ 68 void 69 db_inject_line(const char *command) 70 { 71 72 strlcpy(db_line, command, sizeof(db_line)); 73 db_lp = db_line; 74 db_endlp = db_lp + strlen(command); 75 } 76 77 /* 78 * In rare cases, we may want to pull the remainder of the line input 79 * verbatim, rather than lexing it. For example, when assigning literal 80 * values associated with scripts. In that case, return a static pointer to 81 * the current location in the input buffer. The caller must be aware that 82 * the contents are not stable if other lex/input calls are made. 83 */ 84 char * 85 db_get_line(void) 86 { 87 88 return (db_lp); 89 } 90 91 static void 92 db_flush_line(void) 93 { 94 db_lp = db_line; 95 db_endlp = db_line; 96 } 97 98 static int 99 db_read_char(void) 100 { 101 int c; 102 103 if (db_lp >= db_endlp) 104 c = -1; 105 else 106 c = *db_lp++; 107 return (c); 108 } 109 110 static void 111 db_unread_char(int c) 112 { 113 114 if (c == -1) { 115 /* Unread EOL at EOL is okay. */ 116 if (db_lp < db_endlp) 117 db_error("db_unread_char(-1) before end of line\n"); 118 } else { 119 if (db_lp > db_line) { 120 db_lp--; 121 if (*db_lp != c) 122 db_error("db_unread_char() wrong char\n"); 123 } else { 124 db_error("db_unread_char() at beginning of line\n"); 125 } 126 } 127 } 128 129 static int db_look_token = 0; 130 131 void 132 db_unread_token(int t) 133 { 134 db_look_token = t; 135 } 136 137 int 138 db_read_token_flags(int flags) 139 { 140 int t; 141 142 MPASS((flags & ~(DRT_VALID_FLAGS_MASK)) == 0); 143 144 if (db_look_token) { 145 t = db_look_token; 146 db_look_token = 0; 147 } 148 else 149 t = db_lex(flags); 150 return (t); 151 } 152 153 db_expr_t db_tok_number; 154 char db_tok_string[TOK_STRING_SIZE]; 155 156 db_expr_t db_radix = 16; 157 158 void 159 db_flush_lex(void) 160 { 161 db_flush_line(); 162 db_look_token = 0; 163 } 164 165 static int 166 db_lex(int flags) 167 { 168 int c, n, radix_mode; 169 bool lex_wspace, lex_hex_numbers; 170 171 switch (flags & DRT_RADIX_MASK) { 172 case DRT_DEFAULT_RADIX: 173 radix_mode = -1; 174 break; 175 case DRT_OCTAL: 176 radix_mode = 8; 177 break; 178 case DRT_DECIMAL: 179 radix_mode = 10; 180 break; 181 case DRT_HEXADECIMAL: 182 radix_mode = 16; 183 break; 184 } 185 186 lex_wspace = ((flags & DRT_WSPACE) != 0); 187 lex_hex_numbers = ((flags & DRT_HEX) != 0); 188 189 c = db_read_char(); 190 for (n = 0; c <= ' ' || c > '~'; n++) { 191 if (c == '\n' || c == -1) 192 return (tEOL); 193 c = db_read_char(); 194 } 195 if (lex_wspace && n != 0) { 196 db_unread_char(c); 197 return (tWSPACE); 198 } 199 200 if ((c >= '0' && c <= '9') || 201 (lex_hex_numbers && 202 ((c >= 'a' && c <= 'f') || 203 (c >= 'A' && c <= 'F')))) { 204 /* number */ 205 int r, digit = 0; 206 207 if (radix_mode != -1) 208 r = radix_mode; 209 else if (c != '0') 210 r = db_radix; 211 else { 212 c = db_read_char(); 213 if (c == 'O' || c == 'o') 214 r = 8; 215 else if (c == 'T' || c == 't') 216 r = 10; 217 else if (c == 'X' || c == 'x') 218 r = 16; 219 else { 220 r = db_radix; 221 db_unread_char(c); 222 } 223 c = db_read_char(); 224 } 225 db_tok_number = 0; 226 for (;;) { 227 if (c >= '0' && c <= ((r == 8) ? '7' : '9')) 228 digit = c - '0'; 229 else if (r == 16 && ((c >= 'A' && c <= 'F') || 230 (c >= 'a' && c <= 'f'))) { 231 if (c >= 'a') 232 digit = c - 'a' + 10; 233 else if (c >= 'A') 234 digit = c - 'A' + 10; 235 } 236 else 237 break; 238 db_tok_number = db_tok_number * r + digit; 239 c = db_read_char(); 240 } 241 if ((c >= '0' && c <= '9') || 242 (c >= 'A' && c <= 'Z') || 243 (c >= 'a' && c <= 'z') || 244 (c == '_')) 245 { 246 db_error("Bad character in number\n"); 247 db_flush_lex(); 248 return (tEOF); 249 } 250 db_unread_char(c); 251 return (tNUMBER); 252 } 253 if ((c >= 'A' && c <= 'Z') || 254 (c >= 'a' && c <= 'z') || 255 c == '_' || c == '\\') 256 { 257 /* string */ 258 char *cp; 259 260 cp = db_tok_string; 261 if (c == '\\') { 262 c = db_read_char(); 263 if (c == '\n' || c == -1) 264 db_error("Bad escape\n"); 265 } 266 *cp++ = c; 267 while (1) { 268 c = db_read_char(); 269 if ((c >= 'A' && c <= 'Z') || 270 (c >= 'a' && c <= 'z') || 271 (c >= '0' && c <= '9') || 272 c == '_' || c == '\\' || c == ':' || c == '.') 273 { 274 if (c == '\\') { 275 c = db_read_char(); 276 if (c == '\n' || c == -1) 277 db_error("Bad escape\n"); 278 } 279 *cp++ = c; 280 if (cp == db_tok_string+sizeof(db_tok_string)) { 281 db_error("String too long\n"); 282 db_flush_lex(); 283 return (tEOF); 284 } 285 continue; 286 } 287 else { 288 *cp = '\0'; 289 break; 290 } 291 } 292 db_unread_char(c); 293 return (tIDENT); 294 } 295 296 switch (c) { 297 case '+': 298 return (tPLUS); 299 case '-': 300 return (tMINUS); 301 case '.': 302 c = db_read_char(); 303 if (c == '.') 304 return (tDOTDOT); 305 db_unread_char(c); 306 return (tDOT); 307 case '*': 308 return (tSTAR); 309 case '/': 310 return (tSLASH); 311 case '=': 312 c = db_read_char(); 313 if (c == '=') 314 return (tLOG_EQ); 315 db_unread_char(c); 316 return (tEQ); 317 case '%': 318 return (tPCT); 319 case '#': 320 return (tHASH); 321 case '(': 322 return (tLPAREN); 323 case ')': 324 return (tRPAREN); 325 case ',': 326 return (tCOMMA); 327 case '"': 328 return (tDITTO); 329 case '$': 330 return (tDOLLAR); 331 case '!': 332 c = db_read_char(); 333 if (c == '='){ 334 return (tLOG_NOT_EQ); 335 } 336 db_unread_char(c); 337 return (tEXCL); 338 case ':': 339 c = db_read_char(); 340 if (c == ':') 341 return (tCOLONCOLON); 342 db_unread_char(c); 343 return (tCOLON); 344 case ';': 345 return (tSEMI); 346 case '&': 347 c = db_read_char(); 348 if (c == '&') 349 return (tLOG_AND); 350 db_unread_char(c); 351 return (tBIT_AND); 352 case '|': 353 c = db_read_char(); 354 if (c == '|') 355 return (tLOG_OR); 356 db_unread_char(c); 357 return (tBIT_OR); 358 case '<': 359 c = db_read_char(); 360 if (c == '<') 361 return (tSHIFT_L); 362 if (c == '=') 363 return (tLESS_EQ); 364 db_unread_char(c); 365 return (tLESS); 366 case '>': 367 c = db_read_char(); 368 if (c == '>') 369 return (tSHIFT_R); 370 if (c == '=') 371 return (tGREATER_EQ); 372 db_unread_char(c); 373 return (tGREATER); 374 case '?': 375 return (tQUESTION); 376 case '~': 377 return (tBIT_NOT); 378 case -1: 379 return (tEOF); 380 } 381 db_printf("Bad character\n"); 382 db_flush_lex(); 383 return (tEOF); 384 } 385