1 use crate::fallback::{ 2 self, is_ident_continue, is_ident_start, Group, Ident, LexError, Literal, Span, TokenStream, 3 TokenStreamBuilder, 4 }; 5 use crate::{Delimiter, Punct, Spacing, TokenTree}; 6 use core::char; 7 use core::str::{Bytes, CharIndices, Chars}; 8 9 #[derive(Copy, Clone, Eq, PartialEq)] 10 pub(crate) struct Cursor<'a> { 11 pub(crate) rest: &'a str, 12 #[cfg(span_locations)] 13 pub(crate) off: u32, 14 } 15 16 impl<'a> Cursor<'a> { 17 pub(crate) fn advance(&self, bytes: usize) -> Cursor<'a> { 18 let (_front, rest) = self.rest.split_at(bytes); 19 Cursor { 20 rest, 21 #[cfg(span_locations)] 22 off: self.off + _front.chars().count() as u32, 23 } 24 } 25 26 pub(crate) fn starts_with(&self, s: &str) -> bool { 27 self.rest.starts_with(s) 28 } 29 30 pub(crate) fn starts_with_char(&self, ch: char) -> bool { 31 self.rest.starts_with(ch) 32 } 33 34 pub(crate) fn starts_with_fn<Pattern>(&self, f: Pattern) -> bool 35 where 36 Pattern: FnMut(char) -> bool, 37 { 38 self.rest.starts_with(f) 39 } 40 41 pub(crate) fn is_empty(&self) -> bool { 42 self.rest.is_empty() 43 } 44 45 fn len(&self) -> usize { 46 self.rest.len() 47 } 48 49 fn as_bytes(&self) -> &'a [u8] { 50 self.rest.as_bytes() 51 } 52 53 fn bytes(&self) -> Bytes<'a> { 54 self.rest.bytes() 55 } 56 57 fn chars(&self) -> Chars<'a> { 58 self.rest.chars() 59 } 60 61 fn char_indices(&self) -> CharIndices<'a> { 62 self.rest.char_indices() 63 } 64 65 fn parse(&self, tag: &str) -> Result<Cursor<'a>, Reject> { 66 if self.starts_with(tag) { 67 Ok(self.advance(tag.len())) 68 } else { 69 Err(Reject) 70 } 71 } 72 } 73 74 pub(crate) struct Reject; 75 type PResult<'a, O> = Result<(Cursor<'a>, O), Reject>; 76 77 fn skip_whitespace(input: Cursor) -> Cursor { 78 let mut s = input; 79 80 while !s.is_empty() { 81 let byte = s.as_bytes()[0]; 82 if byte == b'/' { 83 if s.starts_with("//") 84 && (!s.starts_with("///") || s.starts_with("////")) 85 && !s.starts_with("//!") 86 { 87 let (cursor, _) = take_until_newline_or_eof(s); 88 s = cursor; 89 continue; 90 } else if s.starts_with("/**/") { 91 s = s.advance(4); 92 continue; 93 } else if s.starts_with("/*") 94 && (!s.starts_with("/**") || s.starts_with("/***")) 95 && !s.starts_with("/*!") 96 { 97 match block_comment(s) { 98 Ok((rest, _)) => { 99 s = rest; 100 continue; 101 } 102 Err(Reject) => return s, 103 } 104 } 105 } 106 match byte { 107 b' ' | 0x09..=0x0d => { 108 s = s.advance(1); 109 continue; 110 } 111 b if b.is_ascii() => {} 112 _ => { 113 let ch = s.chars().next().unwrap(); 114 if is_whitespace(ch) { 115 s = s.advance(ch.len_utf8()); 116 continue; 117 } 118 } 119 } 120 return s; 121 } 122 s 123 } 124 125 fn block_comment(input: Cursor) -> PResult<&str> { 126 if !input.starts_with("/*") { 127 return Err(Reject); 128 } 129 130 let mut depth = 0usize; 131 let bytes = input.as_bytes(); 132 let mut i = 0usize; 133 let upper = bytes.len() - 1; 134 135 while i < upper { 136 if bytes[i] == b'/' && bytes[i + 1] == b'*' { 137 depth += 1; 138 i += 1; // eat '*' 139 } else if bytes[i] == b'*' && bytes[i + 1] == b'/' { 140 depth -= 1; 141 if depth == 0 { 142 return Ok((input.advance(i + 2), &input.rest[..i + 2])); 143 } 144 i += 1; // eat '/' 145 } 146 i += 1; 147 } 148 149 Err(Reject) 150 } 151 152 fn is_whitespace(ch: char) -> bool { 153 // Rust treats left-to-right mark and right-to-left mark as whitespace 154 ch.is_whitespace() || ch == '\u{200e}' || ch == '\u{200f}' 155 } 156 157 fn word_break(input: Cursor) -> Result<Cursor, Reject> { 158 match input.chars().next() { 159 Some(ch) if is_ident_continue(ch) => Err(Reject), 160 Some(_) | None => Ok(input), 161 } 162 } 163 164 // Rustc's representation of a macro expansion error in expression position or 165 // type position. 166 const ERROR: &str = "(/*ERROR*/)"; 167 168 pub(crate) fn token_stream(mut input: Cursor) -> Result<TokenStream, LexError> { 169 let mut trees = TokenStreamBuilder::new(); 170 let mut stack = Vec::new(); 171 172 loop { 173 input = skip_whitespace(input); 174 175 if let Ok((rest, ())) = doc_comment(input, &mut trees) { 176 input = rest; 177 continue; 178 } 179 180 #[cfg(span_locations)] 181 let lo = input.off; 182 183 let first = match input.bytes().next() { 184 Some(first) => first, 185 None => match stack.last() { 186 None => return Ok(trees.build()), 187 #[cfg(span_locations)] 188 Some((lo, _frame)) => { 189 return Err(LexError { 190 span: Span { lo: *lo, hi: *lo }, 191 }) 192 } 193 #[cfg(not(span_locations))] 194 Some(_frame) => return Err(LexError { span: Span {} }), 195 }, 196 }; 197 198 if let Some(open_delimiter) = match first { 199 b'(' if !input.starts_with(ERROR) => Some(Delimiter::Parenthesis), 200 b'[' => Some(Delimiter::Bracket), 201 b'{' => Some(Delimiter::Brace), 202 _ => None, 203 } { 204 input = input.advance(1); 205 let frame = (open_delimiter, trees); 206 #[cfg(span_locations)] 207 let frame = (lo, frame); 208 stack.push(frame); 209 trees = TokenStreamBuilder::new(); 210 } else if let Some(close_delimiter) = match first { 211 b')' => Some(Delimiter::Parenthesis), 212 b']' => Some(Delimiter::Bracket), 213 b'}' => Some(Delimiter::Brace), 214 _ => None, 215 } { 216 let frame = match stack.pop() { 217 Some(frame) => frame, 218 None => return Err(lex_error(input)), 219 }; 220 #[cfg(span_locations)] 221 let (lo, frame) = frame; 222 let (open_delimiter, outer) = frame; 223 if open_delimiter != close_delimiter { 224 return Err(lex_error(input)); 225 } 226 input = input.advance(1); 227 let mut g = Group::new(open_delimiter, trees.build()); 228 g.set_span(Span { 229 #[cfg(span_locations)] 230 lo, 231 #[cfg(span_locations)] 232 hi: input.off, 233 }); 234 trees = outer; 235 trees.push_token_from_parser(TokenTree::Group(crate::Group::_new_fallback(g))); 236 } else { 237 let (rest, mut tt) = match leaf_token(input) { 238 Ok((rest, tt)) => (rest, tt), 239 Err(Reject) => return Err(lex_error(input)), 240 }; 241 tt.set_span(crate::Span::_new_fallback(Span { 242 #[cfg(span_locations)] 243 lo, 244 #[cfg(span_locations)] 245 hi: rest.off, 246 })); 247 trees.push_token_from_parser(tt); 248 input = rest; 249 } 250 } 251 } 252 253 fn lex_error(cursor: Cursor) -> LexError { 254 #[cfg(not(span_locations))] 255 let _ = cursor; 256 LexError { 257 span: Span { 258 #[cfg(span_locations)] 259 lo: cursor.off, 260 #[cfg(span_locations)] 261 hi: cursor.off, 262 }, 263 } 264 } 265 266 fn leaf_token(input: Cursor) -> PResult<TokenTree> { 267 if let Ok((input, l)) = literal(input) { 268 // must be parsed before ident 269 Ok((input, TokenTree::Literal(crate::Literal::_new_fallback(l)))) 270 } else if let Ok((input, p)) = punct(input) { 271 Ok((input, TokenTree::Punct(p))) 272 } else if let Ok((input, i)) = ident(input) { 273 Ok((input, TokenTree::Ident(i))) 274 } else if input.starts_with(ERROR) { 275 let rest = input.advance(ERROR.len()); 276 let repr = crate::Literal::_new_fallback(Literal::_new(ERROR.to_owned())); 277 Ok((rest, TokenTree::Literal(repr))) 278 } else { 279 Err(Reject) 280 } 281 } 282 283 fn ident(input: Cursor) -> PResult<crate::Ident> { 284 if [ 285 "r\"", "r#\"", "r##", "b\"", "b\'", "br\"", "br#", "c\"", "cr\"", "cr#", 286 ] 287 .iter() 288 .any(|prefix| input.starts_with(prefix)) 289 { 290 Err(Reject) 291 } else { 292 ident_any(input) 293 } 294 } 295 296 fn ident_any(input: Cursor) -> PResult<crate::Ident> { 297 let raw = input.starts_with("r#"); 298 let rest = input.advance((raw as usize) << 1); 299 300 let (rest, sym) = ident_not_raw(rest)?; 301 302 if !raw { 303 let ident = 304 crate::Ident::_new_fallback(Ident::new_unchecked(sym, fallback::Span::call_site())); 305 return Ok((rest, ident)); 306 } 307 308 match sym { 309 "_" | "super" | "self" | "Self" | "crate" => return Err(Reject), 310 _ => {} 311 } 312 313 let ident = 314 crate::Ident::_new_fallback(Ident::new_raw_unchecked(sym, fallback::Span::call_site())); 315 Ok((rest, ident)) 316 } 317 318 fn ident_not_raw(input: Cursor) -> PResult<&str> { 319 let mut chars = input.char_indices(); 320 321 match chars.next() { 322 Some((_, ch)) if is_ident_start(ch) => {} 323 _ => return Err(Reject), 324 } 325 326 let mut end = input.len(); 327 for (i, ch) in chars { 328 if !is_ident_continue(ch) { 329 end = i; 330 break; 331 } 332 } 333 334 Ok((input.advance(end), &input.rest[..end])) 335 } 336 337 pub(crate) fn literal(input: Cursor) -> PResult<Literal> { 338 let rest = literal_nocapture(input)?; 339 let end = input.len() - rest.len(); 340 Ok((rest, Literal::_new(input.rest[..end].to_string()))) 341 } 342 343 fn literal_nocapture(input: Cursor) -> Result<Cursor, Reject> { 344 if let Ok(ok) = string(input) { 345 Ok(ok) 346 } else if let Ok(ok) = byte_string(input) { 347 Ok(ok) 348 } else if let Ok(ok) = c_string(input) { 349 Ok(ok) 350 } else if let Ok(ok) = byte(input) { 351 Ok(ok) 352 } else if let Ok(ok) = character(input) { 353 Ok(ok) 354 } else if let Ok(ok) = float(input) { 355 Ok(ok) 356 } else if let Ok(ok) = int(input) { 357 Ok(ok) 358 } else { 359 Err(Reject) 360 } 361 } 362 363 fn literal_suffix(input: Cursor) -> Cursor { 364 match ident_not_raw(input) { 365 Ok((input, _)) => input, 366 Err(Reject) => input, 367 } 368 } 369 370 fn string(input: Cursor) -> Result<Cursor, Reject> { 371 if let Ok(input) = input.parse("\"") { 372 cooked_string(input) 373 } else if let Ok(input) = input.parse("r") { 374 raw_string(input) 375 } else { 376 Err(Reject) 377 } 378 } 379 380 fn cooked_string(mut input: Cursor) -> Result<Cursor, Reject> { 381 let mut chars = input.char_indices(); 382 383 while let Some((i, ch)) = chars.next() { 384 match ch { 385 '"' => { 386 let input = input.advance(i + 1); 387 return Ok(literal_suffix(input)); 388 } 389 '\r' => match chars.next() { 390 Some((_, '\n')) => {} 391 _ => break, 392 }, 393 '\\' => match chars.next() { 394 Some((_, 'x')) => { 395 backslash_x_char(&mut chars)?; 396 } 397 Some((_, 'n' | 'r' | 't' | '\\' | '\'' | '"' | '0')) => {} 398 Some((_, 'u')) => { 399 backslash_u(&mut chars)?; 400 } 401 Some((newline, ch @ ('\n' | '\r'))) => { 402 input = input.advance(newline + 1); 403 trailing_backslash(&mut input, ch as u8)?; 404 chars = input.char_indices(); 405 } 406 _ => break, 407 }, 408 _ch => {} 409 } 410 } 411 Err(Reject) 412 } 413 414 fn raw_string(input: Cursor) -> Result<Cursor, Reject> { 415 let (input, delimiter) = delimiter_of_raw_string(input)?; 416 let mut bytes = input.bytes().enumerate(); 417 while let Some((i, byte)) = bytes.next() { 418 match byte { 419 b'"' if input.rest[i + 1..].starts_with(delimiter) => { 420 let rest = input.advance(i + 1 + delimiter.len()); 421 return Ok(literal_suffix(rest)); 422 } 423 b'\r' => match bytes.next() { 424 Some((_, b'\n')) => {} 425 _ => break, 426 }, 427 _ => {} 428 } 429 } 430 Err(Reject) 431 } 432 433 fn byte_string(input: Cursor) -> Result<Cursor, Reject> { 434 if let Ok(input) = input.parse("b\"") { 435 cooked_byte_string(input) 436 } else if let Ok(input) = input.parse("br") { 437 raw_byte_string(input) 438 } else { 439 Err(Reject) 440 } 441 } 442 443 fn cooked_byte_string(mut input: Cursor) -> Result<Cursor, Reject> { 444 let mut bytes = input.bytes().enumerate(); 445 while let Some((offset, b)) = bytes.next() { 446 match b { 447 b'"' => { 448 let input = input.advance(offset + 1); 449 return Ok(literal_suffix(input)); 450 } 451 b'\r' => match bytes.next() { 452 Some((_, b'\n')) => {} 453 _ => break, 454 }, 455 b'\\' => match bytes.next() { 456 Some((_, b'x')) => { 457 backslash_x_byte(&mut bytes)?; 458 } 459 Some((_, b'n' | b'r' | b't' | b'\\' | b'0' | b'\'' | b'"')) => {} 460 Some((newline, b @ (b'\n' | b'\r'))) => { 461 input = input.advance(newline + 1); 462 trailing_backslash(&mut input, b)?; 463 bytes = input.bytes().enumerate(); 464 } 465 _ => break, 466 }, 467 b if b.is_ascii() => {} 468 _ => break, 469 } 470 } 471 Err(Reject) 472 } 473 474 fn delimiter_of_raw_string(input: Cursor) -> PResult<&str> { 475 for (i, byte) in input.bytes().enumerate() { 476 match byte { 477 b'"' => { 478 if i > 255 { 479 // https://github.com/rust-lang/rust/pull/95251 480 return Err(Reject); 481 } 482 return Ok((input.advance(i + 1), &input.rest[..i])); 483 } 484 b'#' => {} 485 _ => break, 486 } 487 } 488 Err(Reject) 489 } 490 491 fn raw_byte_string(input: Cursor) -> Result<Cursor, Reject> { 492 let (input, delimiter) = delimiter_of_raw_string(input)?; 493 let mut bytes = input.bytes().enumerate(); 494 while let Some((i, byte)) = bytes.next() { 495 match byte { 496 b'"' if input.rest[i + 1..].starts_with(delimiter) => { 497 let rest = input.advance(i + 1 + delimiter.len()); 498 return Ok(literal_suffix(rest)); 499 } 500 b'\r' => match bytes.next() { 501 Some((_, b'\n')) => {} 502 _ => break, 503 }, 504 other => { 505 if !other.is_ascii() { 506 break; 507 } 508 } 509 } 510 } 511 Err(Reject) 512 } 513 514 fn c_string(input: Cursor) -> Result<Cursor, Reject> { 515 if let Ok(input) = input.parse("c\"") { 516 cooked_c_string(input) 517 } else if let Ok(input) = input.parse("cr") { 518 raw_c_string(input) 519 } else { 520 Err(Reject) 521 } 522 } 523 524 fn raw_c_string(input: Cursor) -> Result<Cursor, Reject> { 525 let (input, delimiter) = delimiter_of_raw_string(input)?; 526 let mut bytes = input.bytes().enumerate(); 527 while let Some((i, byte)) = bytes.next() { 528 match byte { 529 b'"' if input.rest[i + 1..].starts_with(delimiter) => { 530 let rest = input.advance(i + 1 + delimiter.len()); 531 return Ok(literal_suffix(rest)); 532 } 533 b'\r' => match bytes.next() { 534 Some((_, b'\n')) => {} 535 _ => break, 536 }, 537 b'\0' => break, 538 _ => {} 539 } 540 } 541 Err(Reject) 542 } 543 544 fn cooked_c_string(mut input: Cursor) -> Result<Cursor, Reject> { 545 let mut chars = input.char_indices(); 546 547 while let Some((i, ch)) = chars.next() { 548 match ch { 549 '"' => { 550 let input = input.advance(i + 1); 551 return Ok(literal_suffix(input)); 552 } 553 '\r' => match chars.next() { 554 Some((_, '\n')) => {} 555 _ => break, 556 }, 557 '\\' => match chars.next() { 558 Some((_, 'x')) => { 559 backslash_x_nonzero(&mut chars)?; 560 } 561 Some((_, 'n' | 'r' | 't' | '\\' | '\'' | '"')) => {} 562 Some((_, 'u')) => { 563 if backslash_u(&mut chars)? == '\0' { 564 break; 565 } 566 } 567 Some((newline, ch @ ('\n' | '\r'))) => { 568 input = input.advance(newline + 1); 569 trailing_backslash(&mut input, ch as u8)?; 570 chars = input.char_indices(); 571 } 572 _ => break, 573 }, 574 '\0' => break, 575 _ch => {} 576 } 577 } 578 Err(Reject) 579 } 580 581 fn byte(input: Cursor) -> Result<Cursor, Reject> { 582 let input = input.parse("b'")?; 583 let mut bytes = input.bytes().enumerate(); 584 let ok = match bytes.next().map(|(_, b)| b) { 585 Some(b'\\') => match bytes.next().map(|(_, b)| b) { 586 Some(b'x') => backslash_x_byte(&mut bytes).is_ok(), 587 Some(b'n' | b'r' | b't' | b'\\' | b'0' | b'\'' | b'"') => true, 588 _ => false, 589 }, 590 b => b.is_some(), 591 }; 592 if !ok { 593 return Err(Reject); 594 } 595 let (offset, _) = bytes.next().ok_or(Reject)?; 596 if !input.chars().as_str().is_char_boundary(offset) { 597 return Err(Reject); 598 } 599 let input = input.advance(offset).parse("'")?; 600 Ok(literal_suffix(input)) 601 } 602 603 fn character(input: Cursor) -> Result<Cursor, Reject> { 604 let input = input.parse("'")?; 605 let mut chars = input.char_indices(); 606 let ok = match chars.next().map(|(_, ch)| ch) { 607 Some('\\') => match chars.next().map(|(_, ch)| ch) { 608 Some('x') => backslash_x_char(&mut chars).is_ok(), 609 Some('u') => backslash_u(&mut chars).is_ok(), 610 Some('n' | 'r' | 't' | '\\' | '0' | '\'' | '"') => true, 611 _ => false, 612 }, 613 ch => ch.is_some(), 614 }; 615 if !ok { 616 return Err(Reject); 617 } 618 let (idx, _) = chars.next().ok_or(Reject)?; 619 let input = input.advance(idx).parse("'")?; 620 Ok(literal_suffix(input)) 621 } 622 623 macro_rules! next_ch { 624 ($chars:ident @ $pat:pat) => { 625 match $chars.next() { 626 Some((_, ch)) => match ch { 627 $pat => ch, 628 _ => return Err(Reject), 629 }, 630 None => return Err(Reject), 631 } 632 }; 633 } 634 635 fn backslash_x_char<I>(chars: &mut I) -> Result<(), Reject> 636 where 637 I: Iterator<Item = (usize, char)>, 638 { 639 next_ch!(chars @ '0'..='7'); 640 next_ch!(chars @ '0'..='9' | 'a'..='f' | 'A'..='F'); 641 Ok(()) 642 } 643 644 fn backslash_x_byte<I>(chars: &mut I) -> Result<(), Reject> 645 where 646 I: Iterator<Item = (usize, u8)>, 647 { 648 next_ch!(chars @ b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F'); 649 next_ch!(chars @ b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F'); 650 Ok(()) 651 } 652 653 fn backslash_x_nonzero<I>(chars: &mut I) -> Result<(), Reject> 654 where 655 I: Iterator<Item = (usize, char)>, 656 { 657 let first = next_ch!(chars @ '0'..='9' | 'a'..='f' | 'A'..='F'); 658 let second = next_ch!(chars @ '0'..='9' | 'a'..='f' | 'A'..='F'); 659 if first == '0' && second == '0' { 660 Err(Reject) 661 } else { 662 Ok(()) 663 } 664 } 665 666 fn backslash_u<I>(chars: &mut I) -> Result<char, Reject> 667 where 668 I: Iterator<Item = (usize, char)>, 669 { 670 next_ch!(chars @ '{'); 671 let mut value = 0; 672 let mut len = 0; 673 for (_, ch) in chars { 674 let digit = match ch { 675 '0'..='9' => ch as u8 - b'0', 676 'a'..='f' => 10 + ch as u8 - b'a', 677 'A'..='F' => 10 + ch as u8 - b'A', 678 '_' if len > 0 => continue, 679 '}' if len > 0 => return char::from_u32(value).ok_or(Reject), 680 _ => break, 681 }; 682 if len == 6 { 683 break; 684 } 685 value *= 0x10; 686 value += u32::from(digit); 687 len += 1; 688 } 689 Err(Reject) 690 } 691 692 fn trailing_backslash(input: &mut Cursor, mut last: u8) -> Result<(), Reject> { 693 let mut whitespace = input.bytes().enumerate(); 694 loop { 695 if last == b'\r' && whitespace.next().map_or(true, |(_, b)| b != b'\n') { 696 return Err(Reject); 697 } 698 match whitespace.next() { 699 Some((_, b @ (b' ' | b'\t' | b'\n' | b'\r'))) => { 700 last = b; 701 } 702 Some((offset, _)) => { 703 *input = input.advance(offset); 704 return Ok(()); 705 } 706 None => return Err(Reject), 707 } 708 } 709 } 710 711 fn float(input: Cursor) -> Result<Cursor, Reject> { 712 let mut rest = float_digits(input)?; 713 if let Some(ch) = rest.chars().next() { 714 if is_ident_start(ch) { 715 rest = ident_not_raw(rest)?.0; 716 } 717 } 718 word_break(rest) 719 } 720 721 fn float_digits(input: Cursor) -> Result<Cursor, Reject> { 722 let mut chars = input.chars().peekable(); 723 match chars.next() { 724 Some(ch) if '0' <= ch && ch <= '9' => {} 725 _ => return Err(Reject), 726 } 727 728 let mut len = 1; 729 let mut has_dot = false; 730 let mut has_exp = false; 731 while let Some(&ch) = chars.peek() { 732 match ch { 733 '0'..='9' | '_' => { 734 chars.next(); 735 len += 1; 736 } 737 '.' => { 738 if has_dot { 739 break; 740 } 741 chars.next(); 742 if chars 743 .peek() 744 .map_or(false, |&ch| ch == '.' || is_ident_start(ch)) 745 { 746 return Err(Reject); 747 } 748 len += 1; 749 has_dot = true; 750 } 751 'e' | 'E' => { 752 chars.next(); 753 len += 1; 754 has_exp = true; 755 break; 756 } 757 _ => break, 758 } 759 } 760 761 if !(has_dot || has_exp) { 762 return Err(Reject); 763 } 764 765 if has_exp { 766 let token_before_exp = if has_dot { 767 Ok(input.advance(len - 1)) 768 } else { 769 Err(Reject) 770 }; 771 let mut has_sign = false; 772 let mut has_exp_value = false; 773 while let Some(&ch) = chars.peek() { 774 match ch { 775 '+' | '-' => { 776 if has_exp_value { 777 break; 778 } 779 if has_sign { 780 return token_before_exp; 781 } 782 chars.next(); 783 len += 1; 784 has_sign = true; 785 } 786 '0'..='9' => { 787 chars.next(); 788 len += 1; 789 has_exp_value = true; 790 } 791 '_' => { 792 chars.next(); 793 len += 1; 794 } 795 _ => break, 796 } 797 } 798 if !has_exp_value { 799 return token_before_exp; 800 } 801 } 802 803 Ok(input.advance(len)) 804 } 805 806 fn int(input: Cursor) -> Result<Cursor, Reject> { 807 let mut rest = digits(input)?; 808 if let Some(ch) = rest.chars().next() { 809 if is_ident_start(ch) { 810 rest = ident_not_raw(rest)?.0; 811 } 812 } 813 word_break(rest) 814 } 815 816 fn digits(mut input: Cursor) -> Result<Cursor, Reject> { 817 let base = if input.starts_with("0x") { 818 input = input.advance(2); 819 16 820 } else if input.starts_with("0o") { 821 input = input.advance(2); 822 8 823 } else if input.starts_with("0b") { 824 input = input.advance(2); 825 2 826 } else { 827 10 828 }; 829 830 let mut len = 0; 831 let mut empty = true; 832 for b in input.bytes() { 833 match b { 834 b'0'..=b'9' => { 835 let digit = (b - b'0') as u64; 836 if digit >= base { 837 return Err(Reject); 838 } 839 } 840 b'a'..=b'f' => { 841 let digit = 10 + (b - b'a') as u64; 842 if digit >= base { 843 break; 844 } 845 } 846 b'A'..=b'F' => { 847 let digit = 10 + (b - b'A') as u64; 848 if digit >= base { 849 break; 850 } 851 } 852 b'_' => { 853 if empty && base == 10 { 854 return Err(Reject); 855 } 856 len += 1; 857 continue; 858 } 859 _ => break, 860 } 861 len += 1; 862 empty = false; 863 } 864 if empty { 865 Err(Reject) 866 } else { 867 Ok(input.advance(len)) 868 } 869 } 870 871 fn punct(input: Cursor) -> PResult<Punct> { 872 let (rest, ch) = punct_char(input)?; 873 if ch == '\'' { 874 let (after_lifetime, _ident) = ident_any(rest)?; 875 if after_lifetime.starts_with_char('\'') 876 || (after_lifetime.starts_with_char('#') && !rest.starts_with("r#")) 877 { 878 Err(Reject) 879 } else { 880 Ok((rest, Punct::new('\'', Spacing::Joint))) 881 } 882 } else { 883 let kind = match punct_char(rest) { 884 Ok(_) => Spacing::Joint, 885 Err(Reject) => Spacing::Alone, 886 }; 887 Ok((rest, Punct::new(ch, kind))) 888 } 889 } 890 891 fn punct_char(input: Cursor) -> PResult<char> { 892 if input.starts_with("//") || input.starts_with("/*") { 893 // Do not accept `/` of a comment as a punct. 894 return Err(Reject); 895 } 896 897 let mut chars = input.chars(); 898 let first = match chars.next() { 899 Some(ch) => ch, 900 None => { 901 return Err(Reject); 902 } 903 }; 904 let recognized = "~!@#$%^&*-=+|;:,<.>/?'"; 905 if recognized.contains(first) { 906 Ok((input.advance(first.len_utf8()), first)) 907 } else { 908 Err(Reject) 909 } 910 } 911 912 fn doc_comment<'a>(input: Cursor<'a>, trees: &mut TokenStreamBuilder) -> PResult<'a, ()> { 913 #[cfg(span_locations)] 914 let lo = input.off; 915 let (rest, (comment, inner)) = doc_comment_contents(input)?; 916 let fallback_span = Span { 917 #[cfg(span_locations)] 918 lo, 919 #[cfg(span_locations)] 920 hi: rest.off, 921 }; 922 let span = crate::Span::_new_fallback(fallback_span); 923 924 let mut scan_for_bare_cr = comment; 925 while let Some(cr) = scan_for_bare_cr.find('\r') { 926 let rest = &scan_for_bare_cr[cr + 1..]; 927 if !rest.starts_with('\n') { 928 return Err(Reject); 929 } 930 scan_for_bare_cr = rest; 931 } 932 933 let mut pound = Punct::new('#', Spacing::Alone); 934 pound.set_span(span); 935 trees.push_token_from_parser(TokenTree::Punct(pound)); 936 937 if inner { 938 let mut bang = Punct::new('!', Spacing::Alone); 939 bang.set_span(span); 940 trees.push_token_from_parser(TokenTree::Punct(bang)); 941 } 942 943 let doc_ident = crate::Ident::_new_fallback(Ident::new_unchecked("doc", fallback_span)); 944 let mut equal = Punct::new('=', Spacing::Alone); 945 equal.set_span(span); 946 let mut literal = crate::Literal::_new_fallback(Literal::string(comment)); 947 literal.set_span(span); 948 let mut bracketed = TokenStreamBuilder::with_capacity(3); 949 bracketed.push_token_from_parser(TokenTree::Ident(doc_ident)); 950 bracketed.push_token_from_parser(TokenTree::Punct(equal)); 951 bracketed.push_token_from_parser(TokenTree::Literal(literal)); 952 let group = Group::new(Delimiter::Bracket, bracketed.build()); 953 let mut group = crate::Group::_new_fallback(group); 954 group.set_span(span); 955 trees.push_token_from_parser(TokenTree::Group(group)); 956 957 Ok((rest, ())) 958 } 959 960 fn doc_comment_contents(input: Cursor) -> PResult<(&str, bool)> { 961 if input.starts_with("//!") { 962 let input = input.advance(3); 963 let (input, s) = take_until_newline_or_eof(input); 964 Ok((input, (s, true))) 965 } else if input.starts_with("/*!") { 966 let (input, s) = block_comment(input)?; 967 Ok((input, (&s[3..s.len() - 2], true))) 968 } else if input.starts_with("///") { 969 let input = input.advance(3); 970 if input.starts_with_char('/') { 971 return Err(Reject); 972 } 973 let (input, s) = take_until_newline_or_eof(input); 974 Ok((input, (s, false))) 975 } else if input.starts_with("/**") && !input.rest[3..].starts_with('*') { 976 let (input, s) = block_comment(input)?; 977 Ok((input, (&s[3..s.len() - 2], false))) 978 } else { 979 Err(Reject) 980 } 981 } 982 983 fn take_until_newline_or_eof(input: Cursor) -> (Cursor, &str) { 984 let chars = input.char_indices(); 985 986 for (i, ch) in chars { 987 if ch == '\n' { 988 return (input.advance(i), &input.rest[..i]); 989 } else if ch == '\r' && input.rest[i + 1..].starts_with('\n') { 990 return (input.advance(i + 1), &input.rest[..i]); 991 } 992 } 993 994 (input.advance(input.len()), input.rest) 995 } 996