1 // SPDX-License-Identifier: Apache-2.0 OR MIT 2 3 use crate::fallback::{ 4 self, is_ident_continue, is_ident_start, Group, Ident, LexError, Literal, Span, TokenStream, 5 TokenStreamBuilder, 6 }; 7 use crate::{Delimiter, Punct, Spacing, TokenTree}; 8 use core::char; 9 use core::str::{Bytes, CharIndices, Chars}; 10 11 #[derive(Copy, Clone, Eq, PartialEq)] 12 pub(crate) struct Cursor<'a> { 13 pub(crate) rest: &'a str, 14 #[cfg(span_locations)] 15 pub(crate) off: u32, 16 } 17 18 impl<'a> Cursor<'a> { 19 pub(crate) fn advance(&self, bytes: usize) -> Cursor<'a> { 20 let (_front, rest) = self.rest.split_at(bytes); 21 Cursor { 22 rest, 23 #[cfg(span_locations)] 24 off: self.off + _front.chars().count() as u32, 25 } 26 } 27 28 pub(crate) fn starts_with(&self, s: &str) -> bool { 29 self.rest.starts_with(s) 30 } 31 32 pub(crate) fn starts_with_char(&self, ch: char) -> bool { 33 self.rest.starts_with(ch) 34 } 35 36 pub(crate) fn starts_with_fn<Pattern>(&self, f: Pattern) -> bool 37 where 38 Pattern: FnMut(char) -> bool, 39 { 40 self.rest.starts_with(f) 41 } 42 43 pub(crate) fn is_empty(&self) -> bool { 44 self.rest.is_empty() 45 } 46 47 fn len(&self) -> usize { 48 self.rest.len() 49 } 50 51 fn as_bytes(&self) -> &'a [u8] { 52 self.rest.as_bytes() 53 } 54 55 fn bytes(&self) -> Bytes<'a> { 56 self.rest.bytes() 57 } 58 59 fn chars(&self) -> Chars<'a> { 60 self.rest.chars() 61 } 62 63 fn char_indices(&self) -> CharIndices<'a> { 64 self.rest.char_indices() 65 } 66 67 fn parse(&self, tag: &str) -> Result<Cursor<'a>, Reject> { 68 if self.starts_with(tag) { 69 Ok(self.advance(tag.len())) 70 } else { 71 Err(Reject) 72 } 73 } 74 } 75 76 pub(crate) struct Reject; 77 type PResult<'a, O> = Result<(Cursor<'a>, O), Reject>; 78 79 fn skip_whitespace(input: Cursor) -> Cursor { 80 let mut s = input; 81 82 while !s.is_empty() { 83 let byte = s.as_bytes()[0]; 84 if byte == b'/' { 85 if s.starts_with("//") 86 && (!s.starts_with("///") || s.starts_with("////")) 87 && !s.starts_with("//!") 88 { 89 let (cursor, _) = take_until_newline_or_eof(s); 90 s = cursor; 91 continue; 92 } else if s.starts_with("/**/") { 93 s = s.advance(4); 94 continue; 95 } else if s.starts_with("/*") 96 && (!s.starts_with("/**") || s.starts_with("/***")) 97 && !s.starts_with("/*!") 98 { 99 match block_comment(s) { 100 Ok((rest, _)) => { 101 s = rest; 102 continue; 103 } 104 Err(Reject) => return s, 105 } 106 } 107 } 108 match byte { 109 b' ' | 0x09..=0x0d => { 110 s = s.advance(1); 111 continue; 112 } 113 b if b.is_ascii() => {} 114 _ => { 115 let ch = s.chars().next().unwrap(); 116 if is_whitespace(ch) { 117 s = s.advance(ch.len_utf8()); 118 continue; 119 } 120 } 121 } 122 return s; 123 } 124 s 125 } 126 127 fn block_comment(input: Cursor) -> PResult<&str> { 128 if !input.starts_with("/*") { 129 return Err(Reject); 130 } 131 132 let mut depth = 0usize; 133 let bytes = input.as_bytes(); 134 let mut i = 0usize; 135 let upper = bytes.len() - 1; 136 137 while i < upper { 138 if bytes[i] == b'/' && bytes[i + 1] == b'*' { 139 depth += 1; 140 i += 1; // eat '*' 141 } else if bytes[i] == b'*' && bytes[i + 1] == b'/' { 142 depth -= 1; 143 if depth == 0 { 144 return Ok((input.advance(i + 2), &input.rest[..i + 2])); 145 } 146 i += 1; // eat '/' 147 } 148 i += 1; 149 } 150 151 Err(Reject) 152 } 153 154 fn is_whitespace(ch: char) -> bool { 155 // Rust treats left-to-right mark and right-to-left mark as whitespace 156 ch.is_whitespace() || ch == '\u{200e}' || ch == '\u{200f}' 157 } 158 159 fn word_break(input: Cursor) -> Result<Cursor, Reject> { 160 match input.chars().next() { 161 Some(ch) if is_ident_continue(ch) => Err(Reject), 162 Some(_) | None => Ok(input), 163 } 164 } 165 166 // Rustc's representation of a macro expansion error in expression position or 167 // type position. 168 const ERROR: &str = "(/*ERROR*/)"; 169 170 pub(crate) fn token_stream(mut input: Cursor) -> Result<TokenStream, LexError> { 171 let mut trees = TokenStreamBuilder::new(); 172 let mut stack = Vec::new(); 173 174 loop { 175 input = skip_whitespace(input); 176 177 if let Ok((rest, ())) = doc_comment(input, &mut trees) { 178 input = rest; 179 continue; 180 } 181 182 #[cfg(span_locations)] 183 let lo = input.off; 184 185 let first = match input.bytes().next() { 186 Some(first) => first, 187 None => match stack.last() { 188 None => return Ok(trees.build()), 189 #[cfg(span_locations)] 190 Some((lo, _frame)) => { 191 return Err(LexError { 192 span: Span { lo: *lo, hi: *lo }, 193 }) 194 } 195 #[cfg(not(span_locations))] 196 Some(_frame) => return Err(LexError { span: Span {} }), 197 }, 198 }; 199 200 if let Some(open_delimiter) = match first { 201 b'(' if !input.starts_with(ERROR) => Some(Delimiter::Parenthesis), 202 b'[' => Some(Delimiter::Bracket), 203 b'{' => Some(Delimiter::Brace), 204 _ => None, 205 } { 206 input = input.advance(1); 207 let frame = (open_delimiter, trees); 208 #[cfg(span_locations)] 209 let frame = (lo, frame); 210 stack.push(frame); 211 trees = TokenStreamBuilder::new(); 212 } else if let Some(close_delimiter) = match first { 213 b')' => Some(Delimiter::Parenthesis), 214 b']' => Some(Delimiter::Bracket), 215 b'}' => Some(Delimiter::Brace), 216 _ => None, 217 } { 218 let frame = match stack.pop() { 219 Some(frame) => frame, 220 None => return Err(lex_error(input)), 221 }; 222 #[cfg(span_locations)] 223 let (lo, frame) = frame; 224 let (open_delimiter, outer) = frame; 225 if open_delimiter != close_delimiter { 226 return Err(lex_error(input)); 227 } 228 input = input.advance(1); 229 let mut g = Group::new(open_delimiter, trees.build()); 230 g.set_span(Span { 231 #[cfg(span_locations)] 232 lo, 233 #[cfg(span_locations)] 234 hi: input.off, 235 }); 236 trees = outer; 237 trees.push_token_from_parser(TokenTree::Group(crate::Group::_new_fallback(g))); 238 } else { 239 let (rest, mut tt) = match leaf_token(input) { 240 Ok((rest, tt)) => (rest, tt), 241 Err(Reject) => return Err(lex_error(input)), 242 }; 243 tt.set_span(crate::Span::_new_fallback(Span { 244 #[cfg(span_locations)] 245 lo, 246 #[cfg(span_locations)] 247 hi: rest.off, 248 })); 249 trees.push_token_from_parser(tt); 250 input = rest; 251 } 252 } 253 } 254 255 fn lex_error(cursor: Cursor) -> LexError { 256 #[cfg(not(span_locations))] 257 let _ = cursor; 258 LexError { 259 span: Span { 260 #[cfg(span_locations)] 261 lo: cursor.off, 262 #[cfg(span_locations)] 263 hi: cursor.off, 264 }, 265 } 266 } 267 268 fn leaf_token(input: Cursor) -> PResult<TokenTree> { 269 if let Ok((input, l)) = literal(input) { 270 // must be parsed before ident 271 Ok((input, TokenTree::Literal(crate::Literal::_new_fallback(l)))) 272 } else if let Ok((input, p)) = punct(input) { 273 Ok((input, TokenTree::Punct(p))) 274 } else if let Ok((input, i)) = ident(input) { 275 Ok((input, TokenTree::Ident(i))) 276 } else if input.starts_with(ERROR) { 277 let rest = input.advance(ERROR.len()); 278 let repr = crate::Literal::_new_fallback(Literal::_new(ERROR.to_owned())); 279 Ok((rest, TokenTree::Literal(repr))) 280 } else { 281 Err(Reject) 282 } 283 } 284 285 fn ident(input: Cursor) -> PResult<crate::Ident> { 286 if [ 287 "r\"", "r#\"", "r##", "b\"", "b\'", "br\"", "br#", "c\"", "cr\"", "cr#", 288 ] 289 .iter() 290 .any(|prefix| input.starts_with(prefix)) 291 { 292 Err(Reject) 293 } else { 294 ident_any(input) 295 } 296 } 297 298 fn ident_any(input: Cursor) -> PResult<crate::Ident> { 299 let raw = input.starts_with("r#"); 300 let rest = input.advance((raw as usize) << 1); 301 302 let (rest, sym) = ident_not_raw(rest)?; 303 304 if !raw { 305 let ident = 306 crate::Ident::_new_fallback(Ident::new_unchecked(sym, fallback::Span::call_site())); 307 return Ok((rest, ident)); 308 } 309 310 match sym { 311 "_" | "super" | "self" | "Self" | "crate" => return Err(Reject), 312 _ => {} 313 } 314 315 let ident = 316 crate::Ident::_new_fallback(Ident::new_raw_unchecked(sym, fallback::Span::call_site())); 317 Ok((rest, ident)) 318 } 319 320 fn ident_not_raw(input: Cursor) -> PResult<&str> { 321 let mut chars = input.char_indices(); 322 323 match chars.next() { 324 Some((_, ch)) if is_ident_start(ch) => {} 325 _ => return Err(Reject), 326 } 327 328 let mut end = input.len(); 329 for (i, ch) in chars { 330 if !is_ident_continue(ch) { 331 end = i; 332 break; 333 } 334 } 335 336 Ok((input.advance(end), &input.rest[..end])) 337 } 338 339 pub(crate) fn literal(input: Cursor) -> PResult<Literal> { 340 let rest = literal_nocapture(input)?; 341 let end = input.len() - rest.len(); 342 Ok((rest, Literal::_new(input.rest[..end].to_string()))) 343 } 344 345 fn literal_nocapture(input: Cursor) -> Result<Cursor, Reject> { 346 if let Ok(ok) = string(input) { 347 Ok(ok) 348 } else if let Ok(ok) = byte_string(input) { 349 Ok(ok) 350 } else if let Ok(ok) = c_string(input) { 351 Ok(ok) 352 } else if let Ok(ok) = byte(input) { 353 Ok(ok) 354 } else if let Ok(ok) = character(input) { 355 Ok(ok) 356 } else if let Ok(ok) = float(input) { 357 Ok(ok) 358 } else if let Ok(ok) = int(input) { 359 Ok(ok) 360 } else { 361 Err(Reject) 362 } 363 } 364 365 fn literal_suffix(input: Cursor) -> Cursor { 366 match ident_not_raw(input) { 367 Ok((input, _)) => input, 368 Err(Reject) => input, 369 } 370 } 371 372 fn string(input: Cursor) -> Result<Cursor, Reject> { 373 if let Ok(input) = input.parse("\"") { 374 cooked_string(input) 375 } else if let Ok(input) = input.parse("r") { 376 raw_string(input) 377 } else { 378 Err(Reject) 379 } 380 } 381 382 fn cooked_string(mut input: Cursor) -> Result<Cursor, Reject> { 383 let mut chars = input.char_indices(); 384 385 while let Some((i, ch)) = chars.next() { 386 match ch { 387 '"' => { 388 let input = input.advance(i + 1); 389 return Ok(literal_suffix(input)); 390 } 391 '\r' => match chars.next() { 392 Some((_, '\n')) => {} 393 _ => break, 394 }, 395 '\\' => match chars.next() { 396 Some((_, 'x')) => { 397 backslash_x_char(&mut chars)?; 398 } 399 Some((_, 'n' | 'r' | 't' | '\\' | '\'' | '"' | '0')) => {} 400 Some((_, 'u')) => { 401 backslash_u(&mut chars)?; 402 } 403 Some((newline, ch @ ('\n' | '\r'))) => { 404 input = input.advance(newline + 1); 405 trailing_backslash(&mut input, ch as u8)?; 406 chars = input.char_indices(); 407 } 408 _ => break, 409 }, 410 _ch => {} 411 } 412 } 413 Err(Reject) 414 } 415 416 fn raw_string(input: Cursor) -> Result<Cursor, Reject> { 417 let (input, delimiter) = delimiter_of_raw_string(input)?; 418 let mut bytes = input.bytes().enumerate(); 419 while let Some((i, byte)) = bytes.next() { 420 match byte { 421 b'"' if input.rest[i + 1..].starts_with(delimiter) => { 422 let rest = input.advance(i + 1 + delimiter.len()); 423 return Ok(literal_suffix(rest)); 424 } 425 b'\r' => match bytes.next() { 426 Some((_, b'\n')) => {} 427 _ => break, 428 }, 429 _ => {} 430 } 431 } 432 Err(Reject) 433 } 434 435 fn byte_string(input: Cursor) -> Result<Cursor, Reject> { 436 if let Ok(input) = input.parse("b\"") { 437 cooked_byte_string(input) 438 } else if let Ok(input) = input.parse("br") { 439 raw_byte_string(input) 440 } else { 441 Err(Reject) 442 } 443 } 444 445 fn cooked_byte_string(mut input: Cursor) -> Result<Cursor, Reject> { 446 let mut bytes = input.bytes().enumerate(); 447 while let Some((offset, b)) = bytes.next() { 448 match b { 449 b'"' => { 450 let input = input.advance(offset + 1); 451 return Ok(literal_suffix(input)); 452 } 453 b'\r' => match bytes.next() { 454 Some((_, b'\n')) => {} 455 _ => break, 456 }, 457 b'\\' => match bytes.next() { 458 Some((_, b'x')) => { 459 backslash_x_byte(&mut bytes)?; 460 } 461 Some((_, b'n' | b'r' | b't' | b'\\' | b'0' | b'\'' | b'"')) => {} 462 Some((newline, b @ (b'\n' | b'\r'))) => { 463 input = input.advance(newline + 1); 464 trailing_backslash(&mut input, b)?; 465 bytes = input.bytes().enumerate(); 466 } 467 _ => break, 468 }, 469 b if b.is_ascii() => {} 470 _ => break, 471 } 472 } 473 Err(Reject) 474 } 475 476 fn delimiter_of_raw_string(input: Cursor) -> PResult<&str> { 477 for (i, byte) in input.bytes().enumerate() { 478 match byte { 479 b'"' => { 480 if i > 255 { 481 // https://github.com/rust-lang/rust/pull/95251 482 return Err(Reject); 483 } 484 return Ok((input.advance(i + 1), &input.rest[..i])); 485 } 486 b'#' => {} 487 _ => break, 488 } 489 } 490 Err(Reject) 491 } 492 493 fn raw_byte_string(input: Cursor) -> Result<Cursor, Reject> { 494 let (input, delimiter) = delimiter_of_raw_string(input)?; 495 let mut bytes = input.bytes().enumerate(); 496 while let Some((i, byte)) = bytes.next() { 497 match byte { 498 b'"' if input.rest[i + 1..].starts_with(delimiter) => { 499 let rest = input.advance(i + 1 + delimiter.len()); 500 return Ok(literal_suffix(rest)); 501 } 502 b'\r' => match bytes.next() { 503 Some((_, b'\n')) => {} 504 _ => break, 505 }, 506 other => { 507 if !other.is_ascii() { 508 break; 509 } 510 } 511 } 512 } 513 Err(Reject) 514 } 515 516 fn c_string(input: Cursor) -> Result<Cursor, Reject> { 517 if let Ok(input) = input.parse("c\"") { 518 cooked_c_string(input) 519 } else if let Ok(input) = input.parse("cr") { 520 raw_c_string(input) 521 } else { 522 Err(Reject) 523 } 524 } 525 526 fn raw_c_string(input: Cursor) -> Result<Cursor, Reject> { 527 let (input, delimiter) = delimiter_of_raw_string(input)?; 528 let mut bytes = input.bytes().enumerate(); 529 while let Some((i, byte)) = bytes.next() { 530 match byte { 531 b'"' if input.rest[i + 1..].starts_with(delimiter) => { 532 let rest = input.advance(i + 1 + delimiter.len()); 533 return Ok(literal_suffix(rest)); 534 } 535 b'\r' => match bytes.next() { 536 Some((_, b'\n')) => {} 537 _ => break, 538 }, 539 b'\0' => break, 540 _ => {} 541 } 542 } 543 Err(Reject) 544 } 545 546 fn cooked_c_string(mut input: Cursor) -> Result<Cursor, Reject> { 547 let mut chars = input.char_indices(); 548 549 while let Some((i, ch)) = chars.next() { 550 match ch { 551 '"' => { 552 let input = input.advance(i + 1); 553 return Ok(literal_suffix(input)); 554 } 555 '\r' => match chars.next() { 556 Some((_, '\n')) => {} 557 _ => break, 558 }, 559 '\\' => match chars.next() { 560 Some((_, 'x')) => { 561 backslash_x_nonzero(&mut chars)?; 562 } 563 Some((_, 'n' | 'r' | 't' | '\\' | '\'' | '"')) => {} 564 Some((_, 'u')) => { 565 if backslash_u(&mut chars)? == '\0' { 566 break; 567 } 568 } 569 Some((newline, ch @ ('\n' | '\r'))) => { 570 input = input.advance(newline + 1); 571 trailing_backslash(&mut input, ch as u8)?; 572 chars = input.char_indices(); 573 } 574 _ => break, 575 }, 576 '\0' => break, 577 _ch => {} 578 } 579 } 580 Err(Reject) 581 } 582 583 fn byte(input: Cursor) -> Result<Cursor, Reject> { 584 let input = input.parse("b'")?; 585 let mut bytes = input.bytes().enumerate(); 586 let ok = match bytes.next().map(|(_, b)| b) { 587 Some(b'\\') => match bytes.next().map(|(_, b)| b) { 588 Some(b'x') => backslash_x_byte(&mut bytes).is_ok(), 589 Some(b'n' | b'r' | b't' | b'\\' | b'0' | b'\'' | b'"') => true, 590 _ => false, 591 }, 592 b => b.is_some(), 593 }; 594 if !ok { 595 return Err(Reject); 596 } 597 let (offset, _) = bytes.next().ok_or(Reject)?; 598 if !input.chars().as_str().is_char_boundary(offset) { 599 return Err(Reject); 600 } 601 let input = input.advance(offset).parse("'")?; 602 Ok(literal_suffix(input)) 603 } 604 605 fn character(input: Cursor) -> Result<Cursor, Reject> { 606 let input = input.parse("'")?; 607 let mut chars = input.char_indices(); 608 let ok = match chars.next().map(|(_, ch)| ch) { 609 Some('\\') => match chars.next().map(|(_, ch)| ch) { 610 Some('x') => backslash_x_char(&mut chars).is_ok(), 611 Some('u') => backslash_u(&mut chars).is_ok(), 612 Some('n' | 'r' | 't' | '\\' | '0' | '\'' | '"') => true, 613 _ => false, 614 }, 615 ch => ch.is_some(), 616 }; 617 if !ok { 618 return Err(Reject); 619 } 620 let (idx, _) = chars.next().ok_or(Reject)?; 621 let input = input.advance(idx).parse("'")?; 622 Ok(literal_suffix(input)) 623 } 624 625 macro_rules! next_ch { 626 ($chars:ident @ $pat:pat) => { 627 match $chars.next() { 628 Some((_, ch)) => match ch { 629 $pat => ch, 630 _ => return Err(Reject), 631 }, 632 None => return Err(Reject), 633 } 634 }; 635 } 636 637 fn backslash_x_char<I>(chars: &mut I) -> Result<(), Reject> 638 where 639 I: Iterator<Item = (usize, char)>, 640 { 641 next_ch!(chars @ '0'..='7'); 642 next_ch!(chars @ '0'..='9' | 'a'..='f' | 'A'..='F'); 643 Ok(()) 644 } 645 646 fn backslash_x_byte<I>(chars: &mut I) -> Result<(), Reject> 647 where 648 I: Iterator<Item = (usize, u8)>, 649 { 650 next_ch!(chars @ b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F'); 651 next_ch!(chars @ b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F'); 652 Ok(()) 653 } 654 655 fn backslash_x_nonzero<I>(chars: &mut I) -> Result<(), Reject> 656 where 657 I: Iterator<Item = (usize, char)>, 658 { 659 let first = next_ch!(chars @ '0'..='9' | 'a'..='f' | 'A'..='F'); 660 let second = next_ch!(chars @ '0'..='9' | 'a'..='f' | 'A'..='F'); 661 if first == '0' && second == '0' { 662 Err(Reject) 663 } else { 664 Ok(()) 665 } 666 } 667 668 fn backslash_u<I>(chars: &mut I) -> Result<char, Reject> 669 where 670 I: Iterator<Item = (usize, char)>, 671 { 672 next_ch!(chars @ '{'); 673 let mut value = 0; 674 let mut len = 0; 675 for (_, ch) in chars { 676 let digit = match ch { 677 '0'..='9' => ch as u8 - b'0', 678 'a'..='f' => 10 + ch as u8 - b'a', 679 'A'..='F' => 10 + ch as u8 - b'A', 680 '_' if len > 0 => continue, 681 '}' if len > 0 => return char::from_u32(value).ok_or(Reject), 682 _ => break, 683 }; 684 if len == 6 { 685 break; 686 } 687 value *= 0x10; 688 value += u32::from(digit); 689 len += 1; 690 } 691 Err(Reject) 692 } 693 694 fn trailing_backslash(input: &mut Cursor, mut last: u8) -> Result<(), Reject> { 695 let mut whitespace = input.bytes().enumerate(); 696 loop { 697 if last == b'\r' && whitespace.next().map_or(true, |(_, b)| b != b'\n') { 698 return Err(Reject); 699 } 700 match whitespace.next() { 701 Some((_, b @ (b' ' | b'\t' | b'\n' | b'\r'))) => { 702 last = b; 703 } 704 Some((offset, _)) => { 705 *input = input.advance(offset); 706 return Ok(()); 707 } 708 None => return Err(Reject), 709 } 710 } 711 } 712 713 fn float(input: Cursor) -> Result<Cursor, Reject> { 714 let mut rest = float_digits(input)?; 715 if let Some(ch) = rest.chars().next() { 716 if is_ident_start(ch) { 717 rest = ident_not_raw(rest)?.0; 718 } 719 } 720 word_break(rest) 721 } 722 723 fn float_digits(input: Cursor) -> Result<Cursor, Reject> { 724 let mut chars = input.chars().peekable(); 725 match chars.next() { 726 Some(ch) if '0' <= ch && ch <= '9' => {} 727 _ => return Err(Reject), 728 } 729 730 let mut len = 1; 731 let mut has_dot = false; 732 let mut has_exp = false; 733 while let Some(&ch) = chars.peek() { 734 match ch { 735 '0'..='9' | '_' => { 736 chars.next(); 737 len += 1; 738 } 739 '.' => { 740 if has_dot { 741 break; 742 } 743 chars.next(); 744 if chars 745 .peek() 746 .map_or(false, |&ch| ch == '.' || is_ident_start(ch)) 747 { 748 return Err(Reject); 749 } 750 len += 1; 751 has_dot = true; 752 } 753 'e' | 'E' => { 754 chars.next(); 755 len += 1; 756 has_exp = true; 757 break; 758 } 759 _ => break, 760 } 761 } 762 763 if !(has_dot || has_exp) { 764 return Err(Reject); 765 } 766 767 if has_exp { 768 let token_before_exp = if has_dot { 769 Ok(input.advance(len - 1)) 770 } else { 771 Err(Reject) 772 }; 773 let mut has_sign = false; 774 let mut has_exp_value = false; 775 while let Some(&ch) = chars.peek() { 776 match ch { 777 '+' | '-' => { 778 if has_exp_value { 779 break; 780 } 781 if has_sign { 782 return token_before_exp; 783 } 784 chars.next(); 785 len += 1; 786 has_sign = true; 787 } 788 '0'..='9' => { 789 chars.next(); 790 len += 1; 791 has_exp_value = true; 792 } 793 '_' => { 794 chars.next(); 795 len += 1; 796 } 797 _ => break, 798 } 799 } 800 if !has_exp_value { 801 return token_before_exp; 802 } 803 } 804 805 Ok(input.advance(len)) 806 } 807 808 fn int(input: Cursor) -> Result<Cursor, Reject> { 809 let mut rest = digits(input)?; 810 if let Some(ch) = rest.chars().next() { 811 if is_ident_start(ch) { 812 rest = ident_not_raw(rest)?.0; 813 } 814 } 815 word_break(rest) 816 } 817 818 fn digits(mut input: Cursor) -> Result<Cursor, Reject> { 819 let base = if input.starts_with("0x") { 820 input = input.advance(2); 821 16 822 } else if input.starts_with("0o") { 823 input = input.advance(2); 824 8 825 } else if input.starts_with("0b") { 826 input = input.advance(2); 827 2 828 } else { 829 10 830 }; 831 832 let mut len = 0; 833 let mut empty = true; 834 for b in input.bytes() { 835 match b { 836 b'0'..=b'9' => { 837 let digit = (b - b'0') as u64; 838 if digit >= base { 839 return Err(Reject); 840 } 841 } 842 b'a'..=b'f' => { 843 let digit = 10 + (b - b'a') as u64; 844 if digit >= base { 845 break; 846 } 847 } 848 b'A'..=b'F' => { 849 let digit = 10 + (b - b'A') as u64; 850 if digit >= base { 851 break; 852 } 853 } 854 b'_' => { 855 if empty && base == 10 { 856 return Err(Reject); 857 } 858 len += 1; 859 continue; 860 } 861 _ => break, 862 } 863 len += 1; 864 empty = false; 865 } 866 if empty { 867 Err(Reject) 868 } else { 869 Ok(input.advance(len)) 870 } 871 } 872 873 fn punct(input: Cursor) -> PResult<Punct> { 874 let (rest, ch) = punct_char(input)?; 875 if ch == '\'' { 876 let (after_lifetime, _ident) = ident_any(rest)?; 877 if after_lifetime.starts_with_char('\'') 878 || (after_lifetime.starts_with_char('#') && !rest.starts_with("r#")) 879 { 880 Err(Reject) 881 } else { 882 Ok((rest, Punct::new('\'', Spacing::Joint))) 883 } 884 } else { 885 let kind = match punct_char(rest) { 886 Ok(_) => Spacing::Joint, 887 Err(Reject) => Spacing::Alone, 888 }; 889 Ok((rest, Punct::new(ch, kind))) 890 } 891 } 892 893 fn punct_char(input: Cursor) -> PResult<char> { 894 if input.starts_with("//") || input.starts_with("/*") { 895 // Do not accept `/` of a comment as a punct. 896 return Err(Reject); 897 } 898 899 let mut chars = input.chars(); 900 let first = match chars.next() { 901 Some(ch) => ch, 902 None => { 903 return Err(Reject); 904 } 905 }; 906 let recognized = "~!@#$%^&*-=+|;:,<.>/?'"; 907 if recognized.contains(first) { 908 Ok((input.advance(first.len_utf8()), first)) 909 } else { 910 Err(Reject) 911 } 912 } 913 914 fn doc_comment<'a>(input: Cursor<'a>, trees: &mut TokenStreamBuilder) -> PResult<'a, ()> { 915 #[cfg(span_locations)] 916 let lo = input.off; 917 let (rest, (comment, inner)) = doc_comment_contents(input)?; 918 let fallback_span = Span { 919 #[cfg(span_locations)] 920 lo, 921 #[cfg(span_locations)] 922 hi: rest.off, 923 }; 924 let span = crate::Span::_new_fallback(fallback_span); 925 926 let mut scan_for_bare_cr = comment; 927 while let Some(cr) = scan_for_bare_cr.find('\r') { 928 let rest = &scan_for_bare_cr[cr + 1..]; 929 if !rest.starts_with('\n') { 930 return Err(Reject); 931 } 932 scan_for_bare_cr = rest; 933 } 934 935 let mut pound = Punct::new('#', Spacing::Alone); 936 pound.set_span(span); 937 trees.push_token_from_parser(TokenTree::Punct(pound)); 938 939 if inner { 940 let mut bang = Punct::new('!', Spacing::Alone); 941 bang.set_span(span); 942 trees.push_token_from_parser(TokenTree::Punct(bang)); 943 } 944 945 let doc_ident = crate::Ident::_new_fallback(Ident::new_unchecked("doc", fallback_span)); 946 let mut equal = Punct::new('=', Spacing::Alone); 947 equal.set_span(span); 948 let mut literal = crate::Literal::_new_fallback(Literal::string(comment)); 949 literal.set_span(span); 950 let mut bracketed = TokenStreamBuilder::with_capacity(3); 951 bracketed.push_token_from_parser(TokenTree::Ident(doc_ident)); 952 bracketed.push_token_from_parser(TokenTree::Punct(equal)); 953 bracketed.push_token_from_parser(TokenTree::Literal(literal)); 954 let group = Group::new(Delimiter::Bracket, bracketed.build()); 955 let mut group = crate::Group::_new_fallback(group); 956 group.set_span(span); 957 trees.push_token_from_parser(TokenTree::Group(group)); 958 959 Ok((rest, ())) 960 } 961 962 fn doc_comment_contents(input: Cursor) -> PResult<(&str, bool)> { 963 if input.starts_with("//!") { 964 let input = input.advance(3); 965 let (input, s) = take_until_newline_or_eof(input); 966 Ok((input, (s, true))) 967 } else if input.starts_with("/*!") { 968 let (input, s) = block_comment(input)?; 969 Ok((input, (&s[3..s.len() - 2], true))) 970 } else if input.starts_with("///") { 971 let input = input.advance(3); 972 if input.starts_with_char('/') { 973 return Err(Reject); 974 } 975 let (input, s) = take_until_newline_or_eof(input); 976 Ok((input, (s, false))) 977 } else if input.starts_with("/**") && !input.rest[3..].starts_with('*') { 978 let (input, s) = block_comment(input)?; 979 Ok((input, (&s[3..s.len() - 2], false))) 980 } else { 981 Err(Reject) 982 } 983 } 984 985 fn take_until_newline_or_eof(input: Cursor) -> (Cursor, &str) { 986 let chars = input.char_indices(); 987 988 for (i, ch) in chars { 989 if ch == '\n' { 990 return (input.advance(i), &input.rest[..i]); 991 } else if ch == '\r' && input.rest[i + 1..].starts_with('\n') { 992 return (input.advance(i + 1), &input.rest[..i]); 993 } 994 } 995 996 (input.advance(input.len()), input.rest) 997 } 998