xref: /linux/rust/proc-macro2/parse.rs (revision 784faa8eca8270671e0ed6d9d21f04bbb80fc5f7)
1 // SPDX-License-Identifier: Apache-2.0 OR MIT
2 
3 use crate::fallback::{
4     self, is_ident_continue, is_ident_start, Group, Ident, LexError, Literal, Span, TokenStream,
5     TokenStreamBuilder,
6 };
7 use crate::{Delimiter, Punct, Spacing, TokenTree};
8 use core::char;
9 use core::str::{Bytes, CharIndices, Chars};
10 
11 #[derive(Copy, Clone, Eq, PartialEq)]
12 pub(crate) struct Cursor<'a> {
13     pub(crate) rest: &'a str,
14     #[cfg(span_locations)]
15     pub(crate) off: u32,
16 }
17 
18 impl<'a> Cursor<'a> {
advance(&self, bytes: usize) -> Cursor<'a>19     pub(crate) fn advance(&self, bytes: usize) -> Cursor<'a> {
20         let (_front, rest) = self.rest.split_at(bytes);
21         Cursor {
22             rest,
23             #[cfg(span_locations)]
24             off: self.off + _front.chars().count() as u32,
25         }
26     }
27 
starts_with(&self, s: &str) -> bool28     pub(crate) fn starts_with(&self, s: &str) -> bool {
29         self.rest.starts_with(s)
30     }
31 
starts_with_char(&self, ch: char) -> bool32     pub(crate) fn starts_with_char(&self, ch: char) -> bool {
33         self.rest.starts_with(ch)
34     }
35 
starts_with_fn<Pattern>(&self, f: Pattern) -> bool where Pattern: FnMut(char) -> bool,36     pub(crate) fn starts_with_fn<Pattern>(&self, f: Pattern) -> bool
37     where
38         Pattern: FnMut(char) -> bool,
39     {
40         self.rest.starts_with(f)
41     }
42 
is_empty(&self) -> bool43     pub(crate) fn is_empty(&self) -> bool {
44         self.rest.is_empty()
45     }
46 
len(&self) -> usize47     fn len(&self) -> usize {
48         self.rest.len()
49     }
50 
as_bytes(&self) -> &'a [u8]51     fn as_bytes(&self) -> &'a [u8] {
52         self.rest.as_bytes()
53     }
54 
bytes(&self) -> Bytes<'a>55     fn bytes(&self) -> Bytes<'a> {
56         self.rest.bytes()
57     }
58 
chars(&self) -> Chars<'a>59     fn chars(&self) -> Chars<'a> {
60         self.rest.chars()
61     }
62 
char_indices(&self) -> CharIndices<'a>63     fn char_indices(&self) -> CharIndices<'a> {
64         self.rest.char_indices()
65     }
66 
parse(&self, tag: &str) -> Result<Cursor<'a>, Reject>67     fn parse(&self, tag: &str) -> Result<Cursor<'a>, Reject> {
68         if self.starts_with(tag) {
69             Ok(self.advance(tag.len()))
70         } else {
71             Err(Reject)
72         }
73     }
74 }
75 
76 pub(crate) struct Reject;
77 type PResult<'a, O> = Result<(Cursor<'a>, O), Reject>;
78 
skip_whitespace(input: Cursor) -> Cursor79 fn skip_whitespace(input: Cursor) -> Cursor {
80     let mut s = input;
81 
82     while !s.is_empty() {
83         let byte = s.as_bytes()[0];
84         if byte == b'/' {
85             if s.starts_with("//")
86                 && (!s.starts_with("///") || s.starts_with("////"))
87                 && !s.starts_with("//!")
88             {
89                 let (cursor, _) = take_until_newline_or_eof(s);
90                 s = cursor;
91                 continue;
92             } else if s.starts_with("/**/") {
93                 s = s.advance(4);
94                 continue;
95             } else if s.starts_with("/*")
96                 && (!s.starts_with("/**") || s.starts_with("/***"))
97                 && !s.starts_with("/*!")
98             {
99                 match block_comment(s) {
100                     Ok((rest, _)) => {
101                         s = rest;
102                         continue;
103                     }
104                     Err(Reject) => return s,
105                 }
106             }
107         }
108         match byte {
109             b' ' | 0x09..=0x0d => {
110                 s = s.advance(1);
111                 continue;
112             }
113             b if b.is_ascii() => {}
114             _ => {
115                 let ch = s.chars().next().unwrap();
116                 if is_whitespace(ch) {
117                     s = s.advance(ch.len_utf8());
118                     continue;
119                 }
120             }
121         }
122         return s;
123     }
124     s
125 }
126 
block_comment(input: Cursor) -> PResult<&str>127 fn block_comment(input: Cursor) -> PResult<&str> {
128     if !input.starts_with("/*") {
129         return Err(Reject);
130     }
131 
132     let mut depth = 0usize;
133     let bytes = input.as_bytes();
134     let mut i = 0usize;
135     let upper = bytes.len() - 1;
136 
137     while i < upper {
138         if bytes[i] == b'/' && bytes[i + 1] == b'*' {
139             depth += 1;
140             i += 1; // eat '*'
141         } else if bytes[i] == b'*' && bytes[i + 1] == b'/' {
142             depth -= 1;
143             if depth == 0 {
144                 return Ok((input.advance(i + 2), &input.rest[..i + 2]));
145             }
146             i += 1; // eat '/'
147         }
148         i += 1;
149     }
150 
151     Err(Reject)
152 }
153 
is_whitespace(ch: char) -> bool154 fn is_whitespace(ch: char) -> bool {
155     // Rust treats left-to-right mark and right-to-left mark as whitespace
156     ch.is_whitespace() || ch == '\u{200e}' || ch == '\u{200f}'
157 }
158 
word_break(input: Cursor) -> Result<Cursor, Reject>159 fn word_break(input: Cursor) -> Result<Cursor, Reject> {
160     match input.chars().next() {
161         Some(ch) if is_ident_continue(ch) => Err(Reject),
162         Some(_) | None => Ok(input),
163     }
164 }
165 
166 // Rustc's representation of a macro expansion error in expression position or
167 // type position.
168 const ERROR: &str = "(/*ERROR*/)";
169 
token_stream(mut input: Cursor) -> Result<TokenStream, LexError>170 pub(crate) fn token_stream(mut input: Cursor) -> Result<TokenStream, LexError> {
171     let mut trees = TokenStreamBuilder::new();
172     let mut stack = Vec::new();
173 
174     loop {
175         input = skip_whitespace(input);
176 
177         if let Ok((rest, ())) = doc_comment(input, &mut trees) {
178             input = rest;
179             continue;
180         }
181 
182         #[cfg(span_locations)]
183         let lo = input.off;
184 
185         let first = match input.bytes().next() {
186             Some(first) => first,
187             None => match stack.last() {
188                 None => return Ok(trees.build()),
189                 #[cfg(span_locations)]
190                 Some((lo, _frame)) => {
191                     return Err(LexError {
192                         span: Span { lo: *lo, hi: *lo },
193                     })
194                 }
195                 #[cfg(not(span_locations))]
196                 Some(_frame) => return Err(LexError { span: Span {} }),
197             },
198         };
199 
200         if let Some(open_delimiter) = match first {
201             b'(' if !input.starts_with(ERROR) => Some(Delimiter::Parenthesis),
202             b'[' => Some(Delimiter::Bracket),
203             b'{' => Some(Delimiter::Brace),
204             _ => None,
205         } {
206             input = input.advance(1);
207             let frame = (open_delimiter, trees);
208             #[cfg(span_locations)]
209             let frame = (lo, frame);
210             stack.push(frame);
211             trees = TokenStreamBuilder::new();
212         } else if let Some(close_delimiter) = match first {
213             b')' => Some(Delimiter::Parenthesis),
214             b']' => Some(Delimiter::Bracket),
215             b'}' => Some(Delimiter::Brace),
216             _ => None,
217         } {
218             let frame = match stack.pop() {
219                 Some(frame) => frame,
220                 None => return Err(lex_error(input)),
221             };
222             #[cfg(span_locations)]
223             let (lo, frame) = frame;
224             let (open_delimiter, outer) = frame;
225             if open_delimiter != close_delimiter {
226                 return Err(lex_error(input));
227             }
228             input = input.advance(1);
229             let mut g = Group::new(open_delimiter, trees.build());
230             g.set_span(Span {
231                 #[cfg(span_locations)]
232                 lo,
233                 #[cfg(span_locations)]
234                 hi: input.off,
235             });
236             trees = outer;
237             trees.push_token_from_parser(TokenTree::Group(crate::Group::_new_fallback(g)));
238         } else {
239             let (rest, mut tt) = match leaf_token(input) {
240                 Ok((rest, tt)) => (rest, tt),
241                 Err(Reject) => return Err(lex_error(input)),
242             };
243             tt.set_span(crate::Span::_new_fallback(Span {
244                 #[cfg(span_locations)]
245                 lo,
246                 #[cfg(span_locations)]
247                 hi: rest.off,
248             }));
249             trees.push_token_from_parser(tt);
250             input = rest;
251         }
252     }
253 }
254 
lex_error(cursor: Cursor) -> LexError255 fn lex_error(cursor: Cursor) -> LexError {
256     #[cfg(not(span_locations))]
257     let _ = cursor;
258     LexError {
259         span: Span {
260             #[cfg(span_locations)]
261             lo: cursor.off,
262             #[cfg(span_locations)]
263             hi: cursor.off,
264         },
265     }
266 }
267 
leaf_token(input: Cursor) -> PResult<TokenTree>268 fn leaf_token(input: Cursor) -> PResult<TokenTree> {
269     if let Ok((input, l)) = literal(input) {
270         // must be parsed before ident
271         Ok((input, TokenTree::Literal(crate::Literal::_new_fallback(l))))
272     } else if let Ok((input, p)) = punct(input) {
273         Ok((input, TokenTree::Punct(p)))
274     } else if let Ok((input, i)) = ident(input) {
275         Ok((input, TokenTree::Ident(i)))
276     } else if input.starts_with(ERROR) {
277         let rest = input.advance(ERROR.len());
278         let repr = crate::Literal::_new_fallback(Literal::_new(ERROR.to_owned()));
279         Ok((rest, TokenTree::Literal(repr)))
280     } else {
281         Err(Reject)
282     }
283 }
284 
ident(input: Cursor) -> PResult<crate::Ident>285 fn ident(input: Cursor) -> PResult<crate::Ident> {
286     if [
287         "r\"", "r#\"", "r##", "b\"", "b\'", "br\"", "br#", "c\"", "cr\"", "cr#",
288     ]
289     .iter()
290     .any(|prefix| input.starts_with(prefix))
291     {
292         Err(Reject)
293     } else {
294         ident_any(input)
295     }
296 }
297 
ident_any(input: Cursor) -> PResult<crate::Ident>298 fn ident_any(input: Cursor) -> PResult<crate::Ident> {
299     let raw = input.starts_with("r#");
300     let rest = input.advance((raw as usize) << 1);
301 
302     let (rest, sym) = ident_not_raw(rest)?;
303 
304     if !raw {
305         let ident =
306             crate::Ident::_new_fallback(Ident::new_unchecked(sym, fallback::Span::call_site()));
307         return Ok((rest, ident));
308     }
309 
310     match sym {
311         "_" | "super" | "self" | "Self" | "crate" => return Err(Reject),
312         _ => {}
313     }
314 
315     let ident =
316         crate::Ident::_new_fallback(Ident::new_raw_unchecked(sym, fallback::Span::call_site()));
317     Ok((rest, ident))
318 }
319 
ident_not_raw(input: Cursor) -> PResult<&str>320 fn ident_not_raw(input: Cursor) -> PResult<&str> {
321     let mut chars = input.char_indices();
322 
323     match chars.next() {
324         Some((_, ch)) if is_ident_start(ch) => {}
325         _ => return Err(Reject),
326     }
327 
328     let mut end = input.len();
329     for (i, ch) in chars {
330         if !is_ident_continue(ch) {
331             end = i;
332             break;
333         }
334     }
335 
336     Ok((input.advance(end), &input.rest[..end]))
337 }
338 
literal(input: Cursor) -> PResult<Literal>339 pub(crate) fn literal(input: Cursor) -> PResult<Literal> {
340     let rest = literal_nocapture(input)?;
341     let end = input.len() - rest.len();
342     Ok((rest, Literal::_new(input.rest[..end].to_string())))
343 }
344 
literal_nocapture(input: Cursor) -> Result<Cursor, Reject>345 fn literal_nocapture(input: Cursor) -> Result<Cursor, Reject> {
346     if let Ok(ok) = string(input) {
347         Ok(ok)
348     } else if let Ok(ok) = byte_string(input) {
349         Ok(ok)
350     } else if let Ok(ok) = c_string(input) {
351         Ok(ok)
352     } else if let Ok(ok) = byte(input) {
353         Ok(ok)
354     } else if let Ok(ok) = character(input) {
355         Ok(ok)
356     } else if let Ok(ok) = float(input) {
357         Ok(ok)
358     } else if let Ok(ok) = int(input) {
359         Ok(ok)
360     } else {
361         Err(Reject)
362     }
363 }
364 
literal_suffix(input: Cursor) -> Cursor365 fn literal_suffix(input: Cursor) -> Cursor {
366     match ident_not_raw(input) {
367         Ok((input, _)) => input,
368         Err(Reject) => input,
369     }
370 }
371 
string(input: Cursor) -> Result<Cursor, Reject>372 fn string(input: Cursor) -> Result<Cursor, Reject> {
373     if let Ok(input) = input.parse("\"") {
374         cooked_string(input)
375     } else if let Ok(input) = input.parse("r") {
376         raw_string(input)
377     } else {
378         Err(Reject)
379     }
380 }
381 
cooked_string(mut input: Cursor) -> Result<Cursor, Reject>382 fn cooked_string(mut input: Cursor) -> Result<Cursor, Reject> {
383     let mut chars = input.char_indices();
384 
385     while let Some((i, ch)) = chars.next() {
386         match ch {
387             '"' => {
388                 let input = input.advance(i + 1);
389                 return Ok(literal_suffix(input));
390             }
391             '\r' => match chars.next() {
392                 Some((_, '\n')) => {}
393                 _ => break,
394             },
395             '\\' => match chars.next() {
396                 Some((_, 'x')) => {
397                     backslash_x_char(&mut chars)?;
398                 }
399                 Some((_, 'n' | 'r' | 't' | '\\' | '\'' | '"' | '0')) => {}
400                 Some((_, 'u')) => {
401                     backslash_u(&mut chars)?;
402                 }
403                 Some((newline, ch @ ('\n' | '\r'))) => {
404                     input = input.advance(newline + 1);
405                     trailing_backslash(&mut input, ch as u8)?;
406                     chars = input.char_indices();
407                 }
408                 _ => break,
409             },
410             _ch => {}
411         }
412     }
413     Err(Reject)
414 }
415 
raw_string(input: Cursor) -> Result<Cursor, Reject>416 fn raw_string(input: Cursor) -> Result<Cursor, Reject> {
417     let (input, delimiter) = delimiter_of_raw_string(input)?;
418     let mut bytes = input.bytes().enumerate();
419     while let Some((i, byte)) = bytes.next() {
420         match byte {
421             b'"' if input.rest[i + 1..].starts_with(delimiter) => {
422                 let rest = input.advance(i + 1 + delimiter.len());
423                 return Ok(literal_suffix(rest));
424             }
425             b'\r' => match bytes.next() {
426                 Some((_, b'\n')) => {}
427                 _ => break,
428             },
429             _ => {}
430         }
431     }
432     Err(Reject)
433 }
434 
byte_string(input: Cursor) -> Result<Cursor, Reject>435 fn byte_string(input: Cursor) -> Result<Cursor, Reject> {
436     if let Ok(input) = input.parse("b\"") {
437         cooked_byte_string(input)
438     } else if let Ok(input) = input.parse("br") {
439         raw_byte_string(input)
440     } else {
441         Err(Reject)
442     }
443 }
444 
cooked_byte_string(mut input: Cursor) -> Result<Cursor, Reject>445 fn cooked_byte_string(mut input: Cursor) -> Result<Cursor, Reject> {
446     let mut bytes = input.bytes().enumerate();
447     while let Some((offset, b)) = bytes.next() {
448         match b {
449             b'"' => {
450                 let input = input.advance(offset + 1);
451                 return Ok(literal_suffix(input));
452             }
453             b'\r' => match bytes.next() {
454                 Some((_, b'\n')) => {}
455                 _ => break,
456             },
457             b'\\' => match bytes.next() {
458                 Some((_, b'x')) => {
459                     backslash_x_byte(&mut bytes)?;
460                 }
461                 Some((_, b'n' | b'r' | b't' | b'\\' | b'0' | b'\'' | b'"')) => {}
462                 Some((newline, b @ (b'\n' | b'\r'))) => {
463                     input = input.advance(newline + 1);
464                     trailing_backslash(&mut input, b)?;
465                     bytes = input.bytes().enumerate();
466                 }
467                 _ => break,
468             },
469             b if b.is_ascii() => {}
470             _ => break,
471         }
472     }
473     Err(Reject)
474 }
475 
delimiter_of_raw_string(input: Cursor) -> PResult<&str>476 fn delimiter_of_raw_string(input: Cursor) -> PResult<&str> {
477     for (i, byte) in input.bytes().enumerate() {
478         match byte {
479             b'"' => {
480                 if i > 255 {
481                     // https://github.com/rust-lang/rust/pull/95251
482                     return Err(Reject);
483                 }
484                 return Ok((input.advance(i + 1), &input.rest[..i]));
485             }
486             b'#' => {}
487             _ => break,
488         }
489     }
490     Err(Reject)
491 }
492 
raw_byte_string(input: Cursor) -> Result<Cursor, Reject>493 fn raw_byte_string(input: Cursor) -> Result<Cursor, Reject> {
494     let (input, delimiter) = delimiter_of_raw_string(input)?;
495     let mut bytes = input.bytes().enumerate();
496     while let Some((i, byte)) = bytes.next() {
497         match byte {
498             b'"' if input.rest[i + 1..].starts_with(delimiter) => {
499                 let rest = input.advance(i + 1 + delimiter.len());
500                 return Ok(literal_suffix(rest));
501             }
502             b'\r' => match bytes.next() {
503                 Some((_, b'\n')) => {}
504                 _ => break,
505             },
506             other => {
507                 if !other.is_ascii() {
508                     break;
509                 }
510             }
511         }
512     }
513     Err(Reject)
514 }
515 
c_string(input: Cursor) -> Result<Cursor, Reject>516 fn c_string(input: Cursor) -> Result<Cursor, Reject> {
517     if let Ok(input) = input.parse("c\"") {
518         cooked_c_string(input)
519     } else if let Ok(input) = input.parse("cr") {
520         raw_c_string(input)
521     } else {
522         Err(Reject)
523     }
524 }
525 
raw_c_string(input: Cursor) -> Result<Cursor, Reject>526 fn raw_c_string(input: Cursor) -> Result<Cursor, Reject> {
527     let (input, delimiter) = delimiter_of_raw_string(input)?;
528     let mut bytes = input.bytes().enumerate();
529     while let Some((i, byte)) = bytes.next() {
530         match byte {
531             b'"' if input.rest[i + 1..].starts_with(delimiter) => {
532                 let rest = input.advance(i + 1 + delimiter.len());
533                 return Ok(literal_suffix(rest));
534             }
535             b'\r' => match bytes.next() {
536                 Some((_, b'\n')) => {}
537                 _ => break,
538             },
539             b'\0' => break,
540             _ => {}
541         }
542     }
543     Err(Reject)
544 }
545 
cooked_c_string(mut input: Cursor) -> Result<Cursor, Reject>546 fn cooked_c_string(mut input: Cursor) -> Result<Cursor, Reject> {
547     let mut chars = input.char_indices();
548 
549     while let Some((i, ch)) = chars.next() {
550         match ch {
551             '"' => {
552                 let input = input.advance(i + 1);
553                 return Ok(literal_suffix(input));
554             }
555             '\r' => match chars.next() {
556                 Some((_, '\n')) => {}
557                 _ => break,
558             },
559             '\\' => match chars.next() {
560                 Some((_, 'x')) => {
561                     backslash_x_nonzero(&mut chars)?;
562                 }
563                 Some((_, 'n' | 'r' | 't' | '\\' | '\'' | '"')) => {}
564                 Some((_, 'u')) => {
565                     if backslash_u(&mut chars)? == '\0' {
566                         break;
567                     }
568                 }
569                 Some((newline, ch @ ('\n' | '\r'))) => {
570                     input = input.advance(newline + 1);
571                     trailing_backslash(&mut input, ch as u8)?;
572                     chars = input.char_indices();
573                 }
574                 _ => break,
575             },
576             '\0' => break,
577             _ch => {}
578         }
579     }
580     Err(Reject)
581 }
582 
byte(input: Cursor) -> Result<Cursor, Reject>583 fn byte(input: Cursor) -> Result<Cursor, Reject> {
584     let input = input.parse("b'")?;
585     let mut bytes = input.bytes().enumerate();
586     let ok = match bytes.next().map(|(_, b)| b) {
587         Some(b'\\') => match bytes.next().map(|(_, b)| b) {
588             Some(b'x') => backslash_x_byte(&mut bytes).is_ok(),
589             Some(b'n' | b'r' | b't' | b'\\' | b'0' | b'\'' | b'"') => true,
590             _ => false,
591         },
592         b => b.is_some(),
593     };
594     if !ok {
595         return Err(Reject);
596     }
597     let (offset, _) = bytes.next().ok_or(Reject)?;
598     if !input.chars().as_str().is_char_boundary(offset) {
599         return Err(Reject);
600     }
601     let input = input.advance(offset).parse("'")?;
602     Ok(literal_suffix(input))
603 }
604 
character(input: Cursor) -> Result<Cursor, Reject>605 fn character(input: Cursor) -> Result<Cursor, Reject> {
606     let input = input.parse("'")?;
607     let mut chars = input.char_indices();
608     let ok = match chars.next().map(|(_, ch)| ch) {
609         Some('\\') => match chars.next().map(|(_, ch)| ch) {
610             Some('x') => backslash_x_char(&mut chars).is_ok(),
611             Some('u') => backslash_u(&mut chars).is_ok(),
612             Some('n' | 'r' | 't' | '\\' | '0' | '\'' | '"') => true,
613             _ => false,
614         },
615         ch => ch.is_some(),
616     };
617     if !ok {
618         return Err(Reject);
619     }
620     let (idx, _) = chars.next().ok_or(Reject)?;
621     let input = input.advance(idx).parse("'")?;
622     Ok(literal_suffix(input))
623 }
624 
625 macro_rules! next_ch {
626     ($chars:ident @ $pat:pat) => {
627         match $chars.next() {
628             Some((_, ch)) => match ch {
629                 $pat => ch,
630                 _ => return Err(Reject),
631             },
632             None => return Err(Reject),
633         }
634     };
635 }
636 
backslash_x_char<I>(chars: &mut I) -> Result<(), Reject> where I: Iterator<Item = (usize, char)>,637 fn backslash_x_char<I>(chars: &mut I) -> Result<(), Reject>
638 where
639     I: Iterator<Item = (usize, char)>,
640 {
641     next_ch!(chars @ '0'..='7');
642     next_ch!(chars @ '0'..='9' | 'a'..='f' | 'A'..='F');
643     Ok(())
644 }
645 
backslash_x_byte<I>(chars: &mut I) -> Result<(), Reject> where I: Iterator<Item = (usize, u8)>,646 fn backslash_x_byte<I>(chars: &mut I) -> Result<(), Reject>
647 where
648     I: Iterator<Item = (usize, u8)>,
649 {
650     next_ch!(chars @ b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F');
651     next_ch!(chars @ b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F');
652     Ok(())
653 }
654 
backslash_x_nonzero<I>(chars: &mut I) -> Result<(), Reject> where I: Iterator<Item = (usize, char)>,655 fn backslash_x_nonzero<I>(chars: &mut I) -> Result<(), Reject>
656 where
657     I: Iterator<Item = (usize, char)>,
658 {
659     let first = next_ch!(chars @ '0'..='9' | 'a'..='f' | 'A'..='F');
660     let second = next_ch!(chars @ '0'..='9' | 'a'..='f' | 'A'..='F');
661     if first == '0' && second == '0' {
662         Err(Reject)
663     } else {
664         Ok(())
665     }
666 }
667 
backslash_u<I>(chars: &mut I) -> Result<char, Reject> where I: Iterator<Item = (usize, char)>,668 fn backslash_u<I>(chars: &mut I) -> Result<char, Reject>
669 where
670     I: Iterator<Item = (usize, char)>,
671 {
672     next_ch!(chars @ '{');
673     let mut value = 0;
674     let mut len = 0;
675     for (_, ch) in chars {
676         let digit = match ch {
677             '0'..='9' => ch as u8 - b'0',
678             'a'..='f' => 10 + ch as u8 - b'a',
679             'A'..='F' => 10 + ch as u8 - b'A',
680             '_' if len > 0 => continue,
681             '}' if len > 0 => return char::from_u32(value).ok_or(Reject),
682             _ => break,
683         };
684         if len == 6 {
685             break;
686         }
687         value *= 0x10;
688         value += u32::from(digit);
689         len += 1;
690     }
691     Err(Reject)
692 }
693 
trailing_backslash(input: &mut Cursor, mut last: u8) -> Result<(), Reject>694 fn trailing_backslash(input: &mut Cursor, mut last: u8) -> Result<(), Reject> {
695     let mut whitespace = input.bytes().enumerate();
696     loop {
697         if last == b'\r' && whitespace.next().map_or(true, |(_, b)| b != b'\n') {
698             return Err(Reject);
699         }
700         match whitespace.next() {
701             Some((_, b @ (b' ' | b'\t' | b'\n' | b'\r'))) => {
702                 last = b;
703             }
704             Some((offset, _)) => {
705                 *input = input.advance(offset);
706                 return Ok(());
707             }
708             None => return Err(Reject),
709         }
710     }
711 }
712 
float(input: Cursor) -> Result<Cursor, Reject>713 fn float(input: Cursor) -> Result<Cursor, Reject> {
714     let mut rest = float_digits(input)?;
715     if let Some(ch) = rest.chars().next() {
716         if is_ident_start(ch) {
717             rest = ident_not_raw(rest)?.0;
718         }
719     }
720     word_break(rest)
721 }
722 
float_digits(input: Cursor) -> Result<Cursor, Reject>723 fn float_digits(input: Cursor) -> Result<Cursor, Reject> {
724     let mut chars = input.chars().peekable();
725     match chars.next() {
726         Some(ch) if '0' <= ch && ch <= '9' => {}
727         _ => return Err(Reject),
728     }
729 
730     let mut len = 1;
731     let mut has_dot = false;
732     let mut has_exp = false;
733     while let Some(&ch) = chars.peek() {
734         match ch {
735             '0'..='9' | '_' => {
736                 chars.next();
737                 len += 1;
738             }
739             '.' => {
740                 if has_dot {
741                     break;
742                 }
743                 chars.next();
744                 if chars
745                     .peek()
746                     .map_or(false, |&ch| ch == '.' || is_ident_start(ch))
747                 {
748                     return Err(Reject);
749                 }
750                 len += 1;
751                 has_dot = true;
752             }
753             'e' | 'E' => {
754                 chars.next();
755                 len += 1;
756                 has_exp = true;
757                 break;
758             }
759             _ => break,
760         }
761     }
762 
763     if !(has_dot || has_exp) {
764         return Err(Reject);
765     }
766 
767     if has_exp {
768         let token_before_exp = if has_dot {
769             Ok(input.advance(len - 1))
770         } else {
771             Err(Reject)
772         };
773         let mut has_sign = false;
774         let mut has_exp_value = false;
775         while let Some(&ch) = chars.peek() {
776             match ch {
777                 '+' | '-' => {
778                     if has_exp_value {
779                         break;
780                     }
781                     if has_sign {
782                         return token_before_exp;
783                     }
784                     chars.next();
785                     len += 1;
786                     has_sign = true;
787                 }
788                 '0'..='9' => {
789                     chars.next();
790                     len += 1;
791                     has_exp_value = true;
792                 }
793                 '_' => {
794                     chars.next();
795                     len += 1;
796                 }
797                 _ => break,
798             }
799         }
800         if !has_exp_value {
801             return token_before_exp;
802         }
803     }
804 
805     Ok(input.advance(len))
806 }
807 
int(input: Cursor) -> Result<Cursor, Reject>808 fn int(input: Cursor) -> Result<Cursor, Reject> {
809     let mut rest = digits(input)?;
810     if let Some(ch) = rest.chars().next() {
811         if is_ident_start(ch) {
812             rest = ident_not_raw(rest)?.0;
813         }
814     }
815     word_break(rest)
816 }
817 
digits(mut input: Cursor) -> Result<Cursor, Reject>818 fn digits(mut input: Cursor) -> Result<Cursor, Reject> {
819     let base = if input.starts_with("0x") {
820         input = input.advance(2);
821         16
822     } else if input.starts_with("0o") {
823         input = input.advance(2);
824         8
825     } else if input.starts_with("0b") {
826         input = input.advance(2);
827         2
828     } else {
829         10
830     };
831 
832     let mut len = 0;
833     let mut empty = true;
834     for b in input.bytes() {
835         match b {
836             b'0'..=b'9' => {
837                 let digit = (b - b'0') as u64;
838                 if digit >= base {
839                     return Err(Reject);
840                 }
841             }
842             b'a'..=b'f' => {
843                 let digit = 10 + (b - b'a') as u64;
844                 if digit >= base {
845                     break;
846                 }
847             }
848             b'A'..=b'F' => {
849                 let digit = 10 + (b - b'A') as u64;
850                 if digit >= base {
851                     break;
852                 }
853             }
854             b'_' => {
855                 if empty && base == 10 {
856                     return Err(Reject);
857                 }
858                 len += 1;
859                 continue;
860             }
861             _ => break,
862         }
863         len += 1;
864         empty = false;
865     }
866     if empty {
867         Err(Reject)
868     } else {
869         Ok(input.advance(len))
870     }
871 }
872 
punct(input: Cursor) -> PResult<Punct>873 fn punct(input: Cursor) -> PResult<Punct> {
874     let (rest, ch) = punct_char(input)?;
875     if ch == '\'' {
876         let (after_lifetime, _ident) = ident_any(rest)?;
877         if after_lifetime.starts_with_char('\'')
878             || (after_lifetime.starts_with_char('#') && !rest.starts_with("r#"))
879         {
880             Err(Reject)
881         } else {
882             Ok((rest, Punct::new('\'', Spacing::Joint)))
883         }
884     } else {
885         let kind = match punct_char(rest) {
886             Ok(_) => Spacing::Joint,
887             Err(Reject) => Spacing::Alone,
888         };
889         Ok((rest, Punct::new(ch, kind)))
890     }
891 }
892 
punct_char(input: Cursor) -> PResult<char>893 fn punct_char(input: Cursor) -> PResult<char> {
894     if input.starts_with("//") || input.starts_with("/*") {
895         // Do not accept `/` of a comment as a punct.
896         return Err(Reject);
897     }
898 
899     let mut chars = input.chars();
900     let first = match chars.next() {
901         Some(ch) => ch,
902         None => {
903             return Err(Reject);
904         }
905     };
906     let recognized = "~!@#$%^&*-=+|;:,<.>/?'";
907     if recognized.contains(first) {
908         Ok((input.advance(first.len_utf8()), first))
909     } else {
910         Err(Reject)
911     }
912 }
913 
doc_comment<'a>(input: Cursor<'a>, trees: &mut TokenStreamBuilder) -> PResult<'a, ()>914 fn doc_comment<'a>(input: Cursor<'a>, trees: &mut TokenStreamBuilder) -> PResult<'a, ()> {
915     #[cfg(span_locations)]
916     let lo = input.off;
917     let (rest, (comment, inner)) = doc_comment_contents(input)?;
918     let fallback_span = Span {
919         #[cfg(span_locations)]
920         lo,
921         #[cfg(span_locations)]
922         hi: rest.off,
923     };
924     let span = crate::Span::_new_fallback(fallback_span);
925 
926     let mut scan_for_bare_cr = comment;
927     while let Some(cr) = scan_for_bare_cr.find('\r') {
928         let rest = &scan_for_bare_cr[cr + 1..];
929         if !rest.starts_with('\n') {
930             return Err(Reject);
931         }
932         scan_for_bare_cr = rest;
933     }
934 
935     let mut pound = Punct::new('#', Spacing::Alone);
936     pound.set_span(span);
937     trees.push_token_from_parser(TokenTree::Punct(pound));
938 
939     if inner {
940         let mut bang = Punct::new('!', Spacing::Alone);
941         bang.set_span(span);
942         trees.push_token_from_parser(TokenTree::Punct(bang));
943     }
944 
945     let doc_ident = crate::Ident::_new_fallback(Ident::new_unchecked("doc", fallback_span));
946     let mut equal = Punct::new('=', Spacing::Alone);
947     equal.set_span(span);
948     let mut literal = crate::Literal::_new_fallback(Literal::string(comment));
949     literal.set_span(span);
950     let mut bracketed = TokenStreamBuilder::with_capacity(3);
951     bracketed.push_token_from_parser(TokenTree::Ident(doc_ident));
952     bracketed.push_token_from_parser(TokenTree::Punct(equal));
953     bracketed.push_token_from_parser(TokenTree::Literal(literal));
954     let group = Group::new(Delimiter::Bracket, bracketed.build());
955     let mut group = crate::Group::_new_fallback(group);
956     group.set_span(span);
957     trees.push_token_from_parser(TokenTree::Group(group));
958 
959     Ok((rest, ()))
960 }
961 
doc_comment_contents(input: Cursor) -> PResult<(&str, bool)>962 fn doc_comment_contents(input: Cursor) -> PResult<(&str, bool)> {
963     if input.starts_with("//!") {
964         let input = input.advance(3);
965         let (input, s) = take_until_newline_or_eof(input);
966         Ok((input, (s, true)))
967     } else if input.starts_with("/*!") {
968         let (input, s) = block_comment(input)?;
969         Ok((input, (&s[3..s.len() - 2], true)))
970     } else if input.starts_with("///") {
971         let input = input.advance(3);
972         if input.starts_with_char('/') {
973             return Err(Reject);
974         }
975         let (input, s) = take_until_newline_or_eof(input);
976         Ok((input, (s, false)))
977     } else if input.starts_with("/**") && !input.rest[3..].starts_with('*') {
978         let (input, s) = block_comment(input)?;
979         Ok((input, (&s[3..s.len() - 2], false)))
980     } else {
981         Err(Reject)
982     }
983 }
984 
take_until_newline_or_eof(input: Cursor) -> (Cursor, &str)985 fn take_until_newline_or_eof(input: Cursor) -> (Cursor, &str) {
986     let chars = input.char_indices();
987 
988     for (i, ch) in chars {
989         if ch == '\n' {
990             return (input.advance(i), &input.rest[..i]);
991         } else if ch == '\r' && input.rest[i + 1..].starts_with('\n') {
992             return (input.advance(i + 1), &input.rest[..i]);
993         }
994     }
995 
996     (input.advance(input.len()), input.rest)
997 }
998