1 // SPDX-License-Identifier: Apache-2.0 OR MIT
2
3 use crate::fallback::{
4 self, is_ident_continue, is_ident_start, Group, Ident, LexError, Literal, Span, TokenStream,
5 TokenStreamBuilder,
6 };
7 use crate::{Delimiter, Punct, Spacing, TokenTree};
8 use core::char;
9 use core::str::{Bytes, CharIndices, Chars};
10
11 #[derive(Copy, Clone, Eq, PartialEq)]
12 pub(crate) struct Cursor<'a> {
13 pub(crate) rest: &'a str,
14 #[cfg(span_locations)]
15 pub(crate) off: u32,
16 }
17
18 impl<'a> Cursor<'a> {
advance(&self, bytes: usize) -> Cursor<'a>19 pub(crate) fn advance(&self, bytes: usize) -> Cursor<'a> {
20 let (_front, rest) = self.rest.split_at(bytes);
21 Cursor {
22 rest,
23 #[cfg(span_locations)]
24 off: self.off + _front.chars().count() as u32,
25 }
26 }
27
starts_with(&self, s: &str) -> bool28 pub(crate) fn starts_with(&self, s: &str) -> bool {
29 self.rest.starts_with(s)
30 }
31
starts_with_char(&self, ch: char) -> bool32 pub(crate) fn starts_with_char(&self, ch: char) -> bool {
33 self.rest.starts_with(ch)
34 }
35
starts_with_fn<Pattern>(&self, f: Pattern) -> bool where Pattern: FnMut(char) -> bool,36 pub(crate) fn starts_with_fn<Pattern>(&self, f: Pattern) -> bool
37 where
38 Pattern: FnMut(char) -> bool,
39 {
40 self.rest.starts_with(f)
41 }
42
is_empty(&self) -> bool43 pub(crate) fn is_empty(&self) -> bool {
44 self.rest.is_empty()
45 }
46
len(&self) -> usize47 fn len(&self) -> usize {
48 self.rest.len()
49 }
50
as_bytes(&self) -> &'a [u8]51 fn as_bytes(&self) -> &'a [u8] {
52 self.rest.as_bytes()
53 }
54
bytes(&self) -> Bytes<'a>55 fn bytes(&self) -> Bytes<'a> {
56 self.rest.bytes()
57 }
58
chars(&self) -> Chars<'a>59 fn chars(&self) -> Chars<'a> {
60 self.rest.chars()
61 }
62
char_indices(&self) -> CharIndices<'a>63 fn char_indices(&self) -> CharIndices<'a> {
64 self.rest.char_indices()
65 }
66
parse(&self, tag: &str) -> Result<Cursor<'a>, Reject>67 fn parse(&self, tag: &str) -> Result<Cursor<'a>, Reject> {
68 if self.starts_with(tag) {
69 Ok(self.advance(tag.len()))
70 } else {
71 Err(Reject)
72 }
73 }
74 }
75
76 pub(crate) struct Reject;
77 type PResult<'a, O> = Result<(Cursor<'a>, O), Reject>;
78
skip_whitespace(input: Cursor) -> Cursor79 fn skip_whitespace(input: Cursor) -> Cursor {
80 let mut s = input;
81
82 while !s.is_empty() {
83 let byte = s.as_bytes()[0];
84 if byte == b'/' {
85 if s.starts_with("//")
86 && (!s.starts_with("///") || s.starts_with("////"))
87 && !s.starts_with("//!")
88 {
89 let (cursor, _) = take_until_newline_or_eof(s);
90 s = cursor;
91 continue;
92 } else if s.starts_with("/**/") {
93 s = s.advance(4);
94 continue;
95 } else if s.starts_with("/*")
96 && (!s.starts_with("/**") || s.starts_with("/***"))
97 && !s.starts_with("/*!")
98 {
99 match block_comment(s) {
100 Ok((rest, _)) => {
101 s = rest;
102 continue;
103 }
104 Err(Reject) => return s,
105 }
106 }
107 }
108 match byte {
109 b' ' | 0x09..=0x0d => {
110 s = s.advance(1);
111 continue;
112 }
113 b if b.is_ascii() => {}
114 _ => {
115 let ch = s.chars().next().unwrap();
116 if is_whitespace(ch) {
117 s = s.advance(ch.len_utf8());
118 continue;
119 }
120 }
121 }
122 return s;
123 }
124 s
125 }
126
block_comment(input: Cursor) -> PResult<&str>127 fn block_comment(input: Cursor) -> PResult<&str> {
128 if !input.starts_with("/*") {
129 return Err(Reject);
130 }
131
132 let mut depth = 0usize;
133 let bytes = input.as_bytes();
134 let mut i = 0usize;
135 let upper = bytes.len() - 1;
136
137 while i < upper {
138 if bytes[i] == b'/' && bytes[i + 1] == b'*' {
139 depth += 1;
140 i += 1; // eat '*'
141 } else if bytes[i] == b'*' && bytes[i + 1] == b'/' {
142 depth -= 1;
143 if depth == 0 {
144 return Ok((input.advance(i + 2), &input.rest[..i + 2]));
145 }
146 i += 1; // eat '/'
147 }
148 i += 1;
149 }
150
151 Err(Reject)
152 }
153
is_whitespace(ch: char) -> bool154 fn is_whitespace(ch: char) -> bool {
155 // Rust treats left-to-right mark and right-to-left mark as whitespace
156 ch.is_whitespace() || ch == '\u{200e}' || ch == '\u{200f}'
157 }
158
word_break(input: Cursor) -> Result<Cursor, Reject>159 fn word_break(input: Cursor) -> Result<Cursor, Reject> {
160 match input.chars().next() {
161 Some(ch) if is_ident_continue(ch) => Err(Reject),
162 Some(_) | None => Ok(input),
163 }
164 }
165
166 // Rustc's representation of a macro expansion error in expression position or
167 // type position.
168 const ERROR: &str = "(/*ERROR*/)";
169
token_stream(mut input: Cursor) -> Result<TokenStream, LexError>170 pub(crate) fn token_stream(mut input: Cursor) -> Result<TokenStream, LexError> {
171 let mut trees = TokenStreamBuilder::new();
172 let mut stack = Vec::new();
173
174 loop {
175 input = skip_whitespace(input);
176
177 if let Ok((rest, ())) = doc_comment(input, &mut trees) {
178 input = rest;
179 continue;
180 }
181
182 #[cfg(span_locations)]
183 let lo = input.off;
184
185 let first = match input.bytes().next() {
186 Some(first) => first,
187 None => match stack.last() {
188 None => return Ok(trees.build()),
189 #[cfg(span_locations)]
190 Some((lo, _frame)) => {
191 return Err(LexError {
192 span: Span { lo: *lo, hi: *lo },
193 })
194 }
195 #[cfg(not(span_locations))]
196 Some(_frame) => return Err(LexError { span: Span {} }),
197 },
198 };
199
200 if let Some(open_delimiter) = match first {
201 b'(' if !input.starts_with(ERROR) => Some(Delimiter::Parenthesis),
202 b'[' => Some(Delimiter::Bracket),
203 b'{' => Some(Delimiter::Brace),
204 _ => None,
205 } {
206 input = input.advance(1);
207 let frame = (open_delimiter, trees);
208 #[cfg(span_locations)]
209 let frame = (lo, frame);
210 stack.push(frame);
211 trees = TokenStreamBuilder::new();
212 } else if let Some(close_delimiter) = match first {
213 b')' => Some(Delimiter::Parenthesis),
214 b']' => Some(Delimiter::Bracket),
215 b'}' => Some(Delimiter::Brace),
216 _ => None,
217 } {
218 let frame = match stack.pop() {
219 Some(frame) => frame,
220 None => return Err(lex_error(input)),
221 };
222 #[cfg(span_locations)]
223 let (lo, frame) = frame;
224 let (open_delimiter, outer) = frame;
225 if open_delimiter != close_delimiter {
226 return Err(lex_error(input));
227 }
228 input = input.advance(1);
229 let mut g = Group::new(open_delimiter, trees.build());
230 g.set_span(Span {
231 #[cfg(span_locations)]
232 lo,
233 #[cfg(span_locations)]
234 hi: input.off,
235 });
236 trees = outer;
237 trees.push_token_from_parser(TokenTree::Group(crate::Group::_new_fallback(g)));
238 } else {
239 let (rest, mut tt) = match leaf_token(input) {
240 Ok((rest, tt)) => (rest, tt),
241 Err(Reject) => return Err(lex_error(input)),
242 };
243 tt.set_span(crate::Span::_new_fallback(Span {
244 #[cfg(span_locations)]
245 lo,
246 #[cfg(span_locations)]
247 hi: rest.off,
248 }));
249 trees.push_token_from_parser(tt);
250 input = rest;
251 }
252 }
253 }
254
lex_error(cursor: Cursor) -> LexError255 fn lex_error(cursor: Cursor) -> LexError {
256 #[cfg(not(span_locations))]
257 let _ = cursor;
258 LexError {
259 span: Span {
260 #[cfg(span_locations)]
261 lo: cursor.off,
262 #[cfg(span_locations)]
263 hi: cursor.off,
264 },
265 }
266 }
267
leaf_token(input: Cursor) -> PResult<TokenTree>268 fn leaf_token(input: Cursor) -> PResult<TokenTree> {
269 if let Ok((input, l)) = literal(input) {
270 // must be parsed before ident
271 Ok((input, TokenTree::Literal(crate::Literal::_new_fallback(l))))
272 } else if let Ok((input, p)) = punct(input) {
273 Ok((input, TokenTree::Punct(p)))
274 } else if let Ok((input, i)) = ident(input) {
275 Ok((input, TokenTree::Ident(i)))
276 } else if input.starts_with(ERROR) {
277 let rest = input.advance(ERROR.len());
278 let repr = crate::Literal::_new_fallback(Literal::_new(ERROR.to_owned()));
279 Ok((rest, TokenTree::Literal(repr)))
280 } else {
281 Err(Reject)
282 }
283 }
284
ident(input: Cursor) -> PResult<crate::Ident>285 fn ident(input: Cursor) -> PResult<crate::Ident> {
286 if [
287 "r\"", "r#\"", "r##", "b\"", "b\'", "br\"", "br#", "c\"", "cr\"", "cr#",
288 ]
289 .iter()
290 .any(|prefix| input.starts_with(prefix))
291 {
292 Err(Reject)
293 } else {
294 ident_any(input)
295 }
296 }
297
ident_any(input: Cursor) -> PResult<crate::Ident>298 fn ident_any(input: Cursor) -> PResult<crate::Ident> {
299 let raw = input.starts_with("r#");
300 let rest = input.advance((raw as usize) << 1);
301
302 let (rest, sym) = ident_not_raw(rest)?;
303
304 if !raw {
305 let ident =
306 crate::Ident::_new_fallback(Ident::new_unchecked(sym, fallback::Span::call_site()));
307 return Ok((rest, ident));
308 }
309
310 match sym {
311 "_" | "super" | "self" | "Self" | "crate" => return Err(Reject),
312 _ => {}
313 }
314
315 let ident =
316 crate::Ident::_new_fallback(Ident::new_raw_unchecked(sym, fallback::Span::call_site()));
317 Ok((rest, ident))
318 }
319
ident_not_raw(input: Cursor) -> PResult<&str>320 fn ident_not_raw(input: Cursor) -> PResult<&str> {
321 let mut chars = input.char_indices();
322
323 match chars.next() {
324 Some((_, ch)) if is_ident_start(ch) => {}
325 _ => return Err(Reject),
326 }
327
328 let mut end = input.len();
329 for (i, ch) in chars {
330 if !is_ident_continue(ch) {
331 end = i;
332 break;
333 }
334 }
335
336 Ok((input.advance(end), &input.rest[..end]))
337 }
338
literal(input: Cursor) -> PResult<Literal>339 pub(crate) fn literal(input: Cursor) -> PResult<Literal> {
340 let rest = literal_nocapture(input)?;
341 let end = input.len() - rest.len();
342 Ok((rest, Literal::_new(input.rest[..end].to_string())))
343 }
344
literal_nocapture(input: Cursor) -> Result<Cursor, Reject>345 fn literal_nocapture(input: Cursor) -> Result<Cursor, Reject> {
346 if let Ok(ok) = string(input) {
347 Ok(ok)
348 } else if let Ok(ok) = byte_string(input) {
349 Ok(ok)
350 } else if let Ok(ok) = c_string(input) {
351 Ok(ok)
352 } else if let Ok(ok) = byte(input) {
353 Ok(ok)
354 } else if let Ok(ok) = character(input) {
355 Ok(ok)
356 } else if let Ok(ok) = float(input) {
357 Ok(ok)
358 } else if let Ok(ok) = int(input) {
359 Ok(ok)
360 } else {
361 Err(Reject)
362 }
363 }
364
literal_suffix(input: Cursor) -> Cursor365 fn literal_suffix(input: Cursor) -> Cursor {
366 match ident_not_raw(input) {
367 Ok((input, _)) => input,
368 Err(Reject) => input,
369 }
370 }
371
string(input: Cursor) -> Result<Cursor, Reject>372 fn string(input: Cursor) -> Result<Cursor, Reject> {
373 if let Ok(input) = input.parse("\"") {
374 cooked_string(input)
375 } else if let Ok(input) = input.parse("r") {
376 raw_string(input)
377 } else {
378 Err(Reject)
379 }
380 }
381
cooked_string(mut input: Cursor) -> Result<Cursor, Reject>382 fn cooked_string(mut input: Cursor) -> Result<Cursor, Reject> {
383 let mut chars = input.char_indices();
384
385 while let Some((i, ch)) = chars.next() {
386 match ch {
387 '"' => {
388 let input = input.advance(i + 1);
389 return Ok(literal_suffix(input));
390 }
391 '\r' => match chars.next() {
392 Some((_, '\n')) => {}
393 _ => break,
394 },
395 '\\' => match chars.next() {
396 Some((_, 'x')) => {
397 backslash_x_char(&mut chars)?;
398 }
399 Some((_, 'n' | 'r' | 't' | '\\' | '\'' | '"' | '0')) => {}
400 Some((_, 'u')) => {
401 backslash_u(&mut chars)?;
402 }
403 Some((newline, ch @ ('\n' | '\r'))) => {
404 input = input.advance(newline + 1);
405 trailing_backslash(&mut input, ch as u8)?;
406 chars = input.char_indices();
407 }
408 _ => break,
409 },
410 _ch => {}
411 }
412 }
413 Err(Reject)
414 }
415
raw_string(input: Cursor) -> Result<Cursor, Reject>416 fn raw_string(input: Cursor) -> Result<Cursor, Reject> {
417 let (input, delimiter) = delimiter_of_raw_string(input)?;
418 let mut bytes = input.bytes().enumerate();
419 while let Some((i, byte)) = bytes.next() {
420 match byte {
421 b'"' if input.rest[i + 1..].starts_with(delimiter) => {
422 let rest = input.advance(i + 1 + delimiter.len());
423 return Ok(literal_suffix(rest));
424 }
425 b'\r' => match bytes.next() {
426 Some((_, b'\n')) => {}
427 _ => break,
428 },
429 _ => {}
430 }
431 }
432 Err(Reject)
433 }
434
byte_string(input: Cursor) -> Result<Cursor, Reject>435 fn byte_string(input: Cursor) -> Result<Cursor, Reject> {
436 if let Ok(input) = input.parse("b\"") {
437 cooked_byte_string(input)
438 } else if let Ok(input) = input.parse("br") {
439 raw_byte_string(input)
440 } else {
441 Err(Reject)
442 }
443 }
444
cooked_byte_string(mut input: Cursor) -> Result<Cursor, Reject>445 fn cooked_byte_string(mut input: Cursor) -> Result<Cursor, Reject> {
446 let mut bytes = input.bytes().enumerate();
447 while let Some((offset, b)) = bytes.next() {
448 match b {
449 b'"' => {
450 let input = input.advance(offset + 1);
451 return Ok(literal_suffix(input));
452 }
453 b'\r' => match bytes.next() {
454 Some((_, b'\n')) => {}
455 _ => break,
456 },
457 b'\\' => match bytes.next() {
458 Some((_, b'x')) => {
459 backslash_x_byte(&mut bytes)?;
460 }
461 Some((_, b'n' | b'r' | b't' | b'\\' | b'0' | b'\'' | b'"')) => {}
462 Some((newline, b @ (b'\n' | b'\r'))) => {
463 input = input.advance(newline + 1);
464 trailing_backslash(&mut input, b)?;
465 bytes = input.bytes().enumerate();
466 }
467 _ => break,
468 },
469 b if b.is_ascii() => {}
470 _ => break,
471 }
472 }
473 Err(Reject)
474 }
475
delimiter_of_raw_string(input: Cursor) -> PResult<&str>476 fn delimiter_of_raw_string(input: Cursor) -> PResult<&str> {
477 for (i, byte) in input.bytes().enumerate() {
478 match byte {
479 b'"' => {
480 if i > 255 {
481 // https://github.com/rust-lang/rust/pull/95251
482 return Err(Reject);
483 }
484 return Ok((input.advance(i + 1), &input.rest[..i]));
485 }
486 b'#' => {}
487 _ => break,
488 }
489 }
490 Err(Reject)
491 }
492
raw_byte_string(input: Cursor) -> Result<Cursor, Reject>493 fn raw_byte_string(input: Cursor) -> Result<Cursor, Reject> {
494 let (input, delimiter) = delimiter_of_raw_string(input)?;
495 let mut bytes = input.bytes().enumerate();
496 while let Some((i, byte)) = bytes.next() {
497 match byte {
498 b'"' if input.rest[i + 1..].starts_with(delimiter) => {
499 let rest = input.advance(i + 1 + delimiter.len());
500 return Ok(literal_suffix(rest));
501 }
502 b'\r' => match bytes.next() {
503 Some((_, b'\n')) => {}
504 _ => break,
505 },
506 other => {
507 if !other.is_ascii() {
508 break;
509 }
510 }
511 }
512 }
513 Err(Reject)
514 }
515
c_string(input: Cursor) -> Result<Cursor, Reject>516 fn c_string(input: Cursor) -> Result<Cursor, Reject> {
517 if let Ok(input) = input.parse("c\"") {
518 cooked_c_string(input)
519 } else if let Ok(input) = input.parse("cr") {
520 raw_c_string(input)
521 } else {
522 Err(Reject)
523 }
524 }
525
raw_c_string(input: Cursor) -> Result<Cursor, Reject>526 fn raw_c_string(input: Cursor) -> Result<Cursor, Reject> {
527 let (input, delimiter) = delimiter_of_raw_string(input)?;
528 let mut bytes = input.bytes().enumerate();
529 while let Some((i, byte)) = bytes.next() {
530 match byte {
531 b'"' if input.rest[i + 1..].starts_with(delimiter) => {
532 let rest = input.advance(i + 1 + delimiter.len());
533 return Ok(literal_suffix(rest));
534 }
535 b'\r' => match bytes.next() {
536 Some((_, b'\n')) => {}
537 _ => break,
538 },
539 b'\0' => break,
540 _ => {}
541 }
542 }
543 Err(Reject)
544 }
545
cooked_c_string(mut input: Cursor) -> Result<Cursor, Reject>546 fn cooked_c_string(mut input: Cursor) -> Result<Cursor, Reject> {
547 let mut chars = input.char_indices();
548
549 while let Some((i, ch)) = chars.next() {
550 match ch {
551 '"' => {
552 let input = input.advance(i + 1);
553 return Ok(literal_suffix(input));
554 }
555 '\r' => match chars.next() {
556 Some((_, '\n')) => {}
557 _ => break,
558 },
559 '\\' => match chars.next() {
560 Some((_, 'x')) => {
561 backslash_x_nonzero(&mut chars)?;
562 }
563 Some((_, 'n' | 'r' | 't' | '\\' | '\'' | '"')) => {}
564 Some((_, 'u')) => {
565 if backslash_u(&mut chars)? == '\0' {
566 break;
567 }
568 }
569 Some((newline, ch @ ('\n' | '\r'))) => {
570 input = input.advance(newline + 1);
571 trailing_backslash(&mut input, ch as u8)?;
572 chars = input.char_indices();
573 }
574 _ => break,
575 },
576 '\0' => break,
577 _ch => {}
578 }
579 }
580 Err(Reject)
581 }
582
byte(input: Cursor) -> Result<Cursor, Reject>583 fn byte(input: Cursor) -> Result<Cursor, Reject> {
584 let input = input.parse("b'")?;
585 let mut bytes = input.bytes().enumerate();
586 let ok = match bytes.next().map(|(_, b)| b) {
587 Some(b'\\') => match bytes.next().map(|(_, b)| b) {
588 Some(b'x') => backslash_x_byte(&mut bytes).is_ok(),
589 Some(b'n' | b'r' | b't' | b'\\' | b'0' | b'\'' | b'"') => true,
590 _ => false,
591 },
592 b => b.is_some(),
593 };
594 if !ok {
595 return Err(Reject);
596 }
597 let (offset, _) = bytes.next().ok_or(Reject)?;
598 if !input.chars().as_str().is_char_boundary(offset) {
599 return Err(Reject);
600 }
601 let input = input.advance(offset).parse("'")?;
602 Ok(literal_suffix(input))
603 }
604
character(input: Cursor) -> Result<Cursor, Reject>605 fn character(input: Cursor) -> Result<Cursor, Reject> {
606 let input = input.parse("'")?;
607 let mut chars = input.char_indices();
608 let ok = match chars.next().map(|(_, ch)| ch) {
609 Some('\\') => match chars.next().map(|(_, ch)| ch) {
610 Some('x') => backslash_x_char(&mut chars).is_ok(),
611 Some('u') => backslash_u(&mut chars).is_ok(),
612 Some('n' | 'r' | 't' | '\\' | '0' | '\'' | '"') => true,
613 _ => false,
614 },
615 ch => ch.is_some(),
616 };
617 if !ok {
618 return Err(Reject);
619 }
620 let (idx, _) = chars.next().ok_or(Reject)?;
621 let input = input.advance(idx).parse("'")?;
622 Ok(literal_suffix(input))
623 }
624
625 macro_rules! next_ch {
626 ($chars:ident @ $pat:pat) => {
627 match $chars.next() {
628 Some((_, ch)) => match ch {
629 $pat => ch,
630 _ => return Err(Reject),
631 },
632 None => return Err(Reject),
633 }
634 };
635 }
636
backslash_x_char<I>(chars: &mut I) -> Result<(), Reject> where I: Iterator<Item = (usize, char)>,637 fn backslash_x_char<I>(chars: &mut I) -> Result<(), Reject>
638 where
639 I: Iterator<Item = (usize, char)>,
640 {
641 next_ch!(chars @ '0'..='7');
642 next_ch!(chars @ '0'..='9' | 'a'..='f' | 'A'..='F');
643 Ok(())
644 }
645
backslash_x_byte<I>(chars: &mut I) -> Result<(), Reject> where I: Iterator<Item = (usize, u8)>,646 fn backslash_x_byte<I>(chars: &mut I) -> Result<(), Reject>
647 where
648 I: Iterator<Item = (usize, u8)>,
649 {
650 next_ch!(chars @ b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F');
651 next_ch!(chars @ b'0'..=b'9' | b'a'..=b'f' | b'A'..=b'F');
652 Ok(())
653 }
654
backslash_x_nonzero<I>(chars: &mut I) -> Result<(), Reject> where I: Iterator<Item = (usize, char)>,655 fn backslash_x_nonzero<I>(chars: &mut I) -> Result<(), Reject>
656 where
657 I: Iterator<Item = (usize, char)>,
658 {
659 let first = next_ch!(chars @ '0'..='9' | 'a'..='f' | 'A'..='F');
660 let second = next_ch!(chars @ '0'..='9' | 'a'..='f' | 'A'..='F');
661 if first == '0' && second == '0' {
662 Err(Reject)
663 } else {
664 Ok(())
665 }
666 }
667
backslash_u<I>(chars: &mut I) -> Result<char, Reject> where I: Iterator<Item = (usize, char)>,668 fn backslash_u<I>(chars: &mut I) -> Result<char, Reject>
669 where
670 I: Iterator<Item = (usize, char)>,
671 {
672 next_ch!(chars @ '{');
673 let mut value = 0;
674 let mut len = 0;
675 for (_, ch) in chars {
676 let digit = match ch {
677 '0'..='9' => ch as u8 - b'0',
678 'a'..='f' => 10 + ch as u8 - b'a',
679 'A'..='F' => 10 + ch as u8 - b'A',
680 '_' if len > 0 => continue,
681 '}' if len > 0 => return char::from_u32(value).ok_or(Reject),
682 _ => break,
683 };
684 if len == 6 {
685 break;
686 }
687 value *= 0x10;
688 value += u32::from(digit);
689 len += 1;
690 }
691 Err(Reject)
692 }
693
trailing_backslash(input: &mut Cursor, mut last: u8) -> Result<(), Reject>694 fn trailing_backslash(input: &mut Cursor, mut last: u8) -> Result<(), Reject> {
695 let mut whitespace = input.bytes().enumerate();
696 loop {
697 if last == b'\r' && whitespace.next().map_or(true, |(_, b)| b != b'\n') {
698 return Err(Reject);
699 }
700 match whitespace.next() {
701 Some((_, b @ (b' ' | b'\t' | b'\n' | b'\r'))) => {
702 last = b;
703 }
704 Some((offset, _)) => {
705 *input = input.advance(offset);
706 return Ok(());
707 }
708 None => return Err(Reject),
709 }
710 }
711 }
712
float(input: Cursor) -> Result<Cursor, Reject>713 fn float(input: Cursor) -> Result<Cursor, Reject> {
714 let mut rest = float_digits(input)?;
715 if let Some(ch) = rest.chars().next() {
716 if is_ident_start(ch) {
717 rest = ident_not_raw(rest)?.0;
718 }
719 }
720 word_break(rest)
721 }
722
float_digits(input: Cursor) -> Result<Cursor, Reject>723 fn float_digits(input: Cursor) -> Result<Cursor, Reject> {
724 let mut chars = input.chars().peekable();
725 match chars.next() {
726 Some(ch) if '0' <= ch && ch <= '9' => {}
727 _ => return Err(Reject),
728 }
729
730 let mut len = 1;
731 let mut has_dot = false;
732 let mut has_exp = false;
733 while let Some(&ch) = chars.peek() {
734 match ch {
735 '0'..='9' | '_' => {
736 chars.next();
737 len += 1;
738 }
739 '.' => {
740 if has_dot {
741 break;
742 }
743 chars.next();
744 if chars
745 .peek()
746 .map_or(false, |&ch| ch == '.' || is_ident_start(ch))
747 {
748 return Err(Reject);
749 }
750 len += 1;
751 has_dot = true;
752 }
753 'e' | 'E' => {
754 chars.next();
755 len += 1;
756 has_exp = true;
757 break;
758 }
759 _ => break,
760 }
761 }
762
763 if !(has_dot || has_exp) {
764 return Err(Reject);
765 }
766
767 if has_exp {
768 let token_before_exp = if has_dot {
769 Ok(input.advance(len - 1))
770 } else {
771 Err(Reject)
772 };
773 let mut has_sign = false;
774 let mut has_exp_value = false;
775 while let Some(&ch) = chars.peek() {
776 match ch {
777 '+' | '-' => {
778 if has_exp_value {
779 break;
780 }
781 if has_sign {
782 return token_before_exp;
783 }
784 chars.next();
785 len += 1;
786 has_sign = true;
787 }
788 '0'..='9' => {
789 chars.next();
790 len += 1;
791 has_exp_value = true;
792 }
793 '_' => {
794 chars.next();
795 len += 1;
796 }
797 _ => break,
798 }
799 }
800 if !has_exp_value {
801 return token_before_exp;
802 }
803 }
804
805 Ok(input.advance(len))
806 }
807
int(input: Cursor) -> Result<Cursor, Reject>808 fn int(input: Cursor) -> Result<Cursor, Reject> {
809 let mut rest = digits(input)?;
810 if let Some(ch) = rest.chars().next() {
811 if is_ident_start(ch) {
812 rest = ident_not_raw(rest)?.0;
813 }
814 }
815 word_break(rest)
816 }
817
digits(mut input: Cursor) -> Result<Cursor, Reject>818 fn digits(mut input: Cursor) -> Result<Cursor, Reject> {
819 let base = if input.starts_with("0x") {
820 input = input.advance(2);
821 16
822 } else if input.starts_with("0o") {
823 input = input.advance(2);
824 8
825 } else if input.starts_with("0b") {
826 input = input.advance(2);
827 2
828 } else {
829 10
830 };
831
832 let mut len = 0;
833 let mut empty = true;
834 for b in input.bytes() {
835 match b {
836 b'0'..=b'9' => {
837 let digit = (b - b'0') as u64;
838 if digit >= base {
839 return Err(Reject);
840 }
841 }
842 b'a'..=b'f' => {
843 let digit = 10 + (b - b'a') as u64;
844 if digit >= base {
845 break;
846 }
847 }
848 b'A'..=b'F' => {
849 let digit = 10 + (b - b'A') as u64;
850 if digit >= base {
851 break;
852 }
853 }
854 b'_' => {
855 if empty && base == 10 {
856 return Err(Reject);
857 }
858 len += 1;
859 continue;
860 }
861 _ => break,
862 }
863 len += 1;
864 empty = false;
865 }
866 if empty {
867 Err(Reject)
868 } else {
869 Ok(input.advance(len))
870 }
871 }
872
punct(input: Cursor) -> PResult<Punct>873 fn punct(input: Cursor) -> PResult<Punct> {
874 let (rest, ch) = punct_char(input)?;
875 if ch == '\'' {
876 let (after_lifetime, _ident) = ident_any(rest)?;
877 if after_lifetime.starts_with_char('\'')
878 || (after_lifetime.starts_with_char('#') && !rest.starts_with("r#"))
879 {
880 Err(Reject)
881 } else {
882 Ok((rest, Punct::new('\'', Spacing::Joint)))
883 }
884 } else {
885 let kind = match punct_char(rest) {
886 Ok(_) => Spacing::Joint,
887 Err(Reject) => Spacing::Alone,
888 };
889 Ok((rest, Punct::new(ch, kind)))
890 }
891 }
892
punct_char(input: Cursor) -> PResult<char>893 fn punct_char(input: Cursor) -> PResult<char> {
894 if input.starts_with("//") || input.starts_with("/*") {
895 // Do not accept `/` of a comment as a punct.
896 return Err(Reject);
897 }
898
899 let mut chars = input.chars();
900 let first = match chars.next() {
901 Some(ch) => ch,
902 None => {
903 return Err(Reject);
904 }
905 };
906 let recognized = "~!@#$%^&*-=+|;:,<.>/?'";
907 if recognized.contains(first) {
908 Ok((input.advance(first.len_utf8()), first))
909 } else {
910 Err(Reject)
911 }
912 }
913
doc_comment<'a>(input: Cursor<'a>, trees: &mut TokenStreamBuilder) -> PResult<'a, ()>914 fn doc_comment<'a>(input: Cursor<'a>, trees: &mut TokenStreamBuilder) -> PResult<'a, ()> {
915 #[cfg(span_locations)]
916 let lo = input.off;
917 let (rest, (comment, inner)) = doc_comment_contents(input)?;
918 let fallback_span = Span {
919 #[cfg(span_locations)]
920 lo,
921 #[cfg(span_locations)]
922 hi: rest.off,
923 };
924 let span = crate::Span::_new_fallback(fallback_span);
925
926 let mut scan_for_bare_cr = comment;
927 while let Some(cr) = scan_for_bare_cr.find('\r') {
928 let rest = &scan_for_bare_cr[cr + 1..];
929 if !rest.starts_with('\n') {
930 return Err(Reject);
931 }
932 scan_for_bare_cr = rest;
933 }
934
935 let mut pound = Punct::new('#', Spacing::Alone);
936 pound.set_span(span);
937 trees.push_token_from_parser(TokenTree::Punct(pound));
938
939 if inner {
940 let mut bang = Punct::new('!', Spacing::Alone);
941 bang.set_span(span);
942 trees.push_token_from_parser(TokenTree::Punct(bang));
943 }
944
945 let doc_ident = crate::Ident::_new_fallback(Ident::new_unchecked("doc", fallback_span));
946 let mut equal = Punct::new('=', Spacing::Alone);
947 equal.set_span(span);
948 let mut literal = crate::Literal::_new_fallback(Literal::string(comment));
949 literal.set_span(span);
950 let mut bracketed = TokenStreamBuilder::with_capacity(3);
951 bracketed.push_token_from_parser(TokenTree::Ident(doc_ident));
952 bracketed.push_token_from_parser(TokenTree::Punct(equal));
953 bracketed.push_token_from_parser(TokenTree::Literal(literal));
954 let group = Group::new(Delimiter::Bracket, bracketed.build());
955 let mut group = crate::Group::_new_fallback(group);
956 group.set_span(span);
957 trees.push_token_from_parser(TokenTree::Group(group));
958
959 Ok((rest, ()))
960 }
961
doc_comment_contents(input: Cursor) -> PResult<(&str, bool)>962 fn doc_comment_contents(input: Cursor) -> PResult<(&str, bool)> {
963 if input.starts_with("//!") {
964 let input = input.advance(3);
965 let (input, s) = take_until_newline_or_eof(input);
966 Ok((input, (s, true)))
967 } else if input.starts_with("/*!") {
968 let (input, s) = block_comment(input)?;
969 Ok((input, (&s[3..s.len() - 2], true)))
970 } else if input.starts_with("///") {
971 let input = input.advance(3);
972 if input.starts_with_char('/') {
973 return Err(Reject);
974 }
975 let (input, s) = take_until_newline_or_eof(input);
976 Ok((input, (s, false)))
977 } else if input.starts_with("/**") && !input.rest[3..].starts_with('*') {
978 let (input, s) = block_comment(input)?;
979 Ok((input, (&s[3..s.len() - 2], false)))
980 } else {
981 Err(Reject)
982 }
983 }
984
take_until_newline_or_eof(input: Cursor) -> (Cursor, &str)985 fn take_until_newline_or_eof(input: Cursor) -> (Cursor, &str) {
986 let chars = input.char_indices();
987
988 for (i, ch) in chars {
989 if ch == '\n' {
990 return (input.advance(i), &input.rest[..i]);
991 } else if ch == '\r' && input.rest[i + 1..].starts_with('\n') {
992 return (input.advance(i + 1), &input.rest[..i]);
993 }
994 }
995
996 (input.advance(input.len()), input.rest)
997 }
998