xref: /linux/rust/syn/discouraged.rs (revision 54e3eae855629702c566bd2e130d9f40e7f35bde)
1 // SPDX-License-Identifier: Apache-2.0 OR MIT
2 
3 //! Extensions to the parsing API with niche applicability.
4 
5 use crate::buffer::Cursor;
6 use crate::error::Result;
7 use crate::parse::{inner_unexpected, ParseBuffer, Unexpected};
8 use proc_macro2::extra::DelimSpan;
9 use proc_macro2::Delimiter;
10 use std::cell::Cell;
11 use std::mem;
12 use std::rc::Rc;
13 
14 /// Extensions to the `ParseStream` API to support speculative parsing.
15 pub trait Speculative {
16     /// Advance this parse stream to the position of a forked parse stream.
17     ///
18     /// This is the opposite operation to [`ParseStream::fork`]. You can fork a
19     /// parse stream, perform some speculative parsing, then join the original
20     /// stream to the fork to "commit" the parsing from the fork to the main
21     /// stream.
22     ///
23     /// If you can avoid doing this, you should, as it limits the ability to
24     /// generate useful errors. That said, it is often the only way to parse
25     /// syntax of the form `A* B*` for arbitrary syntax `A` and `B`. The problem
26     /// is that when the fork fails to parse an `A`, it's impossible to tell
27     /// whether that was because of a syntax error and the user meant to provide
28     /// an `A`, or that the `A`s are finished and it's time to start parsing
29     /// `B`s. Use with care.
30     ///
31     /// Also note that if `A` is a subset of `B`, `A* B*` can be parsed by
32     /// parsing `B*` and removing the leading members of `A` from the
33     /// repetition, bypassing the need to involve the downsides associated with
34     /// speculative parsing.
35     ///
36     /// [`ParseStream::fork`]: ParseBuffer::fork
37     ///
38     /// # Example
39     ///
40     /// There has been chatter about the possibility of making the colons in the
41     /// turbofish syntax like `path::to::<T>` no longer required by accepting
42     /// `path::to<T>` in expression position. Specifically, according to [RFC
43     /// 2544], [`PathSegment`] parsing should always try to consume a following
44     /// `<` token as the start of generic arguments, and reset to the `<` if
45     /// that fails (e.g. the token is acting as a less-than operator).
46     ///
47     /// This is the exact kind of parsing behavior which requires the "fork,
48     /// try, commit" behavior that [`ParseStream::fork`] discourages. With
49     /// `advance_to`, we can avoid having to parse the speculatively parsed
50     /// content a second time.
51     ///
52     /// This change in behavior can be implemented in syn by replacing just the
53     /// `Parse` implementation for `PathSegment`:
54     ///
55     /// ```
56     /// # use syn::ext::IdentExt;
57     /// use syn::parse::discouraged::Speculative;
58     /// # use syn::parse::{Parse, ParseStream};
59     /// # use syn::{Ident, PathArguments, Result, Token};
60     ///
61     /// pub struct PathSegment {
62     ///     pub ident: Ident,
63     ///     pub arguments: PathArguments,
64     /// }
65     /// #
66     /// # impl<T> From<T> for PathSegment
67     /// # where
68     /// #     T: Into<Ident>,
69     /// # {
70     /// #     fn from(ident: T) -> Self {
71     /// #         PathSegment {
72     /// #             ident: ident.into(),
73     /// #             arguments: PathArguments::None,
74     /// #         }
75     /// #     }
76     /// # }
77     ///
78     /// impl Parse for PathSegment {
79     ///     fn parse(input: ParseStream) -> Result<Self> {
80     ///         if input.peek(Token![super])
81     ///             || input.peek(Token![self])
82     ///             || input.peek(Token![Self])
83     ///             || input.peek(Token![crate])
84     ///         {
85     ///             let ident = input.call(Ident::parse_any)?;
86     ///             return Ok(PathSegment::from(ident));
87     ///         }
88     ///
89     ///         let ident = input.parse()?;
90     ///         if input.peek(Token![::]) && input.peek3(Token![<]) {
91     ///             return Ok(PathSegment {
92     ///                 ident,
93     ///                 arguments: PathArguments::AngleBracketed(input.parse()?),
94     ///             });
95     ///         }
96     ///         if input.peek(Token![<]) && !input.peek(Token![<=]) {
97     ///             let fork = input.fork();
98     ///             if let Ok(arguments) = fork.parse() {
99     ///                 input.advance_to(&fork);
100     ///                 return Ok(PathSegment {
101     ///                     ident,
102     ///                     arguments: PathArguments::AngleBracketed(arguments),
103     ///                 });
104     ///             }
105     ///         }
106     ///         Ok(PathSegment::from(ident))
107     ///     }
108     /// }
109     ///
110     /// # syn::parse_str::<PathSegment>("a<b,c>").unwrap();
111     /// ```
112     ///
113     /// # Drawbacks
114     ///
115     /// The main drawback of this style of speculative parsing is in error
116     /// presentation. Even if the lookahead is the "correct" parse, the error
117     /// that is shown is that of the "fallback" parse. To use the same example
118     /// as the turbofish above, take the following unfinished "turbofish":
119     ///
120     /// ```text
121     /// let _ = f<&'a fn(), for<'a> serde::>();
122     /// ```
123     ///
124     /// If this is parsed as generic arguments, we can provide the error message
125     ///
126     /// ```text
127     /// error: expected identifier
128     ///  --> src.rs:L:C
129     ///   |
130     /// L | let _ = f<&'a fn(), for<'a> serde::>();
131     ///   |                                    ^
132     /// ```
133     ///
134     /// but if parsed using the above speculative parsing, it falls back to
135     /// assuming that the `<` is a less-than when it fails to parse the generic
136     /// arguments, and tries to interpret the `&'a` as the start of a labelled
137     /// loop, resulting in the much less helpful error
138     ///
139     /// ```text
140     /// error: expected `:`
141     ///  --> src.rs:L:C
142     ///   |
143     /// L | let _ = f<&'a fn(), for<'a> serde::>();
144     ///   |               ^^
145     /// ```
146     ///
147     /// This can be mitigated with various heuristics (two examples: show both
148     /// forks' parse errors, or show the one that consumed more tokens), but
149     /// when you can control the grammar, sticking to something that can be
150     /// parsed LL(3) and without the LL(*) speculative parsing this makes
151     /// possible, displaying reasonable errors becomes much more simple.
152     ///
153     /// [RFC 2544]: https://github.com/rust-lang/rfcs/pull/2544
154     /// [`PathSegment`]: crate::PathSegment
155     ///
156     /// # Performance
157     ///
158     /// This method performs a cheap fixed amount of work that does not depend
159     /// on how far apart the two streams are positioned.
160     ///
161     /// # Panics
162     ///
163     /// The forked stream in the argument of `advance_to` must have been
164     /// obtained by forking `self`. Attempting to advance to any other stream
165     /// will cause a panic.
166     fn advance_to(&self, fork: &Self);
167 }
168 
169 impl<'a> Speculative for ParseBuffer<'a> {
170     fn advance_to(&self, fork: &Self) {
171         if !crate::buffer::same_scope(self.cursor(), fork.cursor()) {
172             panic!("fork was not derived from the advancing parse stream");
173         }
174 
175         let (self_unexp, self_sp) = inner_unexpected(self);
176         let (fork_unexp, fork_sp) = inner_unexpected(fork);
177         if !Rc::ptr_eq(&self_unexp, &fork_unexp) {
178             match (fork_sp, self_sp) {
179                 // Unexpected set on the fork, but not on `self`, copy it over.
180                 (Some((span, delimiter)), None) => {
181                     self_unexp.set(Unexpected::Some(span, delimiter));
182                 }
183                 // Unexpected unset. Use chain to propagate errors from fork.
184                 (None, None) => {
185                     fork_unexp.set(Unexpected::Chain(self_unexp));
186 
187                     // Ensure toplevel 'unexpected' tokens from the fork don't
188                     // propagate up the chain by replacing the root `unexpected`
189                     // pointer, only 'unexpected' tokens from existing group
190                     // parsers should propagate.
191                     fork.unexpected
192                         .set(Some(Rc::new(Cell::new(Unexpected::None))));
193                 }
194                 // Unexpected has been set on `self`. No changes needed.
195                 (_, Some(_)) => {}
196             }
197         }
198 
199         // See comment on `cell` in the struct definition.
200         self.cell
201             .set(unsafe { mem::transmute::<Cursor, Cursor<'static>>(fork.cursor()) });
202     }
203 }
204 
205 /// Extensions to the `ParseStream` API to support manipulating invisible
206 /// delimiters the same as if they were visible.
207 pub trait AnyDelimiter {
208     /// Returns the delimiter, the span of the delimiter token, and the nested
209     /// contents for further parsing.
210     fn parse_any_delimiter(&self) -> Result<(Delimiter, DelimSpan, ParseBuffer)>;
211 }
212 
213 impl<'a> AnyDelimiter for ParseBuffer<'a> {
214     fn parse_any_delimiter(&self) -> Result<(Delimiter, DelimSpan, ParseBuffer)> {
215         self.step(|cursor| {
216             if let Some((content, delimiter, span, rest)) = cursor.any_group() {
217                 let scope = span.close();
218                 let nested = crate::parse::advance_step_cursor(cursor, content);
219                 let unexpected = crate::parse::get_unexpected(self);
220                 let content = crate::parse::new_parse_buffer(scope, nested, unexpected);
221                 Ok(((delimiter, span, content), rest))
222             } else {
223                 Err(cursor.error("expected any delimiter"))
224             }
225         })
226     }
227 }
228