1 // SPDX-License-Identifier: Apache-2.0 OR MIT 2 3 //! Extensions to the parsing API with niche applicability. 4 5 use crate::buffer::Cursor; 6 use crate::error::Result; 7 use crate::parse::{inner_unexpected, ParseBuffer, Unexpected}; 8 use proc_macro2::extra::DelimSpan; 9 use proc_macro2::Delimiter; 10 use std::cell::Cell; 11 use std::mem; 12 use std::rc::Rc; 13 14 /// Extensions to the `ParseStream` API to support speculative parsing. 15 pub trait Speculative { 16 /// Advance this parse stream to the position of a forked parse stream. 17 /// 18 /// This is the opposite operation to [`ParseStream::fork`]. You can fork a 19 /// parse stream, perform some speculative parsing, then join the original 20 /// stream to the fork to "commit" the parsing from the fork to the main 21 /// stream. 22 /// 23 /// If you can avoid doing this, you should, as it limits the ability to 24 /// generate useful errors. That said, it is often the only way to parse 25 /// syntax of the form `A* B*` for arbitrary syntax `A` and `B`. The problem 26 /// is that when the fork fails to parse an `A`, it's impossible to tell 27 /// whether that was because of a syntax error and the user meant to provide 28 /// an `A`, or that the `A`s are finished and it's time to start parsing 29 /// `B`s. Use with care. 30 /// 31 /// Also note that if `A` is a subset of `B`, `A* B*` can be parsed by 32 /// parsing `B*` and removing the leading members of `A` from the 33 /// repetition, bypassing the need to involve the downsides associated with 34 /// speculative parsing. 35 /// 36 /// [`ParseStream::fork`]: ParseBuffer::fork 37 /// 38 /// # Example 39 /// 40 /// There has been chatter about the possibility of making the colons in the 41 /// turbofish syntax like `path::to::<T>` no longer required by accepting 42 /// `path::to<T>` in expression position. Specifically, according to [RFC 43 /// 2544], [`PathSegment`] parsing should always try to consume a following 44 /// `<` token as the start of generic arguments, and reset to the `<` if 45 /// that fails (e.g. the token is acting as a less-than operator). 46 /// 47 /// This is the exact kind of parsing behavior which requires the "fork, 48 /// try, commit" behavior that [`ParseStream::fork`] discourages. With 49 /// `advance_to`, we can avoid having to parse the speculatively parsed 50 /// content a second time. 51 /// 52 /// This change in behavior can be implemented in syn by replacing just the 53 /// `Parse` implementation for `PathSegment`: 54 /// 55 /// ``` 56 /// # use syn::ext::IdentExt; 57 /// use syn::parse::discouraged::Speculative; 58 /// # use syn::parse::{Parse, ParseStream}; 59 /// # use syn::{Ident, PathArguments, Result, Token}; 60 /// 61 /// pub struct PathSegment { 62 /// pub ident: Ident, 63 /// pub arguments: PathArguments, 64 /// } 65 /// # 66 /// # impl<T> From<T> for PathSegment 67 /// # where 68 /// # T: Into<Ident>, 69 /// # { 70 /// # fn from(ident: T) -> Self { 71 /// # PathSegment { 72 /// # ident: ident.into(), 73 /// # arguments: PathArguments::None, 74 /// # } 75 /// # } 76 /// # } 77 /// 78 /// impl Parse for PathSegment { 79 /// fn parse(input: ParseStream) -> Result<Self> { 80 /// if input.peek(Token![super]) 81 /// || input.peek(Token![self]) 82 /// || input.peek(Token![Self]) 83 /// || input.peek(Token![crate]) 84 /// { 85 /// let ident = input.call(Ident::parse_any)?; 86 /// return Ok(PathSegment::from(ident)); 87 /// } 88 /// 89 /// let ident = input.parse()?; 90 /// if input.peek(Token![::]) && input.peek3(Token![<]) { 91 /// return Ok(PathSegment { 92 /// ident, 93 /// arguments: PathArguments::AngleBracketed(input.parse()?), 94 /// }); 95 /// } 96 /// if input.peek(Token![<]) && !input.peek(Token![<=]) { 97 /// let fork = input.fork(); 98 /// if let Ok(arguments) = fork.parse() { 99 /// input.advance_to(&fork); 100 /// return Ok(PathSegment { 101 /// ident, 102 /// arguments: PathArguments::AngleBracketed(arguments), 103 /// }); 104 /// } 105 /// } 106 /// Ok(PathSegment::from(ident)) 107 /// } 108 /// } 109 /// 110 /// # syn::parse_str::<PathSegment>("a<b,c>").unwrap(); 111 /// ``` 112 /// 113 /// # Drawbacks 114 /// 115 /// The main drawback of this style of speculative parsing is in error 116 /// presentation. Even if the lookahead is the "correct" parse, the error 117 /// that is shown is that of the "fallback" parse. To use the same example 118 /// as the turbofish above, take the following unfinished "turbofish": 119 /// 120 /// ```text 121 /// let _ = f<&'a fn(), for<'a> serde::>(); 122 /// ``` 123 /// 124 /// If this is parsed as generic arguments, we can provide the error message 125 /// 126 /// ```text 127 /// error: expected identifier 128 /// --> src.rs:L:C 129 /// | 130 /// L | let _ = f<&'a fn(), for<'a> serde::>(); 131 /// | ^ 132 /// ``` 133 /// 134 /// but if parsed using the above speculative parsing, it falls back to 135 /// assuming that the `<` is a less-than when it fails to parse the generic 136 /// arguments, and tries to interpret the `&'a` as the start of a labelled 137 /// loop, resulting in the much less helpful error 138 /// 139 /// ```text 140 /// error: expected `:` 141 /// --> src.rs:L:C 142 /// | 143 /// L | let _ = f<&'a fn(), for<'a> serde::>(); 144 /// | ^^ 145 /// ``` 146 /// 147 /// This can be mitigated with various heuristics (two examples: show both 148 /// forks' parse errors, or show the one that consumed more tokens), but 149 /// when you can control the grammar, sticking to something that can be 150 /// parsed LL(3) and without the LL(*) speculative parsing this makes 151 /// possible, displaying reasonable errors becomes much more simple. 152 /// 153 /// [RFC 2544]: https://github.com/rust-lang/rfcs/pull/2544 154 /// [`PathSegment`]: crate::PathSegment 155 /// 156 /// # Performance 157 /// 158 /// This method performs a cheap fixed amount of work that does not depend 159 /// on how far apart the two streams are positioned. 160 /// 161 /// # Panics 162 /// 163 /// The forked stream in the argument of `advance_to` must have been 164 /// obtained by forking `self`. Attempting to advance to any other stream 165 /// will cause a panic. advance_to(&self, fork: &Self)166 fn advance_to(&self, fork: &Self); 167 } 168 169 impl<'a> Speculative for ParseBuffer<'a> { advance_to(&self, fork: &Self)170 fn advance_to(&self, fork: &Self) { 171 if !crate::buffer::same_scope(self.cursor(), fork.cursor()) { 172 panic!("fork was not derived from the advancing parse stream"); 173 } 174 175 let (self_unexp, self_sp) = inner_unexpected(self); 176 let (fork_unexp, fork_sp) = inner_unexpected(fork); 177 if !Rc::ptr_eq(&self_unexp, &fork_unexp) { 178 match (fork_sp, self_sp) { 179 // Unexpected set on the fork, but not on `self`, copy it over. 180 (Some((span, delimiter)), None) => { 181 self_unexp.set(Unexpected::Some(span, delimiter)); 182 } 183 // Unexpected unset. Use chain to propagate errors from fork. 184 (None, None) => { 185 fork_unexp.set(Unexpected::Chain(self_unexp)); 186 187 // Ensure toplevel 'unexpected' tokens from the fork don't 188 // propagate up the chain by replacing the root `unexpected` 189 // pointer, only 'unexpected' tokens from existing group 190 // parsers should propagate. 191 fork.unexpected 192 .set(Some(Rc::new(Cell::new(Unexpected::None)))); 193 } 194 // Unexpected has been set on `self`. No changes needed. 195 (_, Some(_)) => {} 196 } 197 } 198 199 // See comment on `cell` in the struct definition. 200 self.cell 201 .set(unsafe { mem::transmute::<Cursor, Cursor<'static>>(fork.cursor()) }); 202 } 203 } 204 205 /// Extensions to the `ParseStream` API to support manipulating invisible 206 /// delimiters the same as if they were visible. 207 pub trait AnyDelimiter { 208 /// Returns the delimiter, the span of the delimiter token, and the nested 209 /// contents for further parsing. parse_any_delimiter(&self) -> Result<(Delimiter, DelimSpan, ParseBuffer)>210 fn parse_any_delimiter(&self) -> Result<(Delimiter, DelimSpan, ParseBuffer)>; 211 } 212 213 impl<'a> AnyDelimiter for ParseBuffer<'a> { parse_any_delimiter(&self) -> Result<(Delimiter, DelimSpan, ParseBuffer)>214 fn parse_any_delimiter(&self) -> Result<(Delimiter, DelimSpan, ParseBuffer)> { 215 self.step(|cursor| { 216 if let Some((content, delimiter, span, rest)) = cursor.any_group() { 217 let scope = span.close(); 218 let nested = crate::parse::advance_step_cursor(cursor, content); 219 let unexpected = crate::parse::get_unexpected(self); 220 let content = crate::parse::new_parse_buffer(scope, nested, unexpected); 221 Ok(((delimiter, span, content), rest)) 222 } else { 223 Err(cursor.error("expected any delimiter")) 224 } 225 }) 226 } 227 } 228