xref: /linux/rust/syn/buffer.rs (revision 808c999fc9e7c366fd47da564e69d579c1dc8279)
1 //! A stably addressed token buffer supporting efficient traversal based on a
2 //! cheaply copyable cursor.
3 
4 // This module is heavily commented as it contains most of the unsafe code in
5 // Syn, and caution should be used when editing it. The public-facing interface
6 // is 100% safe but the implementation is fragile internally.
7 
8 use crate::Lifetime;
9 use proc_macro2::extra::DelimSpan;
10 use proc_macro2::{Delimiter, Group, Ident, Literal, Punct, Spacing, Span, TokenStream, TokenTree};
11 use std::cmp::Ordering;
12 use std::marker::PhantomData;
13 use std::ptr;
14 
15 /// Internal type which is used instead of `TokenTree` to represent a token tree
16 /// within a `TokenBuffer`.
17 enum Entry {
18     // Mimicking types from proc-macro.
19     // Group entries contain the offset to the matching End entry.
20     Group(Group, usize),
21     Ident(Ident),
22     Punct(Punct),
23     Literal(Literal),
24     // End entries contain the offset (negative) to the start of the buffer, and
25     // offset (negative) to the matching Group entry.
26     End(isize, isize),
27 }
28 
29 /// A buffer that can be efficiently traversed multiple times, unlike
30 /// `TokenStream` which requires a deep copy in order to traverse more than
31 /// once.
32 pub struct TokenBuffer {
33     // NOTE: Do not implement clone on this - while the current design could be
34     // cloned, other designs which could be desirable may not be cloneable.
35     entries: Box<[Entry]>,
36 }
37 
38 impl TokenBuffer {
39     fn recursive_new(entries: &mut Vec<Entry>, stream: TokenStream) {
40         for tt in stream {
41             match tt {
42                 TokenTree::Ident(ident) => entries.push(Entry::Ident(ident)),
43                 TokenTree::Punct(punct) => entries.push(Entry::Punct(punct)),
44                 TokenTree::Literal(literal) => entries.push(Entry::Literal(literal)),
45                 TokenTree::Group(group) => {
46                     let group_start_index = entries.len();
47                     entries.push(Entry::End(0, 0)); // we replace this below
48                     Self::recursive_new(entries, group.stream());
49                     let group_end_index = entries.len();
50                     let group_offset = group_end_index - group_start_index;
51                     entries.push(Entry::End(
52                         -(group_end_index as isize),
53                         -(group_offset as isize),
54                     ));
55                     entries[group_start_index] = Entry::Group(group, group_offset);
56                 }
57             }
58         }
59     }
60 
61     /// Creates a `TokenBuffer` containing all the tokens from the input
62     /// `proc_macro::TokenStream`.
63     #[cfg(feature = "proc-macro")]
64     #[cfg_attr(docsrs, doc(cfg(feature = "proc-macro")))]
65     pub fn new(stream: proc_macro::TokenStream) -> Self {
66         Self::new2(stream.into())
67     }
68 
69     /// Creates a `TokenBuffer` containing all the tokens from the input
70     /// `proc_macro2::TokenStream`.
71     pub fn new2(stream: TokenStream) -> Self {
72         let mut entries = Vec::new();
73         Self::recursive_new(&mut entries, stream);
74         entries.push(Entry::End(-(entries.len() as isize), 0));
75         Self {
76             entries: entries.into_boxed_slice(),
77         }
78     }
79 
80     /// Creates a cursor referencing the first token in the buffer and able to
81     /// traverse until the end of the buffer.
82     pub fn begin(&self) -> Cursor {
83         let ptr = self.entries.as_ptr();
84         unsafe { Cursor::create(ptr, ptr.add(self.entries.len() - 1)) }
85     }
86 }
87 
88 /// A cheaply copyable cursor into a `TokenBuffer`.
89 ///
90 /// This cursor holds a shared reference into the immutable data which is used
91 /// internally to represent a `TokenStream`, and can be efficiently manipulated
92 /// and copied around.
93 ///
94 /// An empty `Cursor` can be created directly, or one may create a `TokenBuffer`
95 /// object and get a cursor to its first token with `begin()`.
96 pub struct Cursor<'a> {
97     // The current entry which the `Cursor` is pointing at.
98     ptr: *const Entry,
99     // This is the only `Entry::End` object which this cursor is allowed to
100     // point at. All other `End` objects are skipped over in `Cursor::create`.
101     scope: *const Entry,
102     // Cursor is covariant in 'a. This field ensures that our pointers are still
103     // valid.
104     marker: PhantomData<&'a Entry>,
105 }
106 
107 impl<'a> Cursor<'a> {
108     /// Creates a cursor referencing a static empty TokenStream.
109     pub fn empty() -> Self {
110         // It's safe in this situation for us to put an `Entry` object in global
111         // storage, despite it not actually being safe to send across threads
112         // (`Ident` is a reference into a thread-local table). This is because
113         // this entry never includes a `Ident` object.
114         //
115         // This wrapper struct allows us to break the rules and put a `Sync`
116         // object in global storage.
117         struct UnsafeSyncEntry(Entry);
118         unsafe impl Sync for UnsafeSyncEntry {}
119         static EMPTY_ENTRY: UnsafeSyncEntry = UnsafeSyncEntry(Entry::End(0, 0));
120 
121         Cursor {
122             ptr: &EMPTY_ENTRY.0,
123             scope: &EMPTY_ENTRY.0,
124             marker: PhantomData,
125         }
126     }
127 
128     /// This create method intelligently exits non-explicitly-entered
129     /// `None`-delimited scopes when the cursor reaches the end of them,
130     /// allowing for them to be treated transparently.
131     unsafe fn create(mut ptr: *const Entry, scope: *const Entry) -> Self {
132         // NOTE: If we're looking at a `End`, we want to advance the cursor
133         // past it, unless `ptr == scope`, which means that we're at the edge of
134         // our cursor's scope. We should only have `ptr != scope` at the exit
135         // from None-delimited groups entered with `ignore_none`.
136         while let Entry::End(..) = unsafe { &*ptr } {
137             if ptr::eq(ptr, scope) {
138                 break;
139             }
140             ptr = unsafe { ptr.add(1) };
141         }
142 
143         Cursor {
144             ptr,
145             scope,
146             marker: PhantomData,
147         }
148     }
149 
150     /// Get the current entry.
151     fn entry(self) -> &'a Entry {
152         unsafe { &*self.ptr }
153     }
154 
155     /// Bump the cursor to point at the next token after the current one. This
156     /// is undefined behavior if the cursor is currently looking at an
157     /// `Entry::End`.
158     ///
159     /// If the cursor is looking at an `Entry::Group`, the bumped cursor will
160     /// point at the first token in the group (with the same scope end).
161     unsafe fn bump_ignore_group(self) -> Cursor<'a> {
162         unsafe { Cursor::create(self.ptr.offset(1), self.scope) }
163     }
164 
165     /// While the cursor is looking at a `None`-delimited group, move it to look
166     /// at the first token inside instead. If the group is empty, this will move
167     /// the cursor past the `None`-delimited group.
168     ///
169     /// WARNING: This mutates its argument.
170     fn ignore_none(&mut self) {
171         while let Entry::Group(group, _) = self.entry() {
172             if group.delimiter() == Delimiter::None {
173                 unsafe { *self = self.bump_ignore_group() };
174             } else {
175                 break;
176             }
177         }
178     }
179 
180     /// Checks whether the cursor is currently pointing at the end of its valid
181     /// scope.
182     pub fn eof(self) -> bool {
183         // We're at eof if we're at the end of our scope.
184         ptr::eq(self.ptr, self.scope)
185     }
186 
187     /// If the cursor is pointing at a `Ident`, returns it along with a cursor
188     /// pointing at the next `TokenTree`.
189     pub fn ident(mut self) -> Option<(Ident, Cursor<'a>)> {
190         self.ignore_none();
191         match self.entry() {
192             Entry::Ident(ident) => Some((ident.clone(), unsafe { self.bump_ignore_group() })),
193             _ => None,
194         }
195     }
196 
197     /// If the cursor is pointing at a `Punct`, returns it along with a cursor
198     /// pointing at the next `TokenTree`.
199     pub fn punct(mut self) -> Option<(Punct, Cursor<'a>)> {
200         self.ignore_none();
201         match self.entry() {
202             Entry::Punct(punct) if punct.as_char() != '\'' => {
203                 Some((punct.clone(), unsafe { self.bump_ignore_group() }))
204             }
205             _ => None,
206         }
207     }
208 
209     /// If the cursor is pointing at a `Literal`, return it along with a cursor
210     /// pointing at the next `TokenTree`.
211     pub fn literal(mut self) -> Option<(Literal, Cursor<'a>)> {
212         self.ignore_none();
213         match self.entry() {
214             Entry::Literal(literal) => Some((literal.clone(), unsafe { self.bump_ignore_group() })),
215             _ => None,
216         }
217     }
218 
219     /// If the cursor is pointing at a `Lifetime`, returns it along with a
220     /// cursor pointing at the next `TokenTree`.
221     pub fn lifetime(mut self) -> Option<(Lifetime, Cursor<'a>)> {
222         self.ignore_none();
223         match self.entry() {
224             Entry::Punct(punct) if punct.as_char() == '\'' && punct.spacing() == Spacing::Joint => {
225                 let next = unsafe { self.bump_ignore_group() };
226                 let (ident, rest) = next.ident()?;
227                 let lifetime = Lifetime {
228                     apostrophe: punct.span(),
229                     ident,
230                 };
231                 Some((lifetime, rest))
232             }
233             _ => None,
234         }
235     }
236 
237     /// If the cursor is pointing at a `Group` with the given delimiter, returns
238     /// a cursor into that group and one pointing to the next `TokenTree`.
239     pub fn group(mut self, delim: Delimiter) -> Option<(Cursor<'a>, DelimSpan, Cursor<'a>)> {
240         // If we're not trying to enter a none-delimited group, we want to
241         // ignore them. We have to make sure to _not_ ignore them when we want
242         // to enter them, of course. For obvious reasons.
243         if delim != Delimiter::None {
244             self.ignore_none();
245         }
246 
247         if let Entry::Group(group, end_offset) = self.entry() {
248             if group.delimiter() == delim {
249                 let span = group.delim_span();
250                 let end_of_group = unsafe { self.ptr.add(*end_offset) };
251                 let inside_of_group = unsafe { Cursor::create(self.ptr.add(1), end_of_group) };
252                 let after_group = unsafe { Cursor::create(end_of_group, self.scope) };
253                 return Some((inside_of_group, span, after_group));
254             }
255         }
256 
257         None
258     }
259 
260     /// If the cursor is pointing at a `Group`, returns a cursor into the group
261     /// and one pointing to the next `TokenTree`.
262     pub fn any_group(self) -> Option<(Cursor<'a>, Delimiter, DelimSpan, Cursor<'a>)> {
263         if let Entry::Group(group, end_offset) = self.entry() {
264             let delimiter = group.delimiter();
265             let span = group.delim_span();
266             let end_of_group = unsafe { self.ptr.add(*end_offset) };
267             let inside_of_group = unsafe { Cursor::create(self.ptr.add(1), end_of_group) };
268             let after_group = unsafe { Cursor::create(end_of_group, self.scope) };
269             return Some((inside_of_group, delimiter, span, after_group));
270         }
271 
272         None
273     }
274 
275     pub(crate) fn any_group_token(self) -> Option<(Group, Cursor<'a>)> {
276         if let Entry::Group(group, end_offset) = self.entry() {
277             let end_of_group = unsafe { self.ptr.add(*end_offset) };
278             let after_group = unsafe { Cursor::create(end_of_group, self.scope) };
279             return Some((group.clone(), after_group));
280         }
281 
282         None
283     }
284 
285     /// Copies all remaining tokens visible from this cursor into a
286     /// `TokenStream`.
287     pub fn token_stream(self) -> TokenStream {
288         let mut tts = Vec::new();
289         let mut cursor = self;
290         while let Some((tt, rest)) = cursor.token_tree() {
291             tts.push(tt);
292             cursor = rest;
293         }
294         tts.into_iter().collect()
295     }
296 
297     /// If the cursor is pointing at a `TokenTree`, returns it along with a
298     /// cursor pointing at the next `TokenTree`.
299     ///
300     /// Returns `None` if the cursor has reached the end of its stream.
301     ///
302     /// This method does not treat `None`-delimited groups as transparent, and
303     /// will return a `Group(None, ..)` if the cursor is looking at one.
304     pub fn token_tree(self) -> Option<(TokenTree, Cursor<'a>)> {
305         let (tree, len) = match self.entry() {
306             Entry::Group(group, end_offset) => (group.clone().into(), *end_offset),
307             Entry::Literal(literal) => (literal.clone().into(), 1),
308             Entry::Ident(ident) => (ident.clone().into(), 1),
309             Entry::Punct(punct) => (punct.clone().into(), 1),
310             Entry::End(..) => return None,
311         };
312 
313         let rest = unsafe { Cursor::create(self.ptr.add(len), self.scope) };
314         Some((tree, rest))
315     }
316 
317     /// Returns the `Span` of the current token, or `Span::call_site()` if this
318     /// cursor points to eof.
319     pub fn span(mut self) -> Span {
320         match self.entry() {
321             Entry::Group(group, _) => group.span(),
322             Entry::Literal(literal) => literal.span(),
323             Entry::Ident(ident) => ident.span(),
324             Entry::Punct(punct) => punct.span(),
325             Entry::End(_, offset) => {
326                 self.ptr = unsafe { self.ptr.offset(*offset) };
327                 if let Entry::Group(group, _) = self.entry() {
328                     group.span_close()
329                 } else {
330                     Span::call_site()
331                 }
332             }
333         }
334     }
335 
336     /// Returns the `Span` of the token immediately prior to the position of
337     /// this cursor, or of the current token if there is no previous one.
338     #[cfg(any(feature = "full", feature = "derive"))]
339     pub(crate) fn prev_span(mut self) -> Span {
340         if start_of_buffer(self) < self.ptr {
341             self.ptr = unsafe { self.ptr.offset(-1) };
342         }
343         self.span()
344     }
345 
346     /// Skip over the next token that is not a None-delimited group, without
347     /// cloning it. Returns `None` if this cursor points to eof.
348     ///
349     /// This method treats `'lifetimes` as a single token.
350     pub(crate) fn skip(mut self) -> Option<Cursor<'a>> {
351         self.ignore_none();
352 
353         let len = match self.entry() {
354             Entry::End(..) => return None,
355 
356             // Treat lifetimes as a single tt for the purposes of 'skip'.
357             Entry::Punct(punct) if punct.as_char() == '\'' && punct.spacing() == Spacing::Joint => {
358                 match unsafe { &*self.ptr.add(1) } {
359                     Entry::Ident(_) => 2,
360                     _ => 1,
361                 }
362             }
363 
364             Entry::Group(_, end_offset) => *end_offset,
365             _ => 1,
366         };
367 
368         Some(unsafe { Cursor::create(self.ptr.add(len), self.scope) })
369     }
370 
371     pub(crate) fn scope_delimiter(self) -> Delimiter {
372         match unsafe { &*self.scope } {
373             Entry::End(_, offset) => match unsafe { &*self.scope.offset(*offset) } {
374                 Entry::Group(group, _) => group.delimiter(),
375                 _ => Delimiter::None,
376             },
377             _ => unreachable!(),
378         }
379     }
380 }
381 
382 impl<'a> Copy for Cursor<'a> {}
383 
384 impl<'a> Clone for Cursor<'a> {
385     fn clone(&self) -> Self {
386         *self
387     }
388 }
389 
390 impl<'a> Eq for Cursor<'a> {}
391 
392 impl<'a> PartialEq for Cursor<'a> {
393     fn eq(&self, other: &Self) -> bool {
394         ptr::eq(self.ptr, other.ptr)
395     }
396 }
397 
398 impl<'a> PartialOrd for Cursor<'a> {
399     fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
400         if same_buffer(*self, *other) {
401             Some(cmp_assuming_same_buffer(*self, *other))
402         } else {
403             None
404         }
405     }
406 }
407 
408 pub(crate) fn same_scope(a: Cursor, b: Cursor) -> bool {
409     ptr::eq(a.scope, b.scope)
410 }
411 
412 pub(crate) fn same_buffer(a: Cursor, b: Cursor) -> bool {
413     ptr::eq(start_of_buffer(a), start_of_buffer(b))
414 }
415 
416 fn start_of_buffer(cursor: Cursor) -> *const Entry {
417     unsafe {
418         match &*cursor.scope {
419             Entry::End(offset, _) => cursor.scope.offset(*offset),
420             _ => unreachable!(),
421         }
422     }
423 }
424 
425 pub(crate) fn cmp_assuming_same_buffer(a: Cursor, b: Cursor) -> Ordering {
426     a.ptr.cmp(&b.ptr)
427 }
428 
429 pub(crate) fn open_span_of_group(cursor: Cursor) -> Span {
430     match cursor.entry() {
431         Entry::Group(group, _) => group.span_open(),
432         _ => cursor.span(),
433     }
434 }
435