xref: /linux/rust/syn/buffer.rs (revision 784faa8eca8270671e0ed6d9d21f04bbb80fc5f7)
1 // SPDX-License-Identifier: Apache-2.0 OR MIT
2 
3 //! A stably addressed token buffer supporting efficient traversal based on a
4 //! cheaply copyable cursor.
5 
6 // This module is heavily commented as it contains most of the unsafe code in
7 // Syn, and caution should be used when editing it. The public-facing interface
8 // is 100% safe but the implementation is fragile internally.
9 
10 use crate::Lifetime;
11 use proc_macro2::extra::DelimSpan;
12 use proc_macro2::{Delimiter, Group, Ident, Literal, Punct, Spacing, Span, TokenStream, TokenTree};
13 use std::cmp::Ordering;
14 use std::marker::PhantomData;
15 use std::ptr;
16 
17 /// Internal type which is used instead of `TokenTree` to represent a token tree
18 /// within a `TokenBuffer`.
19 enum Entry {
20     // Mimicking types from proc-macro.
21     // Group entries contain the offset to the matching End entry.
22     Group(Group, usize),
23     Ident(Ident),
24     Punct(Punct),
25     Literal(Literal),
26     // End entries contain the offset (negative) to the start of the buffer, and
27     // offset (negative) to the matching Group entry.
28     End(isize, isize),
29 }
30 
31 /// A buffer that can be efficiently traversed multiple times, unlike
32 /// `TokenStream` which requires a deep copy in order to traverse more than
33 /// once.
34 pub struct TokenBuffer {
35     // NOTE: Do not implement clone on this - while the current design could be
36     // cloned, other designs which could be desirable may not be cloneable.
37     entries: Box<[Entry]>,
38 }
39 
40 impl TokenBuffer {
recursive_new(entries: &mut Vec<Entry>, stream: TokenStream)41     fn recursive_new(entries: &mut Vec<Entry>, stream: TokenStream) {
42         for tt in stream {
43             match tt {
44                 TokenTree::Ident(ident) => entries.push(Entry::Ident(ident)),
45                 TokenTree::Punct(punct) => entries.push(Entry::Punct(punct)),
46                 TokenTree::Literal(literal) => entries.push(Entry::Literal(literal)),
47                 TokenTree::Group(group) => {
48                     let group_start_index = entries.len();
49                     entries.push(Entry::End(0, 0)); // we replace this below
50                     Self::recursive_new(entries, group.stream());
51                     let group_end_index = entries.len();
52                     let group_offset = group_end_index - group_start_index;
53                     entries.push(Entry::End(
54                         -(group_end_index as isize),
55                         -(group_offset as isize),
56                     ));
57                     entries[group_start_index] = Entry::Group(group, group_offset);
58                 }
59             }
60         }
61     }
62 
63     /// Creates a `TokenBuffer` containing all the tokens from the input
64     /// `proc_macro::TokenStream`.
65     #[cfg(feature = "proc-macro")]
66     #[cfg_attr(docsrs, doc(cfg(feature = "proc-macro")))]
new(stream: proc_macro::TokenStream) -> Self67     pub fn new(stream: proc_macro::TokenStream) -> Self {
68         Self::new2(stream.into())
69     }
70 
71     /// Creates a `TokenBuffer` containing all the tokens from the input
72     /// `proc_macro2::TokenStream`.
new2(stream: TokenStream) -> Self73     pub fn new2(stream: TokenStream) -> Self {
74         let mut entries = Vec::new();
75         Self::recursive_new(&mut entries, stream);
76         entries.push(Entry::End(-(entries.len() as isize), 0));
77         Self {
78             entries: entries.into_boxed_slice(),
79         }
80     }
81 
82     /// Creates a cursor referencing the first token in the buffer and able to
83     /// traverse until the end of the buffer.
begin(&self) -> Cursor84     pub fn begin(&self) -> Cursor {
85         let ptr = self.entries.as_ptr();
86         unsafe { Cursor::create(ptr, ptr.add(self.entries.len() - 1)) }
87     }
88 }
89 
90 /// A cheaply copyable cursor into a `TokenBuffer`.
91 ///
92 /// This cursor holds a shared reference into the immutable data which is used
93 /// internally to represent a `TokenStream`, and can be efficiently manipulated
94 /// and copied around.
95 ///
96 /// An empty `Cursor` can be created directly, or one may create a `TokenBuffer`
97 /// object and get a cursor to its first token with `begin()`.
98 pub struct Cursor<'a> {
99     // The current entry which the `Cursor` is pointing at.
100     ptr: *const Entry,
101     // This is the only `Entry::End` object which this cursor is allowed to
102     // point at. All other `End` objects are skipped over in `Cursor::create`.
103     scope: *const Entry,
104     // Cursor is covariant in 'a. This field ensures that our pointers are still
105     // valid.
106     marker: PhantomData<&'a Entry>,
107 }
108 
109 impl<'a> Cursor<'a> {
110     /// Creates a cursor referencing a static empty TokenStream.
empty() -> Self111     pub fn empty() -> Self {
112         // It's safe in this situation for us to put an `Entry` object in global
113         // storage, despite it not actually being safe to send across threads
114         // (`Ident` is a reference into a thread-local table). This is because
115         // this entry never includes a `Ident` object.
116         //
117         // This wrapper struct allows us to break the rules and put a `Sync`
118         // object in global storage.
119         struct UnsafeSyncEntry(Entry);
120         unsafe impl Sync for UnsafeSyncEntry {}
121         static EMPTY_ENTRY: UnsafeSyncEntry = UnsafeSyncEntry(Entry::End(0, 0));
122 
123         Cursor {
124             ptr: &EMPTY_ENTRY.0,
125             scope: &EMPTY_ENTRY.0,
126             marker: PhantomData,
127         }
128     }
129 
130     /// This create method intelligently exits non-explicitly-entered
131     /// `None`-delimited scopes when the cursor reaches the end of them,
132     /// allowing for them to be treated transparently.
create(mut ptr: *const Entry, scope: *const Entry) -> Self133     unsafe fn create(mut ptr: *const Entry, scope: *const Entry) -> Self {
134         // NOTE: If we're looking at a `End`, we want to advance the cursor
135         // past it, unless `ptr == scope`, which means that we're at the edge of
136         // our cursor's scope. We should only have `ptr != scope` at the exit
137         // from None-delimited groups entered with `ignore_none`.
138         while let Entry::End(..) = unsafe { &*ptr } {
139             if ptr::eq(ptr, scope) {
140                 break;
141             }
142             ptr = unsafe { ptr.add(1) };
143         }
144 
145         Cursor {
146             ptr,
147             scope,
148             marker: PhantomData,
149         }
150     }
151 
152     /// Get the current entry.
entry(self) -> &'a Entry153     fn entry(self) -> &'a Entry {
154         unsafe { &*self.ptr }
155     }
156 
157     /// Bump the cursor to point at the next token after the current one. This
158     /// is undefined behavior if the cursor is currently looking at an
159     /// `Entry::End`.
160     ///
161     /// If the cursor is looking at an `Entry::Group`, the bumped cursor will
162     /// point at the first token in the group (with the same scope end).
bump_ignore_group(self) -> Cursor<'a>163     unsafe fn bump_ignore_group(self) -> Cursor<'a> {
164         unsafe { Cursor::create(self.ptr.offset(1), self.scope) }
165     }
166 
167     /// While the cursor is looking at a `None`-delimited group, move it to look
168     /// at the first token inside instead. If the group is empty, this will move
169     /// the cursor past the `None`-delimited group.
170     ///
171     /// WARNING: This mutates its argument.
ignore_none(&mut self)172     fn ignore_none(&mut self) {
173         while let Entry::Group(group, _) = self.entry() {
174             if group.delimiter() == Delimiter::None {
175                 unsafe { *self = self.bump_ignore_group() };
176             } else {
177                 break;
178             }
179         }
180     }
181 
182     /// Checks whether the cursor is currently pointing at the end of its valid
183     /// scope.
eof(self) -> bool184     pub fn eof(self) -> bool {
185         // We're at eof if we're at the end of our scope.
186         ptr::eq(self.ptr, self.scope)
187     }
188 
189     /// If the cursor is pointing at a `Ident`, returns it along with a cursor
190     /// pointing at the next `TokenTree`.
ident(mut self) -> Option<(Ident, Cursor<'a>)>191     pub fn ident(mut self) -> Option<(Ident, Cursor<'a>)> {
192         self.ignore_none();
193         match self.entry() {
194             Entry::Ident(ident) => Some((ident.clone(), unsafe { self.bump_ignore_group() })),
195             _ => None,
196         }
197     }
198 
199     /// If the cursor is pointing at a `Punct`, returns it along with a cursor
200     /// pointing at the next `TokenTree`.
punct(mut self) -> Option<(Punct, Cursor<'a>)>201     pub fn punct(mut self) -> Option<(Punct, Cursor<'a>)> {
202         self.ignore_none();
203         match self.entry() {
204             Entry::Punct(punct) if punct.as_char() != '\'' => {
205                 Some((punct.clone(), unsafe { self.bump_ignore_group() }))
206             }
207             _ => None,
208         }
209     }
210 
211     /// If the cursor is pointing at a `Literal`, return it along with a cursor
212     /// pointing at the next `TokenTree`.
literal(mut self) -> Option<(Literal, Cursor<'a>)>213     pub fn literal(mut self) -> Option<(Literal, Cursor<'a>)> {
214         self.ignore_none();
215         match self.entry() {
216             Entry::Literal(literal) => Some((literal.clone(), unsafe { self.bump_ignore_group() })),
217             _ => None,
218         }
219     }
220 
221     /// If the cursor is pointing at a `Lifetime`, returns it along with a
222     /// cursor pointing at the next `TokenTree`.
lifetime(mut self) -> Option<(Lifetime, Cursor<'a>)>223     pub fn lifetime(mut self) -> Option<(Lifetime, Cursor<'a>)> {
224         self.ignore_none();
225         match self.entry() {
226             Entry::Punct(punct) if punct.as_char() == '\'' && punct.spacing() == Spacing::Joint => {
227                 let next = unsafe { self.bump_ignore_group() };
228                 let (ident, rest) = next.ident()?;
229                 let lifetime = Lifetime {
230                     apostrophe: punct.span(),
231                     ident,
232                 };
233                 Some((lifetime, rest))
234             }
235             _ => None,
236         }
237     }
238 
239     /// If the cursor is pointing at a `Group` with the given delimiter, returns
240     /// a cursor into that group and one pointing to the next `TokenTree`.
group(mut self, delim: Delimiter) -> Option<(Cursor<'a>, DelimSpan, Cursor<'a>)>241     pub fn group(mut self, delim: Delimiter) -> Option<(Cursor<'a>, DelimSpan, Cursor<'a>)> {
242         // If we're not trying to enter a none-delimited group, we want to
243         // ignore them. We have to make sure to _not_ ignore them when we want
244         // to enter them, of course. For obvious reasons.
245         if delim != Delimiter::None {
246             self.ignore_none();
247         }
248 
249         if let Entry::Group(group, end_offset) = self.entry() {
250             if group.delimiter() == delim {
251                 let span = group.delim_span();
252                 let end_of_group = unsafe { self.ptr.add(*end_offset) };
253                 let inside_of_group = unsafe { Cursor::create(self.ptr.add(1), end_of_group) };
254                 let after_group = unsafe { Cursor::create(end_of_group, self.scope) };
255                 return Some((inside_of_group, span, after_group));
256             }
257         }
258 
259         None
260     }
261 
262     /// If the cursor is pointing at a `Group`, returns a cursor into the group
263     /// and one pointing to the next `TokenTree`.
any_group(self) -> Option<(Cursor<'a>, Delimiter, DelimSpan, Cursor<'a>)>264     pub fn any_group(self) -> Option<(Cursor<'a>, Delimiter, DelimSpan, Cursor<'a>)> {
265         if let Entry::Group(group, end_offset) = self.entry() {
266             let delimiter = group.delimiter();
267             let span = group.delim_span();
268             let end_of_group = unsafe { self.ptr.add(*end_offset) };
269             let inside_of_group = unsafe { Cursor::create(self.ptr.add(1), end_of_group) };
270             let after_group = unsafe { Cursor::create(end_of_group, self.scope) };
271             return Some((inside_of_group, delimiter, span, after_group));
272         }
273 
274         None
275     }
276 
any_group_token(self) -> Option<(Group, Cursor<'a>)>277     pub(crate) fn any_group_token(self) -> Option<(Group, Cursor<'a>)> {
278         if let Entry::Group(group, end_offset) = self.entry() {
279             let end_of_group = unsafe { self.ptr.add(*end_offset) };
280             let after_group = unsafe { Cursor::create(end_of_group, self.scope) };
281             return Some((group.clone(), after_group));
282         }
283 
284         None
285     }
286 
287     /// Copies all remaining tokens visible from this cursor into a
288     /// `TokenStream`.
token_stream(self) -> TokenStream289     pub fn token_stream(self) -> TokenStream {
290         let mut tts = Vec::new();
291         let mut cursor = self;
292         while let Some((tt, rest)) = cursor.token_tree() {
293             tts.push(tt);
294             cursor = rest;
295         }
296         tts.into_iter().collect()
297     }
298 
299     /// If the cursor is pointing at a `TokenTree`, returns it along with a
300     /// cursor pointing at the next `TokenTree`.
301     ///
302     /// Returns `None` if the cursor has reached the end of its stream.
303     ///
304     /// This method does not treat `None`-delimited groups as transparent, and
305     /// will return a `Group(None, ..)` if the cursor is looking at one.
token_tree(self) -> Option<(TokenTree, Cursor<'a>)>306     pub fn token_tree(self) -> Option<(TokenTree, Cursor<'a>)> {
307         let (tree, len) = match self.entry() {
308             Entry::Group(group, end_offset) => (group.clone().into(), *end_offset),
309             Entry::Literal(literal) => (literal.clone().into(), 1),
310             Entry::Ident(ident) => (ident.clone().into(), 1),
311             Entry::Punct(punct) => (punct.clone().into(), 1),
312             Entry::End(..) => return None,
313         };
314 
315         let rest = unsafe { Cursor::create(self.ptr.add(len), self.scope) };
316         Some((tree, rest))
317     }
318 
319     /// Returns the `Span` of the current token, or `Span::call_site()` if this
320     /// cursor points to eof.
span(mut self) -> Span321     pub fn span(mut self) -> Span {
322         match self.entry() {
323             Entry::Group(group, _) => group.span(),
324             Entry::Literal(literal) => literal.span(),
325             Entry::Ident(ident) => ident.span(),
326             Entry::Punct(punct) => punct.span(),
327             Entry::End(_, offset) => {
328                 self.ptr = unsafe { self.ptr.offset(*offset) };
329                 if let Entry::Group(group, _) = self.entry() {
330                     group.span_close()
331                 } else {
332                     Span::call_site()
333                 }
334             }
335         }
336     }
337 
338     /// Returns the `Span` of the token immediately prior to the position of
339     /// this cursor, or of the current token if there is no previous one.
340     #[cfg(any(feature = "full", feature = "derive"))]
prev_span(mut self) -> Span341     pub(crate) fn prev_span(mut self) -> Span {
342         if start_of_buffer(self) < self.ptr {
343             self.ptr = unsafe { self.ptr.offset(-1) };
344         }
345         self.span()
346     }
347 
348     /// Skip over the next token that is not a None-delimited group, without
349     /// cloning it. Returns `None` if this cursor points to eof.
350     ///
351     /// This method treats `'lifetimes` as a single token.
skip(mut self) -> Option<Cursor<'a>>352     pub(crate) fn skip(mut self) -> Option<Cursor<'a>> {
353         self.ignore_none();
354 
355         let len = match self.entry() {
356             Entry::End(..) => return None,
357 
358             // Treat lifetimes as a single tt for the purposes of 'skip'.
359             Entry::Punct(punct) if punct.as_char() == '\'' && punct.spacing() == Spacing::Joint => {
360                 match unsafe { &*self.ptr.add(1) } {
361                     Entry::Ident(_) => 2,
362                     _ => 1,
363                 }
364             }
365 
366             Entry::Group(_, end_offset) => *end_offset,
367             _ => 1,
368         };
369 
370         Some(unsafe { Cursor::create(self.ptr.add(len), self.scope) })
371     }
372 
scope_delimiter(self) -> Delimiter373     pub(crate) fn scope_delimiter(self) -> Delimiter {
374         match unsafe { &*self.scope } {
375             Entry::End(_, offset) => match unsafe { &*self.scope.offset(*offset) } {
376                 Entry::Group(group, _) => group.delimiter(),
377                 _ => Delimiter::None,
378             },
379             _ => unreachable!(),
380         }
381     }
382 }
383 
384 impl<'a> Copy for Cursor<'a> {}
385 
386 impl<'a> Clone for Cursor<'a> {
clone(&self) -> Self387     fn clone(&self) -> Self {
388         *self
389     }
390 }
391 
392 impl<'a> Eq for Cursor<'a> {}
393 
394 impl<'a> PartialEq for Cursor<'a> {
eq(&self, other: &Self) -> bool395     fn eq(&self, other: &Self) -> bool {
396         ptr::eq(self.ptr, other.ptr)
397     }
398 }
399 
400 impl<'a> PartialOrd for Cursor<'a> {
partial_cmp(&self, other: &Self) -> Option<Ordering>401     fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
402         if same_buffer(*self, *other) {
403             Some(cmp_assuming_same_buffer(*self, *other))
404         } else {
405             None
406         }
407     }
408 }
409 
same_scope(a: Cursor, b: Cursor) -> bool410 pub(crate) fn same_scope(a: Cursor, b: Cursor) -> bool {
411     ptr::eq(a.scope, b.scope)
412 }
413 
same_buffer(a: Cursor, b: Cursor) -> bool414 pub(crate) fn same_buffer(a: Cursor, b: Cursor) -> bool {
415     ptr::eq(start_of_buffer(a), start_of_buffer(b))
416 }
417 
start_of_buffer(cursor: Cursor) -> *const Entry418 fn start_of_buffer(cursor: Cursor) -> *const Entry {
419     unsafe {
420         match &*cursor.scope {
421             Entry::End(offset, _) => cursor.scope.offset(*offset),
422             _ => unreachable!(),
423         }
424     }
425 }
426 
cmp_assuming_same_buffer(a: Cursor, b: Cursor) -> Ordering427 pub(crate) fn cmp_assuming_same_buffer(a: Cursor, b: Cursor) -> Ordering {
428     a.ptr.cmp(&b.ptr)
429 }
430 
open_span_of_group(cursor: Cursor) -> Span431 pub(crate) fn open_span_of_group(cursor: Cursor) -> Span {
432     match cursor.entry() {
433         Entry::Group(group, _) => group.span_open(),
434         _ => cursor.span(),
435     }
436 }
437