1 // SPDX-License-Identifier: Apache-2.0 OR MIT
2
3 //! A stably addressed token buffer supporting efficient traversal based on a
4 //! cheaply copyable cursor.
5
6 // This module is heavily commented as it contains most of the unsafe code in
7 // Syn, and caution should be used when editing it. The public-facing interface
8 // is 100% safe but the implementation is fragile internally.
9
10 use crate::Lifetime;
11 use proc_macro2::extra::DelimSpan;
12 use proc_macro2::{Delimiter, Group, Ident, Literal, Punct, Spacing, Span, TokenStream, TokenTree};
13 use std::cmp::Ordering;
14 use std::marker::PhantomData;
15 use std::ptr;
16
17 /// Internal type which is used instead of `TokenTree` to represent a token tree
18 /// within a `TokenBuffer`.
19 enum Entry {
20 // Mimicking types from proc-macro.
21 // Group entries contain the offset to the matching End entry.
22 Group(Group, usize),
23 Ident(Ident),
24 Punct(Punct),
25 Literal(Literal),
26 // End entries contain the offset (negative) to the start of the buffer, and
27 // offset (negative) to the matching Group entry.
28 End(isize, isize),
29 }
30
31 /// A buffer that can be efficiently traversed multiple times, unlike
32 /// `TokenStream` which requires a deep copy in order to traverse more than
33 /// once.
34 pub struct TokenBuffer {
35 // NOTE: Do not implement clone on this - while the current design could be
36 // cloned, other designs which could be desirable may not be cloneable.
37 entries: Box<[Entry]>,
38 }
39
40 impl TokenBuffer {
recursive_new(entries: &mut Vec<Entry>, stream: TokenStream)41 fn recursive_new(entries: &mut Vec<Entry>, stream: TokenStream) {
42 for tt in stream {
43 match tt {
44 TokenTree::Ident(ident) => entries.push(Entry::Ident(ident)),
45 TokenTree::Punct(punct) => entries.push(Entry::Punct(punct)),
46 TokenTree::Literal(literal) => entries.push(Entry::Literal(literal)),
47 TokenTree::Group(group) => {
48 let group_start_index = entries.len();
49 entries.push(Entry::End(0, 0)); // we replace this below
50 Self::recursive_new(entries, group.stream());
51 let group_end_index = entries.len();
52 let group_offset = group_end_index - group_start_index;
53 entries.push(Entry::End(
54 -(group_end_index as isize),
55 -(group_offset as isize),
56 ));
57 entries[group_start_index] = Entry::Group(group, group_offset);
58 }
59 }
60 }
61 }
62
63 /// Creates a `TokenBuffer` containing all the tokens from the input
64 /// `proc_macro::TokenStream`.
65 #[cfg(feature = "proc-macro")]
66 #[cfg_attr(docsrs, doc(cfg(feature = "proc-macro")))]
new(stream: proc_macro::TokenStream) -> Self67 pub fn new(stream: proc_macro::TokenStream) -> Self {
68 Self::new2(stream.into())
69 }
70
71 /// Creates a `TokenBuffer` containing all the tokens from the input
72 /// `proc_macro2::TokenStream`.
new2(stream: TokenStream) -> Self73 pub fn new2(stream: TokenStream) -> Self {
74 let mut entries = Vec::new();
75 Self::recursive_new(&mut entries, stream);
76 entries.push(Entry::End(-(entries.len() as isize), 0));
77 Self {
78 entries: entries.into_boxed_slice(),
79 }
80 }
81
82 /// Creates a cursor referencing the first token in the buffer and able to
83 /// traverse until the end of the buffer.
begin(&self) -> Cursor84 pub fn begin(&self) -> Cursor {
85 let ptr = self.entries.as_ptr();
86 unsafe { Cursor::create(ptr, ptr.add(self.entries.len() - 1)) }
87 }
88 }
89
90 /// A cheaply copyable cursor into a `TokenBuffer`.
91 ///
92 /// This cursor holds a shared reference into the immutable data which is used
93 /// internally to represent a `TokenStream`, and can be efficiently manipulated
94 /// and copied around.
95 ///
96 /// An empty `Cursor` can be created directly, or one may create a `TokenBuffer`
97 /// object and get a cursor to its first token with `begin()`.
98 pub struct Cursor<'a> {
99 // The current entry which the `Cursor` is pointing at.
100 ptr: *const Entry,
101 // This is the only `Entry::End` object which this cursor is allowed to
102 // point at. All other `End` objects are skipped over in `Cursor::create`.
103 scope: *const Entry,
104 // Cursor is covariant in 'a. This field ensures that our pointers are still
105 // valid.
106 marker: PhantomData<&'a Entry>,
107 }
108
109 impl<'a> Cursor<'a> {
110 /// Creates a cursor referencing a static empty TokenStream.
empty() -> Self111 pub fn empty() -> Self {
112 // It's safe in this situation for us to put an `Entry` object in global
113 // storage, despite it not actually being safe to send across threads
114 // (`Ident` is a reference into a thread-local table). This is because
115 // this entry never includes a `Ident` object.
116 //
117 // This wrapper struct allows us to break the rules and put a `Sync`
118 // object in global storage.
119 struct UnsafeSyncEntry(Entry);
120 unsafe impl Sync for UnsafeSyncEntry {}
121 static EMPTY_ENTRY: UnsafeSyncEntry = UnsafeSyncEntry(Entry::End(0, 0));
122
123 Cursor {
124 ptr: &EMPTY_ENTRY.0,
125 scope: &EMPTY_ENTRY.0,
126 marker: PhantomData,
127 }
128 }
129
130 /// This create method intelligently exits non-explicitly-entered
131 /// `None`-delimited scopes when the cursor reaches the end of them,
132 /// allowing for them to be treated transparently.
create(mut ptr: *const Entry, scope: *const Entry) -> Self133 unsafe fn create(mut ptr: *const Entry, scope: *const Entry) -> Self {
134 // NOTE: If we're looking at a `End`, we want to advance the cursor
135 // past it, unless `ptr == scope`, which means that we're at the edge of
136 // our cursor's scope. We should only have `ptr != scope` at the exit
137 // from None-delimited groups entered with `ignore_none`.
138 while let Entry::End(..) = unsafe { &*ptr } {
139 if ptr::eq(ptr, scope) {
140 break;
141 }
142 ptr = unsafe { ptr.add(1) };
143 }
144
145 Cursor {
146 ptr,
147 scope,
148 marker: PhantomData,
149 }
150 }
151
152 /// Get the current entry.
entry(self) -> &'a Entry153 fn entry(self) -> &'a Entry {
154 unsafe { &*self.ptr }
155 }
156
157 /// Bump the cursor to point at the next token after the current one. This
158 /// is undefined behavior if the cursor is currently looking at an
159 /// `Entry::End`.
160 ///
161 /// If the cursor is looking at an `Entry::Group`, the bumped cursor will
162 /// point at the first token in the group (with the same scope end).
bump_ignore_group(self) -> Cursor<'a>163 unsafe fn bump_ignore_group(self) -> Cursor<'a> {
164 unsafe { Cursor::create(self.ptr.offset(1), self.scope) }
165 }
166
167 /// While the cursor is looking at a `None`-delimited group, move it to look
168 /// at the first token inside instead. If the group is empty, this will move
169 /// the cursor past the `None`-delimited group.
170 ///
171 /// WARNING: This mutates its argument.
ignore_none(&mut self)172 fn ignore_none(&mut self) {
173 while let Entry::Group(group, _) = self.entry() {
174 if group.delimiter() == Delimiter::None {
175 unsafe { *self = self.bump_ignore_group() };
176 } else {
177 break;
178 }
179 }
180 }
181
182 /// Checks whether the cursor is currently pointing at the end of its valid
183 /// scope.
eof(self) -> bool184 pub fn eof(self) -> bool {
185 // We're at eof if we're at the end of our scope.
186 ptr::eq(self.ptr, self.scope)
187 }
188
189 /// If the cursor is pointing at a `Ident`, returns it along with a cursor
190 /// pointing at the next `TokenTree`.
ident(mut self) -> Option<(Ident, Cursor<'a>)>191 pub fn ident(mut self) -> Option<(Ident, Cursor<'a>)> {
192 self.ignore_none();
193 match self.entry() {
194 Entry::Ident(ident) => Some((ident.clone(), unsafe { self.bump_ignore_group() })),
195 _ => None,
196 }
197 }
198
199 /// If the cursor is pointing at a `Punct`, returns it along with a cursor
200 /// pointing at the next `TokenTree`.
punct(mut self) -> Option<(Punct, Cursor<'a>)>201 pub fn punct(mut self) -> Option<(Punct, Cursor<'a>)> {
202 self.ignore_none();
203 match self.entry() {
204 Entry::Punct(punct) if punct.as_char() != '\'' => {
205 Some((punct.clone(), unsafe { self.bump_ignore_group() }))
206 }
207 _ => None,
208 }
209 }
210
211 /// If the cursor is pointing at a `Literal`, return it along with a cursor
212 /// pointing at the next `TokenTree`.
literal(mut self) -> Option<(Literal, Cursor<'a>)>213 pub fn literal(mut self) -> Option<(Literal, Cursor<'a>)> {
214 self.ignore_none();
215 match self.entry() {
216 Entry::Literal(literal) => Some((literal.clone(), unsafe { self.bump_ignore_group() })),
217 _ => None,
218 }
219 }
220
221 /// If the cursor is pointing at a `Lifetime`, returns it along with a
222 /// cursor pointing at the next `TokenTree`.
lifetime(mut self) -> Option<(Lifetime, Cursor<'a>)>223 pub fn lifetime(mut self) -> Option<(Lifetime, Cursor<'a>)> {
224 self.ignore_none();
225 match self.entry() {
226 Entry::Punct(punct) if punct.as_char() == '\'' && punct.spacing() == Spacing::Joint => {
227 let next = unsafe { self.bump_ignore_group() };
228 let (ident, rest) = next.ident()?;
229 let lifetime = Lifetime {
230 apostrophe: punct.span(),
231 ident,
232 };
233 Some((lifetime, rest))
234 }
235 _ => None,
236 }
237 }
238
239 /// If the cursor is pointing at a `Group` with the given delimiter, returns
240 /// a cursor into that group and one pointing to the next `TokenTree`.
group(mut self, delim: Delimiter) -> Option<(Cursor<'a>, DelimSpan, Cursor<'a>)>241 pub fn group(mut self, delim: Delimiter) -> Option<(Cursor<'a>, DelimSpan, Cursor<'a>)> {
242 // If we're not trying to enter a none-delimited group, we want to
243 // ignore them. We have to make sure to _not_ ignore them when we want
244 // to enter them, of course. For obvious reasons.
245 if delim != Delimiter::None {
246 self.ignore_none();
247 }
248
249 if let Entry::Group(group, end_offset) = self.entry() {
250 if group.delimiter() == delim {
251 let span = group.delim_span();
252 let end_of_group = unsafe { self.ptr.add(*end_offset) };
253 let inside_of_group = unsafe { Cursor::create(self.ptr.add(1), end_of_group) };
254 let after_group = unsafe { Cursor::create(end_of_group, self.scope) };
255 return Some((inside_of_group, span, after_group));
256 }
257 }
258
259 None
260 }
261
262 /// If the cursor is pointing at a `Group`, returns a cursor into the group
263 /// and one pointing to the next `TokenTree`.
any_group(self) -> Option<(Cursor<'a>, Delimiter, DelimSpan, Cursor<'a>)>264 pub fn any_group(self) -> Option<(Cursor<'a>, Delimiter, DelimSpan, Cursor<'a>)> {
265 if let Entry::Group(group, end_offset) = self.entry() {
266 let delimiter = group.delimiter();
267 let span = group.delim_span();
268 let end_of_group = unsafe { self.ptr.add(*end_offset) };
269 let inside_of_group = unsafe { Cursor::create(self.ptr.add(1), end_of_group) };
270 let after_group = unsafe { Cursor::create(end_of_group, self.scope) };
271 return Some((inside_of_group, delimiter, span, after_group));
272 }
273
274 None
275 }
276
any_group_token(self) -> Option<(Group, Cursor<'a>)>277 pub(crate) fn any_group_token(self) -> Option<(Group, Cursor<'a>)> {
278 if let Entry::Group(group, end_offset) = self.entry() {
279 let end_of_group = unsafe { self.ptr.add(*end_offset) };
280 let after_group = unsafe { Cursor::create(end_of_group, self.scope) };
281 return Some((group.clone(), after_group));
282 }
283
284 None
285 }
286
287 /// Copies all remaining tokens visible from this cursor into a
288 /// `TokenStream`.
token_stream(self) -> TokenStream289 pub fn token_stream(self) -> TokenStream {
290 let mut tts = Vec::new();
291 let mut cursor = self;
292 while let Some((tt, rest)) = cursor.token_tree() {
293 tts.push(tt);
294 cursor = rest;
295 }
296 tts.into_iter().collect()
297 }
298
299 /// If the cursor is pointing at a `TokenTree`, returns it along with a
300 /// cursor pointing at the next `TokenTree`.
301 ///
302 /// Returns `None` if the cursor has reached the end of its stream.
303 ///
304 /// This method does not treat `None`-delimited groups as transparent, and
305 /// will return a `Group(None, ..)` if the cursor is looking at one.
token_tree(self) -> Option<(TokenTree, Cursor<'a>)>306 pub fn token_tree(self) -> Option<(TokenTree, Cursor<'a>)> {
307 let (tree, len) = match self.entry() {
308 Entry::Group(group, end_offset) => (group.clone().into(), *end_offset),
309 Entry::Literal(literal) => (literal.clone().into(), 1),
310 Entry::Ident(ident) => (ident.clone().into(), 1),
311 Entry::Punct(punct) => (punct.clone().into(), 1),
312 Entry::End(..) => return None,
313 };
314
315 let rest = unsafe { Cursor::create(self.ptr.add(len), self.scope) };
316 Some((tree, rest))
317 }
318
319 /// Returns the `Span` of the current token, or `Span::call_site()` if this
320 /// cursor points to eof.
span(mut self) -> Span321 pub fn span(mut self) -> Span {
322 match self.entry() {
323 Entry::Group(group, _) => group.span(),
324 Entry::Literal(literal) => literal.span(),
325 Entry::Ident(ident) => ident.span(),
326 Entry::Punct(punct) => punct.span(),
327 Entry::End(_, offset) => {
328 self.ptr = unsafe { self.ptr.offset(*offset) };
329 if let Entry::Group(group, _) = self.entry() {
330 group.span_close()
331 } else {
332 Span::call_site()
333 }
334 }
335 }
336 }
337
338 /// Returns the `Span` of the token immediately prior to the position of
339 /// this cursor, or of the current token if there is no previous one.
340 #[cfg(any(feature = "full", feature = "derive"))]
prev_span(mut self) -> Span341 pub(crate) fn prev_span(mut self) -> Span {
342 if start_of_buffer(self) < self.ptr {
343 self.ptr = unsafe { self.ptr.offset(-1) };
344 }
345 self.span()
346 }
347
348 /// Skip over the next token that is not a None-delimited group, without
349 /// cloning it. Returns `None` if this cursor points to eof.
350 ///
351 /// This method treats `'lifetimes` as a single token.
skip(mut self) -> Option<Cursor<'a>>352 pub(crate) fn skip(mut self) -> Option<Cursor<'a>> {
353 self.ignore_none();
354
355 let len = match self.entry() {
356 Entry::End(..) => return None,
357
358 // Treat lifetimes as a single tt for the purposes of 'skip'.
359 Entry::Punct(punct) if punct.as_char() == '\'' && punct.spacing() == Spacing::Joint => {
360 match unsafe { &*self.ptr.add(1) } {
361 Entry::Ident(_) => 2,
362 _ => 1,
363 }
364 }
365
366 Entry::Group(_, end_offset) => *end_offset,
367 _ => 1,
368 };
369
370 Some(unsafe { Cursor::create(self.ptr.add(len), self.scope) })
371 }
372
scope_delimiter(self) -> Delimiter373 pub(crate) fn scope_delimiter(self) -> Delimiter {
374 match unsafe { &*self.scope } {
375 Entry::End(_, offset) => match unsafe { &*self.scope.offset(*offset) } {
376 Entry::Group(group, _) => group.delimiter(),
377 _ => Delimiter::None,
378 },
379 _ => unreachable!(),
380 }
381 }
382 }
383
384 impl<'a> Copy for Cursor<'a> {}
385
386 impl<'a> Clone for Cursor<'a> {
clone(&self) -> Self387 fn clone(&self) -> Self {
388 *self
389 }
390 }
391
392 impl<'a> Eq for Cursor<'a> {}
393
394 impl<'a> PartialEq for Cursor<'a> {
eq(&self, other: &Self) -> bool395 fn eq(&self, other: &Self) -> bool {
396 ptr::eq(self.ptr, other.ptr)
397 }
398 }
399
400 impl<'a> PartialOrd for Cursor<'a> {
partial_cmp(&self, other: &Self) -> Option<Ordering>401 fn partial_cmp(&self, other: &Self) -> Option<Ordering> {
402 if same_buffer(*self, *other) {
403 Some(cmp_assuming_same_buffer(*self, *other))
404 } else {
405 None
406 }
407 }
408 }
409
same_scope(a: Cursor, b: Cursor) -> bool410 pub(crate) fn same_scope(a: Cursor, b: Cursor) -> bool {
411 ptr::eq(a.scope, b.scope)
412 }
413
same_buffer(a: Cursor, b: Cursor) -> bool414 pub(crate) fn same_buffer(a: Cursor, b: Cursor) -> bool {
415 ptr::eq(start_of_buffer(a), start_of_buffer(b))
416 }
417
start_of_buffer(cursor: Cursor) -> *const Entry418 fn start_of_buffer(cursor: Cursor) -> *const Entry {
419 unsafe {
420 match &*cursor.scope {
421 Entry::End(offset, _) => cursor.scope.offset(*offset),
422 _ => unreachable!(),
423 }
424 }
425 }
426
cmp_assuming_same_buffer(a: Cursor, b: Cursor) -> Ordering427 pub(crate) fn cmp_assuming_same_buffer(a: Cursor, b: Cursor) -> Ordering {
428 a.ptr.cmp(&b.ptr)
429 }
430
open_span_of_group(cursor: Cursor) -> Span431 pub(crate) fn open_span_of_group(cursor: Cursor) -> Span {
432 match cursor.entry() {
433 Entry::Group(group, _) => group.span_open(),
434 _ => cursor.span(),
435 }
436 }
437