xref: /freebsd/contrib/llvm-project/clang/include/clang/Serialization/SourceLocationEncoding.h (revision 0fca6ea1d4eea4c934cfff25ac9ee8ad6fe95583)
1 //===--- SourceLocationEncoding.h - Small serialized locations --*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 //
9 // We wish to encode the SourceLocation from other module file not dependent
10 // on the other module file. So that the source location changes from other
11 // module file may not affect the contents of the current module file. Then the
12 // users don't need to recompile the whole project due to a new line in a module
13 // unit in the root of the dependency graph.
14 //
15 // To achieve this, we need to encode the index of the module file into the
16 // encoding of the source location. The encoding of the source location may be:
17 //
18 //      |-----------------------|-----------------------|
19 //      |          A            |         B         | C |
20 //
21 //  * A: 32 bit. The index of the module file in the module manager + 1. The +1
22 //  here is necessary since we wish 0 stands for the current module file.
23 //  * B: 31 bit. The offset of the source location to the module file containing
24 //  it.
25 //  * C: The macro bit. We rotate it to the lowest bit so that we can save some
26 //  space in case the index of the module file is 0.
27 //
28 // Specially, if the index of the module file is 0, we allow to encode a
29 // sequence of locations we store only differences between successive elements.
30 //
31 //===----------------------------------------------------------------------===//
32 
33 #include "clang/Basic/SourceLocation.h"
34 #include "llvm/Support/MathExtras.h"
35 #include <climits>
36 
37 #ifndef LLVM_CLANG_SERIALIZATION_SOURCELOCATIONENCODING_H
38 #define LLVM_CLANG_SERIALIZATION_SOURCELOCATIONENCODING_H
39 
40 namespace clang {
41 class SourceLocationSequence;
42 
43 /// Serialized encoding of SourceLocations without context.
44 /// Optimized to have small unsigned values (=> small after VBR encoding).
45 ///
46 // Macro locations have the top bit set, we rotate by one so it is the low bit.
47 class SourceLocationEncoding {
48   using UIntTy = SourceLocation::UIntTy;
49   constexpr static unsigned UIntBits = CHAR_BIT * sizeof(UIntTy);
50 
encodeRaw(UIntTy Raw)51   static UIntTy encodeRaw(UIntTy Raw) {
52     return (Raw << 1) | (Raw >> (UIntBits - 1));
53   }
decodeRaw(UIntTy Raw)54   static UIntTy decodeRaw(UIntTy Raw) {
55     return (Raw >> 1) | (Raw << (UIntBits - 1));
56   }
57   friend SourceLocationSequence;
58 
59 public:
60   using RawLocEncoding = uint64_t;
61 
62   static RawLocEncoding encode(SourceLocation Loc, UIntTy BaseOffset,
63                                unsigned BaseModuleFileIndex,
64                                SourceLocationSequence * = nullptr);
65   static std::pair<SourceLocation, unsigned>
66   decode(RawLocEncoding, SourceLocationSequence * = nullptr);
67 };
68 
69 /// Serialized encoding of a sequence of SourceLocations.
70 ///
71 /// Optimized to produce small values when locations with the sequence are
72 /// similar. Each element can be delta-encoded against the last nonzero element.
73 ///
74 /// Sequences should be started by creating a SourceLocationSequence::State,
75 /// and then passed around as SourceLocationSequence*. Example:
76 ///
77 ///   // establishes a sequence
78 ///   void EmitTopLevelThing() {
79 ///     SourceLocationSequence::State Seq;
80 ///     EmitContainedThing(Seq);
81 ///     EmitRecursiveThing(Seq);
82 ///   }
83 ///
84 ///   // optionally part of a sequence
85 ///   void EmitContainedThing(SourceLocationSequence *Seq = nullptr) {
86 ///     Record.push_back(SourceLocationEncoding::encode(SomeLoc, Seq));
87 ///   }
88 ///
89 ///   // establishes a sequence if there isn't one already
90 ///   void EmitRecursiveThing(SourceLocationSequence *ParentSeq = nullptr) {
91 ///     SourceLocationSequence::State Seq(ParentSeq);
92 ///     Record.push_back(SourceLocationEncoding::encode(SomeLoc, Seq));
93 ///     EmitRecursiveThing(Seq);
94 ///   }
95 ///
96 class SourceLocationSequence {
97   using UIntTy = SourceLocation::UIntTy;
98   using EncodedTy = uint64_t;
99   constexpr static auto UIntBits = SourceLocationEncoding::UIntBits;
100   static_assert(sizeof(EncodedTy) > sizeof(UIntTy), "Need one extra bit!");
101 
102   // Prev stores the rotated last nonzero location.
103   UIntTy &Prev;
104 
105   // Zig-zag encoding turns small signed integers into small unsigned integers.
106   // 0 => 0, -1 => 1, 1 => 2, -2 => 3, ...
zigZag(UIntTy V)107   static UIntTy zigZag(UIntTy V) {
108     UIntTy Sign = (V & (1 << (UIntBits - 1))) ? UIntTy(-1) : UIntTy(0);
109     return Sign ^ (V << 1);
110   }
zagZig(UIntTy V)111   static UIntTy zagZig(UIntTy V) { return (V >> 1) ^ -(V & 1); }
112 
SourceLocationSequence(UIntTy & Prev)113   SourceLocationSequence(UIntTy &Prev) : Prev(Prev) {}
114 
encodeRaw(UIntTy Raw)115   EncodedTy encodeRaw(UIntTy Raw) {
116     if (Raw == 0)
117       return 0;
118     UIntTy Rotated = SourceLocationEncoding::encodeRaw(Raw);
119     if (Prev == 0)
120       return Prev = Rotated;
121     UIntTy Delta = Rotated - Prev;
122     Prev = Rotated;
123     // Exactly one 33 bit value is possible! (1 << 32).
124     // This is because we have two representations of zero: trivial & relative.
125     return 1 + EncodedTy{zigZag(Delta)};
126   }
decodeRaw(EncodedTy Encoded)127   UIntTy decodeRaw(EncodedTy Encoded) {
128     if (Encoded == 0)
129       return 0;
130     if (Prev == 0)
131       return SourceLocationEncoding::decodeRaw(Prev = Encoded);
132     return SourceLocationEncoding::decodeRaw(Prev += zagZig(Encoded - 1));
133   }
134 
135 public:
decode(EncodedTy Encoded)136   SourceLocation decode(EncodedTy Encoded) {
137     return SourceLocation::getFromRawEncoding(decodeRaw(Encoded));
138   }
encode(SourceLocation Loc)139   EncodedTy encode(SourceLocation Loc) {
140     return encodeRaw(Loc.getRawEncoding());
141   }
142 
143   class State;
144 };
145 
146 /// This object establishes a SourceLocationSequence.
147 class SourceLocationSequence::State {
148   UIntTy Prev = 0;
149   SourceLocationSequence Seq;
150 
151 public:
152   // If Parent is provided and non-null, then this root becomes part of that
153   // enclosing sequence instead of establishing a new one.
154   State(SourceLocationSequence *Parent = nullptr)
155       : Seq(Parent ? Parent->Prev : Prev) {}
156 
157   // Implicit conversion for uniform use of roots vs propagated sequences.
158   operator SourceLocationSequence *() { return &Seq; }
159 };
160 
161 inline SourceLocationEncoding::RawLocEncoding
encode(SourceLocation Loc,UIntTy BaseOffset,unsigned BaseModuleFileIndex,SourceLocationSequence * Seq)162 SourceLocationEncoding::encode(SourceLocation Loc, UIntTy BaseOffset,
163                                unsigned BaseModuleFileIndex,
164                                SourceLocationSequence *Seq) {
165   // If the source location is a local source location, we can try to optimize
166   // the similar sequences to only record the differences.
167   if (!BaseOffset)
168     return Seq ? Seq->encode(Loc) : encodeRaw(Loc.getRawEncoding());
169 
170   if (Loc.isInvalid())
171     return 0;
172 
173   // Otherwise, the higher bits are used to store the module file index,
174   // so it is meaningless to optimize the source locations into small
175   // integers. Let's try to always use the raw encodings.
176   assert(Loc.getOffset() >= BaseOffset);
177   Loc = Loc.getLocWithOffset(-BaseOffset);
178   RawLocEncoding Encoded = encodeRaw(Loc.getRawEncoding());
179 
180   // 16 bits should be sufficient to store the module file index.
181   assert(BaseModuleFileIndex < (1 << 16));
182   Encoded |= (RawLocEncoding)BaseModuleFileIndex << 32;
183   return Encoded;
184 }
185 inline std::pair<SourceLocation, unsigned>
decode(RawLocEncoding Encoded,SourceLocationSequence * Seq)186 SourceLocationEncoding::decode(RawLocEncoding Encoded,
187                                SourceLocationSequence *Seq) {
188   unsigned ModuleFileIndex = Encoded >> 32;
189 
190   if (!ModuleFileIndex)
191     return {Seq ? Seq->decode(Encoded)
192                 : SourceLocation::getFromRawEncoding(decodeRaw(Encoded)),
193             ModuleFileIndex};
194 
195   Encoded &= llvm::maskTrailingOnes<RawLocEncoding>(32);
196   SourceLocation Loc = SourceLocation::getFromRawEncoding(decodeRaw(Encoded));
197 
198   return {Loc, ModuleFileIndex};
199 }
200 
201 } // namespace clang
202 #endif
203