xref: /freebsd/contrib/llvm-project/llvm/include/llvm/Support/DataExtractor.h (revision 6966ac055c3b7a39266fb982493330df7a097997)
1 //===-- DataExtractor.h -----------------------------------------*- C++ -*-===//
2 //
3 // Part of the LLVM Project, under the Apache License v2.0 with LLVM Exceptions.
4 // See https://llvm.org/LICENSE.txt for license information.
5 // SPDX-License-Identifier: Apache-2.0 WITH LLVM-exception
6 //
7 //===----------------------------------------------------------------------===//
8 
9 #ifndef LLVM_SUPPORT_DATAEXTRACTOR_H
10 #define LLVM_SUPPORT_DATAEXTRACTOR_H
11 
12 #include "llvm/ADT/StringRef.h"
13 #include "llvm/Support/DataTypes.h"
14 
15 namespace llvm {
16 
17 /// An auxiliary type to facilitate extraction of 3-byte entities.
18 struct Uint24 {
19   uint8_t Bytes[3];
20   Uint24(uint8_t U) {
21     Bytes[0] = Bytes[1] = Bytes[2] = U;
22   }
23   Uint24(uint8_t U0, uint8_t U1, uint8_t U2) {
24     Bytes[0] = U0; Bytes[1] = U1; Bytes[2] = U2;
25   }
26   uint32_t getAsUint32(bool IsLittleEndian) const {
27     int LoIx = IsLittleEndian ? 0 : 2;
28     return Bytes[LoIx] + (Bytes[1] << 8) + (Bytes[2-LoIx] << 16);
29   }
30 };
31 
32 using uint24_t = Uint24;
33 static_assert(sizeof(uint24_t) == 3, "sizeof(uint24_t) != 3");
34 
35 /// Needed by swapByteOrder().
36 inline uint24_t getSwappedBytes(uint24_t C) {
37   return uint24_t(C.Bytes[2], C.Bytes[1], C.Bytes[0]);
38 }
39 
40 class DataExtractor {
41   StringRef Data;
42   uint8_t IsLittleEndian;
43   uint8_t AddressSize;
44 public:
45   /// Construct with a buffer that is owned by the caller.
46   ///
47   /// This constructor allows us to use data that is owned by the
48   /// caller. The data must stay around as long as this object is
49   /// valid.
50   DataExtractor(StringRef Data, bool IsLittleEndian, uint8_t AddressSize)
51     : Data(Data), IsLittleEndian(IsLittleEndian), AddressSize(AddressSize) {}
52 
53   /// Get the data pointed to by this extractor.
54   StringRef getData() const { return Data; }
55   /// Get the endianness for this extractor.
56   bool isLittleEndian() const { return IsLittleEndian; }
57   /// Get the address size for this extractor.
58   uint8_t getAddressSize() const { return AddressSize; }
59   /// Set the address size for this extractor.
60   void setAddressSize(uint8_t Size) { AddressSize = Size; }
61 
62   /// Extract a C string from \a *offset_ptr.
63   ///
64   /// Returns a pointer to a C String from the data at the offset
65   /// pointed to by \a offset_ptr. A variable length NULL terminated C
66   /// string will be extracted and the \a offset_ptr will be
67   /// updated with the offset of the byte that follows the NULL
68   /// terminator byte.
69   ///
70   /// @param[in,out] offset_ptr
71   ///     A pointer to an offset within the data that will be advanced
72   ///     by the appropriate number of bytes if the value is extracted
73   ///     correctly. If the offset is out of bounds or there are not
74   ///     enough bytes to extract this value, the offset will be left
75   ///     unmodified.
76   ///
77   /// @return
78   ///     A pointer to the C string value in the data. If the offset
79   ///     pointed to by \a offset_ptr is out of bounds, or if the
80   ///     offset plus the length of the C string is out of bounds,
81   ///     NULL will be returned.
82   const char *getCStr(uint32_t *offset_ptr) const;
83 
84   /// Extract a C string from \a *OffsetPtr.
85   ///
86   /// Returns a StringRef for the C String from the data at the offset
87   /// pointed to by \a OffsetPtr. A variable length NULL terminated C
88   /// string will be extracted and the \a OffsetPtr will be
89   /// updated with the offset of the byte that follows the NULL
90   /// terminator byte.
91   ///
92   /// \param[in,out] OffsetPtr
93   ///     A pointer to an offset within the data that will be advanced
94   ///     by the appropriate number of bytes if the value is extracted
95   ///     correctly. If the offset is out of bounds or there are not
96   ///     enough bytes to extract this value, the offset will be left
97   ///     unmodified.
98   ///
99   /// \return
100   ///     A StringRef for the C string value in the data. If the offset
101   ///     pointed to by \a OffsetPtr is out of bounds, or if the
102   ///     offset plus the length of the C string is out of bounds,
103   ///     a default-initialized StringRef will be returned.
104   StringRef getCStrRef(uint32_t *OffsetPtr) const;
105 
106   /// Extract an unsigned integer of size \a byte_size from \a
107   /// *offset_ptr.
108   ///
109   /// Extract a single unsigned integer value and update the offset
110   /// pointed to by \a offset_ptr. The size of the extracted integer
111   /// is specified by the \a byte_size argument. \a byte_size should
112   /// have a value greater than or equal to one and less than or equal
113   /// to eight since the return value is 64 bits wide. Any
114   /// \a byte_size values less than 1 or greater than 8 will result in
115   /// nothing being extracted, and zero being returned.
116   ///
117   /// @param[in,out] offset_ptr
118   ///     A pointer to an offset within the data that will be advanced
119   ///     by the appropriate number of bytes if the value is extracted
120   ///     correctly. If the offset is out of bounds or there are not
121   ///     enough bytes to extract this value, the offset will be left
122   ///     unmodified.
123   ///
124   /// @param[in] byte_size
125   ///     The size in byte of the integer to extract.
126   ///
127   /// @return
128   ///     The unsigned integer value that was extracted, or zero on
129   ///     failure.
130   uint64_t getUnsigned(uint32_t *offset_ptr, uint32_t byte_size) const;
131 
132   /// Extract an signed integer of size \a byte_size from \a *offset_ptr.
133   ///
134   /// Extract a single signed integer value (sign extending if required)
135   /// and update the offset pointed to by \a offset_ptr. The size of
136   /// the extracted integer is specified by the \a byte_size argument.
137   /// \a byte_size should have a value greater than or equal to one
138   /// and less than or equal to eight since the return value is 64
139   /// bits wide. Any \a byte_size values less than 1 or greater than
140   /// 8 will result in nothing being extracted, and zero being returned.
141   ///
142   /// @param[in,out] offset_ptr
143   ///     A pointer to an offset within the data that will be advanced
144   ///     by the appropriate number of bytes if the value is extracted
145   ///     correctly. If the offset is out of bounds or there are not
146   ///     enough bytes to extract this value, the offset will be left
147   ///     unmodified.
148   ///
149   /// @param[in] size
150   ///     The size in bytes of the integer to extract.
151   ///
152   /// @return
153   ///     The sign extended signed integer value that was extracted,
154   ///     or zero on failure.
155   int64_t getSigned(uint32_t *offset_ptr, uint32_t size) const;
156 
157   //------------------------------------------------------------------
158   /// Extract an pointer from \a *offset_ptr.
159   ///
160   /// Extract a single pointer from the data and update the offset
161   /// pointed to by \a offset_ptr. The size of the extracted pointer
162   /// is \a getAddressSize(), so the address size has to be
163   /// set correctly prior to extracting any pointer values.
164   ///
165   /// @param[in,out] offset_ptr
166   ///     A pointer to an offset within the data that will be advanced
167   ///     by the appropriate number of bytes if the value is extracted
168   ///     correctly. If the offset is out of bounds or there are not
169   ///     enough bytes to extract this value, the offset will be left
170   ///     unmodified.
171   ///
172   /// @return
173   ///     The extracted pointer value as a 64 integer.
174   uint64_t getAddress(uint32_t *offset_ptr) const {
175     return getUnsigned(offset_ptr, AddressSize);
176   }
177 
178   /// Extract a uint8_t value from \a *offset_ptr.
179   ///
180   /// Extract a single uint8_t from the binary data at the offset
181   /// pointed to by \a offset_ptr, and advance the offset on success.
182   ///
183   /// @param[in,out] offset_ptr
184   ///     A pointer to an offset within the data that will be advanced
185   ///     by the appropriate number of bytes if the value is extracted
186   ///     correctly. If the offset is out of bounds or there are not
187   ///     enough bytes to extract this value, the offset will be left
188   ///     unmodified.
189   ///
190   /// @return
191   ///     The extracted uint8_t value.
192   uint8_t getU8(uint32_t *offset_ptr) const;
193 
194   /// Extract \a count uint8_t values from \a *offset_ptr.
195   ///
196   /// Extract \a count uint8_t values from the binary data at the
197   /// offset pointed to by \a offset_ptr, and advance the offset on
198   /// success. The extracted values are copied into \a dst.
199   ///
200   /// @param[in,out] offset_ptr
201   ///     A pointer to an offset within the data that will be advanced
202   ///     by the appropriate number of bytes if the value is extracted
203   ///     correctly. If the offset is out of bounds or there are not
204   ///     enough bytes to extract this value, the offset will be left
205   ///     unmodified.
206   ///
207   /// @param[out] dst
208   ///     A buffer to copy \a count uint8_t values into. \a dst must
209   ///     be large enough to hold all requested data.
210   ///
211   /// @param[in] count
212   ///     The number of uint8_t values to extract.
213   ///
214   /// @return
215   ///     \a dst if all values were properly extracted and copied,
216   ///     NULL otherise.
217   uint8_t *getU8(uint32_t *offset_ptr, uint8_t *dst, uint32_t count) const;
218 
219   //------------------------------------------------------------------
220   /// Extract a uint16_t value from \a *offset_ptr.
221   ///
222   /// Extract a single uint16_t from the binary data at the offset
223   /// pointed to by \a offset_ptr, and update the offset on success.
224   ///
225   /// @param[in,out] offset_ptr
226   ///     A pointer to an offset within the data that will be advanced
227   ///     by the appropriate number of bytes if the value is extracted
228   ///     correctly. If the offset is out of bounds or there are not
229   ///     enough bytes to extract this value, the offset will be left
230   ///     unmodified.
231   ///
232   /// @return
233   ///     The extracted uint16_t value.
234   //------------------------------------------------------------------
235   uint16_t getU16(uint32_t *offset_ptr) const;
236 
237   /// Extract \a count uint16_t values from \a *offset_ptr.
238   ///
239   /// Extract \a count uint16_t values from the binary data at the
240   /// offset pointed to by \a offset_ptr, and advance the offset on
241   /// success. The extracted values are copied into \a dst.
242   ///
243   /// @param[in,out] offset_ptr
244   ///     A pointer to an offset within the data that will be advanced
245   ///     by the appropriate number of bytes if the value is extracted
246   ///     correctly. If the offset is out of bounds or there are not
247   ///     enough bytes to extract this value, the offset will be left
248   ///     unmodified.
249   ///
250   /// @param[out] dst
251   ///     A buffer to copy \a count uint16_t values into. \a dst must
252   ///     be large enough to hold all requested data.
253   ///
254   /// @param[in] count
255   ///     The number of uint16_t values to extract.
256   ///
257   /// @return
258   ///     \a dst if all values were properly extracted and copied,
259   ///     NULL otherise.
260   uint16_t *getU16(uint32_t *offset_ptr, uint16_t *dst, uint32_t count) const;
261 
262   /// Extract a 24-bit unsigned value from \a *offset_ptr and return it
263   /// in a uint32_t.
264   ///
265   /// Extract 3 bytes from the binary data at the offset pointed to by
266   /// \a offset_ptr, construct a uint32_t from them and update the offset
267   /// on success.
268   ///
269   /// @param[in,out] offset_ptr
270   ///     A pointer to an offset within the data that will be advanced
271   ///     by the 3 bytes if the value is extracted correctly. If the offset
272   ///     is out of bounds or there are not enough bytes to extract this value,
273   ///     the offset will be left unmodified.
274   ///
275   /// @return
276   ///     The extracted 24-bit value represented in a uint32_t.
277   uint32_t getU24(uint32_t *offset_ptr) const;
278 
279   /// Extract a uint32_t value from \a *offset_ptr.
280   ///
281   /// Extract a single uint32_t from the binary data at the offset
282   /// pointed to by \a offset_ptr, and update the offset on success.
283   ///
284   /// @param[in,out] offset_ptr
285   ///     A pointer to an offset within the data that will be advanced
286   ///     by the appropriate number of bytes if the value is extracted
287   ///     correctly. If the offset is out of bounds or there are not
288   ///     enough bytes to extract this value, the offset will be left
289   ///     unmodified.
290   ///
291   /// @return
292   ///     The extracted uint32_t value.
293   uint32_t getU32(uint32_t *offset_ptr) const;
294 
295   /// Extract \a count uint32_t values from \a *offset_ptr.
296   ///
297   /// Extract \a count uint32_t values from the binary data at the
298   /// offset pointed to by \a offset_ptr, and advance the offset on
299   /// success. The extracted values are copied into \a dst.
300   ///
301   /// @param[in,out] offset_ptr
302   ///     A pointer to an offset within the data that will be advanced
303   ///     by the appropriate number of bytes if the value is extracted
304   ///     correctly. If the offset is out of bounds or there are not
305   ///     enough bytes to extract this value, the offset will be left
306   ///     unmodified.
307   ///
308   /// @param[out] dst
309   ///     A buffer to copy \a count uint32_t values into. \a dst must
310   ///     be large enough to hold all requested data.
311   ///
312   /// @param[in] count
313   ///     The number of uint32_t values to extract.
314   ///
315   /// @return
316   ///     \a dst if all values were properly extracted and copied,
317   ///     NULL otherise.
318   uint32_t *getU32(uint32_t *offset_ptr, uint32_t *dst, uint32_t count) const;
319 
320   /// Extract a uint64_t value from \a *offset_ptr.
321   ///
322   /// Extract a single uint64_t from the binary data at the offset
323   /// pointed to by \a offset_ptr, and update the offset on success.
324   ///
325   /// @param[in,out] offset_ptr
326   ///     A pointer to an offset within the data that will be advanced
327   ///     by the appropriate number of bytes if the value is extracted
328   ///     correctly. If the offset is out of bounds or there are not
329   ///     enough bytes to extract this value, the offset will be left
330   ///     unmodified.
331   ///
332   /// @return
333   ///     The extracted uint64_t value.
334   uint64_t getU64(uint32_t *offset_ptr) const;
335 
336   /// Extract \a count uint64_t values from \a *offset_ptr.
337   ///
338   /// Extract \a count uint64_t values from the binary data at the
339   /// offset pointed to by \a offset_ptr, and advance the offset on
340   /// success. The extracted values are copied into \a dst.
341   ///
342   /// @param[in,out] offset_ptr
343   ///     A pointer to an offset within the data that will be advanced
344   ///     by the appropriate number of bytes if the value is extracted
345   ///     correctly. If the offset is out of bounds or there are not
346   ///     enough bytes to extract this value, the offset will be left
347   ///     unmodified.
348   ///
349   /// @param[out] dst
350   ///     A buffer to copy \a count uint64_t values into. \a dst must
351   ///     be large enough to hold all requested data.
352   ///
353   /// @param[in] count
354   ///     The number of uint64_t values to extract.
355   ///
356   /// @return
357   ///     \a dst if all values were properly extracted and copied,
358   ///     NULL otherise.
359   uint64_t *getU64(uint32_t *offset_ptr, uint64_t *dst, uint32_t count) const;
360 
361   /// Extract a signed LEB128 value from \a *offset_ptr.
362   ///
363   /// Extracts an signed LEB128 number from this object's data
364   /// starting at the offset pointed to by \a offset_ptr. The offset
365   /// pointed to by \a offset_ptr will be updated with the offset of
366   /// the byte following the last extracted byte.
367   ///
368   /// @param[in,out] offset_ptr
369   ///     A pointer to an offset within the data that will be advanced
370   ///     by the appropriate number of bytes if the value is extracted
371   ///     correctly. If the offset is out of bounds or there are not
372   ///     enough bytes to extract this value, the offset will be left
373   ///     unmodified.
374   ///
375   /// @return
376   ///     The extracted signed integer value.
377   int64_t getSLEB128(uint32_t *offset_ptr) const;
378 
379   /// Extract a unsigned LEB128 value from \a *offset_ptr.
380   ///
381   /// Extracts an unsigned LEB128 number from this object's data
382   /// starting at the offset pointed to by \a offset_ptr. The offset
383   /// pointed to by \a offset_ptr will be updated with the offset of
384   /// the byte following the last extracted byte.
385   ///
386   /// @param[in,out] offset_ptr
387   ///     A pointer to an offset within the data that will be advanced
388   ///     by the appropriate number of bytes if the value is extracted
389   ///     correctly. If the offset is out of bounds or there are not
390   ///     enough bytes to extract this value, the offset will be left
391   ///     unmodified.
392   ///
393   /// @return
394   ///     The extracted unsigned integer value.
395   uint64_t getULEB128(uint32_t *offset_ptr) const;
396 
397   /// Test the validity of \a offset.
398   ///
399   /// @return
400   ///     \b true if \a offset is a valid offset into the data in this
401   ///     object, \b false otherwise.
402   bool isValidOffset(uint32_t offset) const { return Data.size() > offset; }
403 
404   /// Test the availability of \a length bytes of data from \a offset.
405   ///
406   /// @return
407   ///     \b true if \a offset is a valid offset and there are \a
408   ///     length bytes available at that offset, \b false otherwise.
409   bool isValidOffsetForDataOfSize(uint32_t offset, uint32_t length) const {
410     return offset + length >= offset && isValidOffset(offset + length - 1);
411   }
412 
413   /// Test the availability of enough bytes of data for a pointer from
414   /// \a offset. The size of a pointer is \a getAddressSize().
415   ///
416   /// @return
417   ///     \b true if \a offset is a valid offset and there are enough
418   ///     bytes for a pointer available at that offset, \b false
419   ///     otherwise.
420   bool isValidOffsetForAddress(uint32_t offset) const {
421     return isValidOffsetForDataOfSize(offset, AddressSize);
422   }
423 };
424 
425 } // namespace llvm
426 
427 #endif
428