1 /*
2 * Copyright (c) 2016 Thomas Pornin <pornin@bolet.org>
3 *
4 * Permission is hereby granted, free of charge, to any person obtaining
5 * a copy of this software and associated documentation files (the
6 * "Software"), to deal in the Software without restriction, including
7 * without limitation the rights to use, copy, modify, merge, publish,
8 * distribute, sublicense, and/or sell copies of the Software, and to
9 * permit persons to whom the Software is furnished to do so, subject to
10 * the following conditions:
11 *
12 * The above copyright notice and this permission notice shall be
13 * included in all copies or substantial portions of the Software.
14 *
15 * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND,
16 * EXPRESS OR IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF
17 * MERCHANTABILITY, FITNESS FOR A PARTICULAR PURPOSE AND
18 * NONINFRINGEMENT. IN NO EVENT SHALL THE AUTHORS OR COPYRIGHT HOLDERS
19 * BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER LIABILITY, WHETHER IN AN
20 * ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, OUT OF OR IN
21 * CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
22 * SOFTWARE.
23 */
24
25 #ifndef BR_BEARSSL_PEM_H__
26 #define BR_BEARSSL_PEM_H__
27
28 #include <stddef.h>
29 #include <stdint.h>
30
31 #ifdef __cplusplus
32 extern "C" {
33 #endif
34
35 /** \file bearssl_pem.h
36 *
37 * # PEM Support
38 *
39 * PEM is a traditional encoding layer use to store binary objects (in
40 * particular X.509 certificates, and private keys) in text files. While
41 * the acronym comes from an old, defunct standard ("Privacy Enhanced
42 * Mail"), the format has been reused, with some variations, by many
43 * systems, and is a _de facto_ standard, even though it is not, actually,
44 * specified in all clarity anywhere.
45 *
46 * ## Format Details
47 *
48 * BearSSL contains a generic, streamed PEM decoder, which handles the
49 * following format:
50 *
51 * - The input source (a sequence of bytes) is assumed to be the
52 * encoding of a text file in an ASCII-compatible charset. This
53 * includes ISO-8859-1, Windows-1252, and UTF-8 encodings. Each
54 * line ends on a newline character (U+000A LINE FEED). The
55 * U+000D CARRIAGE RETURN characters are ignored, so the code
56 * accepts both Windows-style and Unix-style line endings.
57 *
58 * - Each object begins with a banner that occurs at the start of
59 * a line; the first banner characters are "`-----BEGIN `" (five
60 * dashes, the word "BEGIN", and a space). The banner matching is
61 * not case-sensitive.
62 *
63 * - The _object name_ consists in the characters that follow the
64 * banner start sequence, up to the end of the line, but without
65 * trailing dashes (in "normal" PEM, there are five trailing
66 * dashes, but this implementation is not picky about these dashes).
67 * The BearSSL decoder normalises the name characters to uppercase
68 * (for ASCII letters only) and accepts names up to 127 characters.
69 *
70 * - The object ends with a banner that again occurs at the start of
71 * a line, and starts with "`-----END `" (again case-insensitive).
72 *
73 * - Between that start and end banner, only Base64 data shall occur.
74 * Base64 converts each sequence of three bytes into four
75 * characters; the four characters are ASCII letters, digits, "`+`"
76 * or "`-`" signs, and one or two "`=`" signs may occur in the last
77 * quartet. Whitespace is ignored (whitespace is any ASCII character
78 * of code 32 or less, so control characters are whitespace) and
79 * lines may have arbitrary length; the only restriction is that the
80 * four characters of a quartet must appear on the same line (no
81 * line break inside a quartet).
82 *
83 * - A single file may contain more than one PEM object. Bytes that
84 * occur between objects are ignored.
85 *
86 *
87 * ## PEM Decoder API
88 *
89 * The PEM decoder offers a state-machine API. The caller allocates a
90 * decoder context, then injects source bytes. Source bytes are pushed
91 * with `br_pem_decoder_push()`. The decoder stops accepting bytes when
92 * it reaches an "event", which is either the start of an object, the
93 * end of an object, or a decoding error within an object.
94 *
95 * The `br_pem_decoder_event()` function is used to obtain the current
96 * event; it also clears it, thus allowing the decoder to accept more
97 * bytes. When a object start event is raised, the decoder context
98 * offers the found object name (normalised to ASCII uppercase).
99 *
100 * When an object is reached, the caller must set an appropriate callback
101 * function, which will receive (by chunks) the decoded object data.
102 *
103 * Since the decoder context makes no dynamic allocation, it requires
104 * no explicit deallocation.
105 */
106
107 /**
108 * \brief PEM decoder context.
109 *
110 * Contents are opaque (they should not be accessed directly).
111 */
112 typedef struct {
113 #ifndef BR_DOXYGEN_IGNORE
114 /* CPU for the T0 virtual machine. */
115 struct {
116 uint32_t *dp;
117 uint32_t *rp;
118 const unsigned char *ip;
119 } cpu;
120 uint32_t dp_stack[32];
121 uint32_t rp_stack[32];
122 int err;
123
124 const unsigned char *hbuf;
125 size_t hlen;
126
127 void (*dest)(void *dest_ctx, const void *src, size_t len);
128 void *dest_ctx;
129
130 unsigned char event;
131 char name[128];
132 unsigned char buf[255];
133 size_t ptr;
134 #endif
135 } br_pem_decoder_context;
136
137 /**
138 * \brief Initialise a PEM decoder structure.
139 *
140 * \param ctx decoder context to initialise.
141 */
142 void br_pem_decoder_init(br_pem_decoder_context *ctx);
143
144 /**
145 * \brief Push some bytes into the decoder.
146 *
147 * Returned value is the number of bytes actually consumed; this may be
148 * less than the number of provided bytes if an event is raised. When an
149 * event is raised, it must be read (with `br_pem_decoder_event()`);
150 * until the event is read, this function will return 0.
151 *
152 * \param ctx decoder context.
153 * \param data new data bytes.
154 * \param len number of new data bytes.
155 * \return the number of bytes actually received (may be less than `len`).
156 */
157 size_t br_pem_decoder_push(br_pem_decoder_context *ctx,
158 const void *data, size_t len);
159
160 /**
161 * \brief Set the receiver for decoded data.
162 *
163 * When an object is entered, the provided function (with opaque context
164 * pointer) will be called repeatedly with successive chunks of decoded
165 * data for that object. If `dest` is set to 0, then decoded data is
166 * simply ignored. The receiver can be set at any time, but, in practice,
167 * it should be called immediately after receiving a "start of object"
168 * event.
169 *
170 * \param ctx decoder context.
171 * \param dest callback for receiving decoded data.
172 * \param dest_ctx opaque context pointer for the `dest` callback.
173 */
174 static inline void
br_pem_decoder_setdest(br_pem_decoder_context * ctx,void (* dest)(void * dest_ctx,const void * src,size_t len),void * dest_ctx)175 br_pem_decoder_setdest(br_pem_decoder_context *ctx,
176 void (*dest)(void *dest_ctx, const void *src, size_t len),
177 void *dest_ctx)
178 {
179 ctx->dest = dest;
180 ctx->dest_ctx = dest_ctx;
181 }
182
183 /**
184 * \brief Get the last event.
185 *
186 * If an event was raised, then this function returns the event value, and
187 * also clears it, thereby allowing the decoder to proceed. If no event
188 * was raised since the last call to `br_pem_decoder_event()`, then this
189 * function returns 0.
190 *
191 * \param ctx decoder context.
192 * \return the raised event, or 0.
193 */
194 int br_pem_decoder_event(br_pem_decoder_context *ctx);
195
196 /**
197 * \brief Event: start of object.
198 *
199 * This event is raised when the start of a new object has been detected.
200 * The object name (normalised to uppercase) can be accessed with
201 * `br_pem_decoder_name()`.
202 */
203 #define BR_PEM_BEGIN_OBJ 1
204
205 /**
206 * \brief Event: end of object.
207 *
208 * This event is raised when the end of the current object is reached
209 * (normally, i.e. with no decoding error).
210 */
211 #define BR_PEM_END_OBJ 2
212
213 /**
214 * \brief Event: decoding error.
215 *
216 * This event is raised when decoding fails within an object.
217 * This formally closes the current object and brings the decoder back
218 * to the "out of any object" state. The offending line in the source
219 * is consumed.
220 */
221 #define BR_PEM_ERROR 3
222
223 /**
224 * \brief Get the name of the encountered object.
225 *
226 * The encountered object name is defined only when the "start of object"
227 * event is raised. That name is normalised to uppercase (for ASCII letters
228 * only) and does not include trailing dashes.
229 *
230 * \param ctx decoder context.
231 * \return the current object name.
232 */
233 static inline const char *
br_pem_decoder_name(br_pem_decoder_context * ctx)234 br_pem_decoder_name(br_pem_decoder_context *ctx)
235 {
236 return ctx->name;
237 }
238
239 /**
240 * \brief Encode an object in PEM.
241 *
242 * This function encodes the provided binary object (`data`, of length `len`
243 * bytes) into PEM. The `banner` text will be included in the header and
244 * footer (e.g. use `"CERTIFICATE"` to get a `"BEGIN CERTIFICATE"` header).
245 *
246 * The length (in characters) of the PEM output is returned; that length
247 * does NOT include the terminating zero, that this function nevertheless
248 * adds. If using the returned value for allocation purposes, the allocated
249 * buffer size MUST be at least one byte larger than the returned size.
250 *
251 * If `dest` is `NULL`, then the encoding does not happen; however, the
252 * length of the encoded object is still computed and returned.
253 *
254 * The `data` pointer may be `NULL` only if `len` is zero (when encoding
255 * an object of length zero, which is not very useful), or when `dest`
256 * is `NULL` (in that case, source data bytes are ignored).
257 *
258 * Some `flags` can be specified to alter the encoding behaviour:
259 *
260 * - If `BR_PEM_LINE64` is set, then line-breaking will occur after
261 * every 64 characters of output, instead of the default of 76.
262 *
263 * - If `BR_PEM_CRLF` is set, then end-of-line sequence will use
264 * CR+LF instead of a single LF.
265 *
266 * The `data` and `dest` buffers may overlap, in which case the source
267 * binary data is destroyed in the process. Note that the PEM-encoded output
268 * is always larger than the source binary.
269 *
270 * \param dest the destination buffer (or `NULL`).
271 * \param data the source buffer (can be `NULL` in some cases).
272 * \param len the source length (in bytes).
273 * \param banner the PEM banner expression.
274 * \param flags the behavioural flags.
275 * \return the PEM object length (in characters), EXCLUDING the final zero.
276 */
277 size_t br_pem_encode(void *dest, const void *data, size_t len,
278 const char *banner, unsigned flags);
279
280 /**
281 * \brief PEM encoding flag: split lines at 64 characters.
282 */
283 #define BR_PEM_LINE64 0x0001
284
285 /**
286 * \brief PEM encoding flag: use CR+LF line endings.
287 */
288 #define BR_PEM_CRLF 0x0002
289
290 #ifdef __cplusplus
291 }
292 #endif
293
294 #endif
295