xref: /freebsd/crypto/openssl/crypto/evp/encode.c (revision 1523ccfd9c8c254f7928143d31c305384b05fd11)
1 /*
2  * Copyright 1995-2026 The OpenSSL Project Authors. All Rights Reserved.
3  *
4  * Licensed under the Apache License 2.0 (the "License").  You may not use
5  * this file except in compliance with the License.  You can obtain a copy
6  * in the file LICENSE in the source distribution or at
7  * https://www.openssl.org/source/license.html
8  */
9 
10 #include <stdio.h>
11 #include <limits.h>
12 #include "internal/cryptlib.h"
13 #include <openssl/evp.h>
14 #include "crypto/evp.h"
15 #include "evp_local.h"
16 
17 static unsigned char conv_ascii2bin(unsigned char a,
18     const unsigned char *table);
19 static int evp_encodeblock_int(EVP_ENCODE_CTX *ctx, unsigned char *t,
20     const unsigned char *f, int dlen);
21 static int evp_decodeblock_int(EVP_ENCODE_CTX *ctx, unsigned char *t,
22     const unsigned char *f, int n, int eof);
23 
24 #ifndef CHARSET_EBCDIC
25 #define conv_bin2ascii(a, table) ((table)[(a) & 0x3f])
26 #else
27 /*
28  * We assume that PEM encoded files are EBCDIC files (i.e., printable text
29  * files). Convert them here while decoding. When encoding, output is EBCDIC
30  * (text) format again. (No need for conversion in the conv_bin2ascii macro,
31  * as the underlying textstring data_bin2ascii[] is already EBCDIC)
32  */
33 #define conv_bin2ascii(a, table) ((table)[(a) & 0x3f])
34 #endif
35 
36 /*-
37  * 64 char lines
38  * pad input with 0
39  * left over chars are set to =
40  * 1 byte  => xx==
41  * 2 bytes => xxx=
42  * 3 bytes => xxxx
43  */
44 #define BIN_PER_LINE (64 / 4 * 3)
45 #define CHUNKS_PER_LINE (64 / 4)
46 #define CHAR_PER_LINE (64 + 1)
47 
48 static const unsigned char data_bin2ascii[65] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
49 
50 /* SRP uses a different base64 alphabet */
51 static const unsigned char srpdata_bin2ascii[65] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz./";
52 
53 /*-
54  * 0xF0 is a EOLN
55  * 0xF1 is ignore but next needs to be 0xF0 (for \r\n processing).
56  * 0xF2 is EOF
57  * 0xE0 is ignore at start of line.
58  * 0xFF is error
59  */
60 
61 #define B64_EOLN 0xF0
62 #define B64_CR 0xF1
63 #define B64_EOF 0xF2
64 #define B64_WS 0xE0
65 #define B64_ERROR 0xFF
66 #define B64_NOT_BASE64(a) (((a) | 0x13) == 0xF3)
67 #define B64_BASE64(a) (!B64_NOT_BASE64(a))
68 
69 static const unsigned char data_ascii2bin[128] = {
70     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xE0,
71     0xF0, 0xFF, 0xFF, 0xF1, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
72     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
73     0xFF, 0xFF, 0xE0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
74     0xFF, 0xFF, 0xFF, 0x3E, 0xFF, 0xF2, 0xFF, 0x3F, 0x34, 0x35,
75     0x36, 0x37, 0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0xFF, 0xFF,
76     0xFF, 0x00, 0xFF, 0xFF, 0xFF, 0x00, 0x01, 0x02, 0x03, 0x04,
77     0x05, 0x06, 0x07, 0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E,
78     0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18,
79     0x19, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x1A, 0x1B, 0x1C,
80     0x1D, 0x1E, 0x1F, 0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26,
81     0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, 0x30,
82     0x31, 0x32, 0x33, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
83 };
84 
85 static const unsigned char srpdata_ascii2bin[128] = {
86     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xE0,
87     0xF0, 0xFF, 0xFF, 0xF1, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
88     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
89     0xFF, 0xFF, 0xE0, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF,
90     0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xF2, 0x3E, 0x3F, 0x00, 0x01,
91     0x02, 0x03, 0x04, 0x05, 0x06, 0x07, 0x08, 0x09, 0xFF, 0xFF,
92     0xFF, 0x00, 0xFF, 0xFF, 0xFF, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E,
93     0x0F, 0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17, 0x18,
94     0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F, 0x20, 0x21, 0x22,
95     0x23, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF, 0x24, 0x25, 0x26,
96     0x27, 0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F, 0x30,
97     0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37, 0x38, 0x39, 0x3A,
98     0x3B, 0x3C, 0x3D, 0xFF, 0xFF, 0xFF, 0xFF, 0xFF
99 };
100 
101 #ifndef CHARSET_EBCDIC
102 static unsigned char conv_ascii2bin(unsigned char a, const unsigned char *table)
103 {
104     if (a & 0x80)
105         return B64_ERROR;
106     return table[a];
107 }
108 #else
109 static unsigned char conv_ascii2bin(unsigned char a, const unsigned char *table)
110 {
111     a = os_toascii[a];
112     if (a & 0x80)
113         return B64_ERROR;
114     return table[a];
115 }
116 #endif
117 
118 EVP_ENCODE_CTX *EVP_ENCODE_CTX_new(void)
119 {
120     return OPENSSL_zalloc(sizeof(EVP_ENCODE_CTX));
121 }
122 
123 void EVP_ENCODE_CTX_free(EVP_ENCODE_CTX *ctx)
124 {
125     OPENSSL_free(ctx);
126 }
127 
128 int EVP_ENCODE_CTX_copy(EVP_ENCODE_CTX *dctx, const EVP_ENCODE_CTX *sctx)
129 {
130     memcpy(dctx, sctx, sizeof(EVP_ENCODE_CTX));
131 
132     return 1;
133 }
134 
135 int EVP_ENCODE_CTX_num(EVP_ENCODE_CTX *ctx)
136 {
137     return ctx->num;
138 }
139 
140 void evp_encode_ctx_set_flags(EVP_ENCODE_CTX *ctx, unsigned int flags)
141 {
142     ctx->flags = flags;
143 }
144 
145 void EVP_EncodeInit(EVP_ENCODE_CTX *ctx)
146 {
147     ctx->length = 48;
148     ctx->num = 0;
149     ctx->line_num = 0;
150     ctx->flags = 0;
151 }
152 
153 int EVP_EncodeUpdate(EVP_ENCODE_CTX *ctx, unsigned char *out, int *outl,
154     const unsigned char *in, int inl)
155 {
156     int i, j;
157     size_t total = 0;
158 
159     *outl = 0;
160     if (inl <= 0)
161         return 0;
162     OPENSSL_assert(ctx->length <= (int)sizeof(ctx->enc_data));
163     if (ctx->length - ctx->num > inl) {
164         memcpy(&(ctx->enc_data[ctx->num]), in, inl);
165         ctx->num += inl;
166         return 1;
167     }
168     if (ctx->num != 0) {
169         i = ctx->length - ctx->num;
170         memcpy(&(ctx->enc_data[ctx->num]), in, i);
171         in += i;
172         inl -= i;
173         j = evp_encodeblock_int(ctx, out, ctx->enc_data, ctx->length);
174         ctx->num = 0;
175         out += j;
176         total = j;
177         if ((ctx->flags & EVP_ENCODE_CTX_NO_NEWLINES) == 0) {
178             *(out++) = '\n';
179             total++;
180         }
181         *out = '\0';
182     }
183     while (inl >= ctx->length && total <= INT_MAX) {
184         j = evp_encodeblock_int(ctx, out, in, ctx->length);
185         in += ctx->length;
186         inl -= ctx->length;
187         out += j;
188         total += j;
189         if ((ctx->flags & EVP_ENCODE_CTX_NO_NEWLINES) == 0) {
190             *(out++) = '\n';
191             total++;
192         }
193         *out = '\0';
194     }
195     if (total > INT_MAX) {
196         /* Too much output data! */
197         *outl = 0;
198         return 0;
199     }
200     if (inl != 0)
201         memcpy(&(ctx->enc_data[0]), in, inl);
202     ctx->num = inl;
203     *outl = total;
204 
205     return 1;
206 }
207 
208 void EVP_EncodeFinal(EVP_ENCODE_CTX *ctx, unsigned char *out, int *outl)
209 {
210     unsigned int ret = 0;
211 
212     if (ctx->num != 0) {
213         ret = evp_encodeblock_int(ctx, out, ctx->enc_data, ctx->num);
214         if ((ctx->flags & EVP_ENCODE_CTX_NO_NEWLINES) == 0)
215             out[ret++] = '\n';
216         out[ret] = '\0';
217         ctx->num = 0;
218     }
219     *outl = ret;
220 }
221 
222 static int evp_encodeblock_int(EVP_ENCODE_CTX *ctx, unsigned char *t,
223     const unsigned char *f, int dlen)
224 {
225     int i, ret = 0;
226     unsigned long l;
227     const unsigned char *table;
228 
229     if (ctx != NULL && (ctx->flags & EVP_ENCODE_CTX_USE_SRP_ALPHABET) != 0)
230         table = srpdata_bin2ascii;
231     else
232         table = data_bin2ascii;
233 
234     for (i = dlen; i > 0; i -= 3) {
235         if (i >= 3) {
236             l = (((unsigned long)f[0]) << 16L) | (((unsigned long)f[1]) << 8L) | f[2];
237             *(t++) = conv_bin2ascii(l >> 18L, table);
238             *(t++) = conv_bin2ascii(l >> 12L, table);
239             *(t++) = conv_bin2ascii(l >> 6L, table);
240             *(t++) = conv_bin2ascii(l, table);
241         } else {
242             l = ((unsigned long)f[0]) << 16L;
243             if (i == 2)
244                 l |= ((unsigned long)f[1] << 8L);
245 
246             *(t++) = conv_bin2ascii(l >> 18L, table);
247             *(t++) = conv_bin2ascii(l >> 12L, table);
248             *(t++) = (i == 1) ? '=' : conv_bin2ascii(l >> 6L, table);
249             *(t++) = '=';
250         }
251         ret += 4;
252         f += 3;
253     }
254 
255     *t = '\0';
256     return ret;
257 }
258 
259 int EVP_EncodeBlock(unsigned char *t, const unsigned char *f, int dlen)
260 {
261     return evp_encodeblock_int(NULL, t, f, dlen);
262 }
263 
264 void EVP_DecodeInit(EVP_ENCODE_CTX *ctx)
265 {
266     /* Only ctx->num and ctx->flags are used during decoding. */
267     ctx->num = 0;
268     ctx->length = 0;
269     ctx->line_num = 0;
270     ctx->flags = 0;
271 }
272 
273 /*-
274  * -1 for error
275  *  0 for last line
276  *  1 for full line
277  *
278  * Note: even though EVP_DecodeUpdate attempts to detect and report end of
279  * content, the context doesn't currently remember it and will accept more data
280  * in the next call. Therefore, the caller is responsible for checking and
281  * rejecting a 0 return value in the middle of content.
282  *
283  * Note: even though EVP_DecodeUpdate has historically tried to detect end of
284  * content based on line length, this has never worked properly. Therefore,
285  * we now return 0 when one of the following is true:
286  *   - Padding or B64_EOF was detected and the last block is complete.
287  *   - Input has zero-length.
288  * -1 is returned if:
289  *   - Invalid characters are detected.
290  *   - There is extra trailing padding, or data after padding.
291  *   - B64_EOF is detected after an incomplete base64 block.
292  */
293 int EVP_DecodeUpdate(EVP_ENCODE_CTX *ctx, unsigned char *out, int *outl,
294     const unsigned char *in, int inl)
295 {
296     int seof = 0, eof = 0, rv = -1, ret = 0, i, v, tmp, n, decoded_len;
297     unsigned char *d;
298     const unsigned char *table;
299 
300     n = ctx->num;
301     d = ctx->enc_data;
302 
303     if (n > 0 && d[n - 1] == '=') {
304         eof++;
305         if (n > 1 && d[n - 2] == '=')
306             eof++;
307     }
308 
309     /* Legacy behaviour: an empty input chunk signals end of input. */
310     if (inl == 0) {
311         rv = 0;
312         goto end;
313     }
314 
315     if ((ctx->flags & EVP_ENCODE_CTX_USE_SRP_ALPHABET) != 0)
316         table = srpdata_ascii2bin;
317     else
318         table = data_ascii2bin;
319 
320     for (i = 0; i < inl; i++) {
321         tmp = *(in++);
322         v = conv_ascii2bin(tmp, table);
323         if (v == B64_ERROR) {
324             rv = -1;
325             goto end;
326         }
327 
328         if (tmp == '=') {
329             eof++;
330         } else if (eof > 0 && B64_BASE64(v)) {
331             /* More data after padding. */
332             rv = -1;
333             goto end;
334         }
335 
336         if (eof > 2) {
337             rv = -1;
338             goto end;
339         }
340 
341         if (v == B64_EOF) {
342             seof = 1;
343             goto tail;
344         }
345 
346         /* Only save valid base64 characters. */
347         if (B64_BASE64(v)) {
348             if (n >= 64) {
349                 /*
350                  * We increment n once per loop, and empty the buffer as soon as
351                  * we reach 64 characters, so this can only happen if someone's
352                  * manually messed with the ctx. Refuse to write any more data.
353                  */
354                 rv = -1;
355                 goto end;
356             }
357             OPENSSL_assert(n < (int)sizeof(ctx->enc_data));
358             d[n++] = tmp;
359         }
360 
361         if (n == 64) {
362             decoded_len = evp_decodeblock_int(ctx, out, d, n, eof);
363             n = 0;
364             if (decoded_len < 0 || (decoded_len == 0 && eof > 0)) {
365                 rv = -1;
366                 goto end;
367             }
368             ret += decoded_len;
369             out += decoded_len;
370         }
371     }
372 
373     /*
374      * Legacy behaviour: if the current line is a full base64-block (i.e., has
375      * 0 mod 4 base64 characters), it is processed immediately. We keep this
376      * behaviour as applications may not be calling EVP_DecodeFinal properly.
377      */
378 tail:
379     if (n > 0) {
380         if ((n & 3) == 0) {
381             decoded_len = evp_decodeblock_int(ctx, out, d, n, eof);
382             n = 0;
383             if (decoded_len < 0 || (decoded_len == 0 && eof > 0)) {
384                 rv = -1;
385                 goto end;
386             }
387             ret += decoded_len;
388         } else if (seof) {
389             /* EOF in the middle of a base64 block. */
390             rv = -1;
391             goto end;
392         }
393     }
394 
395     rv = seof || (n == 0 && eof) ? 0 : 1;
396 end:
397     /* Legacy behaviour. This should probably rather be zeroed on error. */
398     *outl = ret;
399     ctx->num = n;
400     return rv;
401 }
402 
403 static int evp_decodeblock_int(EVP_ENCODE_CTX *ctx, unsigned char *t,
404     const unsigned char *f, int n,
405     int eof)
406 {
407     int i, ret = 0, a, b, c, d;
408     unsigned long l;
409     const unsigned char *table;
410 
411     if (eof < -1 || eof > 2)
412         return -1;
413 
414     if (ctx != NULL && (ctx->flags & EVP_ENCODE_CTX_USE_SRP_ALPHABET) != 0)
415         table = srpdata_ascii2bin;
416     else
417         table = data_ascii2bin;
418 
419     /* trim whitespace from the start of the line. */
420     while ((n > 0) && (conv_ascii2bin(*f, table) == B64_WS)) {
421         f++;
422         n--;
423     }
424 
425     /*
426      * strip off stuff at the end of the line ascii2bin values B64_WS,
427      * B64_EOLN, B64_EOLN and B64_EOF
428      */
429     while ((n > 3) && (B64_NOT_BASE64(conv_ascii2bin(f[n - 1], table))))
430         n--;
431 
432     if (n % 4 != 0)
433         return -1;
434     if (n == 0)
435         return 0;
436 
437     /* all 4-byte blocks except the last one do not have padding. */
438     for (i = 0; i < n - 4; i += 4) {
439         a = conv_ascii2bin(*(f++), table);
440         b = conv_ascii2bin(*(f++), table);
441         c = conv_ascii2bin(*(f++), table);
442         d = conv_ascii2bin(*(f++), table);
443         if ((a | b | c | d) & 0x80)
444             return -1;
445         l = ((((unsigned long)a) << 18L) | (((unsigned long)b) << 12L) | (((unsigned long)c) << 6L) | (((unsigned long)d)));
446         *(t++) = (unsigned char)(l >> 16L) & 0xff;
447         *(t++) = (unsigned char)(l >> 8L) & 0xff;
448         *(t++) = (unsigned char)(l) & 0xff;
449         ret += 3;
450     }
451 
452     /* process the last block that may have padding. */
453     a = conv_ascii2bin(*(f++), table);
454     b = conv_ascii2bin(*(f++), table);
455     c = conv_ascii2bin(*(f++), table);
456     d = conv_ascii2bin(*(f++), table);
457     if ((a | b | c | d) & 0x80)
458         return -1;
459     l = ((((unsigned long)a) << 18L) | (((unsigned long)b) << 12L) | (((unsigned long)c) << 6L) | (((unsigned long)d)));
460 
461     if (eof == -1)
462         eof = (c == '=') + (d == '=');
463 
464     switch (eof) {
465     case 2:
466         *(t++) = (unsigned char)(l >> 16L) & 0xff;
467         break;
468     case 1:
469         *(t++) = (unsigned char)(l >> 16L) & 0xff;
470         *(t++) = (unsigned char)(l >> 8L) & 0xff;
471         break;
472     case 0:
473         *(t++) = (unsigned char)(l >> 16L) & 0xff;
474         *(t++) = (unsigned char)(l >> 8L) & 0xff;
475         *(t++) = (unsigned char)(l) & 0xff;
476         break;
477     }
478     ret += 3 - eof;
479 
480     return ret;
481 }
482 
483 int EVP_DecodeBlock(unsigned char *t, const unsigned char *f, int n)
484 {
485     return evp_decodeblock_int(NULL, t, f, n, 0);
486 }
487 
488 int EVP_DecodeFinal(EVP_ENCODE_CTX *ctx, unsigned char *out, int *outl)
489 {
490     int i;
491 
492     *outl = 0;
493     if (ctx->num != 0) {
494         i = evp_decodeblock_int(ctx, out, ctx->enc_data, ctx->num, -1);
495         if (i < 0)
496             return -1;
497         ctx->num = 0;
498         *outl = i;
499         return 1;
500     } else
501         return 1;
502 }
503