xref: /freebsd/crypto/openssl/crypto/evp/encode.c (revision f25b8c9fb4f58cf61adb47d7570abe7caa6d385d)
1 /*
2  * Copyright 1995-2025 The OpenSSL Project Authors. All Rights Reserved.
3  *
4  * Licensed under the Apache License 2.0 (the "License").  You may not use
5  * this file except in compliance with the License.  You can obtain a copy
6  * in the file LICENSE in the source distribution or at
7  * https://www.openssl.org/source/license.html
8  */
9 
10 #include <stdio.h>
11 #include <limits.h>
12 #include "internal/cryptlib.h"
13 #include <openssl/evp.h>
14 #include "crypto/evp.h"
15 #include "evp_local.h"
16 
17 static unsigned char conv_ascii2bin(unsigned char a,
18     const unsigned char *table);
19 static int evp_encodeblock_int(EVP_ENCODE_CTX *ctx, unsigned char *t,
20     const unsigned char *f, int dlen);
21 static int evp_decodeblock_int(EVP_ENCODE_CTX *ctx, unsigned char *t,
22     const unsigned char *f, int n, int eof);
23 
24 #ifndef CHARSET_EBCDIC
25 #define conv_bin2ascii(a, table) ((table)[(a) & 0x3f])
26 #else
27 /*
28  * We assume that PEM encoded files are EBCDIC files (i.e., printable text
29  * files). Convert them here while decoding. When encoding, output is EBCDIC
30  * (text) format again. (No need for conversion in the conv_bin2ascii macro,
31  * as the underlying textstring data_bin2ascii[] is already EBCDIC)
32  */
33 #define conv_bin2ascii(a, table) ((table)[(a) & 0x3f])
34 #endif
35 
36 /*-
37  * 64 char lines
38  * pad input with 0
39  * left over chars are set to =
40  * 1 byte  => xx==
41  * 2 bytes => xxx=
42  * 3 bytes => xxxx
43  */
44 #define BIN_PER_LINE (64 / 4 * 3)
45 #define CHUNKS_PER_LINE (64 / 4)
46 #define CHAR_PER_LINE (64 + 1)
47 
48 static const unsigned char data_bin2ascii[65] = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/";
49 
50 /* SRP uses a different base64 alphabet */
51 static const unsigned char srpdata_bin2ascii[65] = "0123456789ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz./";
52 
53 /*-
54  * 0xF0 is a EOLN
55  * 0xF1 is ignore but next needs to be 0xF0 (for \r\n processing).
56  * 0xF2 is EOF
57  * 0xE0 is ignore at start of line.
58  * 0xFF is error
59  */
60 
61 #define B64_EOLN 0xF0
62 #define B64_CR 0xF1
63 #define B64_EOF 0xF2
64 #define B64_WS 0xE0
65 #define B64_ERROR 0xFF
66 #define B64_NOT_BASE64(a) (((a) | 0x13) == 0xF3)
67 #define B64_BASE64(a) (!B64_NOT_BASE64(a))
68 
69 static const unsigned char data_ascii2bin[128] = {
70     0xFF,
71     0xFF,
72     0xFF,
73     0xFF,
74     0xFF,
75     0xFF,
76     0xFF,
77     0xFF,
78     0xFF,
79     0xE0,
80     0xF0,
81     0xFF,
82     0xFF,
83     0xF1,
84     0xFF,
85     0xFF,
86     0xFF,
87     0xFF,
88     0xFF,
89     0xFF,
90     0xFF,
91     0xFF,
92     0xFF,
93     0xFF,
94     0xFF,
95     0xFF,
96     0xFF,
97     0xFF,
98     0xFF,
99     0xFF,
100     0xFF,
101     0xFF,
102     0xE0,
103     0xFF,
104     0xFF,
105     0xFF,
106     0xFF,
107     0xFF,
108     0xFF,
109     0xFF,
110     0xFF,
111     0xFF,
112     0xFF,
113     0x3E,
114     0xFF,
115     0xF2,
116     0xFF,
117     0x3F,
118     0x34,
119     0x35,
120     0x36,
121     0x37,
122     0x38,
123     0x39,
124     0x3A,
125     0x3B,
126     0x3C,
127     0x3D,
128     0xFF,
129     0xFF,
130     0xFF,
131     0x00,
132     0xFF,
133     0xFF,
134     0xFF,
135     0x00,
136     0x01,
137     0x02,
138     0x03,
139     0x04,
140     0x05,
141     0x06,
142     0x07,
143     0x08,
144     0x09,
145     0x0A,
146     0x0B,
147     0x0C,
148     0x0D,
149     0x0E,
150     0x0F,
151     0x10,
152     0x11,
153     0x12,
154     0x13,
155     0x14,
156     0x15,
157     0x16,
158     0x17,
159     0x18,
160     0x19,
161     0xFF,
162     0xFF,
163     0xFF,
164     0xFF,
165     0xFF,
166     0xFF,
167     0x1A,
168     0x1B,
169     0x1C,
170     0x1D,
171     0x1E,
172     0x1F,
173     0x20,
174     0x21,
175     0x22,
176     0x23,
177     0x24,
178     0x25,
179     0x26,
180     0x27,
181     0x28,
182     0x29,
183     0x2A,
184     0x2B,
185     0x2C,
186     0x2D,
187     0x2E,
188     0x2F,
189     0x30,
190     0x31,
191     0x32,
192     0x33,
193     0xFF,
194     0xFF,
195     0xFF,
196     0xFF,
197     0xFF,
198 };
199 
200 static const unsigned char srpdata_ascii2bin[128] = {
201     0xFF,
202     0xFF,
203     0xFF,
204     0xFF,
205     0xFF,
206     0xFF,
207     0xFF,
208     0xFF,
209     0xFF,
210     0xE0,
211     0xF0,
212     0xFF,
213     0xFF,
214     0xF1,
215     0xFF,
216     0xFF,
217     0xFF,
218     0xFF,
219     0xFF,
220     0xFF,
221     0xFF,
222     0xFF,
223     0xFF,
224     0xFF,
225     0xFF,
226     0xFF,
227     0xFF,
228     0xFF,
229     0xFF,
230     0xFF,
231     0xFF,
232     0xFF,
233     0xE0,
234     0xFF,
235     0xFF,
236     0xFF,
237     0xFF,
238     0xFF,
239     0xFF,
240     0xFF,
241     0xFF,
242     0xFF,
243     0xFF,
244     0xFF,
245     0xFF,
246     0xF2,
247     0x3E,
248     0x3F,
249     0x00,
250     0x01,
251     0x02,
252     0x03,
253     0x04,
254     0x05,
255     0x06,
256     0x07,
257     0x08,
258     0x09,
259     0xFF,
260     0xFF,
261     0xFF,
262     0x00,
263     0xFF,
264     0xFF,
265     0xFF,
266     0x0A,
267     0x0B,
268     0x0C,
269     0x0D,
270     0x0E,
271     0x0F,
272     0x10,
273     0x11,
274     0x12,
275     0x13,
276     0x14,
277     0x15,
278     0x16,
279     0x17,
280     0x18,
281     0x19,
282     0x1A,
283     0x1B,
284     0x1C,
285     0x1D,
286     0x1E,
287     0x1F,
288     0x20,
289     0x21,
290     0x22,
291     0x23,
292     0xFF,
293     0xFF,
294     0xFF,
295     0xFF,
296     0xFF,
297     0xFF,
298     0x24,
299     0x25,
300     0x26,
301     0x27,
302     0x28,
303     0x29,
304     0x2A,
305     0x2B,
306     0x2C,
307     0x2D,
308     0x2E,
309     0x2F,
310     0x30,
311     0x31,
312     0x32,
313     0x33,
314     0x34,
315     0x35,
316     0x36,
317     0x37,
318     0x38,
319     0x39,
320     0x3A,
321     0x3B,
322     0x3C,
323     0x3D,
324     0xFF,
325     0xFF,
326     0xFF,
327     0xFF,
328     0xFF,
329 };
330 
331 #ifndef CHARSET_EBCDIC
332 static unsigned char conv_ascii2bin(unsigned char a, const unsigned char *table)
333 {
334     if (a & 0x80)
335         return B64_ERROR;
336     return table[a];
337 }
338 #else
339 static unsigned char conv_ascii2bin(unsigned char a, const unsigned char *table)
340 {
341     a = os_toascii[a];
342     if (a & 0x80)
343         return B64_ERROR;
344     return table[a];
345 }
346 #endif
347 
348 EVP_ENCODE_CTX *EVP_ENCODE_CTX_new(void)
349 {
350     return OPENSSL_zalloc(sizeof(EVP_ENCODE_CTX));
351 }
352 
353 void EVP_ENCODE_CTX_free(EVP_ENCODE_CTX *ctx)
354 {
355     OPENSSL_free(ctx);
356 }
357 
358 int EVP_ENCODE_CTX_copy(EVP_ENCODE_CTX *dctx, const EVP_ENCODE_CTX *sctx)
359 {
360     memcpy(dctx, sctx, sizeof(EVP_ENCODE_CTX));
361 
362     return 1;
363 }
364 
365 int EVP_ENCODE_CTX_num(EVP_ENCODE_CTX *ctx)
366 {
367     return ctx->num;
368 }
369 
370 void evp_encode_ctx_set_flags(EVP_ENCODE_CTX *ctx, unsigned int flags)
371 {
372     ctx->flags = flags;
373 }
374 
375 void EVP_EncodeInit(EVP_ENCODE_CTX *ctx)
376 {
377     ctx->length = 48;
378     ctx->num = 0;
379     ctx->line_num = 0;
380     ctx->flags = 0;
381 }
382 
383 int EVP_EncodeUpdate(EVP_ENCODE_CTX *ctx, unsigned char *out, int *outl,
384     const unsigned char *in, int inl)
385 {
386     int i, j;
387     size_t total = 0;
388 
389     *outl = 0;
390     if (inl <= 0)
391         return 0;
392     OPENSSL_assert(ctx->length <= (int)sizeof(ctx->enc_data));
393     if (ctx->length - ctx->num > inl) {
394         memcpy(&(ctx->enc_data[ctx->num]), in, inl);
395         ctx->num += inl;
396         return 1;
397     }
398     if (ctx->num != 0) {
399         i = ctx->length - ctx->num;
400         memcpy(&(ctx->enc_data[ctx->num]), in, i);
401         in += i;
402         inl -= i;
403         j = evp_encodeblock_int(ctx, out, ctx->enc_data, ctx->length);
404         ctx->num = 0;
405         out += j;
406         total = j;
407         if ((ctx->flags & EVP_ENCODE_CTX_NO_NEWLINES) == 0) {
408             *(out++) = '\n';
409             total++;
410         }
411         *out = '\0';
412     }
413     while (inl >= ctx->length && total <= INT_MAX) {
414         j = evp_encodeblock_int(ctx, out, in, ctx->length);
415         in += ctx->length;
416         inl -= ctx->length;
417         out += j;
418         total += j;
419         if ((ctx->flags & EVP_ENCODE_CTX_NO_NEWLINES) == 0) {
420             *(out++) = '\n';
421             total++;
422         }
423         *out = '\0';
424     }
425     if (total > INT_MAX) {
426         /* Too much output data! */
427         *outl = 0;
428         return 0;
429     }
430     if (inl != 0)
431         memcpy(&(ctx->enc_data[0]), in, inl);
432     ctx->num = inl;
433     *outl = total;
434 
435     return 1;
436 }
437 
438 void EVP_EncodeFinal(EVP_ENCODE_CTX *ctx, unsigned char *out, int *outl)
439 {
440     unsigned int ret = 0;
441 
442     if (ctx->num != 0) {
443         ret = evp_encodeblock_int(ctx, out, ctx->enc_data, ctx->num);
444         if ((ctx->flags & EVP_ENCODE_CTX_NO_NEWLINES) == 0)
445             out[ret++] = '\n';
446         out[ret] = '\0';
447         ctx->num = 0;
448     }
449     *outl = ret;
450 }
451 
452 static int evp_encodeblock_int(EVP_ENCODE_CTX *ctx, unsigned char *t,
453     const unsigned char *f, int dlen)
454 {
455     int i, ret = 0;
456     unsigned long l;
457     const unsigned char *table;
458 
459     if (ctx != NULL && (ctx->flags & EVP_ENCODE_CTX_USE_SRP_ALPHABET) != 0)
460         table = srpdata_bin2ascii;
461     else
462         table = data_bin2ascii;
463 
464     for (i = dlen; i > 0; i -= 3) {
465         if (i >= 3) {
466             l = (((unsigned long)f[0]) << 16L) | (((unsigned long)f[1]) << 8L) | f[2];
467             *(t++) = conv_bin2ascii(l >> 18L, table);
468             *(t++) = conv_bin2ascii(l >> 12L, table);
469             *(t++) = conv_bin2ascii(l >> 6L, table);
470             *(t++) = conv_bin2ascii(l, table);
471         } else {
472             l = ((unsigned long)f[0]) << 16L;
473             if (i == 2)
474                 l |= ((unsigned long)f[1] << 8L);
475 
476             *(t++) = conv_bin2ascii(l >> 18L, table);
477             *(t++) = conv_bin2ascii(l >> 12L, table);
478             *(t++) = (i == 1) ? '=' : conv_bin2ascii(l >> 6L, table);
479             *(t++) = '=';
480         }
481         ret += 4;
482         f += 3;
483     }
484 
485     *t = '\0';
486     return ret;
487 }
488 
489 int EVP_EncodeBlock(unsigned char *t, const unsigned char *f, int dlen)
490 {
491     return evp_encodeblock_int(NULL, t, f, dlen);
492 }
493 
494 void EVP_DecodeInit(EVP_ENCODE_CTX *ctx)
495 {
496     /* Only ctx->num and ctx->flags are used during decoding. */
497     ctx->num = 0;
498     ctx->length = 0;
499     ctx->line_num = 0;
500     ctx->flags = 0;
501 }
502 
503 /*-
504  * -1 for error
505  *  0 for last line
506  *  1 for full line
507  *
508  * Note: even though EVP_DecodeUpdate attempts to detect and report end of
509  * content, the context doesn't currently remember it and will accept more data
510  * in the next call. Therefore, the caller is responsible for checking and
511  * rejecting a 0 return value in the middle of content.
512  *
513  * Note: even though EVP_DecodeUpdate has historically tried to detect end of
514  * content based on line length, this has never worked properly. Therefore,
515  * we now return 0 when one of the following is true:
516  *   - Padding or B64_EOF was detected and the last block is complete.
517  *   - Input has zero-length.
518  * -1 is returned if:
519  *   - Invalid characters are detected.
520  *   - There is extra trailing padding, or data after padding.
521  *   - B64_EOF is detected after an incomplete base64 block.
522  */
523 int EVP_DecodeUpdate(EVP_ENCODE_CTX *ctx, unsigned char *out, int *outl,
524     const unsigned char *in, int inl)
525 {
526     int seof = 0, eof = 0, rv = -1, ret = 0, i, v, tmp, n, decoded_len;
527     unsigned char *d;
528     const unsigned char *table;
529 
530     n = ctx->num;
531     d = ctx->enc_data;
532 
533     if (n > 0 && d[n - 1] == '=') {
534         eof++;
535         if (n > 1 && d[n - 2] == '=')
536             eof++;
537     }
538 
539     /* Legacy behaviour: an empty input chunk signals end of input. */
540     if (inl == 0) {
541         rv = 0;
542         goto end;
543     }
544 
545     if ((ctx->flags & EVP_ENCODE_CTX_USE_SRP_ALPHABET) != 0)
546         table = srpdata_ascii2bin;
547     else
548         table = data_ascii2bin;
549 
550     for (i = 0; i < inl; i++) {
551         tmp = *(in++);
552         v = conv_ascii2bin(tmp, table);
553         if (v == B64_ERROR) {
554             rv = -1;
555             goto end;
556         }
557 
558         if (tmp == '=') {
559             eof++;
560         } else if (eof > 0 && B64_BASE64(v)) {
561             /* More data after padding. */
562             rv = -1;
563             goto end;
564         }
565 
566         if (eof > 2) {
567             rv = -1;
568             goto end;
569         }
570 
571         if (v == B64_EOF) {
572             seof = 1;
573             goto tail;
574         }
575 
576         /* Only save valid base64 characters. */
577         if (B64_BASE64(v)) {
578             if (n >= 64) {
579                 /*
580                  * We increment n once per loop, and empty the buffer as soon as
581                  * we reach 64 characters, so this can only happen if someone's
582                  * manually messed with the ctx. Refuse to write any more data.
583                  */
584                 rv = -1;
585                 goto end;
586             }
587             OPENSSL_assert(n < (int)sizeof(ctx->enc_data));
588             d[n++] = tmp;
589         }
590 
591         if (n == 64) {
592             decoded_len = evp_decodeblock_int(ctx, out, d, n, eof);
593             n = 0;
594             if (decoded_len < 0 || (decoded_len == 0 && eof > 0)) {
595                 rv = -1;
596                 goto end;
597             }
598             ret += decoded_len;
599             out += decoded_len;
600         }
601     }
602 
603     /*
604      * Legacy behaviour: if the current line is a full base64-block (i.e., has
605      * 0 mod 4 base64 characters), it is processed immediately. We keep this
606      * behaviour as applications may not be calling EVP_DecodeFinal properly.
607      */
608 tail:
609     if (n > 0) {
610         if ((n & 3) == 0) {
611             decoded_len = evp_decodeblock_int(ctx, out, d, n, eof);
612             n = 0;
613             if (decoded_len < 0 || (decoded_len == 0 && eof > 0)) {
614                 rv = -1;
615                 goto end;
616             }
617             ret += decoded_len;
618         } else if (seof) {
619             /* EOF in the middle of a base64 block. */
620             rv = -1;
621             goto end;
622         }
623     }
624 
625     rv = seof || (n == 0 && eof) ? 0 : 1;
626 end:
627     /* Legacy behaviour. This should probably rather be zeroed on error. */
628     *outl = ret;
629     ctx->num = n;
630     return rv;
631 }
632 
633 static int evp_decodeblock_int(EVP_ENCODE_CTX *ctx, unsigned char *t,
634     const unsigned char *f, int n,
635     int eof)
636 {
637     int i, ret = 0, a, b, c, d;
638     unsigned long l;
639     const unsigned char *table;
640 
641     if (eof < -1 || eof > 2)
642         return -1;
643 
644     if (ctx != NULL && (ctx->flags & EVP_ENCODE_CTX_USE_SRP_ALPHABET) != 0)
645         table = srpdata_ascii2bin;
646     else
647         table = data_ascii2bin;
648 
649     /* trim whitespace from the start of the line. */
650     while ((n > 0) && (conv_ascii2bin(*f, table) == B64_WS)) {
651         f++;
652         n--;
653     }
654 
655     /*
656      * strip off stuff at the end of the line ascii2bin values B64_WS,
657      * B64_EOLN, B64_EOLN and B64_EOF
658      */
659     while ((n > 3) && (B64_NOT_BASE64(conv_ascii2bin(f[n - 1], table))))
660         n--;
661 
662     if (n % 4 != 0)
663         return -1;
664     if (n == 0)
665         return 0;
666 
667     /* all 4-byte blocks except the last one do not have padding. */
668     for (i = 0; i < n - 4; i += 4) {
669         a = conv_ascii2bin(*(f++), table);
670         b = conv_ascii2bin(*(f++), table);
671         c = conv_ascii2bin(*(f++), table);
672         d = conv_ascii2bin(*(f++), table);
673         if ((a | b | c | d) & 0x80)
674             return -1;
675         l = ((((unsigned long)a) << 18L) | (((unsigned long)b) << 12L) | (((unsigned long)c) << 6L) | (((unsigned long)d)));
676         *(t++) = (unsigned char)(l >> 16L) & 0xff;
677         *(t++) = (unsigned char)(l >> 8L) & 0xff;
678         *(t++) = (unsigned char)(l) & 0xff;
679         ret += 3;
680     }
681 
682     /* process the last block that may have padding. */
683     a = conv_ascii2bin(*(f++), table);
684     b = conv_ascii2bin(*(f++), table);
685     c = conv_ascii2bin(*(f++), table);
686     d = conv_ascii2bin(*(f++), table);
687     if ((a | b | c | d) & 0x80)
688         return -1;
689     l = ((((unsigned long)a) << 18L) | (((unsigned long)b) << 12L) | (((unsigned long)c) << 6L) | (((unsigned long)d)));
690 
691     if (eof == -1)
692         eof = (f[2] == '=') + (f[3] == '=');
693 
694     switch (eof) {
695     case 2:
696         *(t++) = (unsigned char)(l >> 16L) & 0xff;
697         break;
698     case 1:
699         *(t++) = (unsigned char)(l >> 16L) & 0xff;
700         *(t++) = (unsigned char)(l >> 8L) & 0xff;
701         break;
702     case 0:
703         *(t++) = (unsigned char)(l >> 16L) & 0xff;
704         *(t++) = (unsigned char)(l >> 8L) & 0xff;
705         *(t++) = (unsigned char)(l) & 0xff;
706         break;
707     }
708     ret += 3 - eof;
709 
710     return ret;
711 }
712 
713 int EVP_DecodeBlock(unsigned char *t, const unsigned char *f, int n)
714 {
715     return evp_decodeblock_int(NULL, t, f, n, 0);
716 }
717 
718 int EVP_DecodeFinal(EVP_ENCODE_CTX *ctx, unsigned char *out, int *outl)
719 {
720     int i;
721 
722     *outl = 0;
723     if (ctx->num != 0) {
724         i = evp_decodeblock_int(ctx, out, ctx->enc_data, ctx->num, -1);
725         if (i < 0)
726             return -1;
727         ctx->num = 0;
728         *outl = i;
729         return 1;
730     } else
731         return 1;
732 }
733