diff options
author | Viktor Dukhovni <openssl-users@dukhovni.org> | 2024-08-21 16:43:33 +0200 |
---|---|---|
committer | Tomas Mraz <tomas@openssl.org> | 2024-08-30 15:09:10 +0200 |
commit | 0cd9dd703ea575699b2d3cd74f1b8224447f4352 (patch) | |
tree | 413730537872fb469666510e32900947a1958add | |
parent | fix: ossl_digest_get_approved_nid() returns NID_undef on invalid digest (diff) | |
download | openssl-0cd9dd703ea575699b2d3cd74f1b8224447f4352.tar.xz openssl-0cd9dd703ea575699b2d3cd74f1b8224447f4352.zip |
Improve base64 BIO correctness and error reporting
Also improve related documentation.
- The BIO_FLAGS_BASE64_NO_NL flag did not behave as advertised, only
leading and trailing, but not internal, whitespace was supported:
$ echo 'AA AA' | openssl base64 -A -d | wc -c
0
- Switching from ignored leading input to valid base64 input misbehaved
when the length of the skipped input was one more than the length of
the second and subsequent valid base64 lines in the internal 1k
buffer:
$ printf '#foo\n#bar\nA\nAAA\nAAAA\n' | openssl base64 -d | wc -c
0
- When the underlying BIO is retriable, and a read returns less than
1k of data, some of the already buffered input lines that could have
been decoded and returned were retained internally for a retry by the
caller. This is somewhat surprising, and the new code decodes as many
of the buffered lines as possible. Issue reported by MichaĆ Trojnara.
- After all valid data has been read, the next BIO_read(3) should
return 0 when the input was all valid or -1 if an error was detected.
This now occurs in more consistently, but further tests and code
refactoring may be needed to ensure this always happens.
Reviewed-by: Neil Horman <nhorman@openssl.org>
Reviewed-by: Tomas Mraz <tomas@openssl.org>
(Merged from https://github.com/openssl/openssl/pull/25253)
-rw-r--r-- | crypto/evp/bio_b64.c | 126 | ||||
-rw-r--r-- | crypto/evp/encode.c | 2 | ||||
-rw-r--r-- | doc/man3/BIO_f_base64.pod | 51 | ||||
-rw-r--r-- | doc/man3/EVP_EncodeInit.pod | 101 | ||||
-rw-r--r-- | doc/man7/evp.pod | 2 | ||||
-rw-r--r-- | test/bio_base64_test.c | 491 | ||||
-rw-r--r-- | test/build.info | 6 | ||||
-rw-r--r-- | test/recipes/90-test_bio_base64.t | 11 |
8 files changed, 677 insertions, 113 deletions
diff --git a/crypto/evp/bio_b64.c b/crypto/evp/bio_b64.c index 8700315a6b..4bf47c6f24 100644 --- a/crypto/evp/bio_b64.c +++ b/crypto/evp/bio_b64.c @@ -103,9 +103,17 @@ static int b64_free(BIO *a) return 1; } +/* + * Unless `BIO_FLAGS_BASE64_NO_NL` is set, this BIO ignores leading lines that + * aren't exclusively composed of valid Base64 characters (followed by <CRLF> + * or <LF>). Once a valid Base64 line is found, `ctx->start` is set to 0 and + * lines are processed until EOF or the first line that contains invalid Base64 + * characters. In a nod to PEM, lines that start with a '-' (hyphen) are + * treated as a soft EOF, rather than an error. + */ static int b64_read(BIO *b, char *out, int outl) { - int ret = 0, i, ii, j, k, x, n, num, ret_code = 0; + int ret = 0, i, ii, j, k, x, n, num, ret_code; BIO_B64_CTX *ctx; unsigned char *p, *q; BIO *next; @@ -128,7 +136,7 @@ static int b64_read(BIO *b, char *out, int outl) EVP_DecodeInit(ctx->base64); } - /* First check if there are bytes decoded/encoded */ + /* First check if there are buffered bytes already decoded */ if (ctx->buf_len > 0) { OPENSSL_assert(ctx->buf_len >= ctx->buf_off); i = ctx->buf_len - ctx->buf_off; @@ -146,14 +154,17 @@ static int b64_read(BIO *b, char *out, int outl) } } + /* Restore any non-retriable error condition (ctx->cont < 0) */ + ret_code = ctx->cont < 0 ? ctx->cont : 0; + /* - * At this point, we have room of outl bytes and an empty buffer, so we - * should read in some more. + * At this point, we have room of outl bytes and an either an empty buffer, + * or outl == 0, so we'll attempt to read in some more. */ - - ret_code = 0; while (outl > 0) { - if (ctx->cont <= 0) + int again = ctx->cont; + + if (again <= 0) break; i = BIO_read(next, &(ctx->tmp[ctx->tmp_len]), @@ -164,18 +175,22 @@ static int b64_read(BIO *b, char *out, int outl) /* Should we continue next time we are called? */ if (!BIO_should_retry(next)) { - ctx->cont = i; - /* If buffer empty break */ - if (ctx->tmp_len == 0) - break; - /* Fall through and process what we have */ - else - i = 0; + /* Incomplete final Base64 chunk in the decoder is an error */ + if (ctx->tmp_len == 0) { + if (EVP_DecodeFinal(ctx->base64, NULL, &num) < 0) + ret_code = -1; + EVP_DecodeInit(ctx->base64); + } + ctx->cont = ret_code; } - /* else we retry and add more data to buffer */ - else + if (ctx->tmp_len == 0) break; + /* Fall through and process what we have */ + i = 0; + /* But don't loop to top-up even if the buffer is not full! */ + again = 0; } + i += ctx->tmp_len; ctx->tmp_len = i; @@ -204,23 +219,23 @@ static int b64_read(BIO *b, char *out, int outl) } k = EVP_DecodeUpdate(ctx->base64, ctx->buf, &num, p, q - p); - if (k <= 0 && num == 0 && ctx->start) { - EVP_DecodeInit(ctx->base64); - } else { - if (p != ctx->tmp) { - i -= p - ctx->tmp; - for (x = 0; x < i; x++) - ctx->tmp[x] = p[x]; - } - EVP_DecodeInit(ctx->base64); - ctx->start = 0; - break; + EVP_DecodeInit(ctx->base64); + if (k <= 0 && num == 0) { + p = q; + continue; + } + + ctx->start = 0; + if (p != ctx->tmp) { + i -= p - ctx->tmp; + for (x = 0; x < i; x++) + ctx->tmp[x] = p[x]; } - p = q; + break; } /* we fell off the end without starting */ - if (j == i && num == 0) { + if (ctx->start) { /* * Is this is one long chunk?, if so, keep on reading until a * new line. @@ -231,18 +246,29 @@ static int b64_read(BIO *b, char *out, int outl) ctx->tmp_nl = 1; ctx->tmp_len = 0; } - } else if (p != q) { /* finished on a '\n' */ + } else if (p != q) { + /* Retain partial line at end of buffer */ n = q - p; for (ii = 0; ii < n; ii++) ctx->tmp[ii] = p[ii]; ctx->tmp_len = n; + } else { + /* All we have is newline terminated non-start data */ + ctx->tmp_len = 0; } - /* else finished on a '\n' */ - continue; + /* + * Try to read more if possible, otherwise we can't make + * progress unless the underlying BIO is retriable and may + * produce more data next time we're called. + */ + if (again > 0) + continue; + else + break; } else { ctx->tmp_len = 0; } - } else if (i < B64_BLOCK_SIZE && ctx->cont > 0) { + } else if (i < B64_BLOCK_SIZE && again > 0) { /* * If buffer isn't full and we can retry then restart to read in * more data. @@ -250,35 +276,9 @@ static int b64_read(BIO *b, char *out, int outl) continue; } - if ((BIO_get_flags(b) & BIO_FLAGS_BASE64_NO_NL) != 0) { - int z, jj; - - jj = i & ~3; /* process per 4 */ - z = EVP_DecodeBlock(ctx->buf, ctx->tmp, jj); - if (jj > 2) { - if (ctx->tmp[jj - 1] == '=') { - z--; - if (ctx->tmp[jj - 2] == '=') - z--; - } - } - /* - * z is now number of output bytes and jj is the number consumed - */ - if (jj != i) { - memmove(ctx->tmp, &ctx->tmp[jj], i - jj); - ctx->tmp_len = i - jj; - } - ctx->buf_len = 0; - if (z > 0) { - ctx->buf_len = z; - } - i = z; - } else { - i = EVP_DecodeUpdate(ctx->base64, ctx->buf, &ctx->buf_len, - ctx->tmp, i); - ctx->tmp_len = 0; - } + i = EVP_DecodeUpdate(ctx->base64, ctx->buf, &ctx->buf_len, + ctx->tmp, i); + ctx->tmp_len = 0; /* * If eof or an error was signalled, then the condition * 'ctx->cont <= 0' will prevent b64_read() from reading @@ -289,7 +289,7 @@ static int b64_read(BIO *b, char *out, int outl) ctx->buf_off = 0; if (i < 0) { - ret_code = 0; + ret_code = ctx->start ? 0 : i; ctx->buf_len = 0; break; } diff --git a/crypto/evp/encode.c b/crypto/evp/encode.c index 2c047fa039..f8b344766e 100644 --- a/crypto/evp/encode.c +++ b/crypto/evp/encode.c @@ -443,7 +443,7 @@ static int evp_decodeblock_int(EVP_ENCODE_CTX *ctx, unsigned char *t, b = conv_ascii2bin(*(f++), table); c = conv_ascii2bin(*(f++), table); d = conv_ascii2bin(*(f++), table); - if ((a & 0x80) || (b & 0x80) || (c & 0x80) || (d & 0x80)) + if ((a | b | c | d) & 0x80) return -1; l = ((((unsigned long)a) << 18L) | (((unsigned long)b) << 12L) | diff --git a/doc/man3/BIO_f_base64.pod b/doc/man3/BIO_f_base64.pod index 7d10df933c..67d65da2e2 100644 --- a/doc/man3/BIO_f_base64.pod +++ b/doc/man3/BIO_f_base64.pod @@ -25,11 +25,28 @@ For writing, by default output is divided to lines of length 64 characters and there is a newline at the end of output. This behavior can be changed with B<BIO_FLAGS_BASE64_NO_NL> flag. -For reading, first line should be at most 1024 bytes long including newline -unless the flag B<BIO_FLAGS_BASE64_NO_NL> is set. -Further input lines can be of any length (i.e., newlines may appear anywhere +For reading, the first line of base64 content should be at most 1024 bytes long +including newline unless the flag B<BIO_FLAGS_BASE64_NO_NL> is set. +Subsequent input lines can be of any length (i.e., newlines may appear anywhere in the input) and a newline at the end of input is not needed. +Also when reading, unless the flag B<BIO_FLAGS_BASE64_NO_NL> is set, initial +lines that contain non-base64 content (whitespace is tolerated and ignored) are +skipped, as are lines longer than 1024 bytes. +Decoding starts with the first line that is shorter than 1024 bytes (including +the newline) and consists of only (at least one) valid base64 characters plus +optional whitespace. +Decoding stops when base64 padding is encountered, a soft end-of-input +character (B<->, see L<EVP_DecodeUpdate(3)>) occurs as the first byte after a +complete group of 4 valid base64 characters is decoded, or when an error occurs +(e.g. due to input characters other than valid base64 or whitespace). + +If decoding stops as a result of an error, the first L<BIO_read(3)> that +returns no decoded data will typically return a negative result, rather +than 0 (which indicates normal end of input). +However, a negative return value can also occur if the underlying BIO +supports retries, see L<BIO_should_read(3)> and L<BIO_set_mem_eof_return(3)>. + BIO_flush() on a base64 BIO that is being written through is used to signal that no more data is to be encoded: this is used to flush the final block through the BIO. @@ -64,7 +81,7 @@ to standard output: BIO_free_all(b64); -Read Base64 encoded data from standard input and write the decoded +Read base64 encoded data from standard input and write the decoded data to standard output: BIO *bio, *b64, *bio_out; @@ -83,16 +100,30 @@ data to standard output: =head1 BUGS -On decoding, if the flag B<BIO_FLAGS_BASE64_NO_NL> is not set and -the first 1024 bytes of input do not include a newline character -the first two lines of input are ignored. +The hyphen character (B<->) is treated as an ad hoc soft end-of-input +character when it occurs at the start of a base64 group of 4 encoded +characters. -The ambiguity of EOF in base64 encoded data can cause additional -data following the base64 encoded block to be misinterpreted. +This heuristic works to detect the ends of base64 blocks in PEM or +multi-part MIME, provided there are no stray hyphens in the middle +input. +But it is just a heuristic, and sufficiently unusual input could produce +unexpected results. -There should be some way of specifying a test that the BIO can perform +There should perhaps be some way of specifying a test that the BIO can perform to reliably determine EOF (for example a MIME boundary). +It may be possible for L<BIO_read(3)> to return zero, rather than -1, even if +an error has been detected, more tests are needed to cover all the potential +error paths. + +=head1 SEE ALSO + +L<BIO_read(3)>, +L<BIO_should_read(3)>, +L<BIO_set_mem_eof_return(3)>, +L<EVP_DecodeUpdate(3)>. + =head1 COPYRIGHT Copyright 2000-2022 The OpenSSL Project Authors. All Rights Reserved. diff --git a/doc/man3/EVP_EncodeInit.pod b/doc/man3/EVP_EncodeInit.pod index 2d6e4e8cbf..94a9a3b345 100644 --- a/doc/man3/EVP_EncodeInit.pod +++ b/doc/man3/EVP_EncodeInit.pod @@ -5,7 +5,7 @@ EVP_ENCODE_CTX_new, EVP_ENCODE_CTX_free, EVP_ENCODE_CTX_copy, EVP_ENCODE_CTX_num, EVP_EncodeInit, EVP_EncodeUpdate, EVP_EncodeFinal, EVP_EncodeBlock, EVP_DecodeInit, EVP_DecodeUpdate, EVP_DecodeFinal, -EVP_DecodeBlock - EVP base 64 encode/decode routines +EVP_DecodeBlock - EVP base64 encode/decode routines =head1 SYNOPSIS @@ -29,10 +29,11 @@ EVP_DecodeBlock - EVP base 64 encode/decode routines =head1 DESCRIPTION -The EVP encode routines provide a high-level interface to base 64 encoding and -decoding. Base 64 encoding converts binary data into a printable form that uses +The EVP encode routines provide a high-level interface to base64 encoding and +decoding. +Base64 encoding converts binary data into a printable form that uses the characters A-Z, a-z, 0-9, "+" and "/" to represent the data. For every 3 -bytes of binary data provided 4 bytes of base 64 encoded data will be produced +bytes of binary data provided 4 bytes of base64 encoded data will be produced plus some occasional newlines (see below). If the input data length is not a multiple of 3 then the output data will be padded at the end using the "=" character. @@ -44,7 +45,8 @@ EVP_ENCODE_CTX_free() cleans up an encode/decode context B<ctx> and frees up the space allocated to it. If the argument is NULL, nothing is done. Encoding of binary data is performed in blocks of 48 input bytes (or less for -the final block). For each 48 byte input block encoded 64 bytes of base 64 data +the final block). +For each 48 byte input block encoded 64 bytes of base64 data is output plus an additional newline character (i.e. 65 bytes in total). The final block (which may be less than 48 bytes) will output 4 bytes for every 3 bytes of input. If the data length is not divisible by 3 then a full 4 bytes is @@ -93,37 +95,62 @@ the data generated I<without> the NUL terminator is returned from the function. EVP_DecodeInit() initialises B<ctx> for the start of a new decoding operation. -EVP_DecodeUpdate() decodes B<inl> characters of data found in the buffer pointed -to by B<in>. The output is stored in the buffer B<out> and the number of bytes -output is stored in B<*outl>. It is the caller's responsibility to ensure that -the buffer at B<out> is sufficiently large to accommodate the output data. This -function will attempt to decode as much data as possible in 4 byte chunks. Any -whitespace, newline or carriage return characters are ignored. Any partial chunk -of unprocessed data (1, 2 or 3 bytes) that remains at the end will be held in -the B<ctx> object and processed by a subsequent call to EVP_DecodeUpdate(). If -any illegal base 64 characters are encountered or if the base 64 padding -character "=" is encountered in the middle of the data then the function returns --1 to indicate an error. A return value of 0 or 1 indicates successful -processing of the data. A return value of 0 additionally indicates that the last -input data characters processed included the base 64 padding character "=" and -therefore no more non-padding character data is expected to be processed. For -every 4 valid base 64 bytes processed (ignoring whitespace, carriage returns and -line feeds), 3 bytes of binary output data will be produced (or less at the end -of the data where the padding character "=" has been used). - -EVP_DecodeFinal() must be called at the end of a decoding operation. If there -is any unprocessed data still in B<ctx> then the input data must not have been -a multiple of 4 and therefore an error has occurred. The function will return -1 -in this case. Otherwise the function returns 1 on success. - -EVP_DecodeBlock() will decode the block of B<n> characters of base 64 data -contained in B<f> and store the result in B<t>. Any leading whitespace will be -trimmed as will any trailing whitespace, newlines, carriage returns or EOF -characters. After such trimming the length of the data in B<f> must be divisible -by 4. For every 4 input bytes exactly 3 output bytes will be produced. The -output will be padded with 0 bits if necessary to ensure that the output is -always 3 bytes for every 4 input bytes. This function will return the length of -the data decoded or -1 on error. +EVP_DecodeUpdate() decodes B<inl> characters of data found in the buffer +pointed to by B<in>. +The output is stored in the buffer B<out> and the number of bytes output is +stored in B<*outl>. +It is the caller's responsibility to ensure that the buffer at B<out> is +sufficiently large to accommodate the output data. +This function will attempt to decode as much data as possible in chunks of up +to 80 base64 characters at a time. +Residual input shorter than the internal chunk size will be buffered in B<ctx> +if its length is not a multiple of 4 (including any padding), to be processed +in future calls to EVP_DecodeUpdate() or EVP_DecodeFinal(). +If the final chunk length is a multiple of 4, it is decoded immediately and +not buffered. + +Any whitespace, newline or carriage return characters are ignored. +For compatibility with B<PEM>, the B<-> (hyphen) character is treated as a soft +end-of-input, subsequent bytes are not buffered, and the return value will be +0 to indicate that the end of the base64 input has been detected. +The soft end-of-input, if present, MUST occur after a multiple of 4 valid base64 +input bytes. +The soft end-of-input condition is not remembered in B<ctx>, it is up to the +caller to avoid further calls to EVP_DecodeUpdate() after a 0 or negative +(error) return. + +If any invalid base64 characters are encountered or if the base64 padding +character (B<=>) is encountered in the middle of the data then +EVP_DecodeUpdate() returns -1 to indicate an error. +A return value of 0 or 1 indicates successful processing of the data. +A return value of 0 additionally indicates that the last 4 bytes processed +ended with base64 padding (B<=>), or that the next 4 byte group starts with the +soft end-of-input (B<->) character, and therefore no more input data is +expected to be processed. + +For every 4 valid base64 bytes processed (ignoring whitespace, carriage returns +and line feeds), 3 bytes of binary output data will be produced (except at the +end of data terminated with one or two padding characters). + +EVP_DecodeFinal() should be called at the end of a decoding operation, +but it will never decode additional data. If there is no residual data +it will return 1 to indicate success. If there is residual data, its +length is not a multiple of 4, i.e. it was not properly padded, -1 is +is returned in that case to indicate an error. + +EVP_DecodeBlock() will decode the block of B<n> characters of base64 data +contained in B<f> and store the result in B<t>. +Any leading whitespace will be trimmed as will any trailing whitespace, +newlines, carriage returns or EOF characters. +Internal whitespace MUST NOT be present. +After trimming the data in B<f> MUST consist entirely of valid base64 +characters or padding (only at the tail of the input) and its length MUST be +divisible by 4. +For every 4 input bytes exactly 3 output bytes will be produced. +Padding bytes (B<=>) (even if internal) are decoded to 6 zero bits, the caller +is responsible for taking trailing padding into account, by ignoring as many +bytes at the tail of the returned output. +EVP_DecodeBlock() will return the length of the data decoded or -1 on error. =head1 RETURN VALUES @@ -139,7 +166,7 @@ EVP_EncodeBlock() returns the number of bytes encoded excluding the NUL terminator. EVP_DecodeUpdate() returns -1 on error and 0 or 1 on success. If 0 is returned -then no more non-padding base 64 characters are expected. +then no more non-padding base64 characters are expected. EVP_DecodeFinal() returns -1 on error or 1 on success. diff --git a/doc/man7/evp.pod b/doc/man7/evp.pod index 44d385655e..09c4e7a0e8 100644 --- a/doc/man7/evp.pod +++ b/doc/man7/evp.pod @@ -63,7 +63,7 @@ implementation. However, new applications should not typically use this (preferr PBKDF2 from PCKS#5). The L<B<EVP_Encode>I<XXX>|EVP_EncodeInit(3)> and -L<B<EVP_Decode>I<XXX>|EVP_EncodeInit(3)> functions implement base 64 encoding +L<B<EVP_Decode>I<XXX>|EVP_EncodeInit(3)> functions implement base64 encoding and decoding. All the symmetric algorithms (ciphers), digests and asymmetric algorithms diff --git a/test/bio_base64_test.c b/test/bio_base64_test.c new file mode 100644 index 0000000000..0acb50503c --- /dev/null +++ b/test/bio_base64_test.c @@ -0,0 +1,491 @@ +/* + * Copyright 2024 The OpenSSL Project Authors. All Rights Reserved. + * + * Licensed under the Apache License 2.0 (the "License"). You may not use + * this file except in compliance with the License. You can obtain a copy + * in the file LICENSE in the source distribution or at + * https://www.openssl.org/source/license.html + */ +#include <stdio.h> +#include <string.h> +#include <openssl/bio.h> +#include <openssl/evp.h> +#include <openssl/rand.h> + +#include "testutil.h" + +/* 2047 bytes of "#ooooooooo..." + NUL terminator */ +static char gunk[2048]; + +typedef struct { + char *prefix; + char *encoded; + unsigned bytes; + int trunc; + char *suffix; + int retry; + int no_nl; +} test_case; + +#define BUFMAX 0xa0000 /* Encode at most 640kB. */ +#define sEOF "-EOF" /* '-' as in PEM and MIME boundaries */ +#define junk "#foo" /* Skipped initial content */ + +#define EOF_RETURN (-1729) /* Distinct from -1, etc., internal results */ +#define NLEN 6 +#define NVAR 5 +/* + * Junk suffixed variants don't make sense with padding or truncated groups + * because we will typically stop with an error before seeing the suffix, but + * with retriable BIOs may never look at the suffix after detecting padding. + */ +#define NPAD 6 +#define NVARPAD (NVAR * NPAD - NPAD + 1) + +static char *prefixes[NVAR] = { "", junk, gunk, "", "" }; +static char *suffixes[NVAR] = { "", "", "", sEOF, junk }; +static unsigned lengths[6] = { 0, 3, 48, 192, 768, 1536 }; +static unsigned linelengths[] = { + 4, 8, 16, 28, 40, 64, 80, 128, 256, 512, 1023, 0 +}; +static unsigned wscnts[] = { 0, 1, 2, 4, 8, 16, 0xFFFF }; + +/* Generate `len` random octets */ +static unsigned char *genbytes(unsigned len) +{ + unsigned char *buf = NULL; + + if (len > 0 && len <= BUFMAX && (buf = OPENSSL_malloc(len)) != NULL) + RAND_bytes(buf, len); + + return buf; +} + +/* Append one base64 codepoint, adding newlines after every `llen` bytes */ +static int memout(BIO *mem, char c, int llen, int *pos) +{ + if (BIO_write(mem, &c, 1) != 1) + return 0; + if (++*pos == llen) { + *pos = 0; + c = '\n'; + if (BIO_write(mem, &c, 1) != 1) + return 0; + } + return 1; +} + +/* Encode and append one 6-bit slice, randomly prepending some whitespace */ +static int memoutws(BIO *mem, char c, unsigned wscnt, unsigned llen, int *pos) +{ + if (wscnt > 0 + && (test_random() % llen) < wscnt + && memout(mem, ' ', llen, pos) == 0) + return 0; + return memout(mem, c, llen, pos); +} + +/* + * Encode an octet string in base64, approximately `llen` bytes per line, + * with up to roughly `wscnt` additional space characters inserted at random + * before some of the base64 code points. + */ +static int encode(unsigned const char *buf, unsigned buflen, char *encoded, + int trunc, unsigned llen, unsigned wscnt, BIO *mem) +{ + static const unsigned char b64[65] = + "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789+/"; + int pos = 0; + char nl = '\n'; + + if (buflen < 0) + return 0; + + /* Use a verbatim encoding when provided */ + if (encoded != NULL) { + int elen = strlen(encoded); + + return BIO_write(mem, encoded, elen) == elen; + } + + /* Encode full 3-octet groups */ + while (buflen > 2) { + unsigned long v = buf[0] << 16 | buf[1] << 8 | buf[2]; + + if (memoutws(mem, b64[v >> 18], wscnt, llen, &pos) == 0 + || memoutws(mem, b64[(v >> 12) & 0x3f], wscnt, llen, &pos) == 0 + || memoutws(mem, b64[(v >> 6) & 0x3f], wscnt, llen, &pos) == 0 + || memoutws(mem, b64[v & 0x3f], wscnt, llen, &pos) == 0) + return 0; + buf += 3; + buflen -= 3; + } + + /* Encode and pad final 1 or 2 octet group */ + if (buflen == 2) { + unsigned long v = buf[0] << 8 | buf[1]; + + if (memoutws(mem, b64[(v >> 10) & 0x3f], wscnt, llen, &pos) == 0 + || memoutws(mem, b64[(v >> 4) & 0x3f], wscnt, llen, &pos) == 0 + || memoutws(mem, b64[(v & 0xf) << 2], wscnt, llen, &pos) == 0 + || memoutws(mem, '=', wscnt, llen, &pos) == 0) + return 0; + } else if (buflen == 1) { + unsigned long v = buf[0]; + + if (memoutws(mem, b64[v >> 2], wscnt, llen, &pos) == 0 + || memoutws(mem, b64[(v & 0x3) << 4], wscnt, llen, &pos) == 0 + || memoutws(mem, '=', wscnt, llen, &pos) == 0 + || memoutws(mem, '=', wscnt, llen, &pos) == 0) + return 0; + } + + while (trunc-- > 0) + if (memoutws(mem, 'A', wscnt, llen, &pos) == 0) + return 0; + + /* Terminate last line */ + if (pos > 0 && BIO_write(mem, &nl, 1) != 1) + return 0; + + return 1; +} + +static int genb64(char *prefix, char *suffix, unsigned const char *buf, + unsigned buflen, int trunc, char *encoded, unsigned llen, + unsigned wscnt, char **out) +{ + int preflen = strlen(prefix); + int sufflen = strlen(suffix); + int outlen; + char newline = '\n'; + BUF_MEM *bptr; + BIO *mem = BIO_new(BIO_s_mem()); + + if (mem == NULL) + return -1; + + if ((*prefix && (BIO_write(mem, prefix, preflen) != preflen + || BIO_write(mem, &newline, 1) != 1)) + || encode(buf, buflen, encoded, trunc, llen, wscnt, mem) <= 0 + || (*suffix && (BIO_write(mem, suffix, sufflen) != sufflen + || BIO_write(mem, &newline, 1) != 1))) { + BIO_free(mem); + return -1; + } + + /* Orphan the memory BIO's data buffer */ + BIO_get_mem_ptr(mem, &bptr); + *out = bptr->data; + outlen = bptr->length; + bptr->data = NULL; + (void) BIO_set_close(mem, BIO_NOCLOSE); + BIO_free(mem); + BUF_MEM_free(bptr); + + return outlen; +} + +static int test_bio_base64_run(test_case *t, int llen, int wscnt) +{ + unsigned char *raw; + unsigned char *out; + unsigned out_len; + char *encoded = NULL; + int elen; + BIO *bio, *b64; + int n, n1, n2; + int ret; + + /* + * Pre-encoded data always encodes NUL octets. If all we care about is the + * length, and not the payload, use random bytes. + */ + if (t->encoded != NULL) + raw = OPENSSL_zalloc(t->bytes); + else + raw = genbytes(t->bytes); + + if (raw == NULL && t->bytes > 0) { + TEST_error("out of memory"); + return -1; + } + + out_len = t->bytes + 1024; + out = OPENSSL_malloc(out_len); + if (out == NULL) { + OPENSSL_free(raw); + TEST_error("out of memory"); + return -1; + } + + elen = genb64(t->prefix, t->suffix, raw, t->bytes, t->trunc, t->encoded, + llen, wscnt, &encoded); + if (elen < 0 || (bio = BIO_new(BIO_s_mem())) == NULL) { + OPENSSL_free(raw); + OPENSSL_free(out); + OPENSSL_free(encoded); + TEST_error("out of memory"); + return -1; + } + if (t->retry) + BIO_set_mem_eof_return(bio, EOF_RETURN); + else + BIO_set_mem_eof_return(bio, 0); + + /* + * When the input is long enough, and the source bio is retriable, exercise + * retries by writting the input to the underlying BIO in two steps (1024 + * bytes, then the rest) and trying to decode some data after each write. + */ + n1 = elen; + if (t->retry) + n1 = elen / 2; + if (n1 > 0) + BIO_write(bio, encoded, n1); + + b64 = BIO_new(BIO_f_base64()); + if (t->no_nl) + BIO_set_flags(b64, BIO_FLAGS_BASE64_NO_NL); + BIO_push(b64, bio); + + n = BIO_read(b64, out, out_len); + + if (n1 < elen) { + /* Append the rest of the input, and read again */ + BIO_write(bio, encoded + n1, elen - n1); + if (n > 0) { + n2 = BIO_read(b64, out + n, out_len - n); + if (n2 > 0) + n += n2; + } else if (n == EOF_RETURN) { + n = BIO_read(b64, out, out_len); + } + } + + /* Turn retry-related negative results to normal (0) EOF */ + if (n < 0 && n == EOF_RETURN) + n = 0; + + /* Turn off retries */ + if (t->retry) + BIO_set_mem_eof_return(bio, 0); + + if (n < (int) out_len) + /* Perform the last read, checking its result */ + ret = BIO_read(b64, out + n, out_len - n); + else { + /* Should not happen, given extra space in out_len */ + TEST_error("Unexpectedly long decode output"); + ret = -1; + } + + /* + * Expect an error to be detected with: + * + * - truncated groups, + * - non-base64 suffixes (other than soft EOF) for non-empty or oneline + * input + * - non-base64 prefixes in NO_NL mode + * + * Otherwise, check the decoded content + */ + if (t->trunc > 0 + || ((t->bytes > 0 || t->no_nl) && *t->suffix && *t->suffix != '-') + || (t->no_nl && *t->prefix)) { + if ((ret = ret < 0 ? 0 : -1) != 0) + TEST_error("Final read result was non-negative"); + } else if (ret != 0 + || n != (int) t->bytes + || (n > 0 && memcmp(raw, out, n) != 0)) { + TEST_error("Failed to decode expected data"); + ret = -1; + } + + BIO_free_all(b64); + OPENSSL_free(out); + OPENSSL_free(raw); + OPENSSL_free(encoded); + + return ret; +} + +static int generic_case(test_case *t, int verbose) +{ + unsigned *llen; + unsigned *wscnt; + int ok = 1; + + for (llen = linelengths; *llen > 0; ++llen) { + for (wscnt = wscnts; *wscnt >= 0 && *wscnt * 2 < *llen; ++wscnt) { + int extra = t->no_nl ? 64 : 0; + + /* + * Use a longer line for NO_NL tests, in particular, eventually + * exceeding 1k bytes. + */ + if (test_bio_base64_run(t, *llen + extra, *wscnt) != 0) + ok = 0; + + if (verbose) { + fprintf(stderr, "bio_base64_test: ok=%d", ok); + if (*t->prefix) + fprintf(stderr, ", prefix='%s'", t->prefix); + if (t->encoded) + fprintf(stderr, ", data='%s'", t->encoded); + else + fprintf(stderr, ", datalen=%u", t->bytes); + if (t->trunc) + fprintf(stderr, ", trunc=%d", t->trunc); + if (*t->suffix) + fprintf(stderr, ", suffix='%s'", t->suffix); + fprintf(stderr, ", linelen=%u", *llen); + fprintf(stderr, ", wscount=%u", *wscnt); + if (t->retry) + fprintf(stderr, ", retriable"); + if (t->no_nl) + fprintf(stderr, ", oneline"); + fputc('\n', stderr); + } + + /* For verbatim input no effect from varying llen or wscnt */ + if (t->encoded) + return ok; + } + /* + * Longer 'llen' has no effect once we're sure to not have multiple + * lines of data + */ + if (*llen > t->bytes + (t->bytes >> 1)) + break; + } + return ok; +} + +static int quotrem(int i, unsigned int m, int *q) +{ + *q = i / m; + return i - *q * m; +} + +static int test_bio_base64_generated(int idx) +{ + test_case t; + int variant; + int lencase; + int padcase; + int q = idx; + + lencase = quotrem(q, NLEN, &q); + variant = quotrem(q, NVARPAD, &q); + padcase = quotrem(variant, NPAD, &variant); + t.retry = quotrem(q, 2, &q); + t.no_nl = quotrem(q, 2, &q); + + if (q != 0) { + fprintf(stderr, "Test index out of range: %d", idx); + return 0; + } + + t.prefix = prefixes[variant]; + t.encoded = NULL; + t.bytes = lengths[lencase]; + t.trunc = 0; + if (padcase && padcase < 3) + t.bytes += padcase; + else if (padcase >= 3) + t.trunc = padcase - 2; + t.suffix = suffixes[variant]; + + if (padcase != 0 && (*t.suffix && *t.suffix != '-')) { + TEST_error("Unexpected suffix test after padding"); + return 0; + } + + return generic_case(&t, 0); +} + +static int test_bio_base64_corner_case_bug(int idx) +{ + test_case t; + int q = idx; + + t.retry = quotrem(q, 2, &q); + t.no_nl = quotrem(q, 2, &q); + + if (q != 0) { + fprintf(stderr, "Test index out of range: %d", idx); + return 0; + } + + /* 9 bytes of skipped non-base64 input + newline */ + t.prefix = "#foo\n#bar"; + + /* 9 bytes on 2nd and subsequent lines */ + t.encoded = "A\nAAA\nAAAA\n"; + t.suffix = ""; + + /* Expected decode length */ + t.bytes = 6; + t.trunc = 0; /* ignored */ + + return generic_case(&t, 0); +} + +int setup_tests(void) +{ + int numidx; + + memset(gunk, 'o', sizeof(gunk)); + gunk[0] = '#'; + gunk[sizeof(gunk) - 1] = '\0'; + + /* + * Test 5 variants of prefix or suffix + * + * - both empty + * - short junk prefix + * - long gunk prefix (> internal BIO 1k buffer size), + * - soft EOF suffix + * - junk suffix (expect to detect an error) + * + * For 6 input lengths of randomly generated raw input: + * + * 0, 3, 48, 192, 768 and 1536 + * + * corresponding to encoded lengths (plus linebreaks and ignored + * whitespace) of: + * + * 0, 4, 64, 256, 1024 and 2048 + * + * Followed by zero, one or two additional bytes that may involve padding, + * or else (truncation) 1, 2 or 3 bytes with missing padding. + * Only the the first four variants make sense with padding or truncated + * groups. + * + * With two types of underlying BIO + * + * - Non-retriable underlying BIO + * - Retriable underlying BIO + * + * And with/without the BIO_FLAGS_BASE64_NO_NL flag, where now an error is + * expected with the junk and gunk prefixes, however, but the "soft EOF" + * suffix is still accepted. + * + * Internally, each test may loop over a range of encoded line lengths and + * whitespace average "densities". + */ + numidx = NLEN * (NVAR * NPAD - NPAD + 1) * 2 * 2; + ADD_ALL_TESTS(test_bio_base64_generated, numidx); + + /* + * Corner case in original code that skips ignored input, when the ignored + * length is one byte longer than the total of the second and later lines + * of valid input in the first 1k bytes of input. No content variants, + * just BIO retry status and oneline flags vary. + */ + numidx = 2 * 2; + ADD_ALL_TESTS(test_bio_base64_corner_case_bug, numidx); + + return 1; +} diff --git a/test/build.info b/test/build.info index 5ec0b93ca0..991a0039b8 100644 --- a/test/build.info +++ b/test/build.info @@ -49,7 +49,7 @@ IF[{- !$disabled{tests} -}] ssl_test_ctx_test ssl_test x509aux cipherlist_test asynciotest \ bio_callback_test bio_memleak_test bio_core_test bio_dgram_test param_build_test \ bioprinttest sslapitest ssl_handshake_rtt_test dtlstest sslcorrupttest \ - bio_enc_test pkey_meth_test pkey_meth_kdf_test evp_kdf_test uitest \ + bio_base64_test bio_enc_test pkey_meth_test pkey_meth_kdf_test evp_kdf_test uitest \ cipherbytes_test threadstest_fips threadpool_test \ asn1_encode_test asn1_decode_test asn1_string_table_test asn1_stable_parse_test \ x509_time_test x509_dup_cert_test x509_check_cert_pkey_test \ @@ -582,6 +582,10 @@ IF[{- !$disabled{tests} -}] INCLUDE[sslcorrupttest]=../include ../apps/include DEPEND[sslcorrupttest]=../libcrypto ../libssl libtestutil.a + SOURCE[bio_base64_test]=bio_base64_test.c + INCLUDE[bio_base64_test]=../include ../apps/include + DEPEND[bio_base64_test]=../libcrypto libtestutil.a + SOURCE[bio_enc_test]=bio_enc_test.c INCLUDE[bio_enc_test]=../include ../apps/include DEPEND[bio_enc_test]=../libcrypto libtestutil.a diff --git a/test/recipes/90-test_bio_base64.t b/test/recipes/90-test_bio_base64.t new file mode 100644 index 0000000000..9e21bd570d --- /dev/null +++ b/test/recipes/90-test_bio_base64.t @@ -0,0 +1,11 @@ +#! /usr/bin/env perl +# Copyright 2024 The OpenSSL Project Authors. All Rights Reserved. +# +# Licensed under the Apache License 2.0 (the "License"). You may not use +# this file except in compliance with the License. You can obtain a copy +# in the file LICENSE in the source distribution or at +# https://www.openssl.org/source/license.html + +use OpenSSL::Test::Simple; + +simple_test("test_bio_base64", "bio_base64_test", "bio_base64"); |