diff options
author | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2022-02-12 15:38:36 +0100 |
---|---|---|
committer | Jussi Kivilinna <jussi.kivilinna@iki.fi> | 2022-03-08 19:00:31 +0100 |
commit | 15df88d135ba08797c2aaf1023c6c606aed49943 (patch) | |
tree | ac1f1b87b45e4cf1e2560723dd25c10aeb3a3248 /common | |
parent | g10/cipher-aead: add fast path for avoid memcpy when AEAD encrypting (diff) | |
download | gnupg2-15df88d135ba08797c2aaf1023c6c606aed49943.tar.xz gnupg2-15df88d135ba08797c2aaf1023c6c606aed49943.zip |
iobuf: add zerocopy optimization for iobuf_read
* common/iobuf.h (iobuf_struct): Add 'e_d' substructure and members.
* common/iobuf.c (IOBUF_ZEROCOPY_THRESHOLD): New.
(iobuf_alloc): Clear 'iobuf->e_d'.
(underflow_target): Use 'iobuf->e_d' when configured to bypass copying
through 'iobuf->d.buf'.
(iobuf_read): Configure 'iobuf->e_d' for 'underflow' if 'iobuf->d.buf'
is empty and external buffer is larger than threshold.
--
Zero-copy operation in iobuf_read() and underflow() allow bypassing
'iobuf->d.buf' for greater performance. This mainly helps OCB
performance where additional memory copies through iobuf stack
can take significant portion of program time.
GnuPG-bug-id: T5828
Signed-off-by: Jussi Kivilinna <jussi.kivilinna@iki.fi>
Diffstat (limited to 'common')
-rw-r--r-- | common/iobuf.c | 112 | ||||
-rw-r--r-- | common/iobuf.h | 20 |
2 files changed, 118 insertions, 14 deletions
diff --git a/common/iobuf.c b/common/iobuf.c index 822eabe1a..49e03da22 100644 --- a/common/iobuf.c +++ b/common/iobuf.c @@ -66,6 +66,10 @@ the number of filters in a chain. */ #define MAX_NESTING_FILTER 64 +/* The threshold for switching to use external buffers directly + instead of the internal buffers. */ +#define IOBUF_ZEROCOPY_THRESHOLD_SIZE 1024 + /*-- End configurable part. --*/ /* The size of the iobuffers. This can be changed using the @@ -1196,6 +1200,10 @@ iobuf_alloc (int use, size_t bufsize) a->use = use; a->d.buf = xmalloc (bufsize); a->d.size = bufsize; + a->e_d.buf = NULL; + a->e_d.len = 0; + a->e_d.used = 0; + a->e_d.preferred = 0; a->no = ++number; a->subno = 0; a->real_fname = NULL; @@ -1861,12 +1869,15 @@ underflow_target (iobuf_t a, int clear_pending_eof, size_t target) assert (a->use == IOBUF_INPUT); + a->e_d.used = 0; + /* If there is still some buffered data, then move it to the start of the buffer and try to fill the end of the buffer. (This is useful if we are called from iobuf_peek().) */ assert (a->d.start <= a->d.len); a->d.len -= a->d.start; - memmove (a->d.buf, &a->d.buf[a->d.start], a->d.len); + if (a->d.len) + memmove (a->d.buf, &a->d.buf[a->d.start], a->d.len); a->d.start = 0; if (a->d.len < target && a->filter_eof) @@ -1917,23 +1928,57 @@ underflow_target (iobuf_t a, int clear_pending_eof, size_t target) { /* Be careful to account for any buffered data. */ len = a->d.size - a->d.len; - if (DBG_IOBUF) - log_debug ("iobuf-%d.%d: underflow: A->FILTER (%lu bytes)\n", - a->no, a->subno, (ulong) len); + + if (a->e_d.preferred && a->d.len < IOBUF_ZEROCOPY_THRESHOLD_SIZE + && (IOBUF_ZEROCOPY_THRESHOLD_SIZE - a->d.len) < len) + { + if (DBG_IOBUF) + log_debug ("iobuf-%d.%d: limit buffering as external drain is " + "preferred\n", a->no, a->subno); + len = IOBUF_ZEROCOPY_THRESHOLD_SIZE - a->d.len; + } + if (len == 0) /* There is no space for more data. Don't bother calling A->FILTER. */ rc = 0; else - rc = a->filter (a->filter_ov, IOBUFCTRL_UNDERFLOW, a->chain, - &a->d.buf[a->d.len], &len); + { + /* If no buffered data and drain buffer has been setup, and drain + * buffer is largish, read data directly to drain buffer. */ + if (a->d.len == 0 + && a->e_d.buf + && a->e_d.len >= IOBUF_ZEROCOPY_THRESHOLD_SIZE) + { + len = a->e_d.len; + + if (DBG_IOBUF) + log_debug ("iobuf-%d.%d: underflow: A->FILTER (%lu bytes, to external drain)\n", + a->no, a->subno, (ulong)len); + + rc = a->filter (a->filter_ov, IOBUFCTRL_UNDERFLOW, a->chain, + a->e_d.buf, &len); + a->e_d.used = len; + len = 0; + } + else + { + if (DBG_IOBUF) + log_debug ("iobuf-%d.%d: underflow: A->FILTER (%lu bytes)\n", + a->no, a->subno, (ulong)len); + + rc = a->filter (a->filter_ov, IOBUFCTRL_UNDERFLOW, a->chain, + &a->d.buf[a->d.len], &len); + } + } a->d.len += len; if (DBG_IOBUF) - log_debug ("iobuf-%d.%d: A->FILTER() returned rc=%d (%s), read %lu bytes\n", + log_debug ("iobuf-%d.%d: A->FILTER() returned rc=%d (%s), read %lu bytes%s\n", a->no, a->subno, rc, rc == 0 ? "ok" : rc == -1 ? "EOF" : gpg_strerror (rc), - (ulong) len); + (ulong)(a->e_d.used ? a->e_d.used : len), + a->e_d.used ? " (to external buffer)" : ""); /* if( a->no == 1 ) */ /* log_hexdump (" data:", a->d.buf, len); */ @@ -1954,7 +1999,8 @@ underflow_target (iobuf_t a, int clear_pending_eof, size_t target) a->filter = NULL; a->filter_eof = 1; - if (clear_pending_eof && a->d.len == 0 && a->chain) + if (clear_pending_eof && a->d.len == 0 && a->e_d.used == 0 + && a->chain) /* We don't need to keep this filter around at all: - we got an EOF @@ -1976,7 +2022,7 @@ underflow_target (iobuf_t a, int clear_pending_eof, size_t target) return -1; } - else if (a->d.len == 0) + else if (a->d.len == 0 && a->e_d.used == 0) /* We can't unlink this filter (it is the only one in the pipeline), but we can immediately return EOF. */ return -1; @@ -1986,13 +2032,15 @@ underflow_target (iobuf_t a, int clear_pending_eof, size_t target) { a->error = rc; - if (a->d.len == 0) + if (a->d.len == 0 && a->e_d.used == 0) /* There is no buffered data. Immediately return EOF. */ return -1; } } assert (a->d.start <= a->d.len); + if (a->e_d.used > 0) + return 0; if (a->d.start < a->d.len) return a->d.buf[a->d.start++]; @@ -2104,6 +2152,12 @@ iobuf_read (iobuf_t a, void *buffer, unsigned int buflen) return n; } + a->e_d.buf = NULL; + a->e_d.len = 0; + + /* Hint for how full to fill iobuf internal drain buffer. */ + a->e_d.preferred = (buf && buflen >= IOBUF_ZEROCOPY_THRESHOLD_SIZE); + n = 0; do { @@ -2125,16 +2179,46 @@ iobuf_read (iobuf_t a, void *buffer, unsigned int buflen) underflow to read more data into the filter's internal buffer. */ { + if (buf && n < buflen) + { + /* Setup external drain buffer for faster moving of data + * (avoid memcpy). */ + a->e_d.buf = buf; + a->e_d.len = (buflen - n) / IOBUF_ZEROCOPY_THRESHOLD_SIZE + * IOBUF_ZEROCOPY_THRESHOLD_SIZE; + if (a->e_d.len == 0) + a->e_d.buf = NULL; + if (a->e_d.buf && DBG_IOBUF) + log_debug ("iobuf-%d.%d: reading to external buffer, %lu bytes\n", + a->no, a->subno, (ulong)a->e_d.len); + } + if ((c = underflow (a, 1)) == -1) /* EOF. If we managed to read something, don't return EOF now. */ { + a->e_d.buf = NULL; + a->e_d.len = 0; a->nbytes += n; return n ? n : -1 /*EOF*/; } - if (buf) - *buf++ = c; - n++; + + if (a->e_d.buf && a->e_d.used > 0) + { + /* Drain buffer was used, 'c' only contains return code + * 0 or -1. */ + n += a->e_d.used; + buf += a->e_d.used; + } + else + { + if (buf) + *buf++ = c; + n++; + } + + a->e_d.buf = NULL; + a->e_d.len = 0; } } while (n < buflen); diff --git a/common/iobuf.h b/common/iobuf.h index a3d9bd547..f527fbf16 100644 --- a/common/iobuf.h +++ b/common/iobuf.h @@ -202,6 +202,26 @@ struct iobuf_struct byte *buf; } d; + /* A external drain buffer for reading/writting data skipping internal + draint buffer D.BUF. This allows zerocopy operation reducing + processing overhead across filter stack. + + Used when by iobuf_read/iobuf_write when internal buffer has been + depleted and remaining external buffer length is large enough. + */ + struct + { + /* The external buffer provided by iobuf_read/iobuf_write caller. */ + byte *buf; + /* The number of bytes in the external buffer. */ + size_t len; + /* The number of bytes that were consumed from the external buffer. */ + size_t used; + /* Gives hint for processing that the external buffer is preferred and + that internal buffer should be consumed early. */ + int preferred; + } e_d; + /* When FILTER is called to read some data, it may read some data and then return EOF. We can't return the EOF immediately. Instead, we note that we observed the EOF and when the buffer is |