From 70d558199cf2b783f894a2769df44884a5ba8719 Mon Sep 17 00:00:00 2001 From: Zbigniew Jędrzejewski-Szmek Date: Thu, 16 May 2019 13:12:37 +0200 Subject: basic/escape: add truncation to xescape too This does for ASCII and non-unicode encodings what utf8_escape_non_printable_full() does for utf8-based encodings. --- src/basic/escape.c | 64 +++++++++++++++++++++++++++++++++++++++++++----------- 1 file changed, 51 insertions(+), 13 deletions(-) (limited to 'src/basic/escape.c') diff --git a/src/basic/escape.c b/src/basic/escape.c index 5f715156fb..77e86a58f3 100644 --- a/src/basic/escape.c +++ b/src/basic/escape.c @@ -368,33 +368,71 @@ int cunescape(const char *s, UnescapeFlags flags, char **ret) { return cunescape_length(s, strlen(s), flags, ret); } -char *xescape(const char *s, const char *bad) { - char *r, *t; +char *xescape_full(const char *s, const char *bad, size_t console_width, bool eight_bits) { + char *ans, *t, *prev, *prev2; const char *f; - /* Escapes all chars in bad, in addition to \ and all special - * chars, in \xFF style escaping. May be reversed with - * cunescape(). */ + /* Escapes all chars in bad, in addition to \ and all special chars, in \xFF style escaping. May be + * reversed with cunescape(). If eight_bits is true, characters >= 127 are let through unchanged. + * This corresponds to non-ASCII printable characters in pre-unicode encodings. + * + * If console_width is reached, output is truncated and "..." is appended. */ - r = new(char, strlen(s) * 4 + 1); - if (!r) + if (console_width == 0) + return strdup(""); + + ans = new(char, MIN(strlen(s), console_width) * 4 + 1); + if (!ans) return NULL; - for (f = s, t = r; *f; f++) { + memset(ans, '_', MIN(strlen(s), console_width) * 4); + ans[MIN(strlen(s), console_width) * 4] = 0; + + for (f = s, t = prev = prev2 = ans; ; f++) { + char *tmp_t = t; + + if (!*f) { + *t = 0; + return ans; + } + + if ((unsigned char) *f < ' ' || (!eight_bits && (unsigned char) *f >= 127) || + *f == '\\' || strchr(bad, *f)) { + if ((size_t) (t - ans) + 4 > console_width) + break; - if ((*f < ' ') || (*f >= 127) || - (*f == '\\') || strchr(bad, *f)) { *(t++) = '\\'; *(t++) = 'x'; *(t++) = hexchar(*f >> 4); *(t++) = hexchar(*f); - } else + } else { + if ((size_t) (t - ans) + 1 > console_width) + break; + *(t++) = *f; + } + + /* We might need to go back two cycles to fit three dots, so remember two positions */ + prev2 = prev; + prev = tmp_t; } - *t = 0; + /* We can just write where we want, since chars are one-byte */ + size_t c = MIN(console_width, 3u); /* If the console is too narrow, write fewer dots */ + size_t off; + if (console_width - c >= (size_t) (t - ans)) + off = (size_t) (t - ans); + else if (console_width - c >= (size_t) (prev - ans)) + off = (size_t) (prev - ans); + else if (console_width - c >= (size_t) (prev2 - ans)) + off = (size_t) (prev2 - ans); + else + off = console_width - c; + assert(off <= (size_t) (t - ans)); - return r; + memcpy(ans + off, "...", c); + ans[off + c] = '\0'; + return ans; } char *octescape(const char *s, size_t len) { -- cgit v1.2.3